PostgreSQL Source Code git master
Loading...
Searching...
No Matches
execPartition.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * execPartition.c
4 * Support routines for partitioning.
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/executor/execPartition.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/table.h"
17#include "access/tableam.h"
18#include "catalog/index.h"
19#include "catalog/partition.h"
21#include "executor/executor.h"
23#include "foreign/fdwapi.h"
24#include "mb/pg_wchar.h"
25#include "miscadmin.h"
30#include "utils/acl.h"
32#include "utils/lsyscache.h"
33#include "utils/partcache.h"
34#include "utils/rls.h"
35#include "utils/ruleutils.h"
36
37
38/*-----------------------
39 * PartitionTupleRouting - Encapsulates all information required to
40 * route a tuple inserted into a partitioned table to one of its leaf
41 * partitions.
42 *
43 * partition_root
44 * The partitioned table that's the target of the command.
45 *
46 * partition_dispatch_info
47 * Array of 'max_dispatch' elements containing a pointer to a
48 * PartitionDispatch object for every partitioned table touched by tuple
49 * routing. The entry for the target partitioned table is *always*
50 * present in the 0th element of this array. See comment for
51 * PartitionDispatchData->indexes for details on how this array is
52 * indexed.
53 *
54 * nonleaf_partitions
55 * Array of 'max_dispatch' elements containing pointers to fake
56 * ResultRelInfo objects for nonleaf partitions, useful for checking
57 * the partition constraint.
58 *
59 * num_dispatch
60 * The current number of items stored in the 'partition_dispatch_info'
61 * array. Also serves as the index of the next free array element for
62 * new PartitionDispatch objects that need to be stored.
63 *
64 * max_dispatch
65 * The current allocated size of the 'partition_dispatch_info' array.
66 *
67 * partitions
68 * Array of 'max_partitions' elements containing a pointer to a
69 * ResultRelInfo for every leaf partition touched by tuple routing.
70 * Some of these are pointers to ResultRelInfos which are borrowed out of
71 * the owning ModifyTableState node. The remainder have been built
72 * especially for tuple routing. See comment for
73 * PartitionDispatchData->indexes for details on how this array is
74 * indexed.
75 *
76 * is_borrowed_rel
77 * Array of 'max_partitions' booleans recording whether a given entry
78 * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
79 * ModifyTableState node, rather than being built here.
80 *
81 * num_partitions
82 * The current number of items stored in the 'partitions' array. Also
83 * serves as the index of the next free array element for new
84 * ResultRelInfo objects that need to be stored.
85 *
86 * max_partitions
87 * The current allocated size of the 'partitions' array.
88 *
89 * memcxt
90 * Memory context used to allocate subsidiary structs.
91 *-----------------------
92 */
106
107/*-----------------------
108 * PartitionDispatch - information about one partitioned table in a partition
109 * hierarchy required to route a tuple to any of its partitions. A
110 * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
111 * struct and stored inside its 'partition_dispatch_info' array.
112 *
113 * reldesc
114 * Relation descriptor of the table
115 *
116 * key
117 * Partition key information of the table
118 *
119 * keystate
120 * Execution state required for expressions in the partition key
121 *
122 * partdesc
123 * Partition descriptor of the table
124 *
125 * tupslot
126 * A standalone TupleTableSlot initialized with this table's tuple
127 * descriptor, or NULL if no tuple conversion between the parent is
128 * required.
129 *
130 * tupmap
131 * TupleConversionMap to convert from the parent's rowtype to this table's
132 * rowtype (when extracting the partition key of a tuple just before
133 * routing it through this table). A NULL value is stored if no tuple
134 * conversion is required.
135 *
136 * indexes
137 * Array of partdesc->nparts elements. For leaf partitions the index
138 * corresponds to the partition's ResultRelInfo in the encapsulating
139 * PartitionTupleRouting's partitions array. For partitioned partitions,
140 * the index corresponds to the PartitionDispatch for it in its
141 * partition_dispatch_info array. -1 indicates we've not yet allocated
142 * anything in PartitionTupleRouting for the partition.
143 *-----------------------
144 */
155
156
158 EState *estate, PartitionTupleRouting *proute,
160 ResultRelInfo *rootResultRelInfo,
161 int partidx);
162static void ExecInitRoutingInfo(ModifyTableState *mtstate,
163 EState *estate,
164 PartitionTupleRouting *proute,
167 int partidx,
168 bool is_borrowed_rel);
170 PartitionTupleRouting *proute,
172 int partidx, ResultRelInfo *rootResultRelInfo);
174 TupleTableSlot *slot,
175 EState *estate,
176 Datum *values,
177 bool *isnull);
179 const bool *isnull);
181 const Datum *values,
182 const bool *isnull,
183 int maxfieldlen);
191 PartitionDesc partdesc,
193 PlanState *planstate,
194 ExprContext *econtext);
198 int n_total_subplans);
201 bool initial_prune,
204
205
206/*
207 * ExecSetupPartitionTupleRouting - sets up information needed during
208 * tuple routing for partitioned tables, encapsulates it in
209 * PartitionTupleRouting, and returns it.
210 *
211 * Callers must use the returned PartitionTupleRouting during calls to
212 * ExecFindPartition(). The actual ResultRelInfo for a partition is only
213 * allocated when the partition is found for the first time.
214 *
215 * The current memory context is used to allocate this struct and all
216 * subsidiary structs that will be allocated from it later on. Typically
217 * it should be estate->es_query_cxt.
218 */
221{
222 PartitionTupleRouting *proute;
223
224 /*
225 * Here we attempt to expend as little effort as possible in setting up
226 * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
227 * demand, only when we actually need to route a tuple to that partition.
228 * The reason for this is that a common case is for INSERT to insert a
229 * single tuple into a partitioned table and this must be fast.
230 */
232 proute->partition_root = rel;
234 /* Rest of members initialized by zeroing */
235
236 /*
237 * Initialize this table's PartitionDispatch object. Here we pass in the
238 * parent as NULL as we don't need to care about any parent of the target
239 * partitioned table.
240 */
242 NULL, 0, NULL);
243
244 return proute;
245}
246
247/*
248 * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
249 * the tuple contained in *slot should belong to.
250 *
251 * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
252 * one up or reuse one from mtstate's resultRelInfo array. When reusing a
253 * ResultRelInfo from the mtstate we verify that the relation is a valid
254 * target for INSERTs and initialize tuple routing information.
255 *
256 * rootResultRelInfo is the relation named in the query.
257 *
258 * estate must be non-NULL; we'll need it to compute any expressions in the
259 * partition keys. Also, its per-tuple contexts are used as evaluation
260 * scratch space.
261 *
262 * If no leaf partition is found, this routine errors out with the appropriate
263 * error message. An error may also be raised if the found target partition
264 * is not a valid target for an INSERT.
265 */
268 ResultRelInfo *rootResultRelInfo,
269 PartitionTupleRouting *proute,
270 TupleTableSlot *slot, EState *estate)
271{
274 bool isnull[PARTITION_MAX_KEYS];
275 Relation rel;
277 PartitionDesc partdesc;
279 TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
280 TupleTableSlot *rootslot = slot;
284
285 /* use per-tuple context here to avoid leaking memory */
287
288 /*
289 * First check the root table's partition constraint, if any. No point in
290 * routing the tuple if it doesn't belong in the root table itself.
291 */
292 if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition)
293 ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
294
295 /* start with the root partitioned table */
296 dispatch = pd[0];
297 while (dispatch != NULL)
298 {
299 int partidx = -1;
300 bool is_leaf;
301
303
304 rel = dispatch->reldesc;
305 partdesc = dispatch->partdesc;
306
307 /*
308 * Extract partition key from tuple. Expression evaluation machinery
309 * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
310 * point to the correct tuple slot. The slot might have changed from
311 * what was used for the parent table if the table of the current
312 * partitioning level has different tuple descriptor from the parent.
313 * So update ecxt_scantuple accordingly.
314 */
315 ecxt->ecxt_scantuple = slot;
316 FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
317
318 /*
319 * If this partitioned table has no partitions or no partition for
320 * these values, error out.
321 */
322 if (partdesc->nparts == 0 ||
324 {
325 char *val_desc;
326
328 values, isnull, 64);
332 errmsg("no partition of relation \"%s\" found for row",
334 val_desc ?
335 errdetail("Partition key of the failing row contains %s.",
336 val_desc) : 0,
337 errtable(rel)));
338 }
339
340 is_leaf = partdesc->is_leaf[partidx];
341 if (is_leaf)
342 {
343 /*
344 * We've reached the leaf -- hurray, we're done. Look to see if
345 * we've already got a ResultRelInfo for this partition.
346 */
347 if (likely(dispatch->indexes[partidx] >= 0))
348 {
349 /* ResultRelInfo already built */
350 Assert(dispatch->indexes[partidx] < proute->num_partitions);
351 rri = proute->partitions[dispatch->indexes[partidx]];
352 }
353 else
354 {
355 /*
356 * If the partition is known in the owning ModifyTableState
357 * node, we can re-use that ResultRelInfo instead of creating
358 * a new one with ExecInitPartitionInfo().
359 */
361 partdesc->oids[partidx],
362 true, false);
363 if (rri)
364 {
365 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
366
367 /* Verify this ResultRelInfo allows INSERTs */
369 node ? node->onConflictAction : ONCONFLICT_NONE,
370 NIL);
371
372 /*
373 * Initialize information needed to insert this and
374 * subsequent tuples routed to this partition.
375 */
376 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
377 rri, partidx, true);
378 }
379 else
380 {
381 /* We need to create a new one. */
382 rri = ExecInitPartitionInfo(mtstate, estate, proute,
383 dispatch,
384 rootResultRelInfo, partidx);
385 }
386 }
387 Assert(rri != NULL);
388
389 /* Signal to terminate the loop */
390 dispatch = NULL;
391 }
392 else
393 {
394 /*
395 * Partition is a sub-partitioned table; get the PartitionDispatch
396 */
397 if (likely(dispatch->indexes[partidx] >= 0))
398 {
399 /* Already built. */
400 Assert(dispatch->indexes[partidx] < proute->num_dispatch);
401
402 rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
403
404 /*
405 * Move down to the next partition level and search again
406 * until we find a leaf partition that matches this tuple
407 */
408 dispatch = pd[dispatch->indexes[partidx]];
409 }
410 else
411 {
412 /* Not yet built. Do that now. */
414
415 /*
416 * Create the new PartitionDispatch. We pass the current one
417 * in as the parent PartitionDispatch
418 */
420 proute,
421 partdesc->oids[partidx],
423 mtstate->rootResultRelInfo);
424 Assert(dispatch->indexes[partidx] >= 0 &&
425 dispatch->indexes[partidx] < proute->num_dispatch);
426
427 rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
429 }
430
431 /*
432 * Convert the tuple to the new parent's layout, if different from
433 * the previous parent.
434 */
435 if (dispatch->tupslot)
436 {
437 AttrMap *map = dispatch->tupmap;
439
440 myslot = dispatch->tupslot;
441 slot = execute_attr_map_slot(map, slot, myslot);
442
443 if (tempslot != NULL)
445 }
446 }
447
448 /*
449 * If this partition is the default one, we must check its partition
450 * constraint now, which may have changed concurrently due to
451 * partitions being added to the parent.
452 *
453 * (We do this here, and do not rely on ExecInsert doing it, because
454 * we don't want to miss doing it for non-leaf partitions.)
455 */
456 if (partidx == partdesc->boundinfo->default_index)
457 {
458 /*
459 * The tuple must match the partition's layout for the constraint
460 * expression to be evaluated successfully. If the partition is
461 * sub-partitioned, that would already be the case due to the code
462 * above, but for a leaf partition the tuple still matches the
463 * parent's layout.
464 *
465 * Note that we have a map to convert from root to current
466 * partition, but not from immediate parent to current partition.
467 * So if we have to convert, do it from the root slot; if not, use
468 * the root slot as-is.
469 */
470 if (is_leaf)
471 {
473
474 if (map)
476 rri->ri_PartitionTupleSlot);
477 else
478 slot = rootslot;
479 }
480
481 ExecPartitionCheck(rri, slot, estate, true);
482 }
483 }
484
485 /* Release the tuple in the lowest parent's dedicated slot. */
486 if (myslot != NULL)
488 /* and restore ecxt's scantuple */
489 ecxt->ecxt_scantuple = ecxt_scantuple_saved;
491
492 return rri;
493}
494
495/*
496 * IsIndexCompatibleAsArbiter
497 * Return true if two indexes are identical for INSERT ON CONFLICT
498 * purposes.
499 *
500 * Only indexes of the same relation are supported.
501 */
502static bool
505 Relation indexRelation,
506 IndexInfo *indexInfo)
507{
508 Assert(arbiterIndexRelation->rd_index->indrelid == indexRelation->rd_index->indrelid);
509
510 /* must match whether they're unique */
511 if (arbiterIndexInfo->ii_Unique != indexInfo->ii_Unique)
512 return false;
513
514 /* No support currently for comparing exclusion indexes. */
515 if (arbiterIndexInfo->ii_ExclusionOps != NULL ||
516 indexInfo->ii_ExclusionOps != NULL)
517 return false;
518
519 /* the "nulls not distinct" criterion must match */
520 if (arbiterIndexInfo->ii_NullsNotDistinct !=
521 indexInfo->ii_NullsNotDistinct)
522 return false;
523
524 /* number of key attributes must match */
525 if (arbiterIndexInfo->ii_NumIndexKeyAttrs !=
526 indexInfo->ii_NumIndexKeyAttrs)
527 return false;
528
529 for (int i = 0; i < arbiterIndexInfo->ii_NumIndexKeyAttrs; i++)
530 {
531 if (arbiterIndexRelation->rd_indcollation[i] !=
532 indexRelation->rd_indcollation[i])
533 return false;
534
535 if (arbiterIndexRelation->rd_opfamily[i] !=
536 indexRelation->rd_opfamily[i])
537 return false;
538
539 if (arbiterIndexRelation->rd_index->indkey.values[i] !=
540 indexRelation->rd_index->indkey.values[i])
541 return false;
542 }
543
545 RelationGetIndexExpressions(indexRelation)) != NIL)
546 return false;
547
549 RelationGetIndexPredicate(indexRelation)) != NIL)
550 return false;
551 return true;
552}
553
554/*
555 * ExecInitPartitionInfo
556 * Lock the partition and initialize ResultRelInfo. Also setup other
557 * information for the partition and store it in the next empty slot in
558 * the proute->partitions array.
559 *
560 * Returns the ResultRelInfo
561 */
562static ResultRelInfo *
564 PartitionTupleRouting *proute,
566 ResultRelInfo *rootResultRelInfo,
567 int partidx)
568{
569 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
570 Oid partOid = dispatch->partdesc->oids[partidx];
571 Relation partrel;
577 bool found_whole_row;
578
580
582
585 partrel,
586 0,
587 rootResultRelInfo,
588 estate->es_instrument);
589
590 /*
591 * Verify result relation is a valid target for an INSERT. An UPDATE of a
592 * partition-key becomes a DELETE+INSERT operation, so this check is still
593 * required when the operation is CMD_UPDATE.
594 */
596 node ? node->onConflictAction : ONCONFLICT_NONE, NIL);
597
598 /*
599 * Open partition indices. The user may have asked to check for conflicts
600 * within this leaf partition and do "nothing" instead of throwing an
601 * error. Be prepared in that case by initializing the index information
602 * needed by ExecInsert() to perform speculative insertions.
603 */
604 if (partrel->rd_rel->relhasindex &&
605 leaf_part_rri->ri_IndexRelationDescs == NULL)
607 (node != NULL &&
609
610 /*
611 * Build WITH CHECK OPTION constraints for the partition. Note that we
612 * didn't build the withCheckOptionList for partitions within the planner,
613 * but simple translation of varattnos will suffice. This only occurs for
614 * the INSERT case or in the case of UPDATE/MERGE tuple routing where we
615 * didn't find a result rel to reuse.
616 */
617 if (node && node->withCheckOptionLists != NIL)
618 {
619 List *wcoList;
620 List *wcoExprs = NIL;
621 ListCell *ll;
622
623 /*
624 * In the case of INSERT on a partitioned table, there is only one
625 * plan. Likewise, there is only one WCO list, not one per partition.
626 * For UPDATE/MERGE, there are as many WCO lists as there are plans.
627 */
628 Assert((node->operation == CMD_INSERT &&
629 list_length(node->withCheckOptionLists) == 1 &&
630 list_length(node->resultRelations) == 1) ||
631 (node->operation == CMD_UPDATE &&
634 (node->operation == CMD_MERGE &&
637
638 /*
639 * Use the WCO list of the first plan as a reference to calculate
640 * attno's for the WCO list of this partition. In the INSERT case,
641 * that refers to the root partitioned table, whereas in the UPDATE
642 * tuple routing case, that refers to the first partition in the
643 * mtstate->resultRelInfo array. In any case, both that relation and
644 * this partition should have the same columns, so we should be able
645 * to map attributes successfully.
646 */
648
649 /*
650 * Convert Vars in it to contain this partition's attribute numbers.
651 */
655 false);
656 wcoList = (List *)
658 firstVarno, 0,
660 RelationGetForm(partrel)->reltype,
661 &found_whole_row);
662 /* We ignore the value of found_whole_row. */
663
664 foreach(ll, wcoList)
665 {
668 &mtstate->ps);
669
671 }
672
673 leaf_part_rri->ri_WithCheckOptions = wcoList;
674 leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
675 }
676
677 /*
678 * Build the RETURNING projection for the partition. Note that we didn't
679 * build the returningList for partitions within the planner, but simple
680 * translation of varattnos will suffice. This only occurs for the INSERT
681 * case or in the case of UPDATE/MERGE tuple routing where we didn't find
682 * a result rel to reuse.
683 */
684 if (node && node->returningLists != NIL)
685 {
686 TupleTableSlot *slot;
687 ExprContext *econtext;
688 List *returningList;
689
690 /* See the comment above for WCO lists. */
691 Assert((node->operation == CMD_INSERT &&
692 list_length(node->returningLists) == 1 &&
693 list_length(node->resultRelations) == 1) ||
694 (node->operation == CMD_UPDATE &&
697 (node->operation == CMD_MERGE &&
700
701 /*
702 * Use the RETURNING list of the first plan as a reference to
703 * calculate attno's for the RETURNING list of this partition. See
704 * the comment above for WCO lists for more details on why this is
705 * okay.
706 */
707 returningList = linitial(node->returningLists);
708
709 /*
710 * Convert Vars in it to contain this partition's attribute numbers.
711 */
712 if (part_attmap == NULL)
716 false);
717 returningList = (List *)
718 map_variable_attnos((Node *) returningList,
719 firstVarno, 0,
721 RelationGetForm(partrel)->reltype,
722 &found_whole_row);
723 /* We ignore the value of found_whole_row. */
724
725 leaf_part_rri->ri_returningList = returningList;
726
727 /*
728 * Initialize the projection itself.
729 *
730 * Use the slot and the expression context that would have been set up
731 * in ExecInitModifyTable() for projection's output.
732 */
733 Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
734 slot = mtstate->ps.ps_ResultTupleSlot;
735 Assert(mtstate->ps.ps_ExprContext != NULL);
736 econtext = mtstate->ps.ps_ExprContext;
737 leaf_part_rri->ri_projectReturning =
738 ExecBuildProjectionInfo(returningList, econtext, slot,
739 &mtstate->ps, RelationGetDescr(partrel));
740 }
741
742 /* Set up information needed for routing tuples to the partition. */
743 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
744 leaf_part_rri, partidx, false);
745
746 /*
747 * If there is an ON CONFLICT clause, initialize state for it.
748 */
749 if (node && node->onConflictAction != ONCONFLICT_NONE)
750 {
752 ExprContext *econtext = mtstate->ps.ps_ExprContext;
753 List *arbiterIndexes = NIL;
754 int additional_arbiters = 0;
755
756 /*
757 * If there is a list of arbiter indexes, map it to a list of indexes
758 * in the partition. We also add any "identical indexes" to any of
759 * those, to cover the case where one of them is concurrently being
760 * reindexed.
761 */
762 if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL)
763 {
767
768 for (int listidx = 0; listidx < leaf_part_rri->ri_NumIndices; listidx++)
769 {
770 Oid indexoid;
771 List *ancestors;
772
773 /*
774 * If one of this index's ancestors is in the root's arbiter
775 * list, then use this index as arbiter for this partition.
776 * Otherwise, if this index has no parent, track it for later,
777 * in case REINDEX CONCURRENTLY is working on one of the
778 * arbiters.
779 *
780 * However, if two indexes appear to have the same parent,
781 * treat the second of these as if it had no parent. This
782 * sounds counterintuitive, but it can happen if a transaction
783 * running REINDEX CONCURRENTLY commits right between those
784 * two indexes are checked by another process in this loop.
785 * This will have the effect of also treating that second
786 * index as arbiter.
787 *
788 * XXX get_partition_ancestors scans pg_inherits, which is not
789 * only slow, but also means the catalog snapshot can get
790 * invalidated each time through the loop (cf.
791 * GetNonHistoricCatalogSnapshot). Consider a syscache or
792 * some other way to cache?
793 */
794 indexoid = RelationGetRelid(leaf_part_rri->ri_IndexRelationDescs[listidx]);
795 ancestors = get_partition_ancestors(indexoid);
796 INJECTION_POINT("exec-init-partition-after-get-partition-ancestors", NULL);
797
798 if (ancestors != NIL &&
800 {
802 {
803 if (list_member_oid(ancestors, parent_idx))
804 {
806 arbiterIndexes = lappend_oid(arbiterIndexes, indexoid);
808 break;
809 }
810 }
811 }
812 else
814
815 list_free(ancestors);
816 }
817
818 /*
819 * If we found any indexes with no ancestors, it's possible that
820 * some arbiter index is undergoing concurrent reindex. Match all
821 * unparented indexes against arbiters; add unparented matching
822 * ones as "additional arbiters".
823 *
824 * This is critical so that all concurrent transactions use the
825 * same set as arbiters during REINDEX CONCURRENTLY, to avoid
826 * spurious "duplicate key" errors.
827 */
828 if (unparented_idxs && arbiterIndexes)
829 {
831 {
834
835 unparented_rel = leaf_part_rri->ri_IndexRelationDescs[unparented_i];
836 unparented_ii = leaf_part_rri->ri_IndexRelationInfo[unparented_i];
837
838 Assert(!list_member_oid(arbiterIndexes,
839 unparented_rel->rd_index->indexrelid));
840
841 /* Ignore indexes not ready */
842 if (!unparented_ii->ii_ReadyForInserts)
843 continue;
844
846 {
849
850 arbiter_rel = leaf_part_rri->ri_IndexRelationDescs[arbiter_i];
851 arbiter_ii = leaf_part_rri->ri_IndexRelationInfo[arbiter_i];
852
853 /*
854 * If the non-ancestor index is compatible with the
855 * arbiter, use the non-ancestor as arbiter too.
856 */
861 {
862 arbiterIndexes = lappend_oid(arbiterIndexes,
863 unparented_rel->rd_index->indexrelid);
865 break;
866 }
867 }
868 }
869 }
873 }
874
875 /*
876 * We expect to find as many arbiter indexes on this partition as the
877 * root has, plus however many "additional arbiters" (to wit: those
878 * being concurrently rebuilt) we found.
879 */
880 if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
881 list_length(arbiterIndexes) - additional_arbiters)
882 elog(ERROR, "invalid arbiter index list");
883 leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
884
885 /*
886 * In the DO UPDATE case, we have some more state to initialize.
887 */
889 {
892
894
895 Assert(node->onConflictSet != NIL);
896 Assert(rootResultRelInfo->ri_onConflict != NULL);
897
898 leaf_part_rri->ri_onConflict = onconfl;
899
900 /*
901 * Need a separate existing slot for each partition, as the
902 * partition could be of a different AM, even if the tuple
903 * descriptors match.
904 */
905 onconfl->oc_Existing =
906 table_slot_create(leaf_part_rri->ri_RelationDesc,
907 &mtstate->ps.state->es_tupleTable);
908
909 /*
910 * If the partition's tuple descriptor matches exactly the root
911 * parent (the common case), we can re-use most of the parent's ON
912 * CONFLICT SET state, skipping a bunch of work. Otherwise, we
913 * need to create state specific to this partition.
914 */
915 if (map == NULL)
916 {
917 /*
918 * It's safe to reuse these from the partition root, as we
919 * only process one tuple at a time (therefore we won't
920 * overwrite needed data in slots), and the results of
921 * projections are independent of the underlying storage.
922 * Projections and where clauses themselves don't store state
923 * / are independent of the underlying storage.
924 */
925 onconfl->oc_ProjSlot =
926 rootResultRelInfo->ri_onConflict->oc_ProjSlot;
927 onconfl->oc_ProjInfo =
928 rootResultRelInfo->ri_onConflict->oc_ProjInfo;
929 onconfl->oc_WhereClause =
930 rootResultRelInfo->ri_onConflict->oc_WhereClause;
931 }
932 else
933 {
936
937 /*
938 * Translate expressions in onConflictSet to account for
939 * different attribute numbers. For that, map partition
940 * varattnos twice: first to catch the EXCLUDED
941 * pseudo-relation (INNER_VAR), and second to handle the main
942 * target relation (firstVarno).
943 */
945 if (part_attmap == NULL)
949 false);
950 onconflset = (List *)
952 INNER_VAR, 0,
954 RelationGetForm(partrel)->reltype,
955 &found_whole_row);
956 /* We ignore the value of found_whole_row. */
957 onconflset = (List *)
959 firstVarno, 0,
961 RelationGetForm(partrel)->reltype,
962 &found_whole_row);
963 /* We ignore the value of found_whole_row. */
964
965 /* Finally, adjust the target colnos to match the partition. */
968
969 /* create the tuple slot for the UPDATE SET projection */
970 onconfl->oc_ProjSlot =
971 table_slot_create(partrel,
972 &mtstate->ps.state->es_tupleTable);
973
974 /* build UPDATE SET projection state */
975 onconfl->oc_ProjInfo =
977 true,
980 econtext,
981 onconfl->oc_ProjSlot,
982 &mtstate->ps);
983
984 /*
985 * If there is a WHERE clause, initialize state where it will
986 * be evaluated, mapping the attribute numbers appropriately.
987 * As with onConflictSet, we need to map partition varattnos
988 * to the partition's tupdesc.
989 */
990 if (node->onConflictWhere)
991 {
992 List *clause;
993
994 clause = copyObject((List *) node->onConflictWhere);
995 clause = (List *)
996 map_variable_attnos((Node *) clause,
997 INNER_VAR, 0,
999 RelationGetForm(partrel)->reltype,
1000 &found_whole_row);
1001 /* We ignore the value of found_whole_row. */
1002 clause = (List *)
1003 map_variable_attnos((Node *) clause,
1004 firstVarno, 0,
1006 RelationGetForm(partrel)->reltype,
1007 &found_whole_row);
1008 /* We ignore the value of found_whole_row. */
1009 onconfl->oc_WhereClause =
1010 ExecInitQual(clause, &mtstate->ps);
1011 }
1012 }
1013 }
1014 }
1015
1016 /*
1017 * Since we've just initialized this ResultRelInfo, it's not in any list
1018 * attached to the estate as yet. Add it, so that it can be found later.
1019 *
1020 * Note that the entries in this list appear in no predetermined order,
1021 * because partition result rels are initialized as and when they're
1022 * needed.
1023 */
1028
1029 /*
1030 * Initialize information about this partition that's needed to handle
1031 * MERGE. We take the "first" result relation's mergeActionList as
1032 * reference and make copy for this relation, converting stuff that
1033 * references attribute numbers to match this relation's.
1034 *
1035 * This duplicates much of the logic in ExecInitMerge(), so if something
1036 * changes there, look here too.
1037 */
1038 if (node && node->operation == CMD_MERGE)
1039 {
1041 ListCell *lc;
1042 ExprContext *econtext = mtstate->ps.ps_ExprContext;
1043 Node *joinCondition;
1044
1045 if (part_attmap == NULL)
1046 part_attmap =
1049 false);
1050
1051 if (unlikely(!leaf_part_rri->ri_projectNewInfoValid))
1053
1054 /* Initialize state for join condition checking. */
1055 joinCondition =
1057 firstVarno, 0,
1059 RelationGetForm(partrel)->reltype,
1060 &found_whole_row);
1061 /* We ignore the value of found_whole_row. */
1062 leaf_part_rri->ri_MergeJoinCondition =
1063 ExecInitQual((List *) joinCondition, &mtstate->ps);
1064
1065 foreach(lc, firstMergeActionList)
1066 {
1067 /* Make a copy for this relation to be safe. */
1068 MergeAction *action = copyObject(lfirst(lc));
1069 MergeActionState *action_state;
1070
1071 /* Generate the action's state for this relation */
1072 action_state = makeNode(MergeActionState);
1073 action_state->mas_action = action;
1074
1075 /* And put the action in the appropriate list */
1076 leaf_part_rri->ri_MergeActions[action->matchKind] =
1077 lappend(leaf_part_rri->ri_MergeActions[action->matchKind],
1078 action_state);
1079
1080 switch (action->commandType)
1081 {
1082 case CMD_INSERT:
1083
1084 /*
1085 * ExecCheckPlanOutput() already done on the targetlist
1086 * when "first" result relation initialized and it is same
1087 * for all result relations.
1088 */
1089 action_state->mas_proj =
1090 ExecBuildProjectionInfo(action->targetList, econtext,
1091 leaf_part_rri->ri_newTupleSlot,
1092 &mtstate->ps,
1093 RelationGetDescr(partrel));
1094 break;
1095 case CMD_UPDATE:
1096
1097 /*
1098 * Convert updateColnos from "first" result relation
1099 * attribute numbers to this result rel's.
1100 */
1101 if (part_attmap)
1102 action->updateColnos =
1103 adjust_partition_colnos_using_map(action->updateColnos,
1104 part_attmap);
1105 action_state->mas_proj =
1106 ExecBuildUpdateProjection(action->targetList,
1107 true,
1108 action->updateColnos,
1109 RelationGetDescr(leaf_part_rri->ri_RelationDesc),
1110 econtext,
1111 leaf_part_rri->ri_newTupleSlot,
1112 NULL);
1113 break;
1114 case CMD_DELETE:
1115 case CMD_NOTHING:
1116 /* Nothing to do */
1117 break;
1118
1119 default:
1120 elog(ERROR, "unknown action in MERGE WHEN clause");
1121 }
1122
1123 /* found_whole_row intentionally ignored. */
1124 action->qual =
1125 map_variable_attnos(action->qual,
1126 firstVarno, 0,
1128 RelationGetForm(partrel)->reltype,
1129 &found_whole_row);
1130 action_state->mas_whenqual =
1131 ExecInitQual((List *) action->qual, &mtstate->ps);
1132 }
1133 }
1135
1136 return leaf_part_rri;
1137}
1138
1139/*
1140 * ExecInitRoutingInfo
1141 * Set up information needed for translating tuples between root
1142 * partitioned table format and partition format, and keep track of it
1143 * in PartitionTupleRouting.
1144 */
1145static void
1147 EState *estate,
1148 PartitionTupleRouting *proute,
1151 int partidx,
1152 bool is_borrowed_rel)
1153{
1155 int rri_index;
1156
1158
1159 /*
1160 * Set up tuple conversion between root parent and the partition if the
1161 * two have different rowtypes. If conversion is indeed required, also
1162 * initialize a slot dedicated to storing this partition's converted
1163 * tuples. Various operations that are applied to tuples after routing,
1164 * such as checking constraints, will refer to this slot.
1165 */
1166 if (ExecGetRootToChildMap(partRelInfo, estate) != NULL)
1167 {
1168 Relation partrel = partRelInfo->ri_RelationDesc;
1169
1170 /*
1171 * This pins the partition's TupleDesc, which will be released at the
1172 * end of the command.
1173 */
1174 partRelInfo->ri_PartitionTupleSlot =
1175 table_slot_create(partrel, &estate->es_tupleTable);
1176 }
1177 else
1178 partRelInfo->ri_PartitionTupleSlot = NULL;
1179
1180 /*
1181 * If the partition is a foreign table, let the FDW init itself for
1182 * routing tuples to the partition.
1183 */
1184 if (partRelInfo->ri_FdwRoutine != NULL &&
1185 partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
1186 partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
1187
1188 /*
1189 * Determine if the FDW supports batch insert and determine the batch size
1190 * (a FDW may support batching, but it may be disabled for the
1191 * server/table or for this particular query).
1192 *
1193 * If the FDW does not support batching, we set the batch size to 1.
1194 */
1195 if (partRelInfo->ri_FdwRoutine != NULL &&
1196 partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize &&
1197 partRelInfo->ri_FdwRoutine->ExecForeignBatchInsert)
1198 partRelInfo->ri_BatchSize =
1199 partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo);
1200 else
1201 partRelInfo->ri_BatchSize = 1;
1202
1203 Assert(partRelInfo->ri_BatchSize >= 1);
1204
1205 partRelInfo->ri_CopyMultiInsertBuffer = NULL;
1206
1207 /*
1208 * Keep track of it in the PartitionTupleRouting->partitions array.
1209 */
1210 Assert(dispatch->indexes[partidx] == -1);
1211
1212 rri_index = proute->num_partitions++;
1213
1214 /* Allocate or enlarge the array, as needed */
1215 if (proute->num_partitions >= proute->max_partitions)
1216 {
1217 if (proute->max_partitions == 0)
1218 {
1219 proute->max_partitions = 8;
1221 proute->is_borrowed_rel = palloc_array(bool, proute->max_partitions);
1222 }
1223 else
1224 {
1225 proute->max_partitions *= 2;
1226 proute->partitions = (ResultRelInfo **)
1227 repalloc(proute->partitions, sizeof(ResultRelInfo *) *
1228 proute->max_partitions);
1229 proute->is_borrowed_rel = (bool *)
1230 repalloc(proute->is_borrowed_rel, sizeof(bool) *
1231 proute->max_partitions);
1232 }
1233 }
1234
1235 proute->partitions[rri_index] = partRelInfo;
1236 proute->is_borrowed_rel[rri_index] = is_borrowed_rel;
1237 dispatch->indexes[partidx] = rri_index;
1238
1240}
1241
1242/*
1243 * ExecInitPartitionDispatchInfo
1244 * Lock the partitioned table (if not locked already) and initialize
1245 * PartitionDispatch for a partitioned table and store it in the next
1246 * available slot in the proute->partition_dispatch_info array. Also,
1247 * record the index into this array in the parent_pd->indexes[] array in
1248 * the partidx element so that we can properly retrieve the newly created
1249 * PartitionDispatch later.
1250 */
1251static PartitionDispatch
1253 PartitionTupleRouting *proute, Oid partoid,
1255 ResultRelInfo *rootResultRelInfo)
1256{
1257 Relation rel;
1258 PartitionDesc partdesc;
1260 int dispatchidx;
1262
1263 /*
1264 * For data modification, it is better that executor does not include
1265 * partitions being detached, except when running in snapshot-isolation
1266 * mode. This means that a read-committed transaction immediately gets a
1267 * "no partition for tuple" error when a tuple is inserted into a
1268 * partition that's being detached concurrently, but a transaction in
1269 * repeatable-read mode can still use such a partition.
1270 */
1271 if (estate->es_partition_directory == NULL)
1272 estate->es_partition_directory =
1275
1277
1278 /*
1279 * Only sub-partitioned tables need to be locked here. The root
1280 * partitioned table will already have been locked as it's referenced in
1281 * the query's rtable.
1282 */
1283 if (partoid != RelationGetRelid(proute->partition_root))
1284 rel = table_open(partoid, RowExclusiveLock);
1285 else
1286 rel = proute->partition_root;
1287 partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1288
1290 partdesc->nparts * sizeof(int));
1291 pd->reldesc = rel;
1292 pd->key = RelationGetPartitionKey(rel);
1293 pd->keystate = NIL;
1294 pd->partdesc = partdesc;
1295 if (parent_pd != NULL)
1296 {
1297 TupleDesc tupdesc = RelationGetDescr(rel);
1298
1299 /*
1300 * For sub-partitioned tables where the column order differs from its
1301 * direct parent partitioned table, we must store a tuple table slot
1302 * initialized with its tuple descriptor and a tuple conversion map to
1303 * convert a tuple from its parent's rowtype to its own. This is to
1304 * make sure that we are looking at the correct row using the correct
1305 * tuple descriptor when computing its partition key for tuple
1306 * routing.
1307 */
1309 tupdesc,
1310 false);
1311 pd->tupslot = pd->tupmap ?
1313 }
1314 else
1315 {
1316 /* Not required for the root partitioned table */
1317 pd->tupmap = NULL;
1318 pd->tupslot = NULL;
1319 }
1320
1321 /*
1322 * Initialize with -1 to signify that the corresponding partition's
1323 * ResultRelInfo or PartitionDispatch has not been created yet.
1324 */
1325 memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1326
1327 /* Track in PartitionTupleRouting for later use */
1328 dispatchidx = proute->num_dispatch++;
1329
1330 /* Allocate or enlarge the array, as needed */
1331 if (proute->num_dispatch >= proute->max_dispatch)
1332 {
1333 if (proute->max_dispatch == 0)
1334 {
1335 proute->max_dispatch = 4;
1338 }
1339 else
1340 {
1341 proute->max_dispatch *= 2;
1344 sizeof(PartitionDispatch) * proute->max_dispatch);
1345 proute->nonleaf_partitions = (ResultRelInfo **)
1347 sizeof(ResultRelInfo *) * proute->max_dispatch);
1348 }
1349 }
1351
1352 /*
1353 * If setting up a PartitionDispatch for a sub-partitioned table, we may
1354 * also need a minimally valid ResultRelInfo for checking the partition
1355 * constraint later; set that up now.
1356 */
1357 if (parent_pd)
1358 {
1360
1361 InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
1363 }
1364 else
1366
1367 /*
1368 * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1369 * install a downlink in the parent to allow quick descent.
1370 */
1371 if (parent_pd)
1372 {
1373 Assert(parent_pd->indexes[partidx] == -1);
1374 parent_pd->indexes[partidx] = dispatchidx;
1375 }
1376
1378
1379 return pd;
1380}
1381
1382/*
1383 * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1384 * routing.
1385 *
1386 * Close all the partitioned tables, leaf partitions, and their indices.
1387 */
1388void
1390 PartitionTupleRouting *proute)
1391{
1392 int i;
1393
1394 /*
1395 * Remember, proute->partition_dispatch_info[0] corresponds to the root
1396 * partitioned table, which we must not try to close, because it is the
1397 * main target table of the query that will be closed by callers such as
1398 * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1399 * partitioned table.
1400 */
1401 for (i = 1; i < proute->num_dispatch; i++)
1402 {
1404
1406
1407 if (pd->tupslot)
1409 }
1410
1411 for (i = 0; i < proute->num_partitions; i++)
1412 {
1413 ResultRelInfo *resultRelInfo = proute->partitions[i];
1414
1415 /* Allow any FDWs to shut down */
1416 if (resultRelInfo->ri_FdwRoutine != NULL &&
1417 resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1418 resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1419 resultRelInfo);
1420
1421 /*
1422 * Close it if it's not one of the result relations borrowed from the
1423 * owning ModifyTableState; those will be closed by ExecEndPlan().
1424 */
1425 if (proute->is_borrowed_rel[i])
1426 continue;
1427
1428 ExecCloseIndices(resultRelInfo);
1429 table_close(resultRelInfo->ri_RelationDesc, NoLock);
1430 }
1431}
1432
1433/* ----------------
1434 * FormPartitionKeyDatum
1435 * Construct values[] and isnull[] arrays for the partition key
1436 * of a tuple.
1437 *
1438 * pd Partition dispatch object of the partitioned table
1439 * slot Heap tuple from which to extract partition key
1440 * estate executor state for evaluating any partition key
1441 * expressions (must be non-NULL)
1442 * values Array of partition key Datums (output area)
1443 * isnull Array of is-null indicators (output area)
1444 *
1445 * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1446 * the heap tuple passed in.
1447 * ----------------
1448 */
1449static void
1451 TupleTableSlot *slot,
1452 EState *estate,
1453 Datum *values,
1454 bool *isnull)
1455{
1457 int i;
1458
1459 if (pd->key->partexprs != NIL && pd->keystate == NIL)
1460 {
1461 /* Check caller has set up context correctly */
1462 Assert(estate != NULL &&
1463 GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1464
1465 /* First time through, set up expression evaluation state */
1466 pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1467 }
1468
1470 for (i = 0; i < pd->key->partnatts; i++)
1471 {
1473 Datum datum;
1474 bool isNull;
1475
1476 if (keycol != 0)
1477 {
1478 /* Plain column; get the value directly from the heap tuple */
1479 datum = slot_getattr(slot, keycol, &isNull);
1480 }
1481 else
1482 {
1483 /* Expression; need to evaluate it */
1484 if (partexpr_item == NULL)
1485 elog(ERROR, "wrong number of partition key expressions");
1487 GetPerTupleExprContext(estate),
1488 &isNull);
1490 }
1491 values[i] = datum;
1492 isnull[i] = isNull;
1493 }
1494
1495 if (partexpr_item != NULL)
1496 elog(ERROR, "wrong number of partition key expressions");
1497}
1498
1499/*
1500 * The number of times the same partition must be found in a row before we
1501 * switch from a binary search for the given values to just checking if the
1502 * values belong to the last found partition. This must be above 0.
1503 */
1504#define PARTITION_CACHED_FIND_THRESHOLD 16
1505
1506/*
1507 * get_partition_for_tuple
1508 * Finds partition of relation which accepts the partition key specified
1509 * in values and isnull.
1510 *
1511 * Calling this function can be quite expensive when LIST and RANGE
1512 * partitioned tables have many partitions. This is due to the binary search
1513 * that's done to find the correct partition. Many of the use cases for LIST
1514 * and RANGE partitioned tables make it likely that the same partition is
1515 * found in subsequent ExecFindPartition() calls. This is especially true for
1516 * cases such as RANGE partitioned tables on a TIMESTAMP column where the
1517 * partition key is the current time. When asked to find a partition for a
1518 * RANGE or LIST partitioned table, we record the partition index and datum
1519 * offset we've found for the given 'values' in the PartitionDesc (which is
1520 * stored in relcache), and if we keep finding the same partition
1521 * PARTITION_CACHED_FIND_THRESHOLD times in a row, then we'll enable caching
1522 * logic and instead of performing a binary search to find the correct
1523 * partition, we'll just double-check that 'values' still belong to the last
1524 * found partition, and if so, we'll return that partition index, thus
1525 * skipping the need for the binary search. If we fail to match the last
1526 * partition when double checking, then we fall back on doing a binary search.
1527 * In this case, unless we find 'values' belong to the DEFAULT partition,
1528 * we'll reset the number of times we've hit the same partition so that we
1529 * don't attempt to use the cache again until we've found that partition at
1530 * least PARTITION_CACHED_FIND_THRESHOLD times in a row.
1531 *
1532 * For cases where the partition changes on each lookup, the amount of
1533 * additional work required just amounts to recording the last found partition
1534 * and bound offset then resetting the found counter. This is cheap and does
1535 * not appear to cause any meaningful slowdowns for such cases.
1536 *
1537 * No caching of partitions is done when the last found partition is the
1538 * DEFAULT or NULL partition. For the case of the DEFAULT partition, there
1539 * is no bound offset storing the matching datum, so we cannot confirm the
1540 * indexes match. For the NULL partition, this is just so cheap, there's no
1541 * sense in caching.
1542 *
1543 * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1544 * found or -1 if none found.
1545 */
1546static int
1548{
1549 int bound_offset = -1;
1550 int part_index = -1;
1551 PartitionKey key = pd->key;
1552 PartitionDesc partdesc = pd->partdesc;
1553 PartitionBoundInfo boundinfo = partdesc->boundinfo;
1554
1555 /*
1556 * In the switch statement below, when we perform a cached lookup for
1557 * RANGE and LIST partitioned tables, if we find that the last found
1558 * partition matches the 'values', we return the partition index right
1559 * away. We do this instead of breaking out of the switch as we don't
1560 * want to execute the code about the DEFAULT partition or do any updates
1561 * for any of the cache-related fields. That would be a waste of effort
1562 * as we already know it's not the DEFAULT partition and have no need to
1563 * increment the number of times we found the same partition any higher
1564 * than PARTITION_CACHED_FIND_THRESHOLD.
1565 */
1566
1567 /* Route as appropriate based on partitioning strategy. */
1568 switch (key->strategy)
1569 {
1571 {
1573
1574 /* hash partitioning is too cheap to bother caching */
1575 rowHash = compute_partition_hash_value(key->partnatts,
1576 key->partsupfunc,
1577 key->partcollation,
1578 values, isnull);
1579
1580 /*
1581 * HASH partitions can't have a DEFAULT partition and we don't
1582 * do any caching work for them, so just return the part index
1583 */
1584 return boundinfo->indexes[rowHash % boundinfo->nindexes];
1585 }
1586
1588 if (isnull[0])
1589 {
1590 /* this is far too cheap to bother doing any caching */
1591 if (partition_bound_accepts_nulls(boundinfo))
1592 {
1593 /*
1594 * When there is a NULL partition we just return that
1595 * directly. We don't have a bound_offset so it's not
1596 * valid to drop into the code after the switch which
1597 * checks and updates the cache fields. We perhaps should
1598 * be invalidating the details of the last cached
1599 * partition but there's no real need to. Keeping those
1600 * fields set gives a chance at matching to the cached
1601 * partition on the next lookup.
1602 */
1603 return boundinfo->null_index;
1604 }
1605 }
1606 else
1607 {
1608 bool equal;
1609
1611 {
1613 Datum lastDatum = boundinfo->datums[last_datum_offset][0];
1614 int32 cmpval;
1615
1616 /* does the last found datum index match this datum? */
1617 cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1618 key->partcollation[0],
1619 lastDatum,
1620 values[0]));
1621
1622 if (cmpval == 0)
1623 return boundinfo->indexes[last_datum_offset];
1624
1625 /* fall-through and do a manual lookup */
1626 }
1627
1628 bound_offset = partition_list_bsearch(key->partsupfunc,
1629 key->partcollation,
1630 boundinfo,
1631 values[0], &equal);
1632 if (bound_offset >= 0 && equal)
1633 part_index = boundinfo->indexes[bound_offset];
1634 }
1635 break;
1636
1638 {
1639 bool equal = false,
1640 range_partkey_has_null = false;
1641 int i;
1642
1643 /*
1644 * No range includes NULL, so this will be accepted by the
1645 * default partition if there is one, and otherwise rejected.
1646 */
1647 for (i = 0; i < key->partnatts; i++)
1648 {
1649 if (isnull[i])
1650 {
1652 break;
1653 }
1654 }
1655
1656 /* NULLs belong in the DEFAULT partition */
1658 break;
1659
1661 {
1665 int32 cmpval;
1666
1667 /* check if the value is >= to the lower bound */
1668 cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1669 key->partcollation,
1670 lastDatums,
1671 kind,
1672 values,
1673 key->partnatts);
1674
1675 /*
1676 * If it's equal to the lower bound then no need to check
1677 * the upper bound.
1678 */
1679 if (cmpval == 0)
1680 return boundinfo->indexes[last_datum_offset + 1];
1681
1682 if (cmpval < 0 && last_datum_offset + 1 < boundinfo->ndatums)
1683 {
1684 /* check if the value is below the upper bound */
1685 lastDatums = boundinfo->datums[last_datum_offset + 1];
1686 kind = boundinfo->kind[last_datum_offset + 1];
1687 cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1688 key->partcollation,
1689 lastDatums,
1690 kind,
1691 values,
1692 key->partnatts);
1693
1694 if (cmpval > 0)
1695 return boundinfo->indexes[last_datum_offset + 1];
1696 }
1697 /* fall-through and do a manual lookup */
1698 }
1699
1701 key->partcollation,
1702 boundinfo,
1703 key->partnatts,
1704 values,
1705 &equal);
1706
1707 /*
1708 * The bound at bound_offset is less than or equal to the
1709 * tuple value, so the bound at offset+1 is the upper bound of
1710 * the partition we're looking for, if there actually exists
1711 * one.
1712 */
1713 part_index = boundinfo->indexes[bound_offset + 1];
1714 }
1715 break;
1716
1717 default:
1718 elog(ERROR, "unexpected partition strategy: %d",
1719 (int) key->strategy);
1720 }
1721
1722 /*
1723 * part_index < 0 means we failed to find a partition of this parent. Use
1724 * the default partition, if there is one.
1725 */
1726 if (part_index < 0)
1727 {
1728 /*
1729 * No need to reset the cache fields here. The next set of values
1730 * might end up belonging to the cached partition, so leaving the
1731 * cache alone improves the chances of a cache hit on the next lookup.
1732 */
1733 return boundinfo->default_index;
1734 }
1735
1736 /* we should only make it here when the code above set bound_offset */
1737 Assert(bound_offset >= 0);
1738
1739 /*
1740 * Attend to the cache fields. If the bound_offset matches the last
1741 * cached bound offset then we've found the same partition as last time,
1742 * so bump the count by one. If all goes well, we'll eventually reach
1743 * PARTITION_CACHED_FIND_THRESHOLD and try the cache path next time
1744 * around. Otherwise, we'll reset the cache count back to 1 to mark that
1745 * we've found this partition for the first time.
1746 */
1747 if (bound_offset == partdesc->last_found_datum_index)
1748 partdesc->last_found_count++;
1749 else
1750 {
1751 partdesc->last_found_count = 1;
1754 }
1755
1756 return part_index;
1757}
1758
1759/*
1760 * ExecBuildSlotPartitionKeyDescription
1761 *
1762 * This works very much like BuildIndexValueDescription() and is currently
1763 * used for building error messages when ExecFindPartition() fails to find
1764 * partition for a row.
1765 */
1766static char *
1768 const Datum *values,
1769 const bool *isnull,
1770 int maxfieldlen)
1771{
1774 int partnatts = get_partition_natts(key);
1775 int i;
1776 Oid relid = RelationGetRelid(rel);
1778
1779 if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
1780 return NULL;
1781
1782 /* If the user has table-level access, just go build the description. */
1784 if (aclresult != ACLCHECK_OK)
1785 {
1786 /*
1787 * Step through the columns of the partition key and make sure the
1788 * user has SELECT rights on all of them.
1789 */
1790 for (i = 0; i < partnatts; i++)
1791 {
1793
1794 /*
1795 * If this partition key column is an expression, we return no
1796 * detail rather than try to figure out what column(s) the
1797 * expression includes and if the user has SELECT rights on them.
1798 */
1799 if (attnum == InvalidAttrNumber ||
1802 return NULL;
1803 }
1804 }
1805
1807 appendStringInfo(&buf, "(%s) = (",
1808 pg_get_partkeydef_columns(relid, true));
1809
1810 for (i = 0; i < partnatts; i++)
1811 {
1812 char *val;
1813 int vallen;
1814
1815 if (isnull[i])
1816 val = "null";
1817 else
1818 {
1819 Oid foutoid;
1820 bool typisvarlena;
1821
1823 &foutoid, &typisvarlena);
1825 }
1826
1827 if (i > 0)
1829
1830 /* truncate if needed */
1831 vallen = strlen(val);
1832 if (vallen <= maxfieldlen)
1833 appendBinaryStringInfo(&buf, val, vallen);
1834 else
1835 {
1836 vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1837 appendBinaryStringInfo(&buf, val, vallen);
1838 appendStringInfoString(&buf, "...");
1839 }
1840 }
1841
1843
1844 return buf.data;
1845}
1846
1847/*
1848 * adjust_partition_colnos
1849 * Adjust the list of UPDATE target column numbers to account for
1850 * attribute differences between the parent and the partition.
1851 *
1852 * Note: mustn't be called if no adjustment is required.
1853 */
1854static List *
1863
1864/*
1865 * adjust_partition_colnos_using_map
1866 * Like adjust_partition_colnos, but uses a caller-supplied map instead
1867 * of assuming to map from the "root" result relation.
1868 *
1869 * Note: mustn't be called if no adjustment is required.
1870 */
1871static List *
1873{
1874 List *new_colnos = NIL;
1875 ListCell *lc;
1876
1877 Assert(attrMap != NULL); /* else we shouldn't be here */
1878
1879 foreach(lc, colnos)
1880 {
1882
1883 if (parentattrno <= 0 ||
1884 parentattrno > attrMap->maplen ||
1885 attrMap->attnums[parentattrno - 1] == 0)
1886 elog(ERROR, "unexpected attno %d in target column list",
1887 parentattrno);
1889 attrMap->attnums[parentattrno - 1]);
1890 }
1891
1892 return new_colnos;
1893}
1894
1895/*-------------------------------------------------------------------------
1896 * Run-Time Partition Pruning Support.
1897 *
1898 * The following series of functions exist to support the removal of unneeded
1899 * subplans for queries against partitioned tables. The supporting functions
1900 * here are designed to work with any plan type which supports an arbitrary
1901 * number of subplans, e.g. Append, MergeAppend.
1902 *
1903 * When pruning involves comparison of a partition key to a constant, it's
1904 * done by the planner. However, if we have a comparison to a non-constant
1905 * but not volatile expression, that presents an opportunity for run-time
1906 * pruning by the executor, allowing irrelevant partitions to be skipped
1907 * dynamically.
1908 *
1909 * We must distinguish expressions containing PARAM_EXEC Params from
1910 * expressions that don't contain those. Even though a PARAM_EXEC Param is
1911 * considered to be a stable expression, it can change value from one plan
1912 * node scan to the next during query execution. Stable comparison
1913 * expressions that don't involve such Params allow partition pruning to be
1914 * done once during executor startup. Expressions that do involve such Params
1915 * require us to prune separately for each scan of the parent plan node.
1916 *
1917 * Note that pruning away unneeded subplans during executor startup has the
1918 * added benefit of not having to initialize the unneeded subplans at all.
1919 *
1920 *
1921 * Functions:
1922 *
1923 * ExecDoInitialPruning:
1924 * Perform runtime "initial" pruning, if necessary, to determine the set
1925 * of child subnodes that need to be initialized during ExecInitNode() for
1926 * all plan nodes that contain a PartitionPruneInfo.
1927 *
1928 * ExecInitPartitionExecPruning:
1929 * Updates the PartitionPruneState found at given part_prune_index in
1930 * EState.es_part_prune_states for use during "exec" pruning if required.
1931 * Also returns the set of subplans to initialize that would be stored at
1932 * part_prune_index in EState.es_part_prune_results by
1933 * ExecDoInitialPruning(). Maps in PartitionPruneState are updated to
1934 * account for initial pruning possibly having eliminated some of the
1935 * subplans.
1936 *
1937 * ExecFindMatchingSubPlans:
1938 * Returns indexes of matching subplans after evaluating the expressions
1939 * that are safe to evaluate at a given point. This function is first
1940 * called during ExecDoInitialPruning() to find the initially matching
1941 * subplans based on performing the initial pruning steps and then must be
1942 * called again each time the value of a Param listed in
1943 * PartitionPruneState's 'execparamids' changes.
1944 *-------------------------------------------------------------------------
1945 */
1946
1947
1948/*
1949 * ExecDoInitialPruning
1950 * Perform runtime "initial" pruning, if necessary, to determine the set
1951 * of child subnodes that need to be initialized during ExecInitNode() for
1952 * plan nodes that support partition pruning.
1953 *
1954 * This function iterates over each PartitionPruneInfo entry in
1955 * estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
1956 * and adds it to es_part_prune_states. ExecInitPartitionExecPruning() accesses
1957 * these states through their corresponding indexes in es_part_prune_states and
1958 * assign each state to the parent node's PlanState, from where it will be used
1959 * for "exec" pruning.
1960 *
1961 * If initial pruning steps exist for a PartitionPruneInfo entry, this function
1962 * executes those pruning steps and stores the result as a bitmapset of valid
1963 * child subplans, identifying which subplans should be initialized for
1964 * execution. The results are saved in estate->es_part_prune_results.
1965 *
1966 * If no initial pruning is performed for a given PartitionPruneInfo, a NULL
1967 * entry is still added to es_part_prune_results to maintain alignment with
1968 * es_part_prune_infos. This ensures that ExecInitPartitionExecPruning() can
1969 * use the same index to retrieve the pruning results.
1970 */
1971void
1973{
1974 ListCell *lc;
1975
1976 foreach(lc, estate->es_part_prune_infos)
1977 {
1983
1984 /* Create and save the PartitionPruneState. */
1988 prunestate);
1989
1990 /*
1991 * Perform initial pruning steps, if any, and save the result
1992 * bitmapset or NULL as described in the header comment.
1993 */
1994 if (prunestate->do_initial_prune)
1997 else
1999
2004 }
2005}
2006
2007/*
2008 * ExecInitPartitionExecPruning
2009 * Initialize the data structures needed for runtime "exec" partition
2010 * pruning and return the result of initial pruning, if available.
2011 *
2012 * 'relids' identifies the relation to which both the parent plan and the
2013 * PartitionPruneInfo given by 'part_prune_index' belong.
2014 *
2015 * On return, *initially_valid_subplans is assigned the set of indexes of
2016 * child subplans that must be initialized along with the parent plan node.
2017 * Initial pruning would have been performed by ExecDoInitialPruning(), if
2018 * necessary, and the bitmapset of surviving subplans' indexes would have
2019 * been stored as the part_prune_index'th element of
2020 * EState.es_part_prune_results.
2021 *
2022 * If subplans were indeed pruned during initial pruning, the subplan_map
2023 * arrays in the returned PartitionPruneState are re-sequenced to exclude those
2024 * subplans, but only if the maps will be needed for subsequent execution
2025 * pruning passes.
2026 */
2029 int n_total_subplans,
2030 int part_prune_index,
2031 Bitmapset *relids,
2033{
2035 EState *estate = planstate->state;
2037
2038 /* Obtain the pruneinfo we need. */
2040 part_prune_index);
2041
2042 /* Its relids better match the plan node's or the planner messed up. */
2043 if (!bms_equal(relids, pruneinfo->relids))
2044 elog(ERROR, "wrong pruneinfo with relids=%s found at part_prune_index=%d contained in plan node with relids=%s",
2045 bmsToString(pruneinfo->relids), part_prune_index,
2046 bmsToString(relids));
2047
2048 /*
2049 * The PartitionPruneState would have been created by
2050 * ExecDoInitialPruning() and stored as the part_prune_index'th element of
2051 * EState.es_part_prune_states.
2052 */
2053 prunestate = list_nth(estate->es_part_prune_states, part_prune_index);
2055
2056 /* Use the result of initial pruning done by ExecDoInitialPruning(). */
2057 if (prunestate->do_initial_prune)
2059 estate->es_part_prune_results,
2060 part_prune_index);
2061 else
2062 {
2063 /* No pruning, so we'll need to initialize all subplans */
2066 n_total_subplans - 1);
2067 }
2068
2069 /*
2070 * The exec pruning state must also be initialized, if needed, before it
2071 * can be used for pruning during execution.
2072 *
2073 * This also re-sequences subplan indexes contained in prunestate to
2074 * account for any that were removed due to initial pruning; refer to the
2075 * condition in InitExecPartitionPruneContexts() that is used to determine
2076 * whether to do this. If no exec pruning needs to be done, we would thus
2077 * leave the maps to be in an invalid state, but that's ok since that data
2078 * won't be consulted again (cf initial Assert in
2079 * ExecFindMatchingSubPlans).
2080 */
2081 if (prunestate->do_exec_prune)
2085
2086 return prunestate;
2087}
2088
2089/*
2090 * CreatePartitionPruneState
2091 * Build the data structure required for calling ExecFindMatchingSubPlans
2092 *
2093 * This includes PartitionPruneContexts (stored in each
2094 * PartitionedRelPruningData corresponding to a PartitionedRelPruneInfo),
2095 * which hold the ExprStates needed to evaluate pruning expressions, and
2096 * mapping arrays to convert partition indexes from the pruning logic
2097 * into subplan indexes in the parent plan node's list of child subplans.
2098 *
2099 * 'pruneinfo' is a PartitionPruneInfo as generated by
2100 * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
2101 * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
2102 * pruneinfo->prune_infos), each of which contains a PartitionedRelPruningData
2103 * for each PartitionedRelPruneInfo appearing in that sublist. This two-level
2104 * system is needed to keep from confusing the different hierarchies when a
2105 * UNION ALL contains multiple partitioned tables as children. The data
2106 * stored in each PartitionedRelPruningData can be re-used each time we
2107 * re-evaluate which partitions match the pruning steps provided in each
2108 * PartitionedRelPruneInfo.
2109 *
2110 * Note that only the PartitionPruneContexts for initial pruning are
2111 * initialized here. Those required for exec pruning are initialized later in
2112 * ExecInitPartitionExecPruning(), as they depend on the availability of the
2113 * parent plan node's PlanState.
2114 *
2115 * If initial pruning steps are to be skipped (e.g., during EXPLAIN
2116 * (GENERIC_PLAN)), *all_leafpart_rtis will be populated with the RT indexes of
2117 * all leaf partitions whose scanning subnode is included in the parent plan
2118 * node's list of child plans. The caller must add these RT indexes to
2119 * estate->es_unpruned_relids.
2120 */
2121static PartitionPruneState *
2124{
2127 ListCell *lc;
2128 int i;
2129
2130 /*
2131 * Expression context that will be used by partkey_datum_from_expr() to
2132 * evaluate expressions for comparison against partition bounds.
2133 */
2134 ExprContext *econtext = CreateExprContext(estate);
2135
2136 /* For data reading, executor always includes detached partitions */
2137 if (estate->es_partition_directory == NULL)
2138 estate->es_partition_directory =
2139 CreatePartitionDirectory(estate->es_query_cxt, false);
2140
2143
2144 /*
2145 * Allocate the data structure
2146 */
2148 palloc(offsetof(PartitionPruneState, partprunedata) +
2150
2151 /* Save ExprContext for use during InitExecPartitionPruneContexts(). */
2152 prunestate->econtext = econtext;
2153 prunestate->execparamids = NULL;
2154 /* other_subplans can change at runtime, so we need our own copy */
2155 prunestate->other_subplans = bms_copy(pruneinfo->other_subplans);
2156 prunestate->do_initial_prune = false; /* may be set below */
2157 prunestate->do_exec_prune = false; /* may be set below */
2158 prunestate->num_partprunedata = n_part_hierarchies;
2159
2160 /*
2161 * Create a short-term memory context which we'll use when making calls to
2162 * the partition pruning functions. This avoids possible memory leaks,
2163 * since the pruning functions call comparison functions that aren't under
2164 * our control.
2165 */
2166 prunestate->prune_context =
2168 "Partition Prune",
2170
2171 i = 0;
2172 foreach(lc, pruneinfo->prune_infos)
2173 {
2177 ListCell *lc2;
2178 int j;
2179
2181 palloc(offsetof(PartitionPruningData, partrelprunedata) +
2183 prunestate->partprunedata[i] = prunedata;
2184 prunedata->num_partrelprunedata = npartrelpruneinfos;
2185
2186 j = 0;
2187 foreach(lc2, partrelpruneinfos)
2188 {
2190 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2191 Relation partrel;
2192 PartitionDesc partdesc;
2194
2195 /*
2196 * We can rely on the copies of the partitioned table's partition
2197 * key and partition descriptor appearing in its relcache entry,
2198 * because that entry will be held open and locked for the
2199 * duration of this executor run.
2200 */
2201 partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex, false);
2202
2203 /* Remember for InitExecPartitionPruneContexts(). */
2204 pprune->partrel = partrel;
2205
2208 partrel);
2209
2210 /*
2211 * Initialize the subplan_map and subpart_map.
2212 *
2213 * The set of partitions that exist now might not be the same that
2214 * existed when the plan was made. The normal case is that it is;
2215 * optimize for that case with a quick comparison, and just copy
2216 * the subplan_map and make subpart_map, leafpart_rti_map point to
2217 * the ones in PruneInfo.
2218 *
2219 * For the case where they aren't identical, we could have more
2220 * partitions on either side; or even exactly the same number of
2221 * them on both but the set of OIDs doesn't match fully. Handle
2222 * this by creating new subplan_map and subpart_map arrays that
2223 * corresponds to the ones in the PruneInfo where the new
2224 * partition descriptor's OIDs match. Any that don't match can be
2225 * set to -1, as if they were pruned. By construction, both
2226 * arrays are in partition bounds order.
2227 */
2228 pprune->nparts = partdesc->nparts;
2229 pprune->subplan_map = palloc_array(int, partdesc->nparts);
2230
2231 if (partdesc->nparts == pinfo->nparts &&
2232 memcmp(partdesc->oids, pinfo->relid_map,
2233 sizeof(int) * partdesc->nparts) == 0)
2234 {
2235 pprune->subpart_map = pinfo->subpart_map;
2236 pprune->leafpart_rti_map = pinfo->leafpart_rti_map;
2237 memcpy(pprune->subplan_map, pinfo->subplan_map,
2238 sizeof(int) * pinfo->nparts);
2239 }
2240 else
2241 {
2242 int pd_idx = 0;
2243 int pp_idx;
2244
2245 /*
2246 * When the partition arrays are not identical, there could be
2247 * some new ones but it's also possible that one was removed;
2248 * we cope with both situations by walking the arrays and
2249 * discarding those that don't match.
2250 *
2251 * If the number of partitions on both sides match, it's still
2252 * possible that one partition has been detached and another
2253 * attached. Cope with that by creating a map that skips any
2254 * mismatches.
2255 */
2256 pprune->subpart_map = palloc_array(int, partdesc->nparts);
2257 pprune->leafpart_rti_map = palloc_array(int, partdesc->nparts);
2258
2259 for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++)
2260 {
2261 /* Skip any InvalidOid relid_map entries */
2262 while (pd_idx < pinfo->nparts &&
2263 !OidIsValid(pinfo->relid_map[pd_idx]))
2264 pd_idx++;
2265
2266 recheck:
2267 if (pd_idx < pinfo->nparts &&
2268 pinfo->relid_map[pd_idx] == partdesc->oids[pp_idx])
2269 {
2270 /* match... */
2271 pprune->subplan_map[pp_idx] =
2272 pinfo->subplan_map[pd_idx];
2273 pprune->subpart_map[pp_idx] =
2274 pinfo->subpart_map[pd_idx];
2275 pprune->leafpart_rti_map[pp_idx] =
2276 pinfo->leafpart_rti_map[pd_idx];
2277 pd_idx++;
2278 continue;
2279 }
2280
2281 /*
2282 * There isn't an exact match in the corresponding
2283 * positions of both arrays. Peek ahead in
2284 * pinfo->relid_map to see if we have a match for the
2285 * current partition in partdesc. Normally if a match
2286 * exists it's just one element ahead, and it means the
2287 * planner saw one extra partition that we no longer see
2288 * now (its concurrent detach finished just in between);
2289 * so we skip that one by updating pd_idx to the new
2290 * location and jumping above. We can then continue to
2291 * match the rest of the elements after skipping the OID
2292 * with no match; no future matches are tried for the
2293 * element that was skipped, because we know the arrays to
2294 * be in the same order.
2295 *
2296 * If we don't see a match anywhere in the rest of the
2297 * pinfo->relid_map array, that means we see an element
2298 * now that the planner didn't see, so mark that one as
2299 * pruned and move on.
2300 */
2301 for (int pd_idx2 = pd_idx + 1; pd_idx2 < pinfo->nparts; pd_idx2++)
2302 {
2303 if (pd_idx2 >= pinfo->nparts)
2304 break;
2305 if (pinfo->relid_map[pd_idx2] == partdesc->oids[pp_idx])
2306 {
2307 pd_idx = pd_idx2;
2308 goto recheck;
2309 }
2310 }
2311
2312 pprune->subpart_map[pp_idx] = -1;
2313 pprune->subplan_map[pp_idx] = -1;
2314 pprune->leafpart_rti_map[pp_idx] = 0;
2315 }
2316 }
2317
2318 /* present_parts is also subject to later modification */
2320
2321 /*
2322 * Only initial_context is initialized here. exec_context is
2323 * initialized during ExecInitPartitionExecPruning() when the
2324 * parent plan's PlanState is available.
2325 *
2326 * Note that we must skip execution-time (both "init" and "exec")
2327 * partition pruning in EXPLAIN (GENERIC_PLAN), since parameter
2328 * values may be missing.
2329 */
2330 pprune->initial_pruning_steps = pinfo->initial_pruning_steps;
2331 if (pinfo->initial_pruning_steps &&
2333 {
2334 InitPartitionPruneContext(&pprune->initial_context,
2335 pprune->initial_pruning_steps,
2336 partdesc, partkey, NULL,
2337 econtext);
2338 /* Record whether initial pruning is needed at any level */
2339 prunestate->do_initial_prune = true;
2340 }
2341 pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
2342 if (pinfo->exec_pruning_steps &&
2344 {
2345 /* Record whether exec pruning is needed at any level */
2346 prunestate->do_exec_prune = true;
2347 }
2348
2349 /*
2350 * Accumulate the IDs of all PARAM_EXEC Params affecting the
2351 * partitioning decisions at this plan node.
2352 */
2353 prunestate->execparamids = bms_add_members(prunestate->execparamids,
2354 pinfo->execparamids);
2355
2356 /*
2357 * Return all leaf partition indexes if we're skipping pruning in
2358 * the EXPLAIN (GENERIC_PLAN) case.
2359 */
2360 if (pinfo->initial_pruning_steps && !prunestate->do_initial_prune)
2361 {
2362 int part_index = -1;
2363
2364 while ((part_index = bms_next_member(pprune->present_parts,
2365 part_index)) >= 0)
2366 {
2367 Index rtindex = pprune->leafpart_rti_map[part_index];
2368
2369 if (rtindex)
2371 rtindex);
2372 }
2373 }
2374
2375 j++;
2376 }
2377 i++;
2378 }
2379
2380 return prunestate;
2381}
2382
2383/*
2384 * Initialize a PartitionPruneContext for the given list of pruning steps.
2385 */
2386static void
2389 PartitionDesc partdesc,
2391 PlanState *planstate,
2392 ExprContext *econtext)
2393{
2394 int n_steps;
2395 int partnatts;
2396 ListCell *lc;
2397
2399
2400 context->strategy = partkey->strategy;
2401 context->partnatts = partnatts = partkey->partnatts;
2402 context->nparts = partdesc->nparts;
2403 context->boundinfo = partdesc->boundinfo;
2404 context->partcollation = partkey->partcollation;
2405 context->partsupfunc = partkey->partsupfunc;
2406
2407 /* We'll look up type-specific support functions as needed */
2408 context->stepcmpfuncs = palloc0_array(FmgrInfo, n_steps * partnatts);
2409
2411 context->planstate = planstate;
2412 context->exprcontext = econtext;
2413
2414 /* Initialize expression state for each expression we need */
2415 context->exprstates = palloc0_array(ExprState *, n_steps * partnatts);
2416 foreach(lc, pruning_steps)
2417 {
2419 ListCell *lc2 = list_head(step->exprs);
2420 int keyno;
2421
2422 /* not needed for other step kinds */
2423 if (!IsA(step, PartitionPruneStepOp))
2424 continue;
2425
2426 Assert(list_length(step->exprs) <= partnatts);
2427
2428 for (keyno = 0; keyno < partnatts; keyno++)
2429 {
2430 if (bms_is_member(keyno, step->nullkeys))
2431 continue;
2432
2433 if (lc2 != NULL)
2434 {
2435 Expr *expr = lfirst(lc2);
2436
2437 /* not needed for Consts */
2438 if (!IsA(expr, Const))
2439 {
2440 int stateidx = PruneCxtStateIdx(partnatts,
2441 step->step.step_id,
2442 keyno);
2443
2444 /*
2445 * When planstate is NULL, pruning_steps is known not to
2446 * contain any expressions that depend on the parent plan.
2447 * Information of any available EXTERN parameters must be
2448 * passed explicitly in that case, which the caller must
2449 * have made available via econtext.
2450 */
2451 if (planstate == NULL)
2452 context->exprstates[stateidx] =
2454 econtext->ecxt_param_list_info);
2455 else
2456 context->exprstates[stateidx] =
2457 ExecInitExpr(expr, context->planstate);
2458 }
2459 lc2 = lnext(step->exprs, lc2);
2460 }
2461 }
2462 }
2463}
2464
2465/*
2466 * InitExecPartitionPruneContexts
2467 * Initialize exec pruning contexts deferred by CreatePartitionPruneState()
2468 *
2469 * This function finalizes exec pruning setup for a PartitionPruneState by
2470 * initializing contexts for pruning steps that require the parent plan's
2471 * PlanState. It iterates over PartitionPruningData entries and sets up the
2472 * necessary execution contexts for pruning during query execution.
2473 *
2474 * Also fix the mapping of partition indexes to subplan indexes contained in
2475 * prunestate by considering the new list of subplans that survived initial
2476 * pruning.
2477 *
2478 * Current values of the indexes present in PartitionPruneState count all the
2479 * subplans that would be present before initial pruning was done. If initial
2480 * pruning got rid of some of the subplans, any subsequent pruning passes will
2481 * be looking at a different set of target subplans to choose from than those
2482 * in the pre-initial-pruning set, so the maps in PartitionPruneState
2483 * containing those indexes must be updated to reflect the new indexes of
2484 * subplans in the post-initial-pruning set.
2485 */
2486static void
2490 int n_total_subplans)
2491{
2492 EState *estate;
2495 int i;
2496 int newidx;
2497 bool fix_subplan_map = false;
2498
2499 Assert(prunestate->do_exec_prune);
2501 estate = parent_plan->state;
2502
2503 /*
2504 * No need to fix subplans maps if initial pruning didn't eliminate any
2505 * subplans.
2506 */
2508 {
2509 fix_subplan_map = true;
2510
2511 /*
2512 * First we must build a temporary array which maps old subplan
2513 * indexes to new ones. For convenience of initialization, we use
2514 * 1-based indexes in this array and leave pruned items as 0.
2515 */
2517 newidx = 1;
2518 i = -1;
2519 while ((i = bms_next_member(initially_valid_subplans, i)) >= 0)
2520 {
2523 }
2524 }
2525
2526 /*
2527 * Now we can update each PartitionedRelPruneInfo's subplan_map with new
2528 * subplan indexes. We must also recompute its present_parts bitmap.
2529 */
2530 for (i = 0; i < prunestate->num_partprunedata; i++)
2531 {
2532 PartitionPruningData *prunedata = prunestate->partprunedata[i];
2533 int j;
2534
2535 /*
2536 * Within each hierarchy, we perform this loop in back-to-front order
2537 * so that we determine present_parts for the lowest-level partitioned
2538 * tables first. This way we can tell whether a sub-partitioned
2539 * table's partitions were entirely pruned so we can exclude it from
2540 * the current level's present_parts.
2541 */
2542 for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
2543 {
2544 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2545 int nparts = pprune->nparts;
2546 int k;
2547
2548 /* Initialize PartitionPruneContext for exec pruning, if needed. */
2549 if (pprune->exec_pruning_steps != NIL)
2550 {
2552 PartitionDesc partdesc;
2553
2554 /*
2555 * See the comment in CreatePartitionPruneState() regarding
2556 * the usage of partdesc and partkey.
2557 */
2560 pprune->partrel);
2561
2562 InitPartitionPruneContext(&pprune->exec_context,
2563 pprune->exec_pruning_steps,
2564 partdesc, partkey, parent_plan,
2565 prunestate->econtext);
2566 }
2567
2568 if (!fix_subplan_map)
2569 continue;
2570
2571 /* We just rebuild present_parts from scratch */
2572 bms_free(pprune->present_parts);
2573 pprune->present_parts = NULL;
2574
2575 for (k = 0; k < nparts; k++)
2576 {
2577 int oldidx = pprune->subplan_map[k];
2578 int subidx;
2579
2580 /*
2581 * If this partition existed as a subplan then change the old
2582 * subplan index to the new subplan index. The new index may
2583 * become -1 if the partition was pruned above, or it may just
2584 * come earlier in the subplan list due to some subplans being
2585 * removed earlier in the list. If it's a subpartition, add
2586 * it to present_parts unless it's entirely pruned.
2587 */
2588 if (oldidx >= 0)
2589 {
2591 pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
2592
2593 if (new_subplan_indexes[oldidx] > 0)
2594 pprune->present_parts =
2595 bms_add_member(pprune->present_parts, k);
2596 }
2597 else if ((subidx = pprune->subpart_map[k]) >= 0)
2598 {
2600
2601 subprune = &prunedata->partrelprunedata[subidx];
2602
2603 if (!bms_is_empty(subprune->present_parts))
2605 bms_add_member(pprune->present_parts, k);
2606 }
2607 }
2608 }
2609 }
2610
2611 /*
2612 * If we fixed subplan maps, we must also recompute the other_subplans
2613 * set, since indexes in it may change.
2614 */
2615 if (fix_subplan_map)
2616 {
2618 i = -1;
2619 while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
2621 new_subplan_indexes[i] - 1);
2622
2623 bms_free(prunestate->other_subplans);
2624 prunestate->other_subplans = new_other_subplans;
2625
2627 }
2628}
2629
2630/*
2631 * ExecFindMatchingSubPlans
2632 * Determine which subplans match the pruning steps detailed in
2633 * 'prunestate' for the current comparison expression values.
2634 *
2635 * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This
2636 * differentiates the initial executor-time pruning step from later
2637 * runtime pruning.
2638 *
2639 * The caller must pass a non-NULL validsubplan_rtis during initial pruning
2640 * to collect the RT indexes of leaf partitions whose subnodes will be
2641 * executed. These RT indexes are later added to EState.es_unpruned_relids.
2642 */
2643Bitmapset *
2645 bool initial_prune,
2647{
2648 Bitmapset *result = NULL;
2649 MemoryContext oldcontext;
2650 int i;
2651
2652 /*
2653 * Either we're here on the initial prune done during pruning
2654 * initialization, or we're at a point where PARAM_EXEC Params can be
2655 * evaluated *and* there are steps in which to do so.
2656 */
2657 Assert(initial_prune || prunestate->do_exec_prune);
2659
2660 /*
2661 * Switch to a temp context to avoid leaking memory in the executor's
2662 * query-lifespan memory context.
2663 */
2664 oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2665
2666 /*
2667 * For each hierarchy, do the pruning tests, and add nondeletable
2668 * subplans' indexes to "result".
2669 */
2670 for (i = 0; i < prunestate->num_partprunedata; i++)
2671 {
2672 PartitionPruningData *prunedata = prunestate->partprunedata[i];
2674
2675 /*
2676 * We pass the zeroth item, belonging to the root table of the
2677 * hierarchy, and find_matching_subplans_recurse() takes care of
2678 * recursing to other (lower-level) parents as needed.
2679 */
2680 pprune = &prunedata->partrelprunedata[0];
2682 &result, validsubplan_rtis);
2683
2684 /*
2685 * Expression eval may have used space in ExprContext too. Avoid
2686 * accessing exec_context during initial pruning, as it is not valid
2687 * at that stage.
2688 */
2689 if (!initial_prune && pprune->exec_pruning_steps)
2690 ResetExprContext(pprune->exec_context.exprcontext);
2691 }
2692
2693 /* Add in any subplans that partition pruning didn't account for */
2694 result = bms_add_members(result, prunestate->other_subplans);
2695
2696 MemoryContextSwitchTo(oldcontext);
2697
2698 /* Copy result out of the temp context before we reset it */
2699 result = bms_copy(result);
2702
2703 MemoryContextReset(prunestate->prune_context);
2704
2705 return result;
2706}
2707
2708/*
2709 * find_matching_subplans_recurse
2710 * Recursive worker function for ExecFindMatchingSubPlans
2711 *
2712 * Adds valid (non-prunable) subplan IDs to *validsubplans. If
2713 * *validsubplan_rtis is non-NULL, it also adds the RT indexes of their
2714 * corresponding partitions, but only if they are leaf partitions.
2715 */
2716static void
2719 bool initial_prune,
2722{
2724 int i;
2725
2726 /* Guard against stack overflow due to overly deep partition hierarchy. */
2728
2729 /*
2730 * Prune as appropriate, if we have pruning steps matching the current
2731 * execution context. Otherwise just include all partitions at this
2732 * level.
2733 */
2734 if (initial_prune && pprune->initial_pruning_steps)
2735 partset = get_matching_partitions(&pprune->initial_context,
2736 pprune->initial_pruning_steps);
2737 else if (!initial_prune && pprune->exec_pruning_steps)
2738 partset = get_matching_partitions(&pprune->exec_context,
2739 pprune->exec_pruning_steps);
2740 else
2741 partset = pprune->present_parts;
2742
2743 /* Translate partset into subplan indexes */
2744 i = -1;
2745 while ((i = bms_next_member(partset, i)) >= 0)
2746 {
2747 if (pprune->subplan_map[i] >= 0)
2748 {
2750 pprune->subplan_map[i]);
2751
2752 /*
2753 * Only report leaf partitions. Non-leaf partitions may appear
2754 * here when they use an unflattened Append or MergeAppend.
2755 */
2756 if (validsubplan_rtis && pprune->leafpart_rti_map[i])
2758 pprune->leafpart_rti_map[i]);
2759 }
2760 else
2761 {
2762 int partidx = pprune->subpart_map[i];
2763
2764 if (partidx >= 0)
2766 &prunedata->partrelprunedata[partidx],
2769 else
2770 {
2771 /*
2772 * We get here if the planner already pruned all the sub-
2773 * partitions for this partition. Silently ignore this
2774 * partition in this case. The end result is the same: we
2775 * would have pruned all partitions just the same, but we
2776 * don't have any pruning steps to execute to verify this.
2777 */
2778 }
2779 }
2780 }
2781}
AclResult
Definition acl.h:182
@ ACLCHECK_OK
Definition acl.h:183
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
Definition aclchk.c:3868
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition aclchk.c:4039
AttrMap * build_attrmap_by_name(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
Definition attmap.c:175
AttrMap * build_attrmap_by_name_if_req(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
Definition attmap.c:261
int16 AttrNumber
Definition attnum.h:21
#define InvalidAttrNumber
Definition attnum.h:23
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:142
int bms_next_member(const Bitmapset *a, int prevbit)
Definition bitmapset.c:1305
Bitmapset * bms_add_range(Bitmapset *a, int lower, int upper)
Definition bitmapset.c:1018
void bms_free(Bitmapset *a)
Definition bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition bitmapset.c:750
bool bms_is_member(int x, const Bitmapset *a)
Definition bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition bitmapset.c:814
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:916
Bitmapset * bms_copy(const Bitmapset *a)
Definition bitmapset.c:122
#define bms_is_empty(a)
Definition bitmapset.h:118
static Datum values[MAXATTR]
Definition bootstrap.c:155
#define likely(x)
Definition c.h:411
#define Assert(condition)
Definition c.h:873
#define FLEXIBLE_ARRAY_MEMBER
Definition c.h:480
int32_t int32
Definition c.h:542
uint64_t uint64
Definition c.h:547
#define unlikely(x)
Definition c.h:412
unsigned int Index
Definition c.h:628
#define OidIsValid(objectId)
Definition c.h:788
int errdetail(const char *fmt,...)
Definition elog.c:1216
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
bool equal(const void *a, const void *b)
Definition equalfuncs.c:223
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition execExpr.c:143
ProjectionInfo * ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent, TupleDesc inputDesc)
Definition execExpr.c:370
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition execExpr.c:229
ExprState * ExecInitExprWithParams(Expr *node, ParamListInfo ext_params)
Definition execExpr.c:180
ProjectionInfo * ExecBuildUpdateProjection(List *targetList, bool evalTargetList, List *targetColnos, TupleDesc relDesc, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent)
Definition execExpr.c:547
List * ExecPrepareExprList(List *nodes, EState *estate)
Definition execExpr.c:839
void ExecCloseIndices(ResultRelInfo *resultRelInfo)
void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
void CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation, OnConflictAction onConflictAction, List *mergeActions)
Definition execMain.c:1054
bool ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, bool emitError)
Definition execMain.c:1860
void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, ResultRelInfo *partition_root_rri, int instrument_options)
Definition execMain.c:1247
static void InitExecPartitionPruneContexts(PartitionPruneState *prunestate, PlanState *parent_plan, Bitmapset *initially_valid_subplans, int n_total_subplans)
static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate, PartitionTupleRouting *proute, Oid partoid, PartitionDispatch parent_pd, int partidx, ResultRelInfo *rootResultRelInfo)
static bool IsIndexCompatibleAsArbiter(Relation arbiterIndexRelation, IndexInfo *arbiterIndexInfo, Relation indexRelation, IndexInfo *indexInfo)
void ExecDoInitialPruning(EState *estate)
static ResultRelInfo * ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *rootResultRelInfo, int partidx)
PartitionPruneState * ExecInitPartitionExecPruning(PlanState *planstate, int n_total_subplans, int part_prune_index, Bitmapset *relids, Bitmapset **initially_valid_subplans)
Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, bool initial_prune, Bitmapset **validsubplan_rtis)
static void ExecInitRoutingInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *partRelInfo, int partidx, bool is_borrowed_rel)
static char * ExecBuildSlotPartitionKeyDescription(Relation rel, const Datum *values, const bool *isnull, int maxfieldlen)
static void FormPartitionKeyDatum(PartitionDispatch pd, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
static int get_partition_for_tuple(PartitionDispatch pd, const Datum *values, const bool *isnull)
#define PARTITION_CACHED_FIND_THRESHOLD
PartitionTupleRouting * ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
static List * adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
static List * adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
ResultRelInfo * ExecFindPartition(ModifyTableState *mtstate, ResultRelInfo *rootResultRelInfo, PartitionTupleRouting *proute, TupleTableSlot *slot, EState *estate)
static void InitPartitionPruneContext(PartitionPruneContext *context, List *pruning_steps, PartitionDesc partdesc, PartitionKey partkey, PlanState *planstate, ExprContext *econtext)
static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans, Bitmapset **validsubplan_rtis)
static PartitionPruneState * CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo, Bitmapset **all_leafpart_rtis)
void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute)
struct PartitionDispatchData * PartitionDispatch
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
const TupleTableSlotOps TTSOpsVirtual
Definition execTuples.c:84
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Relation ExecGetRangeTableRelation(EState *estate, Index rti, bool isResultRel)
Definition execUtils.c:825
TupleConversionMap * ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate)
Definition execUtils.c:1326
ExprContext * CreateExprContext(EState *estate)
Definition execUtils.c:307
TupleConversionMap * ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
Definition execUtils.c:1300
#define GetPerTupleExprContext(estate)
Definition executor.h:656
#define EXEC_FLAG_EXPLAIN_GENERIC
Definition executor.h:67
#define ResetExprContext(econtext)
Definition executor.h:650
#define GetPerTupleMemoryContext(estate)
Definition executor.h:661
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition executor.h:436
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition fmgr.c:1150
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition fmgr.c:1763
long val
Definition informix.c:689
#define INJECTION_POINT(name, arg)
int j
Definition isn.c:78
int i
Definition isn.c:77
List * list_difference(const List *list1, const List *list2)
Definition list.c:1237
List * lappend(List *list, void *datum)
Definition list.c:339
List * lappend_int(List *list, int datum)
Definition list.c:357
List * lappend_oid(List *list, Oid datum)
Definition list.c:375
void list_free(List *list)
Definition list.c:1546
bool list_member_oid(const List *list, Oid datum)
Definition list.c:722
#define NoLock
Definition lockdefs.h:34
#define RowExclusiveLock
Definition lockdefs.h:38
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition lsyscache.c:3057
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition mbutils.c:1086
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
Oid GetUserId(void)
Definition miscinit.c:469
ResultRelInfo * ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid, bool missing_ok, bool update_cache)
void ExecInitMergeTupleSlots(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo)
#define IsA(nodeptr, _type_)
Definition nodes.h:164
#define copyObject(obj)
Definition nodes.h:232
@ ONCONFLICT_NONE
Definition nodes.h:428
@ ONCONFLICT_UPDATE
Definition nodes.h:430
@ CMD_MERGE
Definition nodes.h:279
@ CMD_INSERT
Definition nodes.h:277
@ CMD_DELETE
Definition nodes.h:278
@ CMD_UPDATE
Definition nodes.h:276
@ CMD_NOTHING
Definition nodes.h:282
#define makeNode(_type_)
Definition nodes.h:161
#define castNode(_type_, nodeptr)
Definition nodes.h:182
char * bmsToString(const Bitmapset *bms)
Definition outfuncs.c:819
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
@ PARTITION_STRATEGY_HASH
Definition parsenodes.h:903
@ PARTITION_STRATEGY_LIST
Definition parsenodes.h:901
@ PARTITION_STRATEGY_RANGE
Definition parsenodes.h:902
PartitionRangeDatumKind
Definition parsenodes.h:952
#define ACL_SELECT
Definition parsenodes.h:77
int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, const Datum *rb_datums, PartitionRangeDatumKind *rb_kind, const Datum *tuple_datums, int n_tuple_datums)
uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, const Oid *partcollation, const Datum *values, const bool *isnull)
int partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, int nvalues, const Datum *values, bool *is_equal)
int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, Datum value, bool *is_equal)
#define partition_bound_accepts_nulls(bi)
Definition partbounds.h:98
PartitionKey RelationGetPartitionKey(Relation rel)
Definition partcache.c:51
static int16 get_partition_col_attnum(PartitionKey key, int col)
Definition partcache.h:80
static int get_partition_natts(PartitionKey key)
Definition partcache.h:65
static Oid get_partition_col_typid(PartitionKey key, int col)
Definition partcache.h:86
PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
Definition partdesc.c:423
PartitionDesc PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
Definition partdesc.c:456
List * get_partition_ancestors(Oid relid)
Definition partition.c:134
Bitmapset * get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
Definition partprune.c:845
#define PruneCxtStateIdx(partnatts, step_id, keyno)
Definition partprune.h:70
int16 attnum
#define PARTITION_MAX_KEYS
#define lfirst(lc)
Definition pg_list.h:172
#define lfirst_node(type, lc)
Definition pg_list.h:176
static int list_length(const List *l)
Definition pg_list.h:152
#define NIL
Definition pg_list.h:68
#define lfirst_int(lc)
Definition pg_list.h:173
static void * list_nth(const List *list, int n)
Definition pg_list.h:299
#define linitial(l)
Definition pg_list.h:178
static ListCell * list_head(const List *l)
Definition pg_list.h:128
#define foreach_oid(var, lst)
Definition pg_list.h:471
#define list_nth_node(type, list, n)
Definition pg_list.h:327
static ListCell * lnext(const List *l, const ListCell *c)
Definition pg_list.h:343
#define linitial_oid(l)
Definition pg_list.h:180
#define foreach_int(var, lst)
Definition pg_list.h:470
static char buf[DEFAULT_XLOG_SEG_SIZE]
uint64_t Datum
Definition postgres.h:70
static int32 DatumGetInt32(Datum X)
Definition postgres.h:212
#define InvalidOid
unsigned int Oid
static int fb(int x)
#define INNER_VAR
Definition primnodes.h:242
#define RelationGetForm(relation)
Definition rel.h:508
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetDescr(relation)
Definition rel.h:540
#define RelationGetRelationName(relation)
Definition rel.h:548
List * RelationGetIndexPredicate(Relation relation)
Definition relcache.c:5205
List * RelationGetIndexExpressions(Relation relation)
Definition relcache.c:5092
int errtable(Relation rel)
Definition relcache.c:6044
Node * map_variable_attnos(Node *node, int target_varno, int sublevels_up, const AttrMap *attno_map, Oid to_rowtype, bool *found_whole_row)
int check_enable_rls(Oid relid, Oid checkAsUser, bool noError)
Definition rls.c:52
@ RLS_ENABLED
Definition rls.h:45
char * pg_get_partkeydef_columns(Oid relid, bool pretty)
Definition ruleutils.c:1923
void check_stack_depth(void)
Definition stack_depth.c:95
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int maplen
Definition attmap.h:37
AttrNumber * attnums
Definition attmap.h:36
List * es_part_prune_infos
Definition execnodes.h:672
List * es_tuple_routing_result_relations
Definition execnodes.h:700
int es_top_eflags
Definition execnodes.h:721
int es_instrument
Definition execnodes.h:722
Bitmapset * es_unpruned_relids
Definition execnodes.h:675
List * es_part_prune_states
Definition execnodes.h:673
MemoryContext es_query_cxt
Definition execnodes.h:712
List * es_tupleTable
Definition execnodes.h:714
PartitionDirectory es_partition_directory
Definition execnodes.h:694
List * es_part_prune_results
Definition execnodes.h:674
ParamListInfo ecxt_param_list_info
Definition execnodes.h:287
struct EState * ecxt_estate
Definition execnodes.h:317
EndForeignInsert_function EndForeignInsert
Definition fdwapi.h:239
bool ii_Unique
Definition execnodes.h:202
Oid * ii_ExclusionOps
Definition execnodes.h:190
bool ii_NullsNotDistinct
Definition execnodes.h:204
int ii_NumIndexKeyAttrs
Definition execnodes.h:171
Definition pg_list.h:54
MergeAction * mas_action
Definition execnodes.h:451
ProjectionInfo * mas_proj
Definition execnodes.h:452
ExprState * mas_whenqual
Definition execnodes.h:454
ResultRelInfo * resultRelInfo
Definition execnodes.h:1410
ResultRelInfo * rootResultRelInfo
Definition execnodes.h:1418
List * onConflictCols
Definition plannodes.h:368
List * mergeJoinConditions
Definition plannodes.h:378
CmdType operation
Definition plannodes.h:334
List * resultRelations
Definition plannodes.h:342
List * onConflictSet
Definition plannodes.h:366
List * mergeActionLists
Definition plannodes.h:376
List * returningLists
Definition plannodes.h:352
List * withCheckOptionLists
Definition plannodes.h:346
Node * onConflictWhere
Definition plannodes.h:370
OnConflictAction onConflictAction
Definition plannodes.h:362
Definition nodes.h:135
TupleTableSlot * oc_ProjSlot
Definition execnodes.h:436
ExprState * oc_WhereClause
Definition execnodes.h:438
ProjectionInfo * oc_ProjInfo
Definition execnodes.h:437
PartitionRangeDatumKind ** kind
Definition partbounds.h:84
int last_found_datum_index
Definition partdesc.h:46
PartitionBoundInfo boundinfo
Definition partdesc.h:38
int last_found_part_index
Definition partdesc.h:52
TupleTableSlot * tupslot
int indexes[FLEXIBLE_ARRAY_MEMBER]
AttrNumber * partattrs
Definition partcache.h:29
FmgrInfo * partsupfunc
Definition partprune.h:56
ExprContext * exprcontext
Definition partprune.h:60
MemoryContext ppccontext
Definition partprune.h:58
PartitionBoundInfo boundinfo
Definition partprune.h:54
PlanState * planstate
Definition partprune.h:59
FmgrInfo * stepcmpfuncs
Definition partprune.h:57
ExprState ** exprstates
Definition partprune.h:61
PartitionPruneStep step
Definition plannodes.h:1758
Bitmapset * nullkeys
Definition plannodes.h:1763
PartitionDispatch * partition_dispatch_info
ResultRelInfo ** partitions
ResultRelInfo ** nonleaf_partitions
Bitmapset * present_parts
Definition plannodes.h:1683
Plan * plan
Definition execnodes.h:1167
EState * state
Definition execnodes.h:1169
ExprContext * ps_ExprContext
Definition execnodes.h:1206
TupleTableSlot * ps_ResultTupleSlot
Definition execnodes.h:1205
Form_pg_index rd_index
Definition rel.h:192
Oid * rd_opfamily
Definition rel.h:207
Oid * rd_indcollation
Definition rel.h:217
Form_pg_class rd_rel
Definition rel.h:111
OnConflictSetState * ri_onConflict
Definition execnodes.h:585
List * ri_onConflictArbiterIndexes
Definition execnodes.h:582
Relation ri_RelationDesc
Definition execnodes.h:482
Index ri_RangeTableIndex
Definition execnodes.h:479
struct FdwRoutine * ri_FdwRoutine
Definition execnodes.h:535
AttrMap * attrMap
Definition tupconvert.h:28
void table_close(Relation relation, LOCKMODE lockmode)
Definition table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:40
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition tableam.c:92
TupleTableSlot * execute_attr_map_slot(AttrMap *attrMap, TupleTableSlot *in_slot, TupleTableSlot *out_slot)
Definition tupconvert.c:193
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition tuptable.h:398
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:457
#define IsolationUsesXactSnapshot()
Definition xact.h:52