PostgreSQL Source Code git master
Loading...
Searching...
No Matches
execPartition.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * execPartition.c
4 * Support routines for partitioning.
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/executor/execPartition.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/table.h"
17#include "access/tableam.h"
18#include "catalog/index.h"
19#include "catalog/partition.h"
21#include "executor/executor.h"
23#include "foreign/fdwapi.h"
24#include "mb/pg_wchar.h"
25#include "miscadmin.h"
30#include "utils/acl.h"
32#include "utils/lsyscache.h"
33#include "utils/partcache.h"
34#include "utils/rls.h"
35#include "utils/ruleutils.h"
36
37
38/*-----------------------
39 * PartitionTupleRouting - Encapsulates all information required to
40 * route a tuple inserted into a partitioned table to one of its leaf
41 * partitions.
42 *
43 * partition_root
44 * The partitioned table that's the target of the command.
45 *
46 * partition_dispatch_info
47 * Array of 'max_dispatch' elements containing a pointer to a
48 * PartitionDispatch object for every partitioned table touched by tuple
49 * routing. The entry for the target partitioned table is *always*
50 * present in the 0th element of this array. See comment for
51 * PartitionDispatchData->indexes for details on how this array is
52 * indexed.
53 *
54 * nonleaf_partitions
55 * Array of 'max_dispatch' elements containing pointers to fake
56 * ResultRelInfo objects for nonleaf partitions, useful for checking
57 * the partition constraint.
58 *
59 * num_dispatch
60 * The current number of items stored in the 'partition_dispatch_info'
61 * array. Also serves as the index of the next free array element for
62 * new PartitionDispatch objects that need to be stored.
63 *
64 * max_dispatch
65 * The current allocated size of the 'partition_dispatch_info' array.
66 *
67 * partitions
68 * Array of 'max_partitions' elements containing a pointer to a
69 * ResultRelInfo for every leaf partition touched by tuple routing.
70 * Some of these are pointers to ResultRelInfos which are borrowed out of
71 * the owning ModifyTableState node. The remainder have been built
72 * especially for tuple routing. See comment for
73 * PartitionDispatchData->indexes for details on how this array is
74 * indexed.
75 *
76 * is_borrowed_rel
77 * Array of 'max_partitions' booleans recording whether a given entry
78 * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
79 * ModifyTableState node, rather than being built here.
80 *
81 * num_partitions
82 * The current number of items stored in the 'partitions' array. Also
83 * serves as the index of the next free array element for new
84 * ResultRelInfo objects that need to be stored.
85 *
86 * max_partitions
87 * The current allocated size of the 'partitions' array.
88 *
89 * memcxt
90 * Memory context used to allocate subsidiary structs.
91 *-----------------------
92 */
106
107/*-----------------------
108 * PartitionDispatch - information about one partitioned table in a partition
109 * hierarchy required to route a tuple to any of its partitions. A
110 * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
111 * struct and stored inside its 'partition_dispatch_info' array.
112 *
113 * reldesc
114 * Relation descriptor of the table
115 *
116 * key
117 * Partition key information of the table
118 *
119 * keystate
120 * Execution state required for expressions in the partition key
121 *
122 * partdesc
123 * Partition descriptor of the table
124 *
125 * tupslot
126 * A standalone TupleTableSlot initialized with this table's tuple
127 * descriptor, or NULL if no tuple conversion between the parent is
128 * required.
129 *
130 * tupmap
131 * TupleConversionMap to convert from the parent's rowtype to this table's
132 * rowtype (when extracting the partition key of a tuple just before
133 * routing it through this table). A NULL value is stored if no tuple
134 * conversion is required.
135 *
136 * indexes
137 * Array of partdesc->nparts elements. For leaf partitions the index
138 * corresponds to the partition's ResultRelInfo in the encapsulating
139 * PartitionTupleRouting's partitions array. For partitioned partitions,
140 * the index corresponds to the PartitionDispatch for it in its
141 * partition_dispatch_info array. -1 indicates we've not yet allocated
142 * anything in PartitionTupleRouting for the partition.
143 *-----------------------
144 */
155
156
158 EState *estate, PartitionTupleRouting *proute,
160 ResultRelInfo *rootResultRelInfo,
161 int partidx);
162static void ExecInitRoutingInfo(ModifyTableState *mtstate,
163 EState *estate,
164 PartitionTupleRouting *proute,
167 int partidx,
168 bool is_borrowed_rel);
170 PartitionTupleRouting *proute,
172 int partidx, ResultRelInfo *rootResultRelInfo);
174 TupleTableSlot *slot,
175 EState *estate,
176 Datum *values,
177 bool *isnull);
179 const bool *isnull);
181 const Datum *values,
182 const bool *isnull,
183 int maxfieldlen);
191 PartitionDesc partdesc,
193 PlanState *planstate,
194 ExprContext *econtext);
198 int n_total_subplans);
201 bool initial_prune,
204
205
206/*
207 * ExecSetupPartitionTupleRouting - sets up information needed during
208 * tuple routing for partitioned tables, encapsulates it in
209 * PartitionTupleRouting, and returns it.
210 *
211 * Callers must use the returned PartitionTupleRouting during calls to
212 * ExecFindPartition(). The actual ResultRelInfo for a partition is only
213 * allocated when the partition is found for the first time.
214 *
215 * The current memory context is used to allocate this struct and all
216 * subsidiary structs that will be allocated from it later on. Typically
217 * it should be estate->es_query_cxt.
218 */
221{
222 PartitionTupleRouting *proute;
223
224 /*
225 * Here we attempt to expend as little effort as possible in setting up
226 * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
227 * demand, only when we actually need to route a tuple to that partition.
228 * The reason for this is that a common case is for INSERT to insert a
229 * single tuple into a partitioned table and this must be fast.
230 */
232 proute->partition_root = rel;
234 /* Rest of members initialized by zeroing */
235
236 /*
237 * Initialize this table's PartitionDispatch object. Here we pass in the
238 * parent as NULL as we don't need to care about any parent of the target
239 * partitioned table.
240 */
242 NULL, 0, NULL);
243
244 return proute;
245}
246
247/*
248 * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
249 * the tuple contained in *slot should belong to.
250 *
251 * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
252 * one up or reuse one from mtstate's resultRelInfo array. When reusing a
253 * ResultRelInfo from the mtstate we verify that the relation is a valid
254 * target for INSERTs and initialize tuple routing information.
255 *
256 * rootResultRelInfo is the relation named in the query.
257 *
258 * estate must be non-NULL; we'll need it to compute any expressions in the
259 * partition keys. Also, its per-tuple contexts are used as evaluation
260 * scratch space.
261 *
262 * If no leaf partition is found, this routine errors out with the appropriate
263 * error message. An error may also be raised if the found target partition
264 * is not a valid target for an INSERT.
265 */
268 ResultRelInfo *rootResultRelInfo,
269 PartitionTupleRouting *proute,
270 TupleTableSlot *slot, EState *estate)
271{
274 bool isnull[PARTITION_MAX_KEYS];
275 Relation rel;
277 PartitionDesc partdesc;
279 TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
280 TupleTableSlot *rootslot = slot;
284
285 /* use per-tuple context here to avoid leaking memory */
287
288 /*
289 * First check the root table's partition constraint, if any. No point in
290 * routing the tuple if it doesn't belong in the root table itself.
291 */
292 if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition)
293 ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
294
295 /* start with the root partitioned table */
296 dispatch = pd[0];
297 while (dispatch != NULL)
298 {
299 int partidx = -1;
300 bool is_leaf;
301
303
304 rel = dispatch->reldesc;
305 partdesc = dispatch->partdesc;
306
307 /*
308 * Extract partition key from tuple. Expression evaluation machinery
309 * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
310 * point to the correct tuple slot. The slot might have changed from
311 * what was used for the parent table if the table of the current
312 * partitioning level has different tuple descriptor from the parent.
313 * So update ecxt_scantuple accordingly.
314 */
315 ecxt->ecxt_scantuple = slot;
316 FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
317
318 /*
319 * If this partitioned table has no partitions or no partition for
320 * these values, error out.
321 */
322 if (partdesc->nparts == 0 ||
324 {
325 char *val_desc;
326
328 values, isnull, 64);
332 errmsg("no partition of relation \"%s\" found for row",
334 val_desc ?
335 errdetail("Partition key of the failing row contains %s.",
336 val_desc) : 0,
337 errtable(rel)));
338 }
339
340 is_leaf = partdesc->is_leaf[partidx];
341 if (is_leaf)
342 {
343 /*
344 * We've reached the leaf -- hurray, we're done. Look to see if
345 * we've already got a ResultRelInfo for this partition.
346 */
347 if (likely(dispatch->indexes[partidx] >= 0))
348 {
349 /* ResultRelInfo already built */
350 Assert(dispatch->indexes[partidx] < proute->num_partitions);
351 rri = proute->partitions[dispatch->indexes[partidx]];
352 }
353 else
354 {
355 /*
356 * If the partition is known in the owning ModifyTableState
357 * node, we can re-use that ResultRelInfo instead of creating
358 * a new one with ExecInitPartitionInfo().
359 */
361 partdesc->oids[partidx],
362 true, false);
363 if (rri)
364 {
365 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
366
367 /* Verify this ResultRelInfo allows INSERTs */
369 node ? node->onConflictAction : ONCONFLICT_NONE,
370 NIL);
371
372 /*
373 * Initialize information needed to insert this and
374 * subsequent tuples routed to this partition.
375 */
376 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
377 rri, partidx, true);
378 }
379 else
380 {
381 /* We need to create a new one. */
382 rri = ExecInitPartitionInfo(mtstate, estate, proute,
383 dispatch,
384 rootResultRelInfo, partidx);
385 }
386 }
387 Assert(rri != NULL);
388
389 /* Signal to terminate the loop */
390 dispatch = NULL;
391 }
392 else
393 {
394 /*
395 * Partition is a sub-partitioned table; get the PartitionDispatch
396 */
397 if (likely(dispatch->indexes[partidx] >= 0))
398 {
399 /* Already built. */
400 Assert(dispatch->indexes[partidx] < proute->num_dispatch);
401
402 rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
403
404 /*
405 * Move down to the next partition level and search again
406 * until we find a leaf partition that matches this tuple
407 */
408 dispatch = pd[dispatch->indexes[partidx]];
409 }
410 else
411 {
412 /* Not yet built. Do that now. */
414
415 /*
416 * Create the new PartitionDispatch. We pass the current one
417 * in as the parent PartitionDispatch
418 */
420 proute,
421 partdesc->oids[partidx],
423 mtstate->rootResultRelInfo);
424 Assert(dispatch->indexes[partidx] >= 0 &&
425 dispatch->indexes[partidx] < proute->num_dispatch);
426
427 rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
429 }
430
431 /*
432 * Convert the tuple to the new parent's layout, if different from
433 * the previous parent.
434 */
435 if (dispatch->tupslot)
436 {
437 AttrMap *map = dispatch->tupmap;
439
440 myslot = dispatch->tupslot;
441 slot = execute_attr_map_slot(map, slot, myslot);
442
443 if (tempslot != NULL)
445 }
446 }
447
448 /*
449 * If this partition is the default one, we must check its partition
450 * constraint now, which may have changed concurrently due to
451 * partitions being added to the parent.
452 *
453 * (We do this here, and do not rely on ExecInsert doing it, because
454 * we don't want to miss doing it for non-leaf partitions.)
455 */
456 if (partidx == partdesc->boundinfo->default_index)
457 {
458 /*
459 * The tuple must match the partition's layout for the constraint
460 * expression to be evaluated successfully. If the partition is
461 * sub-partitioned, that would already be the case due to the code
462 * above, but for a leaf partition the tuple still matches the
463 * parent's layout.
464 *
465 * Note that we have a map to convert from root to current
466 * partition, but not from immediate parent to current partition.
467 * So if we have to convert, do it from the root slot; if not, use
468 * the root slot as-is.
469 */
470 if (is_leaf)
471 {
473
474 if (map)
476 rri->ri_PartitionTupleSlot);
477 else
478 slot = rootslot;
479 }
480
481 ExecPartitionCheck(rri, slot, estate, true);
482 }
483 }
484
485 /* Release the tuple in the lowest parent's dedicated slot. */
486 if (myslot != NULL)
488 /* and restore ecxt's scantuple */
489 ecxt->ecxt_scantuple = ecxt_scantuple_saved;
491
492 return rri;
493}
494
495/*
496 * IsIndexCompatibleAsArbiter
497 * Return true if two indexes are identical for INSERT ON CONFLICT
498 * purposes.
499 *
500 * Only indexes of the same relation are supported.
501 */
502static bool
505 Relation indexRelation,
506 IndexInfo *indexInfo)
507{
508 Assert(arbiterIndexRelation->rd_index->indrelid == indexRelation->rd_index->indrelid);
509
510 /* must match whether they're unique */
511 if (arbiterIndexInfo->ii_Unique != indexInfo->ii_Unique)
512 return false;
513
514 /* No support currently for comparing exclusion indexes. */
515 if (arbiterIndexInfo->ii_ExclusionOps != NULL ||
516 indexInfo->ii_ExclusionOps != NULL)
517 return false;
518
519 /* the "nulls not distinct" criterion must match */
520 if (arbiterIndexInfo->ii_NullsNotDistinct !=
521 indexInfo->ii_NullsNotDistinct)
522 return false;
523
524 /* number of key attributes must match */
525 if (arbiterIndexInfo->ii_NumIndexKeyAttrs !=
526 indexInfo->ii_NumIndexKeyAttrs)
527 return false;
528
529 for (int i = 0; i < arbiterIndexInfo->ii_NumIndexKeyAttrs; i++)
530 {
531 if (arbiterIndexRelation->rd_indcollation[i] !=
532 indexRelation->rd_indcollation[i])
533 return false;
534
535 if (arbiterIndexRelation->rd_opfamily[i] !=
536 indexRelation->rd_opfamily[i])
537 return false;
538
539 if (arbiterIndexRelation->rd_index->indkey.values[i] !=
540 indexRelation->rd_index->indkey.values[i])
541 return false;
542 }
543
545 RelationGetIndexExpressions(indexRelation)) != NIL)
546 return false;
547
549 RelationGetIndexPredicate(indexRelation)) != NIL)
550 return false;
551 return true;
552}
553
554/*
555 * ExecInitPartitionInfo
556 * Lock the partition and initialize ResultRelInfo. Also setup other
557 * information for the partition and store it in the next empty slot in
558 * the proute->partitions array.
559 *
560 * Returns the ResultRelInfo
561 */
562static ResultRelInfo *
564 PartitionTupleRouting *proute,
566 ResultRelInfo *rootResultRelInfo,
567 int partidx)
568{
569 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
570 Oid partOid = dispatch->partdesc->oids[partidx];
571 Relation partrel;
577 bool found_whole_row;
578
580
582
585 partrel,
586 0,
587 rootResultRelInfo,
588 estate->es_instrument);
589
590 /*
591 * Verify result relation is a valid target for an INSERT. An UPDATE of a
592 * partition-key becomes a DELETE+INSERT operation, so this check is still
593 * required when the operation is CMD_UPDATE.
594 */
596 node ? node->onConflictAction : ONCONFLICT_NONE, NIL);
597
598 /*
599 * Open partition indices. The user may have asked to check for conflicts
600 * within this leaf partition and do "nothing" instead of throwing an
601 * error. Be prepared in that case by initializing the index information
602 * needed by ExecInsert() to perform speculative insertions.
603 */
604 if (partrel->rd_rel->relhasindex &&
605 leaf_part_rri->ri_IndexRelationDescs == NULL)
607 (node != NULL &&
609
610 /*
611 * Build WITH CHECK OPTION constraints for the partition. Note that we
612 * didn't build the withCheckOptionList for partitions within the planner,
613 * but simple translation of varattnos will suffice. This only occurs for
614 * the INSERT case or in the case of UPDATE/MERGE tuple routing where we
615 * didn't find a result rel to reuse.
616 */
617 if (node && node->withCheckOptionLists != NIL)
618 {
619 List *wcoList;
620 List *wcoExprs = NIL;
621 ListCell *ll;
622
623 /*
624 * In the case of INSERT on a partitioned table, there is only one
625 * plan. Likewise, there is only one WCO list, not one per partition.
626 * For UPDATE/MERGE, there are as many WCO lists as there are plans.
627 */
628 Assert((node->operation == CMD_INSERT &&
629 list_length(node->withCheckOptionLists) == 1 &&
630 list_length(node->resultRelations) == 1) ||
631 (node->operation == CMD_UPDATE &&
634 (node->operation == CMD_MERGE &&
637
638 /*
639 * Use the WCO list of the first plan as a reference to calculate
640 * attno's for the WCO list of this partition. In the INSERT case,
641 * that refers to the root partitioned table, whereas in the UPDATE
642 * tuple routing case, that refers to the first partition in the
643 * mtstate->resultRelInfo array. In any case, both that relation and
644 * this partition should have the same columns, so we should be able
645 * to map attributes successfully.
646 */
648
649 /*
650 * Convert Vars in it to contain this partition's attribute numbers.
651 */
655 false);
656 wcoList = (List *)
658 firstVarno, 0,
660 RelationGetForm(partrel)->reltype,
661 &found_whole_row);
662 /* We ignore the value of found_whole_row. */
663
664 foreach(ll, wcoList)
665 {
668 &mtstate->ps);
669
671 }
672
673 leaf_part_rri->ri_WithCheckOptions = wcoList;
674 leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
675 }
676
677 /*
678 * Build the RETURNING projection for the partition. Note that we didn't
679 * build the returningList for partitions within the planner, but simple
680 * translation of varattnos will suffice. This only occurs for the INSERT
681 * case or in the case of UPDATE/MERGE tuple routing where we didn't find
682 * a result rel to reuse.
683 */
684 if (node && node->returningLists != NIL)
685 {
686 TupleTableSlot *slot;
687 ExprContext *econtext;
688 List *returningList;
689
690 /* See the comment above for WCO lists. */
691 Assert((node->operation == CMD_INSERT &&
692 list_length(node->returningLists) == 1 &&
693 list_length(node->resultRelations) == 1) ||
694 (node->operation == CMD_UPDATE &&
697 (node->operation == CMD_MERGE &&
700
701 /*
702 * Use the RETURNING list of the first plan as a reference to
703 * calculate attno's for the RETURNING list of this partition. See
704 * the comment above for WCO lists for more details on why this is
705 * okay.
706 */
707 returningList = linitial(node->returningLists);
708
709 /*
710 * Convert Vars in it to contain this partition's attribute numbers.
711 */
712 if (part_attmap == NULL)
716 false);
717 returningList = (List *)
718 map_variable_attnos((Node *) returningList,
719 firstVarno, 0,
721 RelationGetForm(partrel)->reltype,
722 &found_whole_row);
723 /* We ignore the value of found_whole_row. */
724
725 leaf_part_rri->ri_returningList = returningList;
726
727 /*
728 * Initialize the projection itself.
729 *
730 * Use the slot and the expression context that would have been set up
731 * in ExecInitModifyTable() for projection's output.
732 */
733 Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
734 slot = mtstate->ps.ps_ResultTupleSlot;
735 Assert(mtstate->ps.ps_ExprContext != NULL);
736 econtext = mtstate->ps.ps_ExprContext;
737 leaf_part_rri->ri_projectReturning =
738 ExecBuildProjectionInfo(returningList, econtext, slot,
739 &mtstate->ps, RelationGetDescr(partrel));
740 }
741
742 /* Set up information needed for routing tuples to the partition. */
743 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
744 leaf_part_rri, partidx, false);
745
746 /*
747 * If there is an ON CONFLICT clause, initialize state for it.
748 */
749 if (node && node->onConflictAction != ONCONFLICT_NONE)
750 {
752 ExprContext *econtext = mtstate->ps.ps_ExprContext;
753 List *arbiterIndexes = NIL;
754 int additional_arbiters = 0;
755
756 /*
757 * If there is a list of arbiter indexes, map it to a list of indexes
758 * in the partition. We also add any "identical indexes" to any of
759 * those, to cover the case where one of them is concurrently being
760 * reindexed.
761 */
762 if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL)
763 {
767
768 for (int listidx = 0; listidx < leaf_part_rri->ri_NumIndices; listidx++)
769 {
770 Oid indexoid;
771 List *ancestors;
772
773 /*
774 * If one of this index's ancestors is in the root's arbiter
775 * list, then use this index as arbiter for this partition.
776 * Otherwise, if this index has no parent, track it for later,
777 * in case REINDEX CONCURRENTLY is working on one of the
778 * arbiters.
779 *
780 * However, if two indexes appear to have the same parent,
781 * treat the second of these as if it had no parent. This
782 * sounds counterintuitive, but it can happen if a transaction
783 * running REINDEX CONCURRENTLY commits right between those
784 * two indexes are checked by another process in this loop.
785 * This will have the effect of also treating that second
786 * index as arbiter.
787 *
788 * XXX get_partition_ancestors scans pg_inherits, which is not
789 * only slow, but also means the catalog snapshot can get
790 * invalidated each time through the loop (cf.
791 * GetNonHistoricCatalogSnapshot). Consider a syscache or
792 * some other way to cache?
793 */
794 indexoid = RelationGetRelid(leaf_part_rri->ri_IndexRelationDescs[listidx]);
795 ancestors = get_partition_ancestors(indexoid);
796 INJECTION_POINT("exec-init-partition-after-get-partition-ancestors", NULL);
797
798 if (ancestors != NIL &&
800 {
802 {
803 if (list_member_oid(ancestors, parent_idx))
804 {
806 arbiterIndexes = lappend_oid(arbiterIndexes, indexoid);
808 break;
809 }
810 }
811 }
812 else
814
815 list_free(ancestors);
816 }
817
818 /*
819 * If we found any indexes with no ancestors, it's possible that
820 * some arbiter index is undergoing concurrent reindex. Match all
821 * unparented indexes against arbiters; add unparented matching
822 * ones as "additional arbiters".
823 *
824 * This is critical so that all concurrent transactions use the
825 * same set as arbiters during REINDEX CONCURRENTLY, to avoid
826 * spurious "duplicate key" errors.
827 */
828 if (unparented_idxs && arbiterIndexes)
829 {
831 {
834
835 unparented_rel = leaf_part_rri->ri_IndexRelationDescs[unparented_i];
836 unparented_ii = leaf_part_rri->ri_IndexRelationInfo[unparented_i];
837
838 Assert(!list_member_oid(arbiterIndexes,
839 unparented_rel->rd_index->indexrelid));
840
841 /* Ignore indexes not ready */
842 if (!unparented_ii->ii_ReadyForInserts)
843 continue;
844
846 {
849
850 arbiter_rel = leaf_part_rri->ri_IndexRelationDescs[arbiter_i];
851 arbiter_ii = leaf_part_rri->ri_IndexRelationInfo[arbiter_i];
852
853 /*
854 * If the non-ancestor index is compatible with the
855 * arbiter, use the non-ancestor as arbiter too.
856 */
861 {
862 arbiterIndexes = lappend_oid(arbiterIndexes,
863 unparented_rel->rd_index->indexrelid);
865 break;
866 }
867 }
868 }
869 }
873 }
874
875 /*
876 * We expect to find as many arbiter indexes on this partition as the
877 * root has, plus however many "additional arbiters" (to wit: those
878 * being concurrently rebuilt) we found.
879 */
880 if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
881 list_length(arbiterIndexes) - additional_arbiters)
882 elog(ERROR, "invalid arbiter index list");
883 leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
884
885 /*
886 * In the DO UPDATE and DO SELECT cases, we have some more state to
887 * initialize.
888 */
889 if (node->onConflictAction == ONCONFLICT_UPDATE ||
891 {
894
896
897 Assert(node->onConflictSet != NIL ||
899 Assert(rootResultRelInfo->ri_onConflict != NULL);
900
901 leaf_part_rri->ri_onConflict = onconfl;
902
903 /* Lock strength for DO SELECT [FOR UPDATE/SHARE] */
904 onconfl->oc_LockStrength =
905 rootResultRelInfo->ri_onConflict->oc_LockStrength;
906
907 /*
908 * Need a separate existing slot for each partition, as the
909 * partition could be of a different AM, even if the tuple
910 * descriptors match.
911 */
912 onconfl->oc_Existing =
913 table_slot_create(leaf_part_rri->ri_RelationDesc,
914 &mtstate->ps.state->es_tupleTable);
915
916 /*
917 * If the partition's tuple descriptor matches exactly the root
918 * parent (the common case), we can re-use most of the parent's ON
919 * CONFLICT action state, skipping a bunch of work. Otherwise, we
920 * need to create state specific to this partition.
921 */
922 if (map == NULL)
923 {
924 /*
925 * It's safe to reuse these from the partition root, as we
926 * only process one tuple at a time (therefore we won't
927 * overwrite needed data in slots), and the results of any
928 * projections are independent of the underlying storage.
929 * Projections and where clauses themselves don't store state
930 * / are independent of the underlying storage.
931 */
932 onconfl->oc_ProjSlot =
933 rootResultRelInfo->ri_onConflict->oc_ProjSlot;
934 onconfl->oc_ProjInfo =
935 rootResultRelInfo->ri_onConflict->oc_ProjInfo;
936 onconfl->oc_WhereClause =
937 rootResultRelInfo->ri_onConflict->oc_WhereClause;
938 }
939 else
940 {
941 /*
942 * For ON CONFLICT DO UPDATE, translate expressions in
943 * onConflictSet to account for different attribute numbers.
944 * For that, map partition varattnos twice: first to catch the
945 * EXCLUDED pseudo-relation (INNER_VAR), and second to handle
946 * the main target relation (firstVarno).
947 */
949 {
952
954 if (part_attmap == NULL)
958 false);
959 onconflset = (List *)
961 INNER_VAR, 0,
963 RelationGetForm(partrel)->reltype,
964 &found_whole_row);
965 /* We ignore the value of found_whole_row. */
966 onconflset = (List *)
968 firstVarno, 0,
970 RelationGetForm(partrel)->reltype,
971 &found_whole_row);
972 /* We ignore the value of found_whole_row. */
973
974 /*
975 * Finally, adjust the target colnos to match the
976 * partition.
977 */
980
981 /* create the tuple slot for the UPDATE SET projection */
982 onconfl->oc_ProjSlot =
983 table_slot_create(partrel,
984 &mtstate->ps.state->es_tupleTable);
985
986 /* build UPDATE SET projection state */
987 onconfl->oc_ProjInfo =
989 true,
992 econtext,
993 onconfl->oc_ProjSlot,
994 &mtstate->ps);
995 }
996
997 /*
998 * For both ON CONFLICT DO UPDATE and ON CONFLICT DO SELECT,
999 * there may be a WHERE clause. If so, initialize state where
1000 * it will be evaluated, mapping the attribute numbers
1001 * appropriately. As with onConflictSet, we need to map
1002 * partition varattnos twice, to catch both the EXCLUDED
1003 * pseudo-relation (INNER_VAR), and the main target relation
1004 * (firstVarno).
1005 */
1006 if (node->onConflictWhere)
1007 {
1008 List *clause;
1009
1010 if (part_attmap == NULL)
1011 part_attmap =
1014 false);
1015
1016 clause = copyObject((List *) node->onConflictWhere);
1017 clause = (List *)
1018 map_variable_attnos((Node *) clause,
1019 INNER_VAR, 0,
1021 RelationGetForm(partrel)->reltype,
1022 &found_whole_row);
1023 /* We ignore the value of found_whole_row. */
1024 clause = (List *)
1025 map_variable_attnos((Node *) clause,
1026 firstVarno, 0,
1028 RelationGetForm(partrel)->reltype,
1029 &found_whole_row);
1030 /* We ignore the value of found_whole_row. */
1031 onconfl->oc_WhereClause =
1032 ExecInitQual(clause, &mtstate->ps);
1033 }
1034 }
1035 }
1036 }
1037
1038 /*
1039 * Since we've just initialized this ResultRelInfo, it's not in any list
1040 * attached to the estate as yet. Add it, so that it can be found later.
1041 *
1042 * Note that the entries in this list appear in no predetermined order,
1043 * because partition result rels are initialized as and when they're
1044 * needed.
1045 */
1050
1051 /*
1052 * Initialize information about this partition that's needed to handle
1053 * MERGE. We take the "first" result relation's mergeActionList as
1054 * reference and make copy for this relation, converting stuff that
1055 * references attribute numbers to match this relation's.
1056 *
1057 * This duplicates much of the logic in ExecInitMerge(), so if something
1058 * changes there, look here too.
1059 */
1060 if (node && node->operation == CMD_MERGE)
1061 {
1063 ListCell *lc;
1064 ExprContext *econtext = mtstate->ps.ps_ExprContext;
1065 Node *joinCondition;
1066
1067 if (part_attmap == NULL)
1068 part_attmap =
1071 false);
1072
1073 if (unlikely(!leaf_part_rri->ri_projectNewInfoValid))
1075
1076 /* Initialize state for join condition checking. */
1077 joinCondition =
1079 firstVarno, 0,
1081 RelationGetForm(partrel)->reltype,
1082 &found_whole_row);
1083 /* We ignore the value of found_whole_row. */
1084 leaf_part_rri->ri_MergeJoinCondition =
1085 ExecInitQual((List *) joinCondition, &mtstate->ps);
1086
1087 foreach(lc, firstMergeActionList)
1088 {
1089 /* Make a copy for this relation to be safe. */
1090 MergeAction *action = copyObject(lfirst(lc));
1091 MergeActionState *action_state;
1092
1093 /* Generate the action's state for this relation */
1094 action_state = makeNode(MergeActionState);
1095 action_state->mas_action = action;
1096
1097 /* And put the action in the appropriate list */
1098 leaf_part_rri->ri_MergeActions[action->matchKind] =
1099 lappend(leaf_part_rri->ri_MergeActions[action->matchKind],
1100 action_state);
1101
1102 switch (action->commandType)
1103 {
1104 case CMD_INSERT:
1105
1106 /*
1107 * ExecCheckPlanOutput() already done on the targetlist
1108 * when "first" result relation initialized and it is same
1109 * for all result relations.
1110 */
1111 action_state->mas_proj =
1112 ExecBuildProjectionInfo(action->targetList, econtext,
1113 leaf_part_rri->ri_newTupleSlot,
1114 &mtstate->ps,
1115 RelationGetDescr(partrel));
1116 break;
1117 case CMD_UPDATE:
1118
1119 /*
1120 * Convert updateColnos from "first" result relation
1121 * attribute numbers to this result rel's.
1122 */
1123 if (part_attmap)
1124 action->updateColnos =
1125 adjust_partition_colnos_using_map(action->updateColnos,
1126 part_attmap);
1127 action_state->mas_proj =
1128 ExecBuildUpdateProjection(action->targetList,
1129 true,
1130 action->updateColnos,
1131 RelationGetDescr(leaf_part_rri->ri_RelationDesc),
1132 econtext,
1133 leaf_part_rri->ri_newTupleSlot,
1134 NULL);
1135 break;
1136 case CMD_DELETE:
1137 case CMD_NOTHING:
1138 /* Nothing to do */
1139 break;
1140
1141 default:
1142 elog(ERROR, "unknown action in MERGE WHEN clause");
1143 }
1144
1145 /* found_whole_row intentionally ignored. */
1146 action->qual =
1147 map_variable_attnos(action->qual,
1148 firstVarno, 0,
1150 RelationGetForm(partrel)->reltype,
1151 &found_whole_row);
1152 action_state->mas_whenqual =
1153 ExecInitQual((List *) action->qual, &mtstate->ps);
1154 }
1155 }
1157
1158 return leaf_part_rri;
1159}
1160
1161/*
1162 * ExecInitRoutingInfo
1163 * Set up information needed for translating tuples between root
1164 * partitioned table format and partition format, and keep track of it
1165 * in PartitionTupleRouting.
1166 */
1167static void
1169 EState *estate,
1170 PartitionTupleRouting *proute,
1173 int partidx,
1174 bool is_borrowed_rel)
1175{
1177 int rri_index;
1178
1180
1181 /*
1182 * Set up tuple conversion between root parent and the partition if the
1183 * two have different rowtypes. If conversion is indeed required, also
1184 * initialize a slot dedicated to storing this partition's converted
1185 * tuples. Various operations that are applied to tuples after routing,
1186 * such as checking constraints, will refer to this slot.
1187 */
1188 if (ExecGetRootToChildMap(partRelInfo, estate) != NULL)
1189 {
1190 Relation partrel = partRelInfo->ri_RelationDesc;
1191
1192 /*
1193 * This pins the partition's TupleDesc, which will be released at the
1194 * end of the command.
1195 */
1196 partRelInfo->ri_PartitionTupleSlot =
1197 table_slot_create(partrel, &estate->es_tupleTable);
1198 }
1199 else
1200 partRelInfo->ri_PartitionTupleSlot = NULL;
1201
1202 /*
1203 * If the partition is a foreign table, let the FDW init itself for
1204 * routing tuples to the partition.
1205 */
1206 if (partRelInfo->ri_FdwRoutine != NULL &&
1207 partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
1208 partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
1209
1210 /*
1211 * Determine if the FDW supports batch insert and determine the batch size
1212 * (a FDW may support batching, but it may be disabled for the
1213 * server/table or for this particular query).
1214 *
1215 * If the FDW does not support batching, we set the batch size to 1.
1216 */
1217 if (partRelInfo->ri_FdwRoutine != NULL &&
1218 partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize &&
1219 partRelInfo->ri_FdwRoutine->ExecForeignBatchInsert)
1220 partRelInfo->ri_BatchSize =
1221 partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo);
1222 else
1223 partRelInfo->ri_BatchSize = 1;
1224
1225 Assert(partRelInfo->ri_BatchSize >= 1);
1226
1227 partRelInfo->ri_CopyMultiInsertBuffer = NULL;
1228
1229 /*
1230 * Keep track of it in the PartitionTupleRouting->partitions array.
1231 */
1232 Assert(dispatch->indexes[partidx] == -1);
1233
1234 rri_index = proute->num_partitions++;
1235
1236 /* Allocate or enlarge the array, as needed */
1237 if (proute->num_partitions >= proute->max_partitions)
1238 {
1239 if (proute->max_partitions == 0)
1240 {
1241 proute->max_partitions = 8;
1243 proute->is_borrowed_rel = palloc_array(bool, proute->max_partitions);
1244 }
1245 else
1246 {
1247 proute->max_partitions *= 2;
1248 proute->partitions = (ResultRelInfo **)
1249 repalloc(proute->partitions, sizeof(ResultRelInfo *) *
1250 proute->max_partitions);
1251 proute->is_borrowed_rel = (bool *)
1252 repalloc(proute->is_borrowed_rel, sizeof(bool) *
1253 proute->max_partitions);
1254 }
1255 }
1256
1257 proute->partitions[rri_index] = partRelInfo;
1258 proute->is_borrowed_rel[rri_index] = is_borrowed_rel;
1259 dispatch->indexes[partidx] = rri_index;
1260
1262}
1263
1264/*
1265 * ExecInitPartitionDispatchInfo
1266 * Lock the partitioned table (if not locked already) and initialize
1267 * PartitionDispatch for a partitioned table and store it in the next
1268 * available slot in the proute->partition_dispatch_info array. Also,
1269 * record the index into this array in the parent_pd->indexes[] array in
1270 * the partidx element so that we can properly retrieve the newly created
1271 * PartitionDispatch later.
1272 */
1273static PartitionDispatch
1275 PartitionTupleRouting *proute, Oid partoid,
1277 ResultRelInfo *rootResultRelInfo)
1278{
1279 Relation rel;
1280 PartitionDesc partdesc;
1282 int dispatchidx;
1284
1285 /*
1286 * For data modification, it is better that executor does not include
1287 * partitions being detached, except when running in snapshot-isolation
1288 * mode. This means that a read-committed transaction immediately gets a
1289 * "no partition for tuple" error when a tuple is inserted into a
1290 * partition that's being detached concurrently, but a transaction in
1291 * repeatable-read mode can still use such a partition.
1292 */
1293 if (estate->es_partition_directory == NULL)
1294 estate->es_partition_directory =
1297
1299
1300 /*
1301 * Only sub-partitioned tables need to be locked here. The root
1302 * partitioned table will already have been locked as it's referenced in
1303 * the query's rtable.
1304 */
1305 if (partoid != RelationGetRelid(proute->partition_root))
1306 rel = table_open(partoid, RowExclusiveLock);
1307 else
1308 rel = proute->partition_root;
1309 partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1310
1312 partdesc->nparts * sizeof(int));
1313 pd->reldesc = rel;
1314 pd->key = RelationGetPartitionKey(rel);
1315 pd->keystate = NIL;
1316 pd->partdesc = partdesc;
1317 if (parent_pd != NULL)
1318 {
1319 TupleDesc tupdesc = RelationGetDescr(rel);
1320
1321 /*
1322 * For sub-partitioned tables where the column order differs from its
1323 * direct parent partitioned table, we must store a tuple table slot
1324 * initialized with its tuple descriptor and a tuple conversion map to
1325 * convert a tuple from its parent's rowtype to its own. This is to
1326 * make sure that we are looking at the correct row using the correct
1327 * tuple descriptor when computing its partition key for tuple
1328 * routing.
1329 */
1331 tupdesc,
1332 false);
1333 pd->tupslot = pd->tupmap ?
1335 }
1336 else
1337 {
1338 /* Not required for the root partitioned table */
1339 pd->tupmap = NULL;
1340 pd->tupslot = NULL;
1341 }
1342
1343 /*
1344 * Initialize with -1 to signify that the corresponding partition's
1345 * ResultRelInfo or PartitionDispatch has not been created yet.
1346 */
1347 memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1348
1349 /* Track in PartitionTupleRouting for later use */
1350 dispatchidx = proute->num_dispatch++;
1351
1352 /* Allocate or enlarge the array, as needed */
1353 if (proute->num_dispatch >= proute->max_dispatch)
1354 {
1355 if (proute->max_dispatch == 0)
1356 {
1357 proute->max_dispatch = 4;
1360 }
1361 else
1362 {
1363 proute->max_dispatch *= 2;
1366 sizeof(PartitionDispatch) * proute->max_dispatch);
1367 proute->nonleaf_partitions = (ResultRelInfo **)
1369 sizeof(ResultRelInfo *) * proute->max_dispatch);
1370 }
1371 }
1373
1374 /*
1375 * If setting up a PartitionDispatch for a sub-partitioned table, we may
1376 * also need a minimally valid ResultRelInfo for checking the partition
1377 * constraint later; set that up now.
1378 */
1379 if (parent_pd)
1380 {
1382
1383 InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
1385 }
1386 else
1388
1389 /*
1390 * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1391 * install a downlink in the parent to allow quick descent.
1392 */
1393 if (parent_pd)
1394 {
1395 Assert(parent_pd->indexes[partidx] == -1);
1396 parent_pd->indexes[partidx] = dispatchidx;
1397 }
1398
1400
1401 return pd;
1402}
1403
1404/*
1405 * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1406 * routing.
1407 *
1408 * Close all the partitioned tables, leaf partitions, and their indices.
1409 */
1410void
1412 PartitionTupleRouting *proute)
1413{
1414 int i;
1415
1416 /*
1417 * Remember, proute->partition_dispatch_info[0] corresponds to the root
1418 * partitioned table, which we must not try to close, because it is the
1419 * main target table of the query that will be closed by callers such as
1420 * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1421 * partitioned table.
1422 */
1423 for (i = 1; i < proute->num_dispatch; i++)
1424 {
1426
1428
1429 if (pd->tupslot)
1431 }
1432
1433 for (i = 0; i < proute->num_partitions; i++)
1434 {
1435 ResultRelInfo *resultRelInfo = proute->partitions[i];
1436
1437 /* Allow any FDWs to shut down */
1438 if (resultRelInfo->ri_FdwRoutine != NULL &&
1439 resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1440 resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1441 resultRelInfo);
1442
1443 /*
1444 * Close it if it's not one of the result relations borrowed from the
1445 * owning ModifyTableState; those will be closed by ExecEndPlan().
1446 */
1447 if (proute->is_borrowed_rel[i])
1448 continue;
1449
1450 ExecCloseIndices(resultRelInfo);
1451 table_close(resultRelInfo->ri_RelationDesc, NoLock);
1452 }
1453}
1454
1455/* ----------------
1456 * FormPartitionKeyDatum
1457 * Construct values[] and isnull[] arrays for the partition key
1458 * of a tuple.
1459 *
1460 * pd Partition dispatch object of the partitioned table
1461 * slot Heap tuple from which to extract partition key
1462 * estate executor state for evaluating any partition key
1463 * expressions (must be non-NULL)
1464 * values Array of partition key Datums (output area)
1465 * isnull Array of is-null indicators (output area)
1466 *
1467 * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1468 * the heap tuple passed in.
1469 * ----------------
1470 */
1471static void
1473 TupleTableSlot *slot,
1474 EState *estate,
1475 Datum *values,
1476 bool *isnull)
1477{
1479 int i;
1480
1481 if (pd->key->partexprs != NIL && pd->keystate == NIL)
1482 {
1483 /* Check caller has set up context correctly */
1484 Assert(estate != NULL &&
1485 GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1486
1487 /* First time through, set up expression evaluation state */
1488 pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1489 }
1490
1492 for (i = 0; i < pd->key->partnatts; i++)
1493 {
1495 Datum datum;
1496 bool isNull;
1497
1498 if (keycol != 0)
1499 {
1500 /* Plain column; get the value directly from the heap tuple */
1501 datum = slot_getattr(slot, keycol, &isNull);
1502 }
1503 else
1504 {
1505 /* Expression; need to evaluate it */
1506 if (partexpr_item == NULL)
1507 elog(ERROR, "wrong number of partition key expressions");
1509 GetPerTupleExprContext(estate),
1510 &isNull);
1512 }
1513 values[i] = datum;
1514 isnull[i] = isNull;
1515 }
1516
1517 if (partexpr_item != NULL)
1518 elog(ERROR, "wrong number of partition key expressions");
1519}
1520
1521/*
1522 * The number of times the same partition must be found in a row before we
1523 * switch from a binary search for the given values to just checking if the
1524 * values belong to the last found partition. This must be above 0.
1525 */
1526#define PARTITION_CACHED_FIND_THRESHOLD 16
1527
1528/*
1529 * get_partition_for_tuple
1530 * Finds partition of relation which accepts the partition key specified
1531 * in values and isnull.
1532 *
1533 * Calling this function can be quite expensive when LIST and RANGE
1534 * partitioned tables have many partitions. This is due to the binary search
1535 * that's done to find the correct partition. Many of the use cases for LIST
1536 * and RANGE partitioned tables make it likely that the same partition is
1537 * found in subsequent ExecFindPartition() calls. This is especially true for
1538 * cases such as RANGE partitioned tables on a TIMESTAMP column where the
1539 * partition key is the current time. When asked to find a partition for a
1540 * RANGE or LIST partitioned table, we record the partition index and datum
1541 * offset we've found for the given 'values' in the PartitionDesc (which is
1542 * stored in relcache), and if we keep finding the same partition
1543 * PARTITION_CACHED_FIND_THRESHOLD times in a row, then we'll enable caching
1544 * logic and instead of performing a binary search to find the correct
1545 * partition, we'll just double-check that 'values' still belong to the last
1546 * found partition, and if so, we'll return that partition index, thus
1547 * skipping the need for the binary search. If we fail to match the last
1548 * partition when double checking, then we fall back on doing a binary search.
1549 * In this case, unless we find 'values' belong to the DEFAULT partition,
1550 * we'll reset the number of times we've hit the same partition so that we
1551 * don't attempt to use the cache again until we've found that partition at
1552 * least PARTITION_CACHED_FIND_THRESHOLD times in a row.
1553 *
1554 * For cases where the partition changes on each lookup, the amount of
1555 * additional work required just amounts to recording the last found partition
1556 * and bound offset then resetting the found counter. This is cheap and does
1557 * not appear to cause any meaningful slowdowns for such cases.
1558 *
1559 * No caching of partitions is done when the last found partition is the
1560 * DEFAULT or NULL partition. For the case of the DEFAULT partition, there
1561 * is no bound offset storing the matching datum, so we cannot confirm the
1562 * indexes match. For the NULL partition, this is just so cheap, there's no
1563 * sense in caching.
1564 *
1565 * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1566 * found or -1 if none found.
1567 */
1568static int
1570{
1571 int bound_offset = -1;
1572 int part_index = -1;
1573 PartitionKey key = pd->key;
1574 PartitionDesc partdesc = pd->partdesc;
1575 PartitionBoundInfo boundinfo = partdesc->boundinfo;
1576
1577 /*
1578 * In the switch statement below, when we perform a cached lookup for
1579 * RANGE and LIST partitioned tables, if we find that the last found
1580 * partition matches the 'values', we return the partition index right
1581 * away. We do this instead of breaking out of the switch as we don't
1582 * want to execute the code about the DEFAULT partition or do any updates
1583 * for any of the cache-related fields. That would be a waste of effort
1584 * as we already know it's not the DEFAULT partition and have no need to
1585 * increment the number of times we found the same partition any higher
1586 * than PARTITION_CACHED_FIND_THRESHOLD.
1587 */
1588
1589 /* Route as appropriate based on partitioning strategy. */
1590 switch (key->strategy)
1591 {
1593 {
1595
1596 /* hash partitioning is too cheap to bother caching */
1597 rowHash = compute_partition_hash_value(key->partnatts,
1598 key->partsupfunc,
1599 key->partcollation,
1600 values, isnull);
1601
1602 /*
1603 * HASH partitions can't have a DEFAULT partition and we don't
1604 * do any caching work for them, so just return the part index
1605 */
1606 return boundinfo->indexes[rowHash % boundinfo->nindexes];
1607 }
1608
1610 if (isnull[0])
1611 {
1612 /* this is far too cheap to bother doing any caching */
1613 if (partition_bound_accepts_nulls(boundinfo))
1614 {
1615 /*
1616 * When there is a NULL partition we just return that
1617 * directly. We don't have a bound_offset so it's not
1618 * valid to drop into the code after the switch which
1619 * checks and updates the cache fields. We perhaps should
1620 * be invalidating the details of the last cached
1621 * partition but there's no real need to. Keeping those
1622 * fields set gives a chance at matching to the cached
1623 * partition on the next lookup.
1624 */
1625 return boundinfo->null_index;
1626 }
1627 }
1628 else
1629 {
1630 bool equal;
1631
1633 {
1635 Datum lastDatum = boundinfo->datums[last_datum_offset][0];
1636 int32 cmpval;
1637
1638 /* does the last found datum index match this datum? */
1639 cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1640 key->partcollation[0],
1641 lastDatum,
1642 values[0]));
1643
1644 if (cmpval == 0)
1645 return boundinfo->indexes[last_datum_offset];
1646
1647 /* fall-through and do a manual lookup */
1648 }
1649
1650 bound_offset = partition_list_bsearch(key->partsupfunc,
1651 key->partcollation,
1652 boundinfo,
1653 values[0], &equal);
1654 if (bound_offset >= 0 && equal)
1655 part_index = boundinfo->indexes[bound_offset];
1656 }
1657 break;
1658
1660 {
1661 bool equal = false,
1662 range_partkey_has_null = false;
1663 int i;
1664
1665 /*
1666 * No range includes NULL, so this will be accepted by the
1667 * default partition if there is one, and otherwise rejected.
1668 */
1669 for (i = 0; i < key->partnatts; i++)
1670 {
1671 if (isnull[i])
1672 {
1674 break;
1675 }
1676 }
1677
1678 /* NULLs belong in the DEFAULT partition */
1680 break;
1681
1683 {
1687 int32 cmpval;
1688
1689 /* check if the value is >= to the lower bound */
1690 cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1691 key->partcollation,
1692 lastDatums,
1693 kind,
1694 values,
1695 key->partnatts);
1696
1697 /*
1698 * If it's equal to the lower bound then no need to check
1699 * the upper bound.
1700 */
1701 if (cmpval == 0)
1702 return boundinfo->indexes[last_datum_offset + 1];
1703
1704 if (cmpval < 0 && last_datum_offset + 1 < boundinfo->ndatums)
1705 {
1706 /* check if the value is below the upper bound */
1707 lastDatums = boundinfo->datums[last_datum_offset + 1];
1708 kind = boundinfo->kind[last_datum_offset + 1];
1709 cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1710 key->partcollation,
1711 lastDatums,
1712 kind,
1713 values,
1714 key->partnatts);
1715
1716 if (cmpval > 0)
1717 return boundinfo->indexes[last_datum_offset + 1];
1718 }
1719 /* fall-through and do a manual lookup */
1720 }
1721
1723 key->partcollation,
1724 boundinfo,
1725 key->partnatts,
1726 values,
1727 &equal);
1728
1729 /*
1730 * The bound at bound_offset is less than or equal to the
1731 * tuple value, so the bound at offset+1 is the upper bound of
1732 * the partition we're looking for, if there actually exists
1733 * one.
1734 */
1735 part_index = boundinfo->indexes[bound_offset + 1];
1736 }
1737 break;
1738
1739 default:
1740 elog(ERROR, "unexpected partition strategy: %d",
1741 (int) key->strategy);
1742 }
1743
1744 /*
1745 * part_index < 0 means we failed to find a partition of this parent. Use
1746 * the default partition, if there is one.
1747 */
1748 if (part_index < 0)
1749 {
1750 /*
1751 * No need to reset the cache fields here. The next set of values
1752 * might end up belonging to the cached partition, so leaving the
1753 * cache alone improves the chances of a cache hit on the next lookup.
1754 */
1755 return boundinfo->default_index;
1756 }
1757
1758 /* we should only make it here when the code above set bound_offset */
1759 Assert(bound_offset >= 0);
1760
1761 /*
1762 * Attend to the cache fields. If the bound_offset matches the last
1763 * cached bound offset then we've found the same partition as last time,
1764 * so bump the count by one. If all goes well, we'll eventually reach
1765 * PARTITION_CACHED_FIND_THRESHOLD and try the cache path next time
1766 * around. Otherwise, we'll reset the cache count back to 1 to mark that
1767 * we've found this partition for the first time.
1768 */
1769 if (bound_offset == partdesc->last_found_datum_index)
1770 partdesc->last_found_count++;
1771 else
1772 {
1773 partdesc->last_found_count = 1;
1776 }
1777
1778 return part_index;
1779}
1780
1781/*
1782 * ExecBuildSlotPartitionKeyDescription
1783 *
1784 * This works very much like BuildIndexValueDescription() and is currently
1785 * used for building error messages when ExecFindPartition() fails to find
1786 * partition for a row.
1787 */
1788static char *
1790 const Datum *values,
1791 const bool *isnull,
1792 int maxfieldlen)
1793{
1796 int partnatts = get_partition_natts(key);
1797 int i;
1798 Oid relid = RelationGetRelid(rel);
1800
1801 if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
1802 return NULL;
1803
1804 /* If the user has table-level access, just go build the description. */
1806 if (aclresult != ACLCHECK_OK)
1807 {
1808 /*
1809 * Step through the columns of the partition key and make sure the
1810 * user has SELECT rights on all of them.
1811 */
1812 for (i = 0; i < partnatts; i++)
1813 {
1815
1816 /*
1817 * If this partition key column is an expression, we return no
1818 * detail rather than try to figure out what column(s) the
1819 * expression includes and if the user has SELECT rights on them.
1820 */
1821 if (attnum == InvalidAttrNumber ||
1824 return NULL;
1825 }
1826 }
1827
1829 appendStringInfo(&buf, "(%s) = (",
1830 pg_get_partkeydef_columns(relid, true));
1831
1832 for (i = 0; i < partnatts; i++)
1833 {
1834 char *val;
1835 int vallen;
1836
1837 if (isnull[i])
1838 val = "null";
1839 else
1840 {
1841 Oid foutoid;
1842 bool typisvarlena;
1843
1845 &foutoid, &typisvarlena);
1847 }
1848
1849 if (i > 0)
1851
1852 /* truncate if needed */
1853 vallen = strlen(val);
1854 if (vallen <= maxfieldlen)
1855 appendBinaryStringInfo(&buf, val, vallen);
1856 else
1857 {
1858 vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1859 appendBinaryStringInfo(&buf, val, vallen);
1860 appendStringInfoString(&buf, "...");
1861 }
1862 }
1863
1865
1866 return buf.data;
1867}
1868
1869/*
1870 * adjust_partition_colnos
1871 * Adjust the list of UPDATE target column numbers to account for
1872 * attribute differences between the parent and the partition.
1873 *
1874 * Note: mustn't be called if no adjustment is required.
1875 */
1876static List *
1885
1886/*
1887 * adjust_partition_colnos_using_map
1888 * Like adjust_partition_colnos, but uses a caller-supplied map instead
1889 * of assuming to map from the "root" result relation.
1890 *
1891 * Note: mustn't be called if no adjustment is required.
1892 */
1893static List *
1895{
1896 List *new_colnos = NIL;
1897 ListCell *lc;
1898
1899 Assert(attrMap != NULL); /* else we shouldn't be here */
1900
1901 foreach(lc, colnos)
1902 {
1904
1905 if (parentattrno <= 0 ||
1906 parentattrno > attrMap->maplen ||
1907 attrMap->attnums[parentattrno - 1] == 0)
1908 elog(ERROR, "unexpected attno %d in target column list",
1909 parentattrno);
1911 attrMap->attnums[parentattrno - 1]);
1912 }
1913
1914 return new_colnos;
1915}
1916
1917/*-------------------------------------------------------------------------
1918 * Run-Time Partition Pruning Support.
1919 *
1920 * The following series of functions exist to support the removal of unneeded
1921 * subplans for queries against partitioned tables. The supporting functions
1922 * here are designed to work with any plan type which supports an arbitrary
1923 * number of subplans, e.g. Append, MergeAppend.
1924 *
1925 * When pruning involves comparison of a partition key to a constant, it's
1926 * done by the planner. However, if we have a comparison to a non-constant
1927 * but not volatile expression, that presents an opportunity for run-time
1928 * pruning by the executor, allowing irrelevant partitions to be skipped
1929 * dynamically.
1930 *
1931 * We must distinguish expressions containing PARAM_EXEC Params from
1932 * expressions that don't contain those. Even though a PARAM_EXEC Param is
1933 * considered to be a stable expression, it can change value from one plan
1934 * node scan to the next during query execution. Stable comparison
1935 * expressions that don't involve such Params allow partition pruning to be
1936 * done once during executor startup. Expressions that do involve such Params
1937 * require us to prune separately for each scan of the parent plan node.
1938 *
1939 * Note that pruning away unneeded subplans during executor startup has the
1940 * added benefit of not having to initialize the unneeded subplans at all.
1941 *
1942 *
1943 * Functions:
1944 *
1945 * ExecDoInitialPruning:
1946 * Perform runtime "initial" pruning, if necessary, to determine the set
1947 * of child subnodes that need to be initialized during ExecInitNode() for
1948 * all plan nodes that contain a PartitionPruneInfo.
1949 *
1950 * ExecInitPartitionExecPruning:
1951 * Updates the PartitionPruneState found at given part_prune_index in
1952 * EState.es_part_prune_states for use during "exec" pruning if required.
1953 * Also returns the set of subplans to initialize that would be stored at
1954 * part_prune_index in EState.es_part_prune_results by
1955 * ExecDoInitialPruning(). Maps in PartitionPruneState are updated to
1956 * account for initial pruning possibly having eliminated some of the
1957 * subplans.
1958 *
1959 * ExecFindMatchingSubPlans:
1960 * Returns indexes of matching subplans after evaluating the expressions
1961 * that are safe to evaluate at a given point. This function is first
1962 * called during ExecDoInitialPruning() to find the initially matching
1963 * subplans based on performing the initial pruning steps and then must be
1964 * called again each time the value of a Param listed in
1965 * PartitionPruneState's 'execparamids' changes.
1966 *-------------------------------------------------------------------------
1967 */
1968
1969
1970/*
1971 * ExecDoInitialPruning
1972 * Perform runtime "initial" pruning, if necessary, to determine the set
1973 * of child subnodes that need to be initialized during ExecInitNode() for
1974 * plan nodes that support partition pruning.
1975 *
1976 * This function iterates over each PartitionPruneInfo entry in
1977 * estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
1978 * and adds it to es_part_prune_states. ExecInitPartitionExecPruning() accesses
1979 * these states through their corresponding indexes in es_part_prune_states and
1980 * assign each state to the parent node's PlanState, from where it will be used
1981 * for "exec" pruning.
1982 *
1983 * If initial pruning steps exist for a PartitionPruneInfo entry, this function
1984 * executes those pruning steps and stores the result as a bitmapset of valid
1985 * child subplans, identifying which subplans should be initialized for
1986 * execution. The results are saved in estate->es_part_prune_results.
1987 *
1988 * If no initial pruning is performed for a given PartitionPruneInfo, a NULL
1989 * entry is still added to es_part_prune_results to maintain alignment with
1990 * es_part_prune_infos. This ensures that ExecInitPartitionExecPruning() can
1991 * use the same index to retrieve the pruning results.
1992 */
1993void
1995{
1996 ListCell *lc;
1997
1998 foreach(lc, estate->es_part_prune_infos)
1999 {
2005
2006 /* Create and save the PartitionPruneState. */
2010 prunestate);
2011
2012 /*
2013 * Perform initial pruning steps, if any, and save the result
2014 * bitmapset or NULL as described in the header comment.
2015 */
2016 if (prunestate->do_initial_prune)
2019 else
2021
2026 }
2027}
2028
2029/*
2030 * ExecInitPartitionExecPruning
2031 * Initialize the data structures needed for runtime "exec" partition
2032 * pruning and return the result of initial pruning, if available.
2033 *
2034 * 'relids' identifies the relation to which both the parent plan and the
2035 * PartitionPruneInfo given by 'part_prune_index' belong.
2036 *
2037 * On return, *initially_valid_subplans is assigned the set of indexes of
2038 * child subplans that must be initialized along with the parent plan node.
2039 * Initial pruning would have been performed by ExecDoInitialPruning(), if
2040 * necessary, and the bitmapset of surviving subplans' indexes would have
2041 * been stored as the part_prune_index'th element of
2042 * EState.es_part_prune_results.
2043 *
2044 * If subplans were indeed pruned during initial pruning, the subplan_map
2045 * arrays in the returned PartitionPruneState are re-sequenced to exclude those
2046 * subplans, but only if the maps will be needed for subsequent execution
2047 * pruning passes.
2048 */
2051 int n_total_subplans,
2052 int part_prune_index,
2053 Bitmapset *relids,
2055{
2057 EState *estate = planstate->state;
2059
2060 /* Obtain the pruneinfo we need. */
2062 part_prune_index);
2063
2064 /* Its relids better match the plan node's or the planner messed up. */
2065 if (!bms_equal(relids, pruneinfo->relids))
2066 elog(ERROR, "wrong pruneinfo with relids=%s found at part_prune_index=%d contained in plan node with relids=%s",
2067 bmsToString(pruneinfo->relids), part_prune_index,
2068 bmsToString(relids));
2069
2070 /*
2071 * The PartitionPruneState would have been created by
2072 * ExecDoInitialPruning() and stored as the part_prune_index'th element of
2073 * EState.es_part_prune_states.
2074 */
2075 prunestate = list_nth(estate->es_part_prune_states, part_prune_index);
2077
2078 /* Use the result of initial pruning done by ExecDoInitialPruning(). */
2079 if (prunestate->do_initial_prune)
2081 estate->es_part_prune_results,
2082 part_prune_index);
2083 else
2084 {
2085 /* No pruning, so we'll need to initialize all subplans */
2088 n_total_subplans - 1);
2089 }
2090
2091 /*
2092 * The exec pruning state must also be initialized, if needed, before it
2093 * can be used for pruning during execution.
2094 *
2095 * This also re-sequences subplan indexes contained in prunestate to
2096 * account for any that were removed due to initial pruning; refer to the
2097 * condition in InitExecPartitionPruneContexts() that is used to determine
2098 * whether to do this. If no exec pruning needs to be done, we would thus
2099 * leave the maps to be in an invalid state, but that's ok since that data
2100 * won't be consulted again (cf initial Assert in
2101 * ExecFindMatchingSubPlans).
2102 */
2103 if (prunestate->do_exec_prune)
2107
2108 return prunestate;
2109}
2110
2111/*
2112 * CreatePartitionPruneState
2113 * Build the data structure required for calling ExecFindMatchingSubPlans
2114 *
2115 * This includes PartitionPruneContexts (stored in each
2116 * PartitionedRelPruningData corresponding to a PartitionedRelPruneInfo),
2117 * which hold the ExprStates needed to evaluate pruning expressions, and
2118 * mapping arrays to convert partition indexes from the pruning logic
2119 * into subplan indexes in the parent plan node's list of child subplans.
2120 *
2121 * 'pruneinfo' is a PartitionPruneInfo as generated by
2122 * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
2123 * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
2124 * pruneinfo->prune_infos), each of which contains a PartitionedRelPruningData
2125 * for each PartitionedRelPruneInfo appearing in that sublist. This two-level
2126 * system is needed to keep from confusing the different hierarchies when a
2127 * UNION ALL contains multiple partitioned tables as children. The data
2128 * stored in each PartitionedRelPruningData can be re-used each time we
2129 * re-evaluate which partitions match the pruning steps provided in each
2130 * PartitionedRelPruneInfo.
2131 *
2132 * Note that only the PartitionPruneContexts for initial pruning are
2133 * initialized here. Those required for exec pruning are initialized later in
2134 * ExecInitPartitionExecPruning(), as they depend on the availability of the
2135 * parent plan node's PlanState.
2136 *
2137 * If initial pruning steps are to be skipped (e.g., during EXPLAIN
2138 * (GENERIC_PLAN)), *all_leafpart_rtis will be populated with the RT indexes of
2139 * all leaf partitions whose scanning subnode is included in the parent plan
2140 * node's list of child plans. The caller must add these RT indexes to
2141 * estate->es_unpruned_relids.
2142 */
2143static PartitionPruneState *
2146{
2149 ListCell *lc;
2150 int i;
2151
2152 /*
2153 * Expression context that will be used by partkey_datum_from_expr() to
2154 * evaluate expressions for comparison against partition bounds.
2155 */
2156 ExprContext *econtext = CreateExprContext(estate);
2157
2158 /* For data reading, executor always includes detached partitions */
2159 if (estate->es_partition_directory == NULL)
2160 estate->es_partition_directory =
2161 CreatePartitionDirectory(estate->es_query_cxt, false);
2162
2165
2166 /*
2167 * Allocate the data structure
2168 */
2170 palloc(offsetof(PartitionPruneState, partprunedata) +
2172
2173 /* Save ExprContext for use during InitExecPartitionPruneContexts(). */
2174 prunestate->econtext = econtext;
2175 prunestate->execparamids = NULL;
2176 /* other_subplans can change at runtime, so we need our own copy */
2177 prunestate->other_subplans = bms_copy(pruneinfo->other_subplans);
2178 prunestate->do_initial_prune = false; /* may be set below */
2179 prunestate->do_exec_prune = false; /* may be set below */
2180 prunestate->num_partprunedata = n_part_hierarchies;
2181
2182 /*
2183 * Create a short-term memory context which we'll use when making calls to
2184 * the partition pruning functions. This avoids possible memory leaks,
2185 * since the pruning functions call comparison functions that aren't under
2186 * our control.
2187 */
2188 prunestate->prune_context =
2190 "Partition Prune",
2192
2193 i = 0;
2194 foreach(lc, pruneinfo->prune_infos)
2195 {
2199 ListCell *lc2;
2200 int j;
2201
2203 palloc(offsetof(PartitionPruningData, partrelprunedata) +
2205 prunestate->partprunedata[i] = prunedata;
2206 prunedata->num_partrelprunedata = npartrelpruneinfos;
2207
2208 j = 0;
2209 foreach(lc2, partrelpruneinfos)
2210 {
2212 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2213 Relation partrel;
2214 PartitionDesc partdesc;
2216
2217 /*
2218 * We can rely on the copies of the partitioned table's partition
2219 * key and partition descriptor appearing in its relcache entry,
2220 * because that entry will be held open and locked for the
2221 * duration of this executor run.
2222 */
2223 partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex, false);
2224
2225 /* Remember for InitExecPartitionPruneContexts(). */
2226 pprune->partrel = partrel;
2227
2230 partrel);
2231
2232 /*
2233 * Initialize the subplan_map and subpart_map.
2234 *
2235 * The set of partitions that exist now might not be the same that
2236 * existed when the plan was made. The normal case is that it is;
2237 * optimize for that case with a quick comparison, and just copy
2238 * the subplan_map and make subpart_map, leafpart_rti_map point to
2239 * the ones in PruneInfo.
2240 *
2241 * For the case where they aren't identical, we could have more
2242 * partitions on either side; or even exactly the same number of
2243 * them on both but the set of OIDs doesn't match fully. Handle
2244 * this by creating new subplan_map and subpart_map arrays that
2245 * corresponds to the ones in the PruneInfo where the new
2246 * partition descriptor's OIDs match. Any that don't match can be
2247 * set to -1, as if they were pruned. By construction, both
2248 * arrays are in partition bounds order.
2249 */
2250 pprune->nparts = partdesc->nparts;
2251 pprune->subplan_map = palloc_array(int, partdesc->nparts);
2252
2253 if (partdesc->nparts == pinfo->nparts &&
2254 memcmp(partdesc->oids, pinfo->relid_map,
2255 sizeof(int) * partdesc->nparts) == 0)
2256 {
2257 pprune->subpart_map = pinfo->subpart_map;
2258 pprune->leafpart_rti_map = pinfo->leafpart_rti_map;
2259 memcpy(pprune->subplan_map, pinfo->subplan_map,
2260 sizeof(int) * pinfo->nparts);
2261 }
2262 else
2263 {
2264 int pd_idx = 0;
2265 int pp_idx;
2266
2267 /*
2268 * When the partition arrays are not identical, there could be
2269 * some new ones but it's also possible that one was removed;
2270 * we cope with both situations by walking the arrays and
2271 * discarding those that don't match.
2272 *
2273 * If the number of partitions on both sides match, it's still
2274 * possible that one partition has been detached and another
2275 * attached. Cope with that by creating a map that skips any
2276 * mismatches.
2277 */
2278 pprune->subpart_map = palloc_array(int, partdesc->nparts);
2279 pprune->leafpart_rti_map = palloc_array(int, partdesc->nparts);
2280
2281 for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++)
2282 {
2283 /* Skip any InvalidOid relid_map entries */
2284 while (pd_idx < pinfo->nparts &&
2285 !OidIsValid(pinfo->relid_map[pd_idx]))
2286 pd_idx++;
2287
2288 recheck:
2289 if (pd_idx < pinfo->nparts &&
2290 pinfo->relid_map[pd_idx] == partdesc->oids[pp_idx])
2291 {
2292 /* match... */
2293 pprune->subplan_map[pp_idx] =
2294 pinfo->subplan_map[pd_idx];
2295 pprune->subpart_map[pp_idx] =
2296 pinfo->subpart_map[pd_idx];
2297 pprune->leafpart_rti_map[pp_idx] =
2298 pinfo->leafpart_rti_map[pd_idx];
2299 pd_idx++;
2300 continue;
2301 }
2302
2303 /*
2304 * There isn't an exact match in the corresponding
2305 * positions of both arrays. Peek ahead in
2306 * pinfo->relid_map to see if we have a match for the
2307 * current partition in partdesc. Normally if a match
2308 * exists it's just one element ahead, and it means the
2309 * planner saw one extra partition that we no longer see
2310 * now (its concurrent detach finished just in between);
2311 * so we skip that one by updating pd_idx to the new
2312 * location and jumping above. We can then continue to
2313 * match the rest of the elements after skipping the OID
2314 * with no match; no future matches are tried for the
2315 * element that was skipped, because we know the arrays to
2316 * be in the same order.
2317 *
2318 * If we don't see a match anywhere in the rest of the
2319 * pinfo->relid_map array, that means we see an element
2320 * now that the planner didn't see, so mark that one as
2321 * pruned and move on.
2322 */
2323 for (int pd_idx2 = pd_idx + 1; pd_idx2 < pinfo->nparts; pd_idx2++)
2324 {
2325 if (pd_idx2 >= pinfo->nparts)
2326 break;
2327 if (pinfo->relid_map[pd_idx2] == partdesc->oids[pp_idx])
2328 {
2329 pd_idx = pd_idx2;
2330 goto recheck;
2331 }
2332 }
2333
2334 pprune->subpart_map[pp_idx] = -1;
2335 pprune->subplan_map[pp_idx] = -1;
2336 pprune->leafpart_rti_map[pp_idx] = 0;
2337 }
2338 }
2339
2340 /* present_parts is also subject to later modification */
2342
2343 /*
2344 * Only initial_context is initialized here. exec_context is
2345 * initialized during ExecInitPartitionExecPruning() when the
2346 * parent plan's PlanState is available.
2347 *
2348 * Note that we must skip execution-time (both "init" and "exec")
2349 * partition pruning in EXPLAIN (GENERIC_PLAN), since parameter
2350 * values may be missing.
2351 */
2352 pprune->initial_pruning_steps = pinfo->initial_pruning_steps;
2353 if (pinfo->initial_pruning_steps &&
2355 {
2356 InitPartitionPruneContext(&pprune->initial_context,
2357 pprune->initial_pruning_steps,
2358 partdesc, partkey, NULL,
2359 econtext);
2360 /* Record whether initial pruning is needed at any level */
2361 prunestate->do_initial_prune = true;
2362 }
2363 pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
2364 if (pinfo->exec_pruning_steps &&
2366 {
2367 /* Record whether exec pruning is needed at any level */
2368 prunestate->do_exec_prune = true;
2369 }
2370
2371 /*
2372 * Accumulate the IDs of all PARAM_EXEC Params affecting the
2373 * partitioning decisions at this plan node.
2374 */
2375 prunestate->execparamids = bms_add_members(prunestate->execparamids,
2376 pinfo->execparamids);
2377
2378 /*
2379 * Return all leaf partition indexes if we're skipping pruning in
2380 * the EXPLAIN (GENERIC_PLAN) case.
2381 */
2382 if (pinfo->initial_pruning_steps && !prunestate->do_initial_prune)
2383 {
2384 int part_index = -1;
2385
2386 while ((part_index = bms_next_member(pprune->present_parts,
2387 part_index)) >= 0)
2388 {
2389 Index rtindex = pprune->leafpart_rti_map[part_index];
2390
2391 if (rtindex)
2393 rtindex);
2394 }
2395 }
2396
2397 j++;
2398 }
2399 i++;
2400 }
2401
2402 return prunestate;
2403}
2404
2405/*
2406 * Initialize a PartitionPruneContext for the given list of pruning steps.
2407 */
2408static void
2411 PartitionDesc partdesc,
2413 PlanState *planstate,
2414 ExprContext *econtext)
2415{
2416 int n_steps;
2417 int partnatts;
2418 ListCell *lc;
2419
2421
2422 context->strategy = partkey->strategy;
2423 context->partnatts = partnatts = partkey->partnatts;
2424 context->nparts = partdesc->nparts;
2425 context->boundinfo = partdesc->boundinfo;
2426 context->partcollation = partkey->partcollation;
2427 context->partsupfunc = partkey->partsupfunc;
2428
2429 /* We'll look up type-specific support functions as needed */
2430 context->stepcmpfuncs = palloc0_array(FmgrInfo, n_steps * partnatts);
2431
2433 context->planstate = planstate;
2434 context->exprcontext = econtext;
2435
2436 /* Initialize expression state for each expression we need */
2437 context->exprstates = palloc0_array(ExprState *, n_steps * partnatts);
2438 foreach(lc, pruning_steps)
2439 {
2441 ListCell *lc2 = list_head(step->exprs);
2442 int keyno;
2443
2444 /* not needed for other step kinds */
2445 if (!IsA(step, PartitionPruneStepOp))
2446 continue;
2447
2448 Assert(list_length(step->exprs) <= partnatts);
2449
2450 for (keyno = 0; keyno < partnatts; keyno++)
2451 {
2452 if (bms_is_member(keyno, step->nullkeys))
2453 continue;
2454
2455 if (lc2 != NULL)
2456 {
2457 Expr *expr = lfirst(lc2);
2458
2459 /* not needed for Consts */
2460 if (!IsA(expr, Const))
2461 {
2462 int stateidx = PruneCxtStateIdx(partnatts,
2463 step->step.step_id,
2464 keyno);
2465
2466 /*
2467 * When planstate is NULL, pruning_steps is known not to
2468 * contain any expressions that depend on the parent plan.
2469 * Information of any available EXTERN parameters must be
2470 * passed explicitly in that case, which the caller must
2471 * have made available via econtext.
2472 */
2473 if (planstate == NULL)
2474 context->exprstates[stateidx] =
2476 econtext->ecxt_param_list_info);
2477 else
2478 context->exprstates[stateidx] =
2479 ExecInitExpr(expr, context->planstate);
2480 }
2481 lc2 = lnext(step->exprs, lc2);
2482 }
2483 }
2484 }
2485}
2486
2487/*
2488 * InitExecPartitionPruneContexts
2489 * Initialize exec pruning contexts deferred by CreatePartitionPruneState()
2490 *
2491 * This function finalizes exec pruning setup for a PartitionPruneState by
2492 * initializing contexts for pruning steps that require the parent plan's
2493 * PlanState. It iterates over PartitionPruningData entries and sets up the
2494 * necessary execution contexts for pruning during query execution.
2495 *
2496 * Also fix the mapping of partition indexes to subplan indexes contained in
2497 * prunestate by considering the new list of subplans that survived initial
2498 * pruning.
2499 *
2500 * Current values of the indexes present in PartitionPruneState count all the
2501 * subplans that would be present before initial pruning was done. If initial
2502 * pruning got rid of some of the subplans, any subsequent pruning passes will
2503 * be looking at a different set of target subplans to choose from than those
2504 * in the pre-initial-pruning set, so the maps in PartitionPruneState
2505 * containing those indexes must be updated to reflect the new indexes of
2506 * subplans in the post-initial-pruning set.
2507 */
2508static void
2512 int n_total_subplans)
2513{
2514 EState *estate;
2517 int i;
2518 int newidx;
2519 bool fix_subplan_map = false;
2520
2521 Assert(prunestate->do_exec_prune);
2523 estate = parent_plan->state;
2524
2525 /*
2526 * No need to fix subplans maps if initial pruning didn't eliminate any
2527 * subplans.
2528 */
2530 {
2531 fix_subplan_map = true;
2532
2533 /*
2534 * First we must build a temporary array which maps old subplan
2535 * indexes to new ones. For convenience of initialization, we use
2536 * 1-based indexes in this array and leave pruned items as 0.
2537 */
2539 newidx = 1;
2540 i = -1;
2541 while ((i = bms_next_member(initially_valid_subplans, i)) >= 0)
2542 {
2545 }
2546 }
2547
2548 /*
2549 * Now we can update each PartitionedRelPruneInfo's subplan_map with new
2550 * subplan indexes. We must also recompute its present_parts bitmap.
2551 */
2552 for (i = 0; i < prunestate->num_partprunedata; i++)
2553 {
2554 PartitionPruningData *prunedata = prunestate->partprunedata[i];
2555 int j;
2556
2557 /*
2558 * Within each hierarchy, we perform this loop in back-to-front order
2559 * so that we determine present_parts for the lowest-level partitioned
2560 * tables first. This way we can tell whether a sub-partitioned
2561 * table's partitions were entirely pruned so we can exclude it from
2562 * the current level's present_parts.
2563 */
2564 for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
2565 {
2566 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2567 int nparts = pprune->nparts;
2568 int k;
2569
2570 /* Initialize PartitionPruneContext for exec pruning, if needed. */
2571 if (pprune->exec_pruning_steps != NIL)
2572 {
2574 PartitionDesc partdesc;
2575
2576 /*
2577 * See the comment in CreatePartitionPruneState() regarding
2578 * the usage of partdesc and partkey.
2579 */
2582 pprune->partrel);
2583
2584 InitPartitionPruneContext(&pprune->exec_context,
2585 pprune->exec_pruning_steps,
2586 partdesc, partkey, parent_plan,
2587 prunestate->econtext);
2588 }
2589
2590 if (!fix_subplan_map)
2591 continue;
2592
2593 /* We just rebuild present_parts from scratch */
2594 bms_free(pprune->present_parts);
2595 pprune->present_parts = NULL;
2596
2597 for (k = 0; k < nparts; k++)
2598 {
2599 int oldidx = pprune->subplan_map[k];
2600 int subidx;
2601
2602 /*
2603 * If this partition existed as a subplan then change the old
2604 * subplan index to the new subplan index. The new index may
2605 * become -1 if the partition was pruned above, or it may just
2606 * come earlier in the subplan list due to some subplans being
2607 * removed earlier in the list. If it's a subpartition, add
2608 * it to present_parts unless it's entirely pruned.
2609 */
2610 if (oldidx >= 0)
2611 {
2613 pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
2614
2615 if (new_subplan_indexes[oldidx] > 0)
2616 pprune->present_parts =
2617 bms_add_member(pprune->present_parts, k);
2618 }
2619 else if ((subidx = pprune->subpart_map[k]) >= 0)
2620 {
2622
2623 subprune = &prunedata->partrelprunedata[subidx];
2624
2625 if (!bms_is_empty(subprune->present_parts))
2627 bms_add_member(pprune->present_parts, k);
2628 }
2629 }
2630 }
2631 }
2632
2633 /*
2634 * If we fixed subplan maps, we must also recompute the other_subplans
2635 * set, since indexes in it may change.
2636 */
2637 if (fix_subplan_map)
2638 {
2640 i = -1;
2641 while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
2643 new_subplan_indexes[i] - 1);
2644
2645 bms_free(prunestate->other_subplans);
2646 prunestate->other_subplans = new_other_subplans;
2647
2649 }
2650}
2651
2652/*
2653 * ExecFindMatchingSubPlans
2654 * Determine which subplans match the pruning steps detailed in
2655 * 'prunestate' for the current comparison expression values.
2656 *
2657 * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This
2658 * differentiates the initial executor-time pruning step from later
2659 * runtime pruning.
2660 *
2661 * The caller must pass a non-NULL validsubplan_rtis during initial pruning
2662 * to collect the RT indexes of leaf partitions whose subnodes will be
2663 * executed. These RT indexes are later added to EState.es_unpruned_relids.
2664 */
2665Bitmapset *
2667 bool initial_prune,
2669{
2670 Bitmapset *result = NULL;
2671 MemoryContext oldcontext;
2672 int i;
2673
2674 /*
2675 * Either we're here on the initial prune done during pruning
2676 * initialization, or we're at a point where PARAM_EXEC Params can be
2677 * evaluated *and* there are steps in which to do so.
2678 */
2679 Assert(initial_prune || prunestate->do_exec_prune);
2681
2682 /*
2683 * Switch to a temp context to avoid leaking memory in the executor's
2684 * query-lifespan memory context.
2685 */
2686 oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2687
2688 /*
2689 * For each hierarchy, do the pruning tests, and add nondeletable
2690 * subplans' indexes to "result".
2691 */
2692 for (i = 0; i < prunestate->num_partprunedata; i++)
2693 {
2694 PartitionPruningData *prunedata = prunestate->partprunedata[i];
2696
2697 /*
2698 * We pass the zeroth item, belonging to the root table of the
2699 * hierarchy, and find_matching_subplans_recurse() takes care of
2700 * recursing to other (lower-level) parents as needed.
2701 */
2702 pprune = &prunedata->partrelprunedata[0];
2704 &result, validsubplan_rtis);
2705
2706 /*
2707 * Expression eval may have used space in ExprContext too. Avoid
2708 * accessing exec_context during initial pruning, as it is not valid
2709 * at that stage.
2710 */
2711 if (!initial_prune && pprune->exec_pruning_steps)
2712 ResetExprContext(pprune->exec_context.exprcontext);
2713 }
2714
2715 /* Add in any subplans that partition pruning didn't account for */
2716 result = bms_add_members(result, prunestate->other_subplans);
2717
2718 MemoryContextSwitchTo(oldcontext);
2719
2720 /* Copy result out of the temp context before we reset it */
2721 result = bms_copy(result);
2724
2725 MemoryContextReset(prunestate->prune_context);
2726
2727 return result;
2728}
2729
2730/*
2731 * find_matching_subplans_recurse
2732 * Recursive worker function for ExecFindMatchingSubPlans
2733 *
2734 * Adds valid (non-prunable) subplan IDs to *validsubplans. If
2735 * *validsubplan_rtis is non-NULL, it also adds the RT indexes of their
2736 * corresponding partitions, but only if they are leaf partitions.
2737 */
2738static void
2741 bool initial_prune,
2744{
2746 int i;
2747
2748 /* Guard against stack overflow due to overly deep partition hierarchy. */
2750
2751 /*
2752 * Prune as appropriate, if we have pruning steps matching the current
2753 * execution context. Otherwise just include all partitions at this
2754 * level.
2755 */
2756 if (initial_prune && pprune->initial_pruning_steps)
2757 partset = get_matching_partitions(&pprune->initial_context,
2758 pprune->initial_pruning_steps);
2759 else if (!initial_prune && pprune->exec_pruning_steps)
2760 partset = get_matching_partitions(&pprune->exec_context,
2761 pprune->exec_pruning_steps);
2762 else
2763 partset = pprune->present_parts;
2764
2765 /* Translate partset into subplan indexes */
2766 i = -1;
2767 while ((i = bms_next_member(partset, i)) >= 0)
2768 {
2769 if (pprune->subplan_map[i] >= 0)
2770 {
2772 pprune->subplan_map[i]);
2773
2774 /*
2775 * Only report leaf partitions. Non-leaf partitions may appear
2776 * here when they use an unflattened Append or MergeAppend.
2777 */
2778 if (validsubplan_rtis && pprune->leafpart_rti_map[i])
2780 pprune->leafpart_rti_map[i]);
2781 }
2782 else
2783 {
2784 int partidx = pprune->subpart_map[i];
2785
2786 if (partidx >= 0)
2788 &prunedata->partrelprunedata[partidx],
2791 else
2792 {
2793 /*
2794 * We get here if the planner already pruned all the sub-
2795 * partitions for this partition. Silently ignore this
2796 * partition in this case. The end result is the same: we
2797 * would have pruned all partitions just the same, but we
2798 * don't have any pruning steps to execute to verify this.
2799 */
2800 }
2801 }
2802 }
2803}
AclResult
Definition acl.h:182
@ ACLCHECK_OK
Definition acl.h:183
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
Definition aclchk.c:3886
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition aclchk.c:4057
AttrMap * build_attrmap_by_name(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
Definition attmap.c:175
AttrMap * build_attrmap_by_name_if_req(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
Definition attmap.c:261
int16 AttrNumber
Definition attnum.h:21
#define InvalidAttrNumber
Definition attnum.h:23
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:142
int bms_next_member(const Bitmapset *a, int prevbit)
Definition bitmapset.c:1290
Bitmapset * bms_add_range(Bitmapset *a, int lower, int upper)
Definition bitmapset.c:1003
void bms_free(Bitmapset *a)
Definition bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition bitmapset.c:744
bool bms_is_member(int x, const Bitmapset *a)
Definition bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition bitmapset.c:799
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:901
Bitmapset * bms_copy(const Bitmapset *a)
Definition bitmapset.c:122
#define bms_is_empty(a)
Definition bitmapset.h:118
static Datum values[MAXATTR]
Definition bootstrap.c:147
#define likely(x)
Definition c.h:423
#define Assert(condition)
Definition c.h:885
#define FLEXIBLE_ARRAY_MEMBER
Definition c.h:492
int32_t int32
Definition c.h:554
uint64_t uint64
Definition c.h:559
#define unlikely(x)
Definition c.h:424
unsigned int Index
Definition c.h:640
#define OidIsValid(objectId)
Definition c.h:800
int errcode(int sqlerrcode)
Definition elog.c:874
int errmsg(const char *fmt,...)
Definition elog.c:1093
int errdetail(const char *fmt,...) pg_attribute_printf(1
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
bool equal(const void *a, const void *b)
Definition equalfuncs.c:223
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition execExpr.c:143
ProjectionInfo * ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent, TupleDesc inputDesc)
Definition execExpr.c:370
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition execExpr.c:229
ExprState * ExecInitExprWithParams(Expr *node, ParamListInfo ext_params)
Definition execExpr.c:180
ProjectionInfo * ExecBuildUpdateProjection(List *targetList, bool evalTargetList, List *targetColnos, TupleDesc relDesc, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent)
Definition execExpr.c:547
List * ExecPrepareExprList(List *nodes, EState *estate)
Definition execExpr.c:839
void ExecCloseIndices(ResultRelInfo *resultRelInfo)
void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
void CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation, OnConflictAction onConflictAction, List *mergeActions)
Definition execMain.c:1054
bool ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, bool emitError)
Definition execMain.c:1860
void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, ResultRelInfo *partition_root_rri, int instrument_options)
Definition execMain.c:1247
static void InitExecPartitionPruneContexts(PartitionPruneState *prunestate, PlanState *parent_plan, Bitmapset *initially_valid_subplans, int n_total_subplans)
static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate, PartitionTupleRouting *proute, Oid partoid, PartitionDispatch parent_pd, int partidx, ResultRelInfo *rootResultRelInfo)
static bool IsIndexCompatibleAsArbiter(Relation arbiterIndexRelation, IndexInfo *arbiterIndexInfo, Relation indexRelation, IndexInfo *indexInfo)
void ExecDoInitialPruning(EState *estate)
static ResultRelInfo * ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *rootResultRelInfo, int partidx)
PartitionPruneState * ExecInitPartitionExecPruning(PlanState *planstate, int n_total_subplans, int part_prune_index, Bitmapset *relids, Bitmapset **initially_valid_subplans)
Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, bool initial_prune, Bitmapset **validsubplan_rtis)
static void ExecInitRoutingInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *partRelInfo, int partidx, bool is_borrowed_rel)
static char * ExecBuildSlotPartitionKeyDescription(Relation rel, const Datum *values, const bool *isnull, int maxfieldlen)
static void FormPartitionKeyDatum(PartitionDispatch pd, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
static int get_partition_for_tuple(PartitionDispatch pd, const Datum *values, const bool *isnull)
#define PARTITION_CACHED_FIND_THRESHOLD
PartitionTupleRouting * ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
static List * adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
static List * adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
ResultRelInfo * ExecFindPartition(ModifyTableState *mtstate, ResultRelInfo *rootResultRelInfo, PartitionTupleRouting *proute, TupleTableSlot *slot, EState *estate)
static void InitPartitionPruneContext(PartitionPruneContext *context, List *pruning_steps, PartitionDesc partdesc, PartitionKey partkey, PlanState *planstate, ExprContext *econtext)
static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans, Bitmapset **validsubplan_rtis)
static PartitionPruneState * CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo, Bitmapset **all_leafpart_rtis)
void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute)
struct PartitionDispatchData * PartitionDispatch
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
const TupleTableSlotOps TTSOpsVirtual
Definition execTuples.c:84
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Relation ExecGetRangeTableRelation(EState *estate, Index rti, bool isResultRel)
Definition execUtils.c:825
TupleConversionMap * ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate)
Definition execUtils.c:1326
ExprContext * CreateExprContext(EState *estate)
Definition execUtils.c:307
TupleConversionMap * ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
Definition execUtils.c:1300
#define GetPerTupleExprContext(estate)
Definition executor.h:656
#define EXEC_FLAG_EXPLAIN_GENERIC
Definition executor.h:67
#define ResetExprContext(econtext)
Definition executor.h:650
#define GetPerTupleMemoryContext(estate)
Definition executor.h:661
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition executor.h:436
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition fmgr.c:1150
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition fmgr.c:1763
long val
Definition informix.c:689
#define INJECTION_POINT(name, arg)
int j
Definition isn.c:78
int i
Definition isn.c:77
List * list_difference(const List *list1, const List *list2)
Definition list.c:1237
List * lappend(List *list, void *datum)
Definition list.c:339
List * lappend_int(List *list, int datum)
Definition list.c:357
List * lappend_oid(List *list, Oid datum)
Definition list.c:375
void list_free(List *list)
Definition list.c:1546
bool list_member_oid(const List *list, Oid datum)
Definition list.c:722
#define NoLock
Definition lockdefs.h:34
#define RowExclusiveLock
Definition lockdefs.h:38
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition lsyscache.c:3059
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition mbutils.c:1211
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
Oid GetUserId(void)
Definition miscinit.c:469
ResultRelInfo * ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid, bool missing_ok, bool update_cache)
void ExecInitMergeTupleSlots(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo)
#define IsA(nodeptr, _type_)
Definition nodes.h:164
#define copyObject(obj)
Definition nodes.h:232
@ ONCONFLICT_NONE
Definition nodes.h:428
@ ONCONFLICT_SELECT
Definition nodes.h:431
@ ONCONFLICT_UPDATE
Definition nodes.h:430
@ CMD_MERGE
Definition nodes.h:279
@ CMD_INSERT
Definition nodes.h:277
@ CMD_DELETE
Definition nodes.h:278
@ CMD_UPDATE
Definition nodes.h:276
@ CMD_NOTHING
Definition nodes.h:282
#define makeNode(_type_)
Definition nodes.h:161
#define castNode(_type_, nodeptr)
Definition nodes.h:182
char * bmsToString(const Bitmapset *bms)
Definition outfuncs.c:819
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
@ PARTITION_STRATEGY_HASH
Definition parsenodes.h:903
@ PARTITION_STRATEGY_LIST
Definition parsenodes.h:901
@ PARTITION_STRATEGY_RANGE
Definition parsenodes.h:902
PartitionRangeDatumKind
Definition parsenodes.h:952
#define ACL_SELECT
Definition parsenodes.h:77
int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, const Datum *rb_datums, PartitionRangeDatumKind *rb_kind, const Datum *tuple_datums, int n_tuple_datums)
uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, const Oid *partcollation, const Datum *values, const bool *isnull)
int partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, int nvalues, const Datum *values, bool *is_equal)
int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, Datum value, bool *is_equal)
#define partition_bound_accepts_nulls(bi)
Definition partbounds.h:98
PartitionKey RelationGetPartitionKey(Relation rel)
Definition partcache.c:51
static int16 get_partition_col_attnum(PartitionKey key, int col)
Definition partcache.h:80
static int get_partition_natts(PartitionKey key)
Definition partcache.h:65
static Oid get_partition_col_typid(PartitionKey key, int col)
Definition partcache.h:86
PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
Definition partdesc.c:423
PartitionDesc PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
Definition partdesc.c:456
List * get_partition_ancestors(Oid relid)
Definition partition.c:134
Bitmapset * get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
Definition partprune.c:845
#define PruneCxtStateIdx(partnatts, step_id, keyno)
Definition partprune.h:70
int16 attnum
#define PARTITION_MAX_KEYS
#define lfirst(lc)
Definition pg_list.h:172
#define lfirst_node(type, lc)
Definition pg_list.h:176
static int list_length(const List *l)
Definition pg_list.h:152
#define NIL
Definition pg_list.h:68
#define lfirst_int(lc)
Definition pg_list.h:173
static void * list_nth(const List *list, int n)
Definition pg_list.h:299
#define linitial(l)
Definition pg_list.h:178
static ListCell * list_head(const List *l)
Definition pg_list.h:128
#define foreach_oid(var, lst)
Definition pg_list.h:471
#define list_nth_node(type, list, n)
Definition pg_list.h:327
static ListCell * lnext(const List *l, const ListCell *c)
Definition pg_list.h:343
#define linitial_oid(l)
Definition pg_list.h:180
#define foreach_int(var, lst)
Definition pg_list.h:470
static char buf[DEFAULT_XLOG_SEG_SIZE]
uint64_t Datum
Definition postgres.h:70
static int32 DatumGetInt32(Datum X)
Definition postgres.h:212
#define InvalidOid
unsigned int Oid
static int fb(int x)
#define INNER_VAR
Definition primnodes.h:243
#define RelationGetForm(relation)
Definition rel.h:508
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetDescr(relation)
Definition rel.h:540
#define RelationGetRelationName(relation)
Definition rel.h:548
List * RelationGetIndexPredicate(Relation relation)
Definition relcache.c:5205
List * RelationGetIndexExpressions(Relation relation)
Definition relcache.c:5092
int errtable(Relation rel)
Definition relcache.c:6044
Node * map_variable_attnos(Node *node, int target_varno, int sublevels_up, const AttrMap *attno_map, Oid to_rowtype, bool *found_whole_row)
int check_enable_rls(Oid relid, Oid checkAsUser, bool noError)
Definition rls.c:52
@ RLS_ENABLED
Definition rls.h:45
char * pg_get_partkeydef_columns(Oid relid, bool pretty)
Definition ruleutils.c:1924
void check_stack_depth(void)
Definition stack_depth.c:95
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition stringinfo.c:242
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int maplen
Definition attmap.h:37
AttrNumber * attnums
Definition attmap.h:36
List * es_part_prune_infos
Definition execnodes.h:673
List * es_tuple_routing_result_relations
Definition execnodes.h:701
int es_top_eflags
Definition execnodes.h:722
int es_instrument
Definition execnodes.h:723
Bitmapset * es_unpruned_relids
Definition execnodes.h:676
List * es_part_prune_states
Definition execnodes.h:674
MemoryContext es_query_cxt
Definition execnodes.h:713
List * es_tupleTable
Definition execnodes.h:715
PartitionDirectory es_partition_directory
Definition execnodes.h:695
List * es_part_prune_results
Definition execnodes.h:675
ParamListInfo ecxt_param_list_info
Definition execnodes.h:287
struct EState * ecxt_estate
Definition execnodes.h:317
EndForeignInsert_function EndForeignInsert
Definition fdwapi.h:239
bool ii_Unique
Definition execnodes.h:202
Oid * ii_ExclusionOps
Definition execnodes.h:190
bool ii_NullsNotDistinct
Definition execnodes.h:204
int ii_NumIndexKeyAttrs
Definition execnodes.h:171
Definition pg_list.h:54
MergeAction * mas_action
Definition execnodes.h:452
ProjectionInfo * mas_proj
Definition execnodes.h:453
ExprState * mas_whenqual
Definition execnodes.h:455
ResultRelInfo * resultRelInfo
Definition execnodes.h:1411
ResultRelInfo * rootResultRelInfo
Definition execnodes.h:1419
List * onConflictCols
Definition plannodes.h:376
List * mergeJoinConditions
Definition plannodes.h:386
CmdType operation
Definition plannodes.h:340
List * resultRelations
Definition plannodes.h:348
List * onConflictSet
Definition plannodes.h:374
List * mergeActionLists
Definition plannodes.h:384
List * returningLists
Definition plannodes.h:358
List * withCheckOptionLists
Definition plannodes.h:352
Node * onConflictWhere
Definition plannodes.h:378
OnConflictAction onConflictAction
Definition plannodes.h:368
Definition nodes.h:135
ExprState * oc_WhereClause
Definition execnodes.h:439
ProjectionInfo * oc_ProjInfo
Definition execnodes.h:437
TupleTableSlot * oc_ProjSlot
Definition execnodes.h:436
LockClauseStrength oc_LockStrength
Definition execnodes.h:438
PartitionRangeDatumKind ** kind
Definition partbounds.h:84
int last_found_datum_index
Definition partdesc.h:46
PartitionBoundInfo boundinfo
Definition partdesc.h:38
int last_found_part_index
Definition partdesc.h:52
TupleTableSlot * tupslot
int indexes[FLEXIBLE_ARRAY_MEMBER]
AttrNumber * partattrs
Definition partcache.h:29
FmgrInfo * partsupfunc
Definition partprune.h:56
ExprContext * exprcontext
Definition partprune.h:60
MemoryContext ppccontext
Definition partprune.h:58
PartitionBoundInfo boundinfo
Definition partprune.h:54
PlanState * planstate
Definition partprune.h:59
FmgrInfo * stepcmpfuncs
Definition partprune.h:57
ExprState ** exprstates
Definition partprune.h:61
PartitionPruneStep step
Definition plannodes.h:1777
Bitmapset * nullkeys
Definition plannodes.h:1782
PartitionDispatch * partition_dispatch_info
ResultRelInfo ** partitions
ResultRelInfo ** nonleaf_partitions
Bitmapset * present_parts
Definition plannodes.h:1702
Plan * plan
Definition execnodes.h:1168
EState * state
Definition execnodes.h:1170
ExprContext * ps_ExprContext
Definition execnodes.h:1207
TupleTableSlot * ps_ResultTupleSlot
Definition execnodes.h:1206
Form_pg_index rd_index
Definition rel.h:192
Oid * rd_opfamily
Definition rel.h:207
Oid * rd_indcollation
Definition rel.h:217
Form_pg_class rd_rel
Definition rel.h:111
OnConflictActionState * ri_onConflict
Definition execnodes.h:586
List * ri_onConflictArbiterIndexes
Definition execnodes.h:583
Relation ri_RelationDesc
Definition execnodes.h:483
Index ri_RangeTableIndex
Definition execnodes.h:480
struct FdwRoutine * ri_FdwRoutine
Definition execnodes.h:536
AttrMap * attrMap
Definition tupconvert.h:28
void table_close(Relation relation, LOCKMODE lockmode)
Definition table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition table.c:40
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition tableam.c:92
TupleTableSlot * execute_attr_map_slot(AttrMap *attrMap, TupleTableSlot *in_slot, TupleTableSlot *out_slot)
Definition tupconvert.c:193
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition tuptable.h:398
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:457
#define IsolationUsesXactSnapshot()
Definition xact.h:52