PostgreSQL Source Code git master
execPartition.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * execPartition.c
4 * Support routines for partitioning.
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/executor/execPartition.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/table.h"
17#include "access/tableam.h"
18#include "catalog/index.h"
19#include "catalog/partition.h"
21#include "executor/executor.h"
23#include "foreign/fdwapi.h"
24#include "mb/pg_wchar.h"
25#include "miscadmin.h"
30#include "utils/acl.h"
31#include "utils/lsyscache.h"
32#include "utils/partcache.h"
33#include "utils/rls.h"
34#include "utils/ruleutils.h"
35
36
37/*-----------------------
38 * PartitionTupleRouting - Encapsulates all information required to
39 * route a tuple inserted into a partitioned table to one of its leaf
40 * partitions.
41 *
42 * partition_root
43 * The partitioned table that's the target of the command.
44 *
45 * partition_dispatch_info
46 * Array of 'max_dispatch' elements containing a pointer to a
47 * PartitionDispatch object for every partitioned table touched by tuple
48 * routing. The entry for the target partitioned table is *always*
49 * present in the 0th element of this array. See comment for
50 * PartitionDispatchData->indexes for details on how this array is
51 * indexed.
52 *
53 * nonleaf_partitions
54 * Array of 'max_dispatch' elements containing pointers to fake
55 * ResultRelInfo objects for nonleaf partitions, useful for checking
56 * the partition constraint.
57 *
58 * num_dispatch
59 * The current number of items stored in the 'partition_dispatch_info'
60 * array. Also serves as the index of the next free array element for
61 * new PartitionDispatch objects that need to be stored.
62 *
63 * max_dispatch
64 * The current allocated size of the 'partition_dispatch_info' array.
65 *
66 * partitions
67 * Array of 'max_partitions' elements containing a pointer to a
68 * ResultRelInfo for every leaf partition touched by tuple routing.
69 * Some of these are pointers to ResultRelInfos which are borrowed out of
70 * the owning ModifyTableState node. The remainder have been built
71 * especially for tuple routing. See comment for
72 * PartitionDispatchData->indexes for details on how this array is
73 * indexed.
74 *
75 * is_borrowed_rel
76 * Array of 'max_partitions' booleans recording whether a given entry
77 * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
78 * ModifyTableState node, rather than being built here.
79 *
80 * num_partitions
81 * The current number of items stored in the 'partitions' array. Also
82 * serves as the index of the next free array element for new
83 * ResultRelInfo objects that need to be stored.
84 *
85 * max_partitions
86 * The current allocated size of the 'partitions' array.
87 *
88 * memcxt
89 * Memory context used to allocate subsidiary structs.
90 *-----------------------
91 */
93{
104};
105
106/*-----------------------
107 * PartitionDispatch - information about one partitioned table in a partition
108 * hierarchy required to route a tuple to any of its partitions. A
109 * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
110 * struct and stored inside its 'partition_dispatch_info' array.
111 *
112 * reldesc
113 * Relation descriptor of the table
114 *
115 * key
116 * Partition key information of the table
117 *
118 * keystate
119 * Execution state required for expressions in the partition key
120 *
121 * partdesc
122 * Partition descriptor of the table
123 *
124 * tupslot
125 * A standalone TupleTableSlot initialized with this table's tuple
126 * descriptor, or NULL if no tuple conversion between the parent is
127 * required.
128 *
129 * tupmap
130 * TupleConversionMap to convert from the parent's rowtype to this table's
131 * rowtype (when extracting the partition key of a tuple just before
132 * routing it through this table). A NULL value is stored if no tuple
133 * conversion is required.
134 *
135 * indexes
136 * Array of partdesc->nparts elements. For leaf partitions the index
137 * corresponds to the partition's ResultRelInfo in the encapsulating
138 * PartitionTupleRouting's partitions array. For partitioned partitions,
139 * the index corresponds to the PartitionDispatch for it in its
140 * partition_dispatch_info array. -1 indicates we've not yet allocated
141 * anything in PartitionTupleRouting for the partition.
142 *-----------------------
143 */
145{
148 List *keystate; /* list of ExprState */
154
155
157 EState *estate, PartitionTupleRouting *proute,
158 PartitionDispatch dispatch,
159 ResultRelInfo *rootResultRelInfo,
160 int partidx);
161static void ExecInitRoutingInfo(ModifyTableState *mtstate,
162 EState *estate,
163 PartitionTupleRouting *proute,
164 PartitionDispatch dispatch,
165 ResultRelInfo *partRelInfo,
166 int partidx,
167 bool is_borrowed_rel);
169 PartitionTupleRouting *proute,
170 Oid partoid, PartitionDispatch parent_pd,
171 int partidx, ResultRelInfo *rootResultRelInfo);
173 TupleTableSlot *slot,
174 EState *estate,
175 Datum *values,
176 bool *isnull);
178 const bool *isnull);
180 const Datum *values,
181 const bool *isnull,
182 int maxfieldlen);
183static List *adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri);
184static List *adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap);
186 PartitionPruneInfo *pruneinfo,
187 Bitmapset **all_leafpart_rtis);
189 List *pruning_steps,
190 PartitionDesc partdesc,
191 PartitionKey partkey,
192 PlanState *planstate,
193 ExprContext *econtext);
195 PlanState *parent_plan,
196 Bitmapset *initially_valid_subplans,
197 int n_total_subplans);
200 bool initial_prune,
201 Bitmapset **validsubplans,
202 Bitmapset **validsubplan_rtis);
203
204
205/*
206 * ExecSetupPartitionTupleRouting - sets up information needed during
207 * tuple routing for partitioned tables, encapsulates it in
208 * PartitionTupleRouting, and returns it.
209 *
210 * Callers must use the returned PartitionTupleRouting during calls to
211 * ExecFindPartition(). The actual ResultRelInfo for a partition is only
212 * allocated when the partition is found for the first time.
213 *
214 * The current memory context is used to allocate this struct and all
215 * subsidiary structs that will be allocated from it later on. Typically
216 * it should be estate->es_query_cxt.
217 */
220{
221 PartitionTupleRouting *proute;
222
223 /*
224 * Here we attempt to expend as little effort as possible in setting up
225 * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
226 * demand, only when we actually need to route a tuple to that partition.
227 * The reason for this is that a common case is for INSERT to insert a
228 * single tuple into a partitioned table and this must be fast.
229 */
231 proute->partition_root = rel;
233 /* Rest of members initialized by zeroing */
234
235 /*
236 * Initialize this table's PartitionDispatch object. Here we pass in the
237 * parent as NULL as we don't need to care about any parent of the target
238 * partitioned table.
239 */
241 NULL, 0, NULL);
242
243 return proute;
244}
245
246/*
247 * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
248 * the tuple contained in *slot should belong to.
249 *
250 * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
251 * one up or reuse one from mtstate's resultRelInfo array. When reusing a
252 * ResultRelInfo from the mtstate we verify that the relation is a valid
253 * target for INSERTs and initialize tuple routing information.
254 *
255 * rootResultRelInfo is the relation named in the query.
256 *
257 * estate must be non-NULL; we'll need it to compute any expressions in the
258 * partition keys. Also, its per-tuple contexts are used as evaluation
259 * scratch space.
260 *
261 * If no leaf partition is found, this routine errors out with the appropriate
262 * error message. An error may also be raised if the found target partition
263 * is not a valid target for an INSERT.
264 */
267 ResultRelInfo *rootResultRelInfo,
268 PartitionTupleRouting *proute,
269 TupleTableSlot *slot, EState *estate)
270{
273 bool isnull[PARTITION_MAX_KEYS];
274 Relation rel;
275 PartitionDispatch dispatch;
276 PartitionDesc partdesc;
277 ExprContext *ecxt = GetPerTupleExprContext(estate);
278 TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
279 TupleTableSlot *rootslot = slot;
280 TupleTableSlot *myslot = NULL;
281 MemoryContext oldcxt;
282 ResultRelInfo *rri = NULL;
283
284 /* use per-tuple context here to avoid leaking memory */
286
287 /*
288 * First check the root table's partition constraint, if any. No point in
289 * routing the tuple if it doesn't belong in the root table itself.
290 */
291 if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition)
292 ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
293
294 /* start with the root partitioned table */
295 dispatch = pd[0];
296 while (dispatch != NULL)
297 {
298 int partidx = -1;
299 bool is_leaf;
300
302
303 rel = dispatch->reldesc;
304 partdesc = dispatch->partdesc;
305
306 /*
307 * Extract partition key from tuple. Expression evaluation machinery
308 * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
309 * point to the correct tuple slot. The slot might have changed from
310 * what was used for the parent table if the table of the current
311 * partitioning level has different tuple descriptor from the parent.
312 * So update ecxt_scantuple accordingly.
313 */
314 ecxt->ecxt_scantuple = slot;
315 FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
316
317 /*
318 * If this partitioned table has no partitions or no partition for
319 * these values, error out.
320 */
321 if (partdesc->nparts == 0 ||
322 (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
323 {
324 char *val_desc;
325
327 values, isnull, 64);
330 (errcode(ERRCODE_CHECK_VIOLATION),
331 errmsg("no partition of relation \"%s\" found for row",
333 val_desc ?
334 errdetail("Partition key of the failing row contains %s.",
335 val_desc) : 0,
336 errtable(rel)));
337 }
338
339 is_leaf = partdesc->is_leaf[partidx];
340 if (is_leaf)
341 {
342 /*
343 * We've reached the leaf -- hurray, we're done. Look to see if
344 * we've already got a ResultRelInfo for this partition.
345 */
346 if (likely(dispatch->indexes[partidx] >= 0))
347 {
348 /* ResultRelInfo already built */
349 Assert(dispatch->indexes[partidx] < proute->num_partitions);
350 rri = proute->partitions[dispatch->indexes[partidx]];
351 }
352 else
353 {
354 /*
355 * If the partition is known in the owning ModifyTableState
356 * node, we can re-use that ResultRelInfo instead of creating
357 * a new one with ExecInitPartitionInfo().
358 */
359 rri = ExecLookupResultRelByOid(mtstate,
360 partdesc->oids[partidx],
361 true, false);
362 if (rri)
363 {
364 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
365
366 /* Verify this ResultRelInfo allows INSERTs */
368 node ? node->onConflictAction : ONCONFLICT_NONE,
369 NIL);
370
371 /*
372 * Initialize information needed to insert this and
373 * subsequent tuples routed to this partition.
374 */
375 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
376 rri, partidx, true);
377 }
378 else
379 {
380 /* We need to create a new one. */
381 rri = ExecInitPartitionInfo(mtstate, estate, proute,
382 dispatch,
383 rootResultRelInfo, partidx);
384 }
385 }
386 Assert(rri != NULL);
387
388 /* Signal to terminate the loop */
389 dispatch = NULL;
390 }
391 else
392 {
393 /*
394 * Partition is a sub-partitioned table; get the PartitionDispatch
395 */
396 if (likely(dispatch->indexes[partidx] >= 0))
397 {
398 /* Already built. */
399 Assert(dispatch->indexes[partidx] < proute->num_dispatch);
400
401 rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
402
403 /*
404 * Move down to the next partition level and search again
405 * until we find a leaf partition that matches this tuple
406 */
407 dispatch = pd[dispatch->indexes[partidx]];
408 }
409 else
410 {
411 /* Not yet built. Do that now. */
412 PartitionDispatch subdispatch;
413
414 /*
415 * Create the new PartitionDispatch. We pass the current one
416 * in as the parent PartitionDispatch
417 */
418 subdispatch = ExecInitPartitionDispatchInfo(estate,
419 proute,
420 partdesc->oids[partidx],
421 dispatch, partidx,
422 mtstate->rootResultRelInfo);
423 Assert(dispatch->indexes[partidx] >= 0 &&
424 dispatch->indexes[partidx] < proute->num_dispatch);
425
426 rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
427 dispatch = subdispatch;
428 }
429
430 /*
431 * Convert the tuple to the new parent's layout, if different from
432 * the previous parent.
433 */
434 if (dispatch->tupslot)
435 {
436 AttrMap *map = dispatch->tupmap;
437 TupleTableSlot *tempslot = myslot;
438
439 myslot = dispatch->tupslot;
440 slot = execute_attr_map_slot(map, slot, myslot);
441
442 if (tempslot != NULL)
443 ExecClearTuple(tempslot);
444 }
445 }
446
447 /*
448 * If this partition is the default one, we must check its partition
449 * constraint now, which may have changed concurrently due to
450 * partitions being added to the parent.
451 *
452 * (We do this here, and do not rely on ExecInsert doing it, because
453 * we don't want to miss doing it for non-leaf partitions.)
454 */
455 if (partidx == partdesc->boundinfo->default_index)
456 {
457 /*
458 * The tuple must match the partition's layout for the constraint
459 * expression to be evaluated successfully. If the partition is
460 * sub-partitioned, that would already be the case due to the code
461 * above, but for a leaf partition the tuple still matches the
462 * parent's layout.
463 *
464 * Note that we have a map to convert from root to current
465 * partition, but not from immediate parent to current partition.
466 * So if we have to convert, do it from the root slot; if not, use
467 * the root slot as-is.
468 */
469 if (is_leaf)
470 {
471 TupleConversionMap *map = ExecGetRootToChildMap(rri, estate);
472
473 if (map)
474 slot = execute_attr_map_slot(map->attrMap, rootslot,
476 else
477 slot = rootslot;
478 }
479
480 ExecPartitionCheck(rri, slot, estate, true);
481 }
482 }
483
484 /* Release the tuple in the lowest parent's dedicated slot. */
485 if (myslot != NULL)
486 ExecClearTuple(myslot);
487 /* and restore ecxt's scantuple */
488 ecxt->ecxt_scantuple = ecxt_scantuple_saved;
489 MemoryContextSwitchTo(oldcxt);
490
491 return rri;
492}
493
494/*
495 * IsIndexCompatibleAsArbiter
496 * Return true if two indexes are identical for INSERT ON CONFLICT
497 * purposes.
498 *
499 * Only indexes of the same relation are supported.
500 */
501static bool
503 IndexInfo *arbiterIndexInfo,
504 Relation indexRelation,
505 IndexInfo *indexInfo)
506{
507 Assert(arbiterIndexRelation->rd_index->indrelid == indexRelation->rd_index->indrelid);
508
509 /* must match whether they're unique */
510 if (arbiterIndexInfo->ii_Unique != indexInfo->ii_Unique)
511 return false;
512
513 /* No support currently for comparing exclusion indexes. */
514 if (arbiterIndexInfo->ii_ExclusionOps != NULL ||
515 indexInfo->ii_ExclusionOps != NULL)
516 return false;
517
518 /* the "nulls not distinct" criterion must match */
519 if (arbiterIndexInfo->ii_NullsNotDistinct !=
520 indexInfo->ii_NullsNotDistinct)
521 return false;
522
523 /* number of key attributes must match */
524 if (arbiterIndexInfo->ii_NumIndexKeyAttrs !=
525 indexInfo->ii_NumIndexKeyAttrs)
526 return false;
527
528 for (int i = 0; i < arbiterIndexInfo->ii_NumIndexKeyAttrs; i++)
529 {
530 if (arbiterIndexRelation->rd_indcollation[i] !=
531 indexRelation->rd_indcollation[i])
532 return false;
533
534 if (arbiterIndexRelation->rd_opfamily[i] !=
535 indexRelation->rd_opfamily[i])
536 return false;
537
538 if (arbiterIndexRelation->rd_index->indkey.values[i] !=
539 indexRelation->rd_index->indkey.values[i])
540 return false;
541 }
542
543 if (list_difference(RelationGetIndexExpressions(arbiterIndexRelation),
544 RelationGetIndexExpressions(indexRelation)) != NIL)
545 return false;
546
547 if (list_difference(RelationGetIndexPredicate(arbiterIndexRelation),
548 RelationGetIndexPredicate(indexRelation)) != NIL)
549 return false;
550 return true;
551}
552
553/*
554 * ExecInitPartitionInfo
555 * Lock the partition and initialize ResultRelInfo. Also setup other
556 * information for the partition and store it in the next empty slot in
557 * the proute->partitions array.
558 *
559 * Returns the ResultRelInfo
560 */
561static ResultRelInfo *
563 PartitionTupleRouting *proute,
564 PartitionDispatch dispatch,
565 ResultRelInfo *rootResultRelInfo,
566 int partidx)
567{
568 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
569 Oid partOid = dispatch->partdesc->oids[partidx];
570 Relation partrel;
571 int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
572 Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
573 ResultRelInfo *leaf_part_rri;
574 MemoryContext oldcxt;
575 AttrMap *part_attmap = NULL;
576 bool found_whole_row;
577
578 oldcxt = MemoryContextSwitchTo(proute->memcxt);
579
580 partrel = table_open(partOid, RowExclusiveLock);
581
582 leaf_part_rri = makeNode(ResultRelInfo);
583 InitResultRelInfo(leaf_part_rri,
584 partrel,
585 0,
586 rootResultRelInfo,
587 estate->es_instrument);
588
589 /*
590 * Verify result relation is a valid target for an INSERT. An UPDATE of a
591 * partition-key becomes a DELETE+INSERT operation, so this check is still
592 * required when the operation is CMD_UPDATE.
593 */
594 CheckValidResultRel(leaf_part_rri, CMD_INSERT,
595 node ? node->onConflictAction : ONCONFLICT_NONE, NIL);
596
597 /*
598 * Open partition indices. The user may have asked to check for conflicts
599 * within this leaf partition and do "nothing" instead of throwing an
600 * error. Be prepared in that case by initializing the index information
601 * needed by ExecInsert() to perform speculative insertions.
602 */
603 if (partrel->rd_rel->relhasindex &&
604 leaf_part_rri->ri_IndexRelationDescs == NULL)
605 ExecOpenIndices(leaf_part_rri,
606 (node != NULL &&
608
609 /*
610 * Build WITH CHECK OPTION constraints for the partition. Note that we
611 * didn't build the withCheckOptionList for partitions within the planner,
612 * but simple translation of varattnos will suffice. This only occurs for
613 * the INSERT case or in the case of UPDATE/MERGE tuple routing where we
614 * didn't find a result rel to reuse.
615 */
616 if (node && node->withCheckOptionLists != NIL)
617 {
618 List *wcoList;
619 List *wcoExprs = NIL;
620 ListCell *ll;
621
622 /*
623 * In the case of INSERT on a partitioned table, there is only one
624 * plan. Likewise, there is only one WCO list, not one per partition.
625 * For UPDATE/MERGE, there are as many WCO lists as there are plans.
626 */
627 Assert((node->operation == CMD_INSERT &&
628 list_length(node->withCheckOptionLists) == 1 &&
629 list_length(node->resultRelations) == 1) ||
630 (node->operation == CMD_UPDATE &&
633 (node->operation == CMD_MERGE &&
636
637 /*
638 * Use the WCO list of the first plan as a reference to calculate
639 * attno's for the WCO list of this partition. In the INSERT case,
640 * that refers to the root partitioned table, whereas in the UPDATE
641 * tuple routing case, that refers to the first partition in the
642 * mtstate->resultRelInfo array. In any case, both that relation and
643 * this partition should have the same columns, so we should be able
644 * to map attributes successfully.
645 */
646 wcoList = linitial(node->withCheckOptionLists);
647
648 /*
649 * Convert Vars in it to contain this partition's attribute numbers.
650 */
651 part_attmap =
653 RelationGetDescr(firstResultRel),
654 false);
655 wcoList = (List *)
656 map_variable_attnos((Node *) wcoList,
657 firstVarno, 0,
658 part_attmap,
659 RelationGetForm(partrel)->reltype,
660 &found_whole_row);
661 /* We ignore the value of found_whole_row. */
662
663 foreach(ll, wcoList)
664 {
666 ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
667 &mtstate->ps);
668
669 wcoExprs = lappend(wcoExprs, wcoExpr);
670 }
671
672 leaf_part_rri->ri_WithCheckOptions = wcoList;
673 leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
674 }
675
676 /*
677 * Build the RETURNING projection for the partition. Note that we didn't
678 * build the returningList for partitions within the planner, but simple
679 * translation of varattnos will suffice. This only occurs for the INSERT
680 * case or in the case of UPDATE/MERGE tuple routing where we didn't find
681 * a result rel to reuse.
682 */
683 if (node && node->returningLists != NIL)
684 {
685 TupleTableSlot *slot;
686 ExprContext *econtext;
687 List *returningList;
688
689 /* See the comment above for WCO lists. */
690 Assert((node->operation == CMD_INSERT &&
691 list_length(node->returningLists) == 1 &&
692 list_length(node->resultRelations) == 1) ||
693 (node->operation == CMD_UPDATE &&
696 (node->operation == CMD_MERGE &&
699
700 /*
701 * Use the RETURNING list of the first plan as a reference to
702 * calculate attno's for the RETURNING list of this partition. See
703 * the comment above for WCO lists for more details on why this is
704 * okay.
705 */
706 returningList = linitial(node->returningLists);
707
708 /*
709 * Convert Vars in it to contain this partition's attribute numbers.
710 */
711 if (part_attmap == NULL)
712 part_attmap =
714 RelationGetDescr(firstResultRel),
715 false);
716 returningList = (List *)
717 map_variable_attnos((Node *) returningList,
718 firstVarno, 0,
719 part_attmap,
720 RelationGetForm(partrel)->reltype,
721 &found_whole_row);
722 /* We ignore the value of found_whole_row. */
723
724 leaf_part_rri->ri_returningList = returningList;
725
726 /*
727 * Initialize the projection itself.
728 *
729 * Use the slot and the expression context that would have been set up
730 * in ExecInitModifyTable() for projection's output.
731 */
732 Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
733 slot = mtstate->ps.ps_ResultTupleSlot;
734 Assert(mtstate->ps.ps_ExprContext != NULL);
735 econtext = mtstate->ps.ps_ExprContext;
736 leaf_part_rri->ri_projectReturning =
737 ExecBuildProjectionInfo(returningList, econtext, slot,
738 &mtstate->ps, RelationGetDescr(partrel));
739 }
740
741 /* Set up information needed for routing tuples to the partition. */
742 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
743 leaf_part_rri, partidx, false);
744
745 /*
746 * If there is an ON CONFLICT clause, initialize state for it.
747 */
748 if (node && node->onConflictAction != ONCONFLICT_NONE)
749 {
750 TupleDesc partrelDesc = RelationGetDescr(partrel);
751 ExprContext *econtext = mtstate->ps.ps_ExprContext;
752 List *arbiterIndexes = NIL;
753 int additional_arbiters = 0;
754
755 /*
756 * If there is a list of arbiter indexes, map it to a list of indexes
757 * in the partition. We also add any "identical indexes" to any of
758 * those, to cover the case where one of them is concurrently being
759 * reindexed.
760 */
761 if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL)
762 {
763 List *unparented_idxs = NIL,
764 *arbiters_listidxs = NIL;
765
766 for (int listidx = 0; listidx < leaf_part_rri->ri_NumIndices; listidx++)
767 {
768 Oid indexoid;
769 List *ancestors;
770
771 /*
772 * If one of this index's ancestors is in the root's arbiter
773 * list, then use this index as arbiter for this partition.
774 * Otherwise, if this index has no parent, track it for later,
775 * in case REINDEX CONCURRENTLY is working on one of the
776 * arbiters.
777 *
778 * XXX get_partition_ancestors is slow: it scans pg_inherits
779 * each time. Consider a syscache or some other way to cache?
780 */
781 indexoid = RelationGetRelid(leaf_part_rri->ri_IndexRelationDescs[listidx]);
782 ancestors = get_partition_ancestors(indexoid);
783 if (ancestors != NIL)
784 {
785 foreach_oid(parent_idx, rootResultRelInfo->ri_onConflictArbiterIndexes)
786 {
787 if (list_member_oid(ancestors, parent_idx))
788 {
789 arbiterIndexes = lappend_oid(arbiterIndexes, indexoid);
790 arbiters_listidxs = lappend_int(arbiters_listidxs, listidx);
791 break;
792 }
793 }
794 }
795 else
796 unparented_idxs = lappend_int(unparented_idxs, listidx);
797 list_free(ancestors);
798 }
799
800 /*
801 * If we found any indexes with no ancestors, it's possible that
802 * some arbiter index is undergoing concurrent reindex. Match all
803 * unparented indexes against arbiters; add unparented matching
804 * ones as "additional arbiters".
805 *
806 * This is critical so that all concurrent transactions use the
807 * same set as arbiters during REINDEX CONCURRENTLY, to avoid
808 * spurious "duplicate key" errors.
809 */
810 if (unparented_idxs && arbiterIndexes)
811 {
812 foreach_int(unparented_i, unparented_idxs)
813 {
814 Relation unparented_rel;
815 IndexInfo *unparenred_ii;
816
817 unparented_rel = leaf_part_rri->ri_IndexRelationDescs[unparented_i];
818 unparenred_ii = leaf_part_rri->ri_IndexRelationInfo[unparented_i];
819
820 Assert(!list_member_oid(arbiterIndexes,
821 unparented_rel->rd_index->indexrelid));
822
823 /* Ignore indexes not ready */
824 if (!unparenred_ii->ii_ReadyForInserts)
825 continue;
826
827 foreach_int(arbiter_i, arbiters_listidxs)
828 {
829 Relation arbiter_rel;
830 IndexInfo *arbiter_ii;
831
832 arbiter_rel = leaf_part_rri->ri_IndexRelationDescs[arbiter_i];
833 arbiter_ii = leaf_part_rri->ri_IndexRelationInfo[arbiter_i];
834
835 /*
836 * If the non-ancestor index is compatible with the
837 * arbiter, use the non-ancestor as arbiter too.
838 */
839 if (IsIndexCompatibleAsArbiter(arbiter_rel,
840 arbiter_ii,
841 unparented_rel,
842 unparenred_ii))
843 {
844 arbiterIndexes = lappend_oid(arbiterIndexes,
845 unparented_rel->rd_index->indexrelid);
846 additional_arbiters++;
847 break;
848 }
849 }
850 }
851 }
852 list_free(unparented_idxs);
853 list_free(arbiters_listidxs);
854 }
855
856 /*
857 * We expect to find as many arbiter indexes on this partition as the
858 * root has, plus however many "additional arbiters" (to wit: those
859 * being concurrently rebuilt) we found.
860 */
861 if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
862 list_length(arbiterIndexes) - additional_arbiters)
863 elog(ERROR, "invalid arbiter index list");
864 leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
865
866 /*
867 * In the DO UPDATE case, we have some more state to initialize.
868 */
870 {
873
874 map = ExecGetRootToChildMap(leaf_part_rri, estate);
875
876 Assert(node->onConflictSet != NIL);
877 Assert(rootResultRelInfo->ri_onConflict != NULL);
878
879 leaf_part_rri->ri_onConflict = onconfl;
880
881 /*
882 * Need a separate existing slot for each partition, as the
883 * partition could be of a different AM, even if the tuple
884 * descriptors match.
885 */
886 onconfl->oc_Existing =
887 table_slot_create(leaf_part_rri->ri_RelationDesc,
888 &mtstate->ps.state->es_tupleTable);
889
890 /*
891 * If the partition's tuple descriptor matches exactly the root
892 * parent (the common case), we can re-use most of the parent's ON
893 * CONFLICT SET state, skipping a bunch of work. Otherwise, we
894 * need to create state specific to this partition.
895 */
896 if (map == NULL)
897 {
898 /*
899 * It's safe to reuse these from the partition root, as we
900 * only process one tuple at a time (therefore we won't
901 * overwrite needed data in slots), and the results of
902 * projections are independent of the underlying storage.
903 * Projections and where clauses themselves don't store state
904 * / are independent of the underlying storage.
905 */
906 onconfl->oc_ProjSlot =
907 rootResultRelInfo->ri_onConflict->oc_ProjSlot;
908 onconfl->oc_ProjInfo =
909 rootResultRelInfo->ri_onConflict->oc_ProjInfo;
910 onconfl->oc_WhereClause =
911 rootResultRelInfo->ri_onConflict->oc_WhereClause;
912 }
913 else
914 {
915 List *onconflset;
916 List *onconflcols;
917
918 /*
919 * Translate expressions in onConflictSet to account for
920 * different attribute numbers. For that, map partition
921 * varattnos twice: first to catch the EXCLUDED
922 * pseudo-relation (INNER_VAR), and second to handle the main
923 * target relation (firstVarno).
924 */
925 onconflset = copyObject(node->onConflictSet);
926 if (part_attmap == NULL)
927 part_attmap =
929 RelationGetDescr(firstResultRel),
930 false);
931 onconflset = (List *)
932 map_variable_attnos((Node *) onconflset,
933 INNER_VAR, 0,
934 part_attmap,
935 RelationGetForm(partrel)->reltype,
936 &found_whole_row);
937 /* We ignore the value of found_whole_row. */
938 onconflset = (List *)
939 map_variable_attnos((Node *) onconflset,
940 firstVarno, 0,
941 part_attmap,
942 RelationGetForm(partrel)->reltype,
943 &found_whole_row);
944 /* We ignore the value of found_whole_row. */
945
946 /* Finally, adjust the target colnos to match the partition. */
947 onconflcols = adjust_partition_colnos(node->onConflictCols,
948 leaf_part_rri);
949
950 /* create the tuple slot for the UPDATE SET projection */
951 onconfl->oc_ProjSlot =
952 table_slot_create(partrel,
953 &mtstate->ps.state->es_tupleTable);
954
955 /* build UPDATE SET projection state */
956 onconfl->oc_ProjInfo =
957 ExecBuildUpdateProjection(onconflset,
958 true,
959 onconflcols,
960 partrelDesc,
961 econtext,
962 onconfl->oc_ProjSlot,
963 &mtstate->ps);
964
965 /*
966 * If there is a WHERE clause, initialize state where it will
967 * be evaluated, mapping the attribute numbers appropriately.
968 * As with onConflictSet, we need to map partition varattnos
969 * to the partition's tupdesc.
970 */
971 if (node->onConflictWhere)
972 {
973 List *clause;
974
975 clause = copyObject((List *) node->onConflictWhere);
976 clause = (List *)
977 map_variable_attnos((Node *) clause,
978 INNER_VAR, 0,
979 part_attmap,
980 RelationGetForm(partrel)->reltype,
981 &found_whole_row);
982 /* We ignore the value of found_whole_row. */
983 clause = (List *)
984 map_variable_attnos((Node *) clause,
985 firstVarno, 0,
986 part_attmap,
987 RelationGetForm(partrel)->reltype,
988 &found_whole_row);
989 /* We ignore the value of found_whole_row. */
990 onconfl->oc_WhereClause =
991 ExecInitQual(clause, &mtstate->ps);
992 }
993 }
994 }
995 }
996
997 /*
998 * Since we've just initialized this ResultRelInfo, it's not in any list
999 * attached to the estate as yet. Add it, so that it can be found later.
1000 *
1001 * Note that the entries in this list appear in no predetermined order,
1002 * because partition result rels are initialized as and when they're
1003 * needed.
1004 */
1008 leaf_part_rri);
1009
1010 /*
1011 * Initialize information about this partition that's needed to handle
1012 * MERGE. We take the "first" result relation's mergeActionList as
1013 * reference and make copy for this relation, converting stuff that
1014 * references attribute numbers to match this relation's.
1015 *
1016 * This duplicates much of the logic in ExecInitMerge(), so if something
1017 * changes there, look here too.
1018 */
1019 if (node && node->operation == CMD_MERGE)
1020 {
1021 List *firstMergeActionList = linitial(node->mergeActionLists);
1022 ListCell *lc;
1023 ExprContext *econtext = mtstate->ps.ps_ExprContext;
1024 Node *joinCondition;
1025
1026 if (part_attmap == NULL)
1027 part_attmap =
1029 RelationGetDescr(firstResultRel),
1030 false);
1031
1032 if (unlikely(!leaf_part_rri->ri_projectNewInfoValid))
1033 ExecInitMergeTupleSlots(mtstate, leaf_part_rri);
1034
1035 /* Initialize state for join condition checking. */
1036 joinCondition =
1038 firstVarno, 0,
1039 part_attmap,
1040 RelationGetForm(partrel)->reltype,
1041 &found_whole_row);
1042 /* We ignore the value of found_whole_row. */
1043 leaf_part_rri->ri_MergeJoinCondition =
1044 ExecInitQual((List *) joinCondition, &mtstate->ps);
1045
1046 foreach(lc, firstMergeActionList)
1047 {
1048 /* Make a copy for this relation to be safe. */
1050 MergeActionState *action_state;
1051
1052 /* Generate the action's state for this relation */
1053 action_state = makeNode(MergeActionState);
1054 action_state->mas_action = action;
1055
1056 /* And put the action in the appropriate list */
1057 leaf_part_rri->ri_MergeActions[action->matchKind] =
1058 lappend(leaf_part_rri->ri_MergeActions[action->matchKind],
1059 action_state);
1060
1061 switch (action->commandType)
1062 {
1063 case CMD_INSERT:
1064
1065 /*
1066 * ExecCheckPlanOutput() already done on the targetlist
1067 * when "first" result relation initialized and it is same
1068 * for all result relations.
1069 */
1070 action_state->mas_proj =
1071 ExecBuildProjectionInfo(action->targetList, econtext,
1072 leaf_part_rri->ri_newTupleSlot,
1073 &mtstate->ps,
1074 RelationGetDescr(partrel));
1075 break;
1076 case CMD_UPDATE:
1077
1078 /*
1079 * Convert updateColnos from "first" result relation
1080 * attribute numbers to this result rel's.
1081 */
1082 if (part_attmap)
1083 action->updateColnos =
1085 part_attmap);
1086 action_state->mas_proj =
1088 true,
1089 action->updateColnos,
1090 RelationGetDescr(leaf_part_rri->ri_RelationDesc),
1091 econtext,
1092 leaf_part_rri->ri_newTupleSlot,
1093 NULL);
1094 break;
1095 case CMD_DELETE:
1096 case CMD_NOTHING:
1097 /* Nothing to do */
1098 break;
1099
1100 default:
1101 elog(ERROR, "unknown action in MERGE WHEN clause");
1102 }
1103
1104 /* found_whole_row intentionally ignored. */
1105 action->qual =
1107 firstVarno, 0,
1108 part_attmap,
1109 RelationGetForm(partrel)->reltype,
1110 &found_whole_row);
1111 action_state->mas_whenqual =
1112 ExecInitQual((List *) action->qual, &mtstate->ps);
1113 }
1114 }
1115 MemoryContextSwitchTo(oldcxt);
1116
1117 return leaf_part_rri;
1118}
1119
1120/*
1121 * ExecInitRoutingInfo
1122 * Set up information needed for translating tuples between root
1123 * partitioned table format and partition format, and keep track of it
1124 * in PartitionTupleRouting.
1125 */
1126static void
1128 EState *estate,
1129 PartitionTupleRouting *proute,
1130 PartitionDispatch dispatch,
1131 ResultRelInfo *partRelInfo,
1132 int partidx,
1133 bool is_borrowed_rel)
1134{
1135 MemoryContext oldcxt;
1136 int rri_index;
1137
1138 oldcxt = MemoryContextSwitchTo(proute->memcxt);
1139
1140 /*
1141 * Set up tuple conversion between root parent and the partition if the
1142 * two have different rowtypes. If conversion is indeed required, also
1143 * initialize a slot dedicated to storing this partition's converted
1144 * tuples. Various operations that are applied to tuples after routing,
1145 * such as checking constraints, will refer to this slot.
1146 */
1147 if (ExecGetRootToChildMap(partRelInfo, estate) != NULL)
1148 {
1149 Relation partrel = partRelInfo->ri_RelationDesc;
1150
1151 /*
1152 * This pins the partition's TupleDesc, which will be released at the
1153 * end of the command.
1154 */
1155 partRelInfo->ri_PartitionTupleSlot =
1156 table_slot_create(partrel, &estate->es_tupleTable);
1157 }
1158 else
1159 partRelInfo->ri_PartitionTupleSlot = NULL;
1160
1161 /*
1162 * If the partition is a foreign table, let the FDW init itself for
1163 * routing tuples to the partition.
1164 */
1165 if (partRelInfo->ri_FdwRoutine != NULL &&
1166 partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
1167 partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
1168
1169 /*
1170 * Determine if the FDW supports batch insert and determine the batch size
1171 * (a FDW may support batching, but it may be disabled for the
1172 * server/table or for this particular query).
1173 *
1174 * If the FDW does not support batching, we set the batch size to 1.
1175 */
1176 if (partRelInfo->ri_FdwRoutine != NULL &&
1179 partRelInfo->ri_BatchSize =
1180 partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo);
1181 else
1182 partRelInfo->ri_BatchSize = 1;
1183
1184 Assert(partRelInfo->ri_BatchSize >= 1);
1185
1186 partRelInfo->ri_CopyMultiInsertBuffer = NULL;
1187
1188 /*
1189 * Keep track of it in the PartitionTupleRouting->partitions array.
1190 */
1191 Assert(dispatch->indexes[partidx] == -1);
1192
1193 rri_index = proute->num_partitions++;
1194
1195 /* Allocate or enlarge the array, as needed */
1196 if (proute->num_partitions >= proute->max_partitions)
1197 {
1198 if (proute->max_partitions == 0)
1199 {
1200 proute->max_partitions = 8;
1201 proute->partitions = (ResultRelInfo **)
1202 palloc(sizeof(ResultRelInfo *) * proute->max_partitions);
1203 proute->is_borrowed_rel = (bool *)
1204 palloc(sizeof(bool) * proute->max_partitions);
1205 }
1206 else
1207 {
1208 proute->max_partitions *= 2;
1209 proute->partitions = (ResultRelInfo **)
1210 repalloc(proute->partitions, sizeof(ResultRelInfo *) *
1211 proute->max_partitions);
1212 proute->is_borrowed_rel = (bool *)
1213 repalloc(proute->is_borrowed_rel, sizeof(bool) *
1214 proute->max_partitions);
1215 }
1216 }
1217
1218 proute->partitions[rri_index] = partRelInfo;
1219 proute->is_borrowed_rel[rri_index] = is_borrowed_rel;
1220 dispatch->indexes[partidx] = rri_index;
1221
1222 MemoryContextSwitchTo(oldcxt);
1223}
1224
1225/*
1226 * ExecInitPartitionDispatchInfo
1227 * Lock the partitioned table (if not locked already) and initialize
1228 * PartitionDispatch for a partitioned table and store it in the next
1229 * available slot in the proute->partition_dispatch_info array. Also,
1230 * record the index into this array in the parent_pd->indexes[] array in
1231 * the partidx element so that we can properly retrieve the newly created
1232 * PartitionDispatch later.
1233 */
1234static PartitionDispatch
1236 PartitionTupleRouting *proute, Oid partoid,
1237 PartitionDispatch parent_pd, int partidx,
1238 ResultRelInfo *rootResultRelInfo)
1239{
1240 Relation rel;
1241 PartitionDesc partdesc;
1243 int dispatchidx;
1244 MemoryContext oldcxt;
1245
1246 /*
1247 * For data modification, it is better that executor does not include
1248 * partitions being detached, except when running in snapshot-isolation
1249 * mode. This means that a read-committed transaction immediately gets a
1250 * "no partition for tuple" error when a tuple is inserted into a
1251 * partition that's being detached concurrently, but a transaction in
1252 * repeatable-read mode can still use such a partition.
1253 */
1254 if (estate->es_partition_directory == NULL)
1255 estate->es_partition_directory =
1258
1259 oldcxt = MemoryContextSwitchTo(proute->memcxt);
1260
1261 /*
1262 * Only sub-partitioned tables need to be locked here. The root
1263 * partitioned table will already have been locked as it's referenced in
1264 * the query's rtable.
1265 */
1266 if (partoid != RelationGetRelid(proute->partition_root))
1267 rel = table_open(partoid, RowExclusiveLock);
1268 else
1269 rel = proute->partition_root;
1270 partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1271
1272 pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
1273 partdesc->nparts * sizeof(int));
1274 pd->reldesc = rel;
1275 pd->key = RelationGetPartitionKey(rel);
1276 pd->keystate = NIL;
1277 pd->partdesc = partdesc;
1278 if (parent_pd != NULL)
1279 {
1280 TupleDesc tupdesc = RelationGetDescr(rel);
1281
1282 /*
1283 * For sub-partitioned tables where the column order differs from its
1284 * direct parent partitioned table, we must store a tuple table slot
1285 * initialized with its tuple descriptor and a tuple conversion map to
1286 * convert a tuple from its parent's rowtype to its own. This is to
1287 * make sure that we are looking at the correct row using the correct
1288 * tuple descriptor when computing its partition key for tuple
1289 * routing.
1290 */
1292 tupdesc,
1293 false);
1294 pd->tupslot = pd->tupmap ?
1295 MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL;
1296 }
1297 else
1298 {
1299 /* Not required for the root partitioned table */
1300 pd->tupmap = NULL;
1301 pd->tupslot = NULL;
1302 }
1303
1304 /*
1305 * Initialize with -1 to signify that the corresponding partition's
1306 * ResultRelInfo or PartitionDispatch has not been created yet.
1307 */
1308 memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1309
1310 /* Track in PartitionTupleRouting for later use */
1311 dispatchidx = proute->num_dispatch++;
1312
1313 /* Allocate or enlarge the array, as needed */
1314 if (proute->num_dispatch >= proute->max_dispatch)
1315 {
1316 if (proute->max_dispatch == 0)
1317 {
1318 proute->max_dispatch = 4;
1320 palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
1321 proute->nonleaf_partitions = (ResultRelInfo **)
1322 palloc(sizeof(ResultRelInfo *) * proute->max_dispatch);
1323 }
1324 else
1325 {
1326 proute->max_dispatch *= 2;
1329 sizeof(PartitionDispatch) * proute->max_dispatch);
1330 proute->nonleaf_partitions = (ResultRelInfo **)
1332 sizeof(ResultRelInfo *) * proute->max_dispatch);
1333 }
1334 }
1335 proute->partition_dispatch_info[dispatchidx] = pd;
1336
1337 /*
1338 * If setting up a PartitionDispatch for a sub-partitioned table, we may
1339 * also need a minimally valid ResultRelInfo for checking the partition
1340 * constraint later; set that up now.
1341 */
1342 if (parent_pd)
1343 {
1345
1346 InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
1347 proute->nonleaf_partitions[dispatchidx] = rri;
1348 }
1349 else
1350 proute->nonleaf_partitions[dispatchidx] = NULL;
1351
1352 /*
1353 * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1354 * install a downlink in the parent to allow quick descent.
1355 */
1356 if (parent_pd)
1357 {
1358 Assert(parent_pd->indexes[partidx] == -1);
1359 parent_pd->indexes[partidx] = dispatchidx;
1360 }
1361
1362 MemoryContextSwitchTo(oldcxt);
1363
1364 return pd;
1365}
1366
1367/*
1368 * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1369 * routing.
1370 *
1371 * Close all the partitioned tables, leaf partitions, and their indices.
1372 */
1373void
1375 PartitionTupleRouting *proute)
1376{
1377 int i;
1378
1379 /*
1380 * Remember, proute->partition_dispatch_info[0] corresponds to the root
1381 * partitioned table, which we must not try to close, because it is the
1382 * main target table of the query that will be closed by callers such as
1383 * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1384 * partitioned table.
1385 */
1386 for (i = 1; i < proute->num_dispatch; i++)
1387 {
1389
1391
1392 if (pd->tupslot)
1394 }
1395
1396 for (i = 0; i < proute->num_partitions; i++)
1397 {
1398 ResultRelInfo *resultRelInfo = proute->partitions[i];
1399
1400 /* Allow any FDWs to shut down */
1401 if (resultRelInfo->ri_FdwRoutine != NULL &&
1402 resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1403 resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1404 resultRelInfo);
1405
1406 /*
1407 * Close it if it's not one of the result relations borrowed from the
1408 * owning ModifyTableState; those will be closed by ExecEndPlan().
1409 */
1410 if (proute->is_borrowed_rel[i])
1411 continue;
1412
1413 ExecCloseIndices(resultRelInfo);
1414 table_close(resultRelInfo->ri_RelationDesc, NoLock);
1415 }
1416}
1417
1418/* ----------------
1419 * FormPartitionKeyDatum
1420 * Construct values[] and isnull[] arrays for the partition key
1421 * of a tuple.
1422 *
1423 * pd Partition dispatch object of the partitioned table
1424 * slot Heap tuple from which to extract partition key
1425 * estate executor state for evaluating any partition key
1426 * expressions (must be non-NULL)
1427 * values Array of partition key Datums (output area)
1428 * isnull Array of is-null indicators (output area)
1429 *
1430 * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1431 * the heap tuple passed in.
1432 * ----------------
1433 */
1434static void
1436 TupleTableSlot *slot,
1437 EState *estate,
1438 Datum *values,
1439 bool *isnull)
1440{
1441 ListCell *partexpr_item;
1442 int i;
1443
1444 if (pd->key->partexprs != NIL && pd->keystate == NIL)
1445 {
1446 /* Check caller has set up context correctly */
1447 Assert(estate != NULL &&
1448 GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1449
1450 /* First time through, set up expression evaluation state */
1451 pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1452 }
1453
1454 partexpr_item = list_head(pd->keystate);
1455 for (i = 0; i < pd->key->partnatts; i++)
1456 {
1457 AttrNumber keycol = pd->key->partattrs[i];
1458 Datum datum;
1459 bool isNull;
1460
1461 if (keycol != 0)
1462 {
1463 /* Plain column; get the value directly from the heap tuple */
1464 datum = slot_getattr(slot, keycol, &isNull);
1465 }
1466 else
1467 {
1468 /* Expression; need to evaluate it */
1469 if (partexpr_item == NULL)
1470 elog(ERROR, "wrong number of partition key expressions");
1471 datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1472 GetPerTupleExprContext(estate),
1473 &isNull);
1474 partexpr_item = lnext(pd->keystate, partexpr_item);
1475 }
1476 values[i] = datum;
1477 isnull[i] = isNull;
1478 }
1479
1480 if (partexpr_item != NULL)
1481 elog(ERROR, "wrong number of partition key expressions");
1482}
1483
1484/*
1485 * The number of times the same partition must be found in a row before we
1486 * switch from a binary search for the given values to just checking if the
1487 * values belong to the last found partition. This must be above 0.
1488 */
1489#define PARTITION_CACHED_FIND_THRESHOLD 16
1490
1491/*
1492 * get_partition_for_tuple
1493 * Finds partition of relation which accepts the partition key specified
1494 * in values and isnull.
1495 *
1496 * Calling this function can be quite expensive when LIST and RANGE
1497 * partitioned tables have many partitions. This is due to the binary search
1498 * that's done to find the correct partition. Many of the use cases for LIST
1499 * and RANGE partitioned tables make it likely that the same partition is
1500 * found in subsequent ExecFindPartition() calls. This is especially true for
1501 * cases such as RANGE partitioned tables on a TIMESTAMP column where the
1502 * partition key is the current time. When asked to find a partition for a
1503 * RANGE or LIST partitioned table, we record the partition index and datum
1504 * offset we've found for the given 'values' in the PartitionDesc (which is
1505 * stored in relcache), and if we keep finding the same partition
1506 * PARTITION_CACHED_FIND_THRESHOLD times in a row, then we'll enable caching
1507 * logic and instead of performing a binary search to find the correct
1508 * partition, we'll just double-check that 'values' still belong to the last
1509 * found partition, and if so, we'll return that partition index, thus
1510 * skipping the need for the binary search. If we fail to match the last
1511 * partition when double checking, then we fall back on doing a binary search.
1512 * In this case, unless we find 'values' belong to the DEFAULT partition,
1513 * we'll reset the number of times we've hit the same partition so that we
1514 * don't attempt to use the cache again until we've found that partition at
1515 * least PARTITION_CACHED_FIND_THRESHOLD times in a row.
1516 *
1517 * For cases where the partition changes on each lookup, the amount of
1518 * additional work required just amounts to recording the last found partition
1519 * and bound offset then resetting the found counter. This is cheap and does
1520 * not appear to cause any meaningful slowdowns for such cases.
1521 *
1522 * No caching of partitions is done when the last found partition is the
1523 * DEFAULT or NULL partition. For the case of the DEFAULT partition, there
1524 * is no bound offset storing the matching datum, so we cannot confirm the
1525 * indexes match. For the NULL partition, this is just so cheap, there's no
1526 * sense in caching.
1527 *
1528 * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1529 * found or -1 if none found.
1530 */
1531static int
1533{
1534 int bound_offset = -1;
1535 int part_index = -1;
1536 PartitionKey key = pd->key;
1537 PartitionDesc partdesc = pd->partdesc;
1538 PartitionBoundInfo boundinfo = partdesc->boundinfo;
1539
1540 /*
1541 * In the switch statement below, when we perform a cached lookup for
1542 * RANGE and LIST partitioned tables, if we find that the last found
1543 * partition matches the 'values', we return the partition index right
1544 * away. We do this instead of breaking out of the switch as we don't
1545 * want to execute the code about the DEFAULT partition or do any updates
1546 * for any of the cache-related fields. That would be a waste of effort
1547 * as we already know it's not the DEFAULT partition and have no need to
1548 * increment the number of times we found the same partition any higher
1549 * than PARTITION_CACHED_FIND_THRESHOLD.
1550 */
1551
1552 /* Route as appropriate based on partitioning strategy. */
1553 switch (key->strategy)
1554 {
1556 {
1557 uint64 rowHash;
1558
1559 /* hash partitioning is too cheap to bother caching */
1560 rowHash = compute_partition_hash_value(key->partnatts,
1561 key->partsupfunc,
1562 key->partcollation,
1563 values, isnull);
1564
1565 /*
1566 * HASH partitions can't have a DEFAULT partition and we don't
1567 * do any caching work for them, so just return the part index
1568 */
1569 return boundinfo->indexes[rowHash % boundinfo->nindexes];
1570 }
1571
1573 if (isnull[0])
1574 {
1575 /* this is far too cheap to bother doing any caching */
1576 if (partition_bound_accepts_nulls(boundinfo))
1577 {
1578 /*
1579 * When there is a NULL partition we just return that
1580 * directly. We don't have a bound_offset so it's not
1581 * valid to drop into the code after the switch which
1582 * checks and updates the cache fields. We perhaps should
1583 * be invalidating the details of the last cached
1584 * partition but there's no real need to. Keeping those
1585 * fields set gives a chance at matching to the cached
1586 * partition on the next lookup.
1587 */
1588 return boundinfo->null_index;
1589 }
1590 }
1591 else
1592 {
1593 bool equal;
1594
1596 {
1597 int last_datum_offset = partdesc->last_found_datum_index;
1598 Datum lastDatum = boundinfo->datums[last_datum_offset][0];
1599 int32 cmpval;
1600
1601 /* does the last found datum index match this datum? */
1602 cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1603 key->partcollation[0],
1604 lastDatum,
1605 values[0]));
1606
1607 if (cmpval == 0)
1608 return boundinfo->indexes[last_datum_offset];
1609
1610 /* fall-through and do a manual lookup */
1611 }
1612
1613 bound_offset = partition_list_bsearch(key->partsupfunc,
1614 key->partcollation,
1615 boundinfo,
1616 values[0], &equal);
1617 if (bound_offset >= 0 && equal)
1618 part_index = boundinfo->indexes[bound_offset];
1619 }
1620 break;
1621
1623 {
1624 bool equal = false,
1625 range_partkey_has_null = false;
1626 int i;
1627
1628 /*
1629 * No range includes NULL, so this will be accepted by the
1630 * default partition if there is one, and otherwise rejected.
1631 */
1632 for (i = 0; i < key->partnatts; i++)
1633 {
1634 if (isnull[i])
1635 {
1636 range_partkey_has_null = true;
1637 break;
1638 }
1639 }
1640
1641 /* NULLs belong in the DEFAULT partition */
1642 if (range_partkey_has_null)
1643 break;
1644
1646 {
1647 int last_datum_offset = partdesc->last_found_datum_index;
1648 Datum *lastDatums = boundinfo->datums[last_datum_offset];
1649 PartitionRangeDatumKind *kind = boundinfo->kind[last_datum_offset];
1650 int32 cmpval;
1651
1652 /* check if the value is >= to the lower bound */
1653 cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1654 key->partcollation,
1655 lastDatums,
1656 kind,
1657 values,
1658 key->partnatts);
1659
1660 /*
1661 * If it's equal to the lower bound then no need to check
1662 * the upper bound.
1663 */
1664 if (cmpval == 0)
1665 return boundinfo->indexes[last_datum_offset + 1];
1666
1667 if (cmpval < 0 && last_datum_offset + 1 < boundinfo->ndatums)
1668 {
1669 /* check if the value is below the upper bound */
1670 lastDatums = boundinfo->datums[last_datum_offset + 1];
1671 kind = boundinfo->kind[last_datum_offset + 1];
1672 cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1673 key->partcollation,
1674 lastDatums,
1675 kind,
1676 values,
1677 key->partnatts);
1678
1679 if (cmpval > 0)
1680 return boundinfo->indexes[last_datum_offset + 1];
1681 }
1682 /* fall-through and do a manual lookup */
1683 }
1684
1685 bound_offset = partition_range_datum_bsearch(key->partsupfunc,
1686 key->partcollation,
1687 boundinfo,
1688 key->partnatts,
1689 values,
1690 &equal);
1691
1692 /*
1693 * The bound at bound_offset is less than or equal to the
1694 * tuple value, so the bound at offset+1 is the upper bound of
1695 * the partition we're looking for, if there actually exists
1696 * one.
1697 */
1698 part_index = boundinfo->indexes[bound_offset + 1];
1699 }
1700 break;
1701
1702 default:
1703 elog(ERROR, "unexpected partition strategy: %d",
1704 (int) key->strategy);
1705 }
1706
1707 /*
1708 * part_index < 0 means we failed to find a partition of this parent. Use
1709 * the default partition, if there is one.
1710 */
1711 if (part_index < 0)
1712 {
1713 /*
1714 * No need to reset the cache fields here. The next set of values
1715 * might end up belonging to the cached partition, so leaving the
1716 * cache alone improves the chances of a cache hit on the next lookup.
1717 */
1718 return boundinfo->default_index;
1719 }
1720
1721 /* we should only make it here when the code above set bound_offset */
1722 Assert(bound_offset >= 0);
1723
1724 /*
1725 * Attend to the cache fields. If the bound_offset matches the last
1726 * cached bound offset then we've found the same partition as last time,
1727 * so bump the count by one. If all goes well, we'll eventually reach
1728 * PARTITION_CACHED_FIND_THRESHOLD and try the cache path next time
1729 * around. Otherwise, we'll reset the cache count back to 1 to mark that
1730 * we've found this partition for the first time.
1731 */
1732 if (bound_offset == partdesc->last_found_datum_index)
1733 partdesc->last_found_count++;
1734 else
1735 {
1736 partdesc->last_found_count = 1;
1737 partdesc->last_found_part_index = part_index;
1738 partdesc->last_found_datum_index = bound_offset;
1739 }
1740
1741 return part_index;
1742}
1743
1744/*
1745 * ExecBuildSlotPartitionKeyDescription
1746 *
1747 * This works very much like BuildIndexValueDescription() and is currently
1748 * used for building error messages when ExecFindPartition() fails to find
1749 * partition for a row.
1750 */
1751static char *
1753 const Datum *values,
1754 const bool *isnull,
1755 int maxfieldlen)
1756{
1759 int partnatts = get_partition_natts(key);
1760 int i;
1761 Oid relid = RelationGetRelid(rel);
1762 AclResult aclresult;
1763
1764 if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
1765 return NULL;
1766
1767 /* If the user has table-level access, just go build the description. */
1768 aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
1769 if (aclresult != ACLCHECK_OK)
1770 {
1771 /*
1772 * Step through the columns of the partition key and make sure the
1773 * user has SELECT rights on all of them.
1774 */
1775 for (i = 0; i < partnatts; i++)
1776 {
1778
1779 /*
1780 * If this partition key column is an expression, we return no
1781 * detail rather than try to figure out what column(s) the
1782 * expression includes and if the user has SELECT rights on them.
1783 */
1784 if (attnum == InvalidAttrNumber ||
1787 return NULL;
1788 }
1789 }
1790
1792 appendStringInfo(&buf, "(%s) = (",
1793 pg_get_partkeydef_columns(relid, true));
1794
1795 for (i = 0; i < partnatts; i++)
1796 {
1797 char *val;
1798 int vallen;
1799
1800 if (isnull[i])
1801 val = "null";
1802 else
1803 {
1804 Oid foutoid;
1805 bool typisvarlena;
1806
1808 &foutoid, &typisvarlena);
1809 val = OidOutputFunctionCall(foutoid, values[i]);
1810 }
1811
1812 if (i > 0)
1814
1815 /* truncate if needed */
1816 vallen = strlen(val);
1817 if (vallen <= maxfieldlen)
1818 appendBinaryStringInfo(&buf, val, vallen);
1819 else
1820 {
1821 vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1822 appendBinaryStringInfo(&buf, val, vallen);
1823 appendStringInfoString(&buf, "...");
1824 }
1825 }
1826
1828
1829 return buf.data;
1830}
1831
1832/*
1833 * adjust_partition_colnos
1834 * Adjust the list of UPDATE target column numbers to account for
1835 * attribute differences between the parent and the partition.
1836 *
1837 * Note: mustn't be called if no adjustment is required.
1838 */
1839static List *
1841{
1842 TupleConversionMap *map = ExecGetChildToRootMap(leaf_part_rri);
1843
1844 Assert(map != NULL);
1845
1846 return adjust_partition_colnos_using_map(colnos, map->attrMap);
1847}
1848
1849/*
1850 * adjust_partition_colnos_using_map
1851 * Like adjust_partition_colnos, but uses a caller-supplied map instead
1852 * of assuming to map from the "root" result relation.
1853 *
1854 * Note: mustn't be called if no adjustment is required.
1855 */
1856static List *
1858{
1859 List *new_colnos = NIL;
1860 ListCell *lc;
1861
1862 Assert(attrMap != NULL); /* else we shouldn't be here */
1863
1864 foreach(lc, colnos)
1865 {
1866 AttrNumber parentattrno = lfirst_int(lc);
1867
1868 if (parentattrno <= 0 ||
1869 parentattrno > attrMap->maplen ||
1870 attrMap->attnums[parentattrno - 1] == 0)
1871 elog(ERROR, "unexpected attno %d in target column list",
1872 parentattrno);
1873 new_colnos = lappend_int(new_colnos,
1874 attrMap->attnums[parentattrno - 1]);
1875 }
1876
1877 return new_colnos;
1878}
1879
1880/*-------------------------------------------------------------------------
1881 * Run-Time Partition Pruning Support.
1882 *
1883 * The following series of functions exist to support the removal of unneeded
1884 * subplans for queries against partitioned tables. The supporting functions
1885 * here are designed to work with any plan type which supports an arbitrary
1886 * number of subplans, e.g. Append, MergeAppend.
1887 *
1888 * When pruning involves comparison of a partition key to a constant, it's
1889 * done by the planner. However, if we have a comparison to a non-constant
1890 * but not volatile expression, that presents an opportunity for run-time
1891 * pruning by the executor, allowing irrelevant partitions to be skipped
1892 * dynamically.
1893 *
1894 * We must distinguish expressions containing PARAM_EXEC Params from
1895 * expressions that don't contain those. Even though a PARAM_EXEC Param is
1896 * considered to be a stable expression, it can change value from one plan
1897 * node scan to the next during query execution. Stable comparison
1898 * expressions that don't involve such Params allow partition pruning to be
1899 * done once during executor startup. Expressions that do involve such Params
1900 * require us to prune separately for each scan of the parent plan node.
1901 *
1902 * Note that pruning away unneeded subplans during executor startup has the
1903 * added benefit of not having to initialize the unneeded subplans at all.
1904 *
1905 *
1906 * Functions:
1907 *
1908 * ExecDoInitialPruning:
1909 * Perform runtime "initial" pruning, if necessary, to determine the set
1910 * of child subnodes that need to be initialized during ExecInitNode() for
1911 * all plan nodes that contain a PartitionPruneInfo.
1912 *
1913 * ExecInitPartitionExecPruning:
1914 * Updates the PartitionPruneState found at given part_prune_index in
1915 * EState.es_part_prune_states for use during "exec" pruning if required.
1916 * Also returns the set of subplans to initialize that would be stored at
1917 * part_prune_index in EState.es_part_prune_results by
1918 * ExecDoInitialPruning(). Maps in PartitionPruneState are updated to
1919 * account for initial pruning possibly having eliminated some of the
1920 * subplans.
1921 *
1922 * ExecFindMatchingSubPlans:
1923 * Returns indexes of matching subplans after evaluating the expressions
1924 * that are safe to evaluate at a given point. This function is first
1925 * called during ExecDoInitialPruning() to find the initially matching
1926 * subplans based on performing the initial pruning steps and then must be
1927 * called again each time the value of a Param listed in
1928 * PartitionPruneState's 'execparamids' changes.
1929 *-------------------------------------------------------------------------
1930 */
1931
1932
1933/*
1934 * ExecDoInitialPruning
1935 * Perform runtime "initial" pruning, if necessary, to determine the set
1936 * of child subnodes that need to be initialized during ExecInitNode() for
1937 * plan nodes that support partition pruning.
1938 *
1939 * This function iterates over each PartitionPruneInfo entry in
1940 * estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
1941 * and adds it to es_part_prune_states. ExecInitPartitionExecPruning() accesses
1942 * these states through their corresponding indexes in es_part_prune_states and
1943 * assign each state to the parent node's PlanState, from where it will be used
1944 * for "exec" pruning.
1945 *
1946 * If initial pruning steps exist for a PartitionPruneInfo entry, this function
1947 * executes those pruning steps and stores the result as a bitmapset of valid
1948 * child subplans, identifying which subplans should be initialized for
1949 * execution. The results are saved in estate->es_part_prune_results.
1950 *
1951 * If no initial pruning is performed for a given PartitionPruneInfo, a NULL
1952 * entry is still added to es_part_prune_results to maintain alignment with
1953 * es_part_prune_infos. This ensures that ExecInitPartitionExecPruning() can
1954 * use the same index to retrieve the pruning results.
1955 */
1956void
1958{
1959 ListCell *lc;
1960
1961 foreach(lc, estate->es_part_prune_infos)
1962 {
1964 PartitionPruneState *prunestate;
1965 Bitmapset *validsubplans = NULL;
1966 Bitmapset *all_leafpart_rtis = NULL;
1967 Bitmapset *validsubplan_rtis = NULL;
1968
1969 /* Create and save the PartitionPruneState. */
1970 prunestate = CreatePartitionPruneState(estate, pruneinfo,
1971 &all_leafpart_rtis);
1973 prunestate);
1974
1975 /*
1976 * Perform initial pruning steps, if any, and save the result
1977 * bitmapset or NULL as described in the header comment.
1978 */
1979 if (prunestate->do_initial_prune)
1980 validsubplans = ExecFindMatchingSubPlans(prunestate, true,
1981 &validsubplan_rtis);
1982 else
1983 validsubplan_rtis = all_leafpart_rtis;
1984
1986 validsubplan_rtis);
1988 validsubplans);
1989 }
1990}
1991
1992/*
1993 * ExecInitPartitionExecPruning
1994 * Initialize the data structures needed for runtime "exec" partition
1995 * pruning and return the result of initial pruning, if available.
1996 *
1997 * 'relids' identifies the relation to which both the parent plan and the
1998 * PartitionPruneInfo given by 'part_prune_index' belong.
1999 *
2000 * On return, *initially_valid_subplans is assigned the set of indexes of
2001 * child subplans that must be initialized along with the parent plan node.
2002 * Initial pruning would have been performed by ExecDoInitialPruning(), if
2003 * necessary, and the bitmapset of surviving subplans' indexes would have
2004 * been stored as the part_prune_index'th element of
2005 * EState.es_part_prune_results.
2006 *
2007 * If subplans were indeed pruned during initial pruning, the subplan_map
2008 * arrays in the returned PartitionPruneState are re-sequenced to exclude those
2009 * subplans, but only if the maps will be needed for subsequent execution
2010 * pruning passes.
2011 */
2014 int n_total_subplans,
2015 int part_prune_index,
2016 Bitmapset *relids,
2017 Bitmapset **initially_valid_subplans)
2018{
2019 PartitionPruneState *prunestate;
2020 EState *estate = planstate->state;
2021 PartitionPruneInfo *pruneinfo;
2022
2023 /* Obtain the pruneinfo we need. */
2025 part_prune_index);
2026
2027 /* Its relids better match the plan node's or the planner messed up. */
2028 if (!bms_equal(relids, pruneinfo->relids))
2029 elog(ERROR, "wrong pruneinfo with relids=%s found at part_prune_index=%d contained in plan node with relids=%s",
2030 bmsToString(pruneinfo->relids), part_prune_index,
2031 bmsToString(relids));
2032
2033 /*
2034 * The PartitionPruneState would have been created by
2035 * ExecDoInitialPruning() and stored as the part_prune_index'th element of
2036 * EState.es_part_prune_states.
2037 */
2038 prunestate = list_nth(estate->es_part_prune_states, part_prune_index);
2039 Assert(prunestate != NULL);
2040
2041 /* Use the result of initial pruning done by ExecDoInitialPruning(). */
2042 if (prunestate->do_initial_prune)
2043 *initially_valid_subplans = list_nth_node(Bitmapset,
2044 estate->es_part_prune_results,
2045 part_prune_index);
2046 else
2047 {
2048 /* No pruning, so we'll need to initialize all subplans */
2049 Assert(n_total_subplans > 0);
2050 *initially_valid_subplans = bms_add_range(NULL, 0,
2051 n_total_subplans - 1);
2052 }
2053
2054 /*
2055 * The exec pruning state must also be initialized, if needed, before it
2056 * can be used for pruning during execution.
2057 *
2058 * This also re-sequences subplan indexes contained in prunestate to
2059 * account for any that were removed due to initial pruning; refer to the
2060 * condition in InitExecPartitionPruneContexts() that is used to determine
2061 * whether to do this. If no exec pruning needs to be done, we would thus
2062 * leave the maps to be in an invalid state, but that's ok since that data
2063 * won't be consulted again (cf initial Assert in
2064 * ExecFindMatchingSubPlans).
2065 */
2066 if (prunestate->do_exec_prune)
2067 InitExecPartitionPruneContexts(prunestate, planstate,
2068 *initially_valid_subplans,
2069 n_total_subplans);
2070
2071 return prunestate;
2072}
2073
2074/*
2075 * CreatePartitionPruneState
2076 * Build the data structure required for calling ExecFindMatchingSubPlans
2077 *
2078 * This includes PartitionPruneContexts (stored in each
2079 * PartitionedRelPruningData corresponding to a PartitionedRelPruneInfo),
2080 * which hold the ExprStates needed to evaluate pruning expressions, and
2081 * mapping arrays to convert partition indexes from the pruning logic
2082 * into subplan indexes in the parent plan node's list of child subplans.
2083 *
2084 * 'pruneinfo' is a PartitionPruneInfo as generated by
2085 * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
2086 * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
2087 * pruneinfo->prune_infos), each of which contains a PartitionedRelPruningData
2088 * for each PartitionedRelPruneInfo appearing in that sublist. This two-level
2089 * system is needed to keep from confusing the different hierarchies when a
2090 * UNION ALL contains multiple partitioned tables as children. The data
2091 * stored in each PartitionedRelPruningData can be re-used each time we
2092 * re-evaluate which partitions match the pruning steps provided in each
2093 * PartitionedRelPruneInfo.
2094 *
2095 * Note that only the PartitionPruneContexts for initial pruning are
2096 * initialized here. Those required for exec pruning are initialized later in
2097 * ExecInitPartitionExecPruning(), as they depend on the availability of the
2098 * parent plan node's PlanState.
2099 *
2100 * If initial pruning steps are to be skipped (e.g., during EXPLAIN
2101 * (GENERIC_PLAN)), *all_leafpart_rtis will be populated with the RT indexes of
2102 * all leaf partitions whose scanning subnode is included in the parent plan
2103 * node's list of child plans. The caller must add these RT indexes to
2104 * estate->es_unpruned_relids.
2105 */
2106static PartitionPruneState *
2108 Bitmapset **all_leafpart_rtis)
2109{
2110 PartitionPruneState *prunestate;
2111 int n_part_hierarchies;
2112 ListCell *lc;
2113 int i;
2114
2115 /*
2116 * Expression context that will be used by partkey_datum_from_expr() to
2117 * evaluate expressions for comparison against partition bounds.
2118 */
2119 ExprContext *econtext = CreateExprContext(estate);
2120
2121 /* For data reading, executor always includes detached partitions */
2122 if (estate->es_partition_directory == NULL)
2123 estate->es_partition_directory =
2124 CreatePartitionDirectory(estate->es_query_cxt, false);
2125
2126 n_part_hierarchies = list_length(pruneinfo->prune_infos);
2127 Assert(n_part_hierarchies > 0);
2128
2129 /*
2130 * Allocate the data structure
2131 */
2132 prunestate = (PartitionPruneState *)
2133 palloc(offsetof(PartitionPruneState, partprunedata) +
2134 sizeof(PartitionPruningData *) * n_part_hierarchies);
2135
2136 /* Save ExprContext for use during InitExecPartitionPruneContexts(). */
2137 prunestate->econtext = econtext;
2138 prunestate->execparamids = NULL;
2139 /* other_subplans can change at runtime, so we need our own copy */
2140 prunestate->other_subplans = bms_copy(pruneinfo->other_subplans);
2141 prunestate->do_initial_prune = false; /* may be set below */
2142 prunestate->do_exec_prune = false; /* may be set below */
2143 prunestate->num_partprunedata = n_part_hierarchies;
2144
2145 /*
2146 * Create a short-term memory context which we'll use when making calls to
2147 * the partition pruning functions. This avoids possible memory leaks,
2148 * since the pruning functions call comparison functions that aren't under
2149 * our control.
2150 */
2151 prunestate->prune_context =
2153 "Partition Prune",
2155
2156 i = 0;
2157 foreach(lc, pruneinfo->prune_infos)
2158 {
2159 List *partrelpruneinfos = lfirst_node(List, lc);
2160 int npartrelpruneinfos = list_length(partrelpruneinfos);
2161 PartitionPruningData *prunedata;
2162 ListCell *lc2;
2163 int j;
2164
2165 prunedata = (PartitionPruningData *)
2166 palloc(offsetof(PartitionPruningData, partrelprunedata) +
2167 npartrelpruneinfos * sizeof(PartitionedRelPruningData));
2168 prunestate->partprunedata[i] = prunedata;
2169 prunedata->num_partrelprunedata = npartrelpruneinfos;
2170
2171 j = 0;
2172 foreach(lc2, partrelpruneinfos)
2173 {
2175 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2176 Relation partrel;
2177 PartitionDesc partdesc;
2178 PartitionKey partkey;
2179
2180 /*
2181 * We can rely on the copies of the partitioned table's partition
2182 * key and partition descriptor appearing in its relcache entry,
2183 * because that entry will be held open and locked for the
2184 * duration of this executor run.
2185 */
2186 partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex, false);
2187
2188 /* Remember for InitExecPartitionPruneContexts(). */
2189 pprune->partrel = partrel;
2190
2191 partkey = RelationGetPartitionKey(partrel);
2193 partrel);
2194
2195 /*
2196 * Initialize the subplan_map and subpart_map.
2197 *
2198 * The set of partitions that exist now might not be the same that
2199 * existed when the plan was made. The normal case is that it is;
2200 * optimize for that case with a quick comparison, and just copy
2201 * the subplan_map and make subpart_map, leafpart_rti_map point to
2202 * the ones in PruneInfo.
2203 *
2204 * For the case where they aren't identical, we could have more
2205 * partitions on either side; or even exactly the same number of
2206 * them on both but the set of OIDs doesn't match fully. Handle
2207 * this by creating new subplan_map and subpart_map arrays that
2208 * corresponds to the ones in the PruneInfo where the new
2209 * partition descriptor's OIDs match. Any that don't match can be
2210 * set to -1, as if they were pruned. By construction, both
2211 * arrays are in partition bounds order.
2212 */
2213 pprune->nparts = partdesc->nparts;
2214 pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts);
2215
2216 if (partdesc->nparts == pinfo->nparts &&
2217 memcmp(partdesc->oids, pinfo->relid_map,
2218 sizeof(int) * partdesc->nparts) == 0)
2219 {
2220 pprune->subpart_map = pinfo->subpart_map;
2221 pprune->leafpart_rti_map = pinfo->leafpart_rti_map;
2222 memcpy(pprune->subplan_map, pinfo->subplan_map,
2223 sizeof(int) * pinfo->nparts);
2224 }
2225 else
2226 {
2227 int pd_idx = 0;
2228 int pp_idx;
2229
2230 /*
2231 * When the partition arrays are not identical, there could be
2232 * some new ones but it's also possible that one was removed;
2233 * we cope with both situations by walking the arrays and
2234 * discarding those that don't match.
2235 *
2236 * If the number of partitions on both sides match, it's still
2237 * possible that one partition has been detached and another
2238 * attached. Cope with that by creating a map that skips any
2239 * mismatches.
2240 */
2241 pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts);
2242 pprune->leafpart_rti_map = palloc(sizeof(int) * partdesc->nparts);
2243
2244 for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++)
2245 {
2246 /* Skip any InvalidOid relid_map entries */
2247 while (pd_idx < pinfo->nparts &&
2248 !OidIsValid(pinfo->relid_map[pd_idx]))
2249 pd_idx++;
2250
2251 recheck:
2252 if (pd_idx < pinfo->nparts &&
2253 pinfo->relid_map[pd_idx] == partdesc->oids[pp_idx])
2254 {
2255 /* match... */
2256 pprune->subplan_map[pp_idx] =
2257 pinfo->subplan_map[pd_idx];
2258 pprune->subpart_map[pp_idx] =
2259 pinfo->subpart_map[pd_idx];
2260 pprune->leafpart_rti_map[pp_idx] =
2261 pinfo->leafpart_rti_map[pd_idx];
2262 pd_idx++;
2263 continue;
2264 }
2265
2266 /*
2267 * There isn't an exact match in the corresponding
2268 * positions of both arrays. Peek ahead in
2269 * pinfo->relid_map to see if we have a match for the
2270 * current partition in partdesc. Normally if a match
2271 * exists it's just one element ahead, and it means the
2272 * planner saw one extra partition that we no longer see
2273 * now (its concurrent detach finished just in between);
2274 * so we skip that one by updating pd_idx to the new
2275 * location and jumping above. We can then continue to
2276 * match the rest of the elements after skipping the OID
2277 * with no match; no future matches are tried for the
2278 * element that was skipped, because we know the arrays to
2279 * be in the same order.
2280 *
2281 * If we don't see a match anywhere in the rest of the
2282 * pinfo->relid_map array, that means we see an element
2283 * now that the planner didn't see, so mark that one as
2284 * pruned and move on.
2285 */
2286 for (int pd_idx2 = pd_idx + 1; pd_idx2 < pinfo->nparts; pd_idx2++)
2287 {
2288 if (pd_idx2 >= pinfo->nparts)
2289 break;
2290 if (pinfo->relid_map[pd_idx2] == partdesc->oids[pp_idx])
2291 {
2292 pd_idx = pd_idx2;
2293 goto recheck;
2294 }
2295 }
2296
2297 pprune->subpart_map[pp_idx] = -1;
2298 pprune->subplan_map[pp_idx] = -1;
2299 pprune->leafpart_rti_map[pp_idx] = 0;
2300 }
2301 }
2302
2303 /* present_parts is also subject to later modification */
2304 pprune->present_parts = bms_copy(pinfo->present_parts);
2305
2306 /*
2307 * Only initial_context is initialized here. exec_context is
2308 * initialized during ExecInitPartitionExecPruning() when the
2309 * parent plan's PlanState is available.
2310 *
2311 * Note that we must skip execution-time (both "init" and "exec")
2312 * partition pruning in EXPLAIN (GENERIC_PLAN), since parameter
2313 * values may be missing.
2314 */
2316 if (pinfo->initial_pruning_steps &&
2318 {
2320 pprune->initial_pruning_steps,
2321 partdesc, partkey, NULL,
2322 econtext);
2323 /* Record whether initial pruning is needed at any level */
2324 prunestate->do_initial_prune = true;
2325 }
2326 pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
2327 if (pinfo->exec_pruning_steps &&
2329 {
2330 /* Record whether exec pruning is needed at any level */
2331 prunestate->do_exec_prune = true;
2332 }
2333
2334 /*
2335 * Accumulate the IDs of all PARAM_EXEC Params affecting the
2336 * partitioning decisions at this plan node.
2337 */
2338 prunestate->execparamids = bms_add_members(prunestate->execparamids,
2339 pinfo->execparamids);
2340
2341 /*
2342 * Return all leaf partition indexes if we're skipping pruning in
2343 * the EXPLAIN (GENERIC_PLAN) case.
2344 */
2345 if (pinfo->initial_pruning_steps && !prunestate->do_initial_prune)
2346 {
2347 int part_index = -1;
2348
2349 while ((part_index = bms_next_member(pprune->present_parts,
2350 part_index)) >= 0)
2351 {
2352 Index rtindex = pprune->leafpart_rti_map[part_index];
2353
2354 if (rtindex)
2355 *all_leafpart_rtis = bms_add_member(*all_leafpart_rtis,
2356 rtindex);
2357 }
2358 }
2359
2360 j++;
2361 }
2362 i++;
2363 }
2364
2365 return prunestate;
2366}
2367
2368/*
2369 * Initialize a PartitionPruneContext for the given list of pruning steps.
2370 */
2371static void
2373 List *pruning_steps,
2374 PartitionDesc partdesc,
2375 PartitionKey partkey,
2376 PlanState *planstate,
2377 ExprContext *econtext)
2378{
2379 int n_steps;
2380 int partnatts;
2381 ListCell *lc;
2382
2383 n_steps = list_length(pruning_steps);
2384
2385 context->strategy = partkey->strategy;
2386 context->partnatts = partnatts = partkey->partnatts;
2387 context->nparts = partdesc->nparts;
2388 context->boundinfo = partdesc->boundinfo;
2389 context->partcollation = partkey->partcollation;
2390 context->partsupfunc = partkey->partsupfunc;
2391
2392 /* We'll look up type-specific support functions as needed */
2393 context->stepcmpfuncs = (FmgrInfo *)
2394 palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
2395
2397 context->planstate = planstate;
2398 context->exprcontext = econtext;
2399
2400 /* Initialize expression state for each expression we need */
2401 context->exprstates = (ExprState **)
2402 palloc0(sizeof(ExprState *) * n_steps * partnatts);
2403 foreach(lc, pruning_steps)
2404 {
2406 ListCell *lc2 = list_head(step->exprs);
2407 int keyno;
2408
2409 /* not needed for other step kinds */
2410 if (!IsA(step, PartitionPruneStepOp))
2411 continue;
2412
2413 Assert(list_length(step->exprs) <= partnatts);
2414
2415 for (keyno = 0; keyno < partnatts; keyno++)
2416 {
2417 if (bms_is_member(keyno, step->nullkeys))
2418 continue;
2419
2420 if (lc2 != NULL)
2421 {
2422 Expr *expr = lfirst(lc2);
2423
2424 /* not needed for Consts */
2425 if (!IsA(expr, Const))
2426 {
2427 int stateidx = PruneCxtStateIdx(partnatts,
2428 step->step.step_id,
2429 keyno);
2430
2431 /*
2432 * When planstate is NULL, pruning_steps is known not to
2433 * contain any expressions that depend on the parent plan.
2434 * Information of any available EXTERN parameters must be
2435 * passed explicitly in that case, which the caller must
2436 * have made available via econtext.
2437 */
2438 if (planstate == NULL)
2439 context->exprstates[stateidx] =
2441 econtext->ecxt_param_list_info);
2442 else
2443 context->exprstates[stateidx] =
2444 ExecInitExpr(expr, context->planstate);
2445 }
2446 lc2 = lnext(step->exprs, lc2);
2447 }
2448 }
2449 }
2450}
2451
2452/*
2453 * InitExecPartitionPruneContexts
2454 * Initialize exec pruning contexts deferred by CreatePartitionPruneState()
2455 *
2456 * This function finalizes exec pruning setup for a PartitionPruneState by
2457 * initializing contexts for pruning steps that require the parent plan's
2458 * PlanState. It iterates over PartitionPruningData entries and sets up the
2459 * necessary execution contexts for pruning during query execution.
2460 *
2461 * Also fix the mapping of partition indexes to subplan indexes contained in
2462 * prunestate by considering the new list of subplans that survived initial
2463 * pruning.
2464 *
2465 * Current values of the indexes present in PartitionPruneState count all the
2466 * subplans that would be present before initial pruning was done. If initial
2467 * pruning got rid of some of the subplans, any subsequent pruning passes will
2468 * be looking at a different set of target subplans to choose from than those
2469 * in the pre-initial-pruning set, so the maps in PartitionPruneState
2470 * containing those indexes must be updated to reflect the new indexes of
2471 * subplans in the post-initial-pruning set.
2472 */
2473static void
2475 PlanState *parent_plan,
2476 Bitmapset *initially_valid_subplans,
2477 int n_total_subplans)
2478{
2479 EState *estate;
2480 int *new_subplan_indexes = NULL;
2481 Bitmapset *new_other_subplans;
2482 int i;
2483 int newidx;
2484 bool fix_subplan_map = false;
2485
2486 Assert(prunestate->do_exec_prune);
2487 Assert(parent_plan != NULL);
2488 estate = parent_plan->state;
2489
2490 /*
2491 * No need to fix subplans maps if initial pruning didn't eliminate any
2492 * subplans.
2493 */
2494 if (bms_num_members(initially_valid_subplans) < n_total_subplans)
2495 {
2496 fix_subplan_map = true;
2497
2498 /*
2499 * First we must build a temporary array which maps old subplan
2500 * indexes to new ones. For convenience of initialization, we use
2501 * 1-based indexes in this array and leave pruned items as 0.
2502 */
2503 new_subplan_indexes = (int *) palloc0(sizeof(int) * n_total_subplans);
2504 newidx = 1;
2505 i = -1;
2506 while ((i = bms_next_member(initially_valid_subplans, i)) >= 0)
2507 {
2508 Assert(i < n_total_subplans);
2509 new_subplan_indexes[i] = newidx++;
2510 }
2511 }
2512
2513 /*
2514 * Now we can update each PartitionedRelPruneInfo's subplan_map with new
2515 * subplan indexes. We must also recompute its present_parts bitmap.
2516 */
2517 for (i = 0; i < prunestate->num_partprunedata; i++)
2518 {
2519 PartitionPruningData *prunedata = prunestate->partprunedata[i];
2520 int j;
2521
2522 /*
2523 * Within each hierarchy, we perform this loop in back-to-front order
2524 * so that we determine present_parts for the lowest-level partitioned
2525 * tables first. This way we can tell whether a sub-partitioned
2526 * table's partitions were entirely pruned so we can exclude it from
2527 * the current level's present_parts.
2528 */
2529 for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
2530 {
2531 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2532 int nparts = pprune->nparts;
2533 int k;
2534
2535 /* Initialize PartitionPruneContext for exec pruning, if needed. */
2536 if (pprune->exec_pruning_steps != NIL)
2537 {
2538 PartitionKey partkey;
2539 PartitionDesc partdesc;
2540
2541 /*
2542 * See the comment in CreatePartitionPruneState() regarding
2543 * the usage of partdesc and partkey.
2544 */
2545 partkey = RelationGetPartitionKey(pprune->partrel);
2547 pprune->partrel);
2548
2550 pprune->exec_pruning_steps,
2551 partdesc, partkey, parent_plan,
2552 prunestate->econtext);
2553 }
2554
2555 if (!fix_subplan_map)
2556 continue;
2557
2558 /* We just rebuild present_parts from scratch */
2559 bms_free(pprune->present_parts);
2560 pprune->present_parts = NULL;
2561
2562 for (k = 0; k < nparts; k++)
2563 {
2564 int oldidx = pprune->subplan_map[k];
2565 int subidx;
2566
2567 /*
2568 * If this partition existed as a subplan then change the old
2569 * subplan index to the new subplan index. The new index may
2570 * become -1 if the partition was pruned above, or it may just
2571 * come earlier in the subplan list due to some subplans being
2572 * removed earlier in the list. If it's a subpartition, add
2573 * it to present_parts unless it's entirely pruned.
2574 */
2575 if (oldidx >= 0)
2576 {
2577 Assert(oldidx < n_total_subplans);
2578 pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
2579
2580 if (new_subplan_indexes[oldidx] > 0)
2581 pprune->present_parts =
2582 bms_add_member(pprune->present_parts, k);
2583 }
2584 else if ((subidx = pprune->subpart_map[k]) >= 0)
2585 {
2586 PartitionedRelPruningData *subprune;
2587
2588 subprune = &prunedata->partrelprunedata[subidx];
2589
2590 if (!bms_is_empty(subprune->present_parts))
2591 pprune->present_parts =
2592 bms_add_member(pprune->present_parts, k);
2593 }
2594 }
2595 }
2596 }
2597
2598 /*
2599 * If we fixed subplan maps, we must also recompute the other_subplans
2600 * set, since indexes in it may change.
2601 */
2602 if (fix_subplan_map)
2603 {
2604 new_other_subplans = NULL;
2605 i = -1;
2606 while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
2607 new_other_subplans = bms_add_member(new_other_subplans,
2608 new_subplan_indexes[i] - 1);
2609
2610 bms_free(prunestate->other_subplans);
2611 prunestate->other_subplans = new_other_subplans;
2612
2613 pfree(new_subplan_indexes);
2614 }
2615}
2616
2617/*
2618 * ExecFindMatchingSubPlans
2619 * Determine which subplans match the pruning steps detailed in
2620 * 'prunestate' for the current comparison expression values.
2621 *
2622 * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This
2623 * differentiates the initial executor-time pruning step from later
2624 * runtime pruning.
2625 *
2626 * The caller must pass a non-NULL validsubplan_rtis during initial pruning
2627 * to collect the RT indexes of leaf partitions whose subnodes will be
2628 * executed. These RT indexes are later added to EState.es_unpruned_relids.
2629 */
2630Bitmapset *
2632 bool initial_prune,
2633 Bitmapset **validsubplan_rtis)
2634{
2635 Bitmapset *result = NULL;
2636 MemoryContext oldcontext;
2637 int i;
2638
2639 /*
2640 * Either we're here on the initial prune done during pruning
2641 * initialization, or we're at a point where PARAM_EXEC Params can be
2642 * evaluated *and* there are steps in which to do so.
2643 */
2644 Assert(initial_prune || prunestate->do_exec_prune);
2645 Assert(validsubplan_rtis != NULL || !initial_prune);
2646
2647 /*
2648 * Switch to a temp context to avoid leaking memory in the executor's
2649 * query-lifespan memory context.
2650 */
2651 oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2652
2653 /*
2654 * For each hierarchy, do the pruning tests, and add nondeletable
2655 * subplans' indexes to "result".
2656 */
2657 for (i = 0; i < prunestate->num_partprunedata; i++)
2658 {
2659 PartitionPruningData *prunedata = prunestate->partprunedata[i];
2661
2662 /*
2663 * We pass the zeroth item, belonging to the root table of the
2664 * hierarchy, and find_matching_subplans_recurse() takes care of
2665 * recursing to other (lower-level) parents as needed.
2666 */
2667 pprune = &prunedata->partrelprunedata[0];
2668 find_matching_subplans_recurse(prunedata, pprune, initial_prune,
2669 &result, validsubplan_rtis);
2670
2671 /*
2672 * Expression eval may have used space in ExprContext too. Avoid
2673 * accessing exec_context during initial pruning, as it is not valid
2674 * at that stage.
2675 */
2676 if (!initial_prune && pprune->exec_pruning_steps)
2678 }
2679
2680 /* Add in any subplans that partition pruning didn't account for */
2681 result = bms_add_members(result, prunestate->other_subplans);
2682
2683 MemoryContextSwitchTo(oldcontext);
2684
2685 /* Copy result out of the temp context before we reset it */
2686 result = bms_copy(result);
2687 if (validsubplan_rtis)
2688 *validsubplan_rtis = bms_copy(*validsubplan_rtis);
2689
2690 MemoryContextReset(prunestate->prune_context);
2691
2692 return result;
2693}
2694
2695/*
2696 * find_matching_subplans_recurse
2697 * Recursive worker function for ExecFindMatchingSubPlans
2698 *
2699 * Adds valid (non-prunable) subplan IDs to *validsubplans. If
2700 * *validsubplan_rtis is non-NULL, it also adds the RT indexes of their
2701 * corresponding partitions, but only if they are leaf partitions.
2702 */
2703static void
2706 bool initial_prune,
2707 Bitmapset **validsubplans,
2708 Bitmapset **validsubplan_rtis)
2709{
2710 Bitmapset *partset;
2711 int i;
2712
2713 /* Guard against stack overflow due to overly deep partition hierarchy. */
2715
2716 /*
2717 * Prune as appropriate, if we have pruning steps matching the current
2718 * execution context. Otherwise just include all partitions at this
2719 * level.
2720 */
2721 if (initial_prune && pprune->initial_pruning_steps)
2722 partset = get_matching_partitions(&pprune->initial_context,
2723 pprune->initial_pruning_steps);
2724 else if (!initial_prune && pprune->exec_pruning_steps)
2725 partset = get_matching_partitions(&pprune->exec_context,
2726 pprune->exec_pruning_steps);
2727 else
2728 partset = pprune->present_parts;
2729
2730 /* Translate partset into subplan indexes */
2731 i = -1;
2732 while ((i = bms_next_member(partset, i)) >= 0)
2733 {
2734 if (pprune->subplan_map[i] >= 0)
2735 {
2736 *validsubplans = bms_add_member(*validsubplans,
2737 pprune->subplan_map[i]);
2738
2739 /*
2740 * Only report leaf partitions. Non-leaf partitions may appear
2741 * here when they use an unflattened Append or MergeAppend.
2742 */
2743 if (validsubplan_rtis && pprune->leafpart_rti_map[i])
2744 *validsubplan_rtis = bms_add_member(*validsubplan_rtis,
2745 pprune->leafpart_rti_map[i]);
2746 }
2747 else
2748 {
2749 int partidx = pprune->subpart_map[i];
2750
2751 if (partidx >= 0)
2753 &prunedata->partrelprunedata[partidx],
2754 initial_prune, validsubplans,
2755 validsubplan_rtis);
2756 else
2757 {
2758 /*
2759 * We get here if the planner already pruned all the sub-
2760 * partitions for this partition. Silently ignore this
2761 * partition in this case. The end result is the same: we
2762 * would have pruned all partitions just the same, but we
2763 * don't have any pruning steps to execute to verify this.
2764 */
2765 }
2766 }
2767 }
2768}
AclResult
Definition: acl.h:182
@ ACLCHECK_OK
Definition: acl.h:183
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
Definition: aclchk.c:3866
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4037
AttrMap * build_attrmap_by_name(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
Definition: attmap.c:175
AttrMap * build_attrmap_by_name_if_req(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
Definition: attmap.c:261
int16 AttrNumber
Definition: attnum.h:21
#define InvalidAttrNumber
Definition: attnum.h:23
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:142
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1305
Bitmapset * bms_add_range(Bitmapset *a, int lower, int upper)
Definition: bitmapset.c:1018
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:750
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:814
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:916
Bitmapset * bms_copy(const Bitmapset *a)
Definition: bitmapset.c:122
#define bms_is_empty(a)
Definition: bitmapset.h:118
static Datum values[MAXATTR]
Definition: bootstrap.c:153
#define likely(x)
Definition: c.h:406
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:475
int32_t int32
Definition: c.h:537
uint64_t uint64
Definition: c.h:542
#define unlikely(x)
Definition: c.h:407
unsigned int Index
Definition: c.h:622
#define OidIsValid(objectId)
Definition: c.h:777
int errdetail(const char *fmt,...)
Definition: elog.c:1216
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:143
ProjectionInfo * ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent, TupleDesc inputDesc)
Definition: execExpr.c:370
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:229
ExprState * ExecInitExprWithParams(Expr *node, ParamListInfo ext_params)
Definition: execExpr.c:180
ProjectionInfo * ExecBuildUpdateProjection(List *targetList, bool evalTargetList, List *targetColnos, TupleDesc relDesc, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent)
Definition: execExpr.c:547
List * ExecPrepareExprList(List *nodes, EState *estate)
Definition: execExpr.c:839
void ExecCloseIndices(ResultRelInfo *resultRelInfo)
Definition: execIndexing.c:239
void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
Definition: execIndexing.c:161
void CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation, OnConflictAction onConflictAction, List *mergeActions)
Definition: execMain.c:1050
bool ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, bool emitError)
Definition: execMain.c:1856
void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, ResultRelInfo *partition_root_rri, int instrument_options)
Definition: execMain.c:1243
static void InitExecPartitionPruneContexts(PartitionPruneState *prunestate, PlanState *parent_plan, Bitmapset *initially_valid_subplans, int n_total_subplans)
static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate, PartitionTupleRouting *proute, Oid partoid, PartitionDispatch parent_pd, int partidx, ResultRelInfo *rootResultRelInfo)
static bool IsIndexCompatibleAsArbiter(Relation arbiterIndexRelation, IndexInfo *arbiterIndexInfo, Relation indexRelation, IndexInfo *indexInfo)
void ExecDoInitialPruning(EState *estate)
static ResultRelInfo * ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *rootResultRelInfo, int partidx)
PartitionPruneState * ExecInitPartitionExecPruning(PlanState *planstate, int n_total_subplans, int part_prune_index, Bitmapset *relids, Bitmapset **initially_valid_subplans)
Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, bool initial_prune, Bitmapset **validsubplan_rtis)
static void ExecInitRoutingInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *partRelInfo, int partidx, bool is_borrowed_rel)
static char * ExecBuildSlotPartitionKeyDescription(Relation rel, const Datum *values, const bool *isnull, int maxfieldlen)
static void FormPartitionKeyDatum(PartitionDispatch pd, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
static int get_partition_for_tuple(PartitionDispatch pd, const Datum *values, const bool *isnull)
#define PARTITION_CACHED_FIND_THRESHOLD
PartitionTupleRouting * ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
static List * adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
static List * adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
ResultRelInfo * ExecFindPartition(ModifyTableState *mtstate, ResultRelInfo *rootResultRelInfo, PartitionTupleRouting *proute, TupleTableSlot *slot, EState *estate)
static void InitPartitionPruneContext(PartitionPruneContext *context, List *pruning_steps, PartitionDesc partdesc, PartitionKey partkey, PlanState *planstate, ExprContext *econtext)
struct PartitionDispatchData PartitionDispatchData
static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans, Bitmapset **validsubplan_rtis)
static PartitionPruneState * CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo, Bitmapset **all_leafpart_rtis)
void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute)
struct PartitionDispatchData * PartitionDispatch
Definition: execPartition.h:22
struct PartitionedRelPruningData PartitionedRelPruningData
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1427
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:84
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1443
Relation ExecGetRangeTableRelation(EState *estate, Index rti, bool isResultRel)
Definition: execUtils.c:825
TupleConversionMap * ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate)
Definition: execUtils.c:1326
ExprContext * CreateExprContext(EState *estate)
Definition: execUtils.c:307
TupleConversionMap * ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
Definition: execUtils.c:1300
#define GetPerTupleExprContext(estate)
Definition: executor.h:656
#define EXEC_FLAG_EXPLAIN_GENERIC
Definition: executor.h:67
#define ResetExprContext(econtext)
Definition: executor.h:650
#define GetPerTupleMemoryContext(estate)
Definition: executor.h:661
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:436
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:1763
Assert(PointerIsAligned(start, uint64))
long val
Definition: informix.c:689
int j
Definition: isn.c:78
int i
Definition: isn.c:77
List * list_difference(const List *list1, const List *list2)
Definition: list.c:1237
List * lappend(List *list, void *datum)
Definition: list.c:339
List * lappend_int(List *list, int datum)
Definition: list.c:357
List * lappend_oid(List *list, Oid datum)
Definition: list.c:375
void list_free(List *list)
Definition: list.c:1546
bool list_member_oid(const List *list, Oid datum)
Definition: list.c:722
#define NoLock
Definition: lockdefs.h:34
#define RowExclusiveLock
Definition: lockdefs.h:38
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:3074
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1084
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:400
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc0(Size size)
Definition: mcxt.c:1395
void * palloc(Size size)
Definition: mcxt.c:1365
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
Oid GetUserId(void)
Definition: miscinit.c:469
ResultRelInfo * ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid, bool missing_ok, bool update_cache)
void ExecInitMergeTupleSlots(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo)
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
#define copyObject(obj)
Definition: nodes.h:232
@ ONCONFLICT_NONE
Definition: nodes.h:428
@ ONCONFLICT_UPDATE
Definition: nodes.h:430
@ CMD_MERGE
Definition: nodes.h:279
@ CMD_INSERT
Definition: nodes.h:277
@ CMD_DELETE
Definition: nodes.h:278
@ CMD_UPDATE
Definition: nodes.h:276
@ CMD_NOTHING
Definition: nodes.h:282
#define makeNode(_type_)
Definition: nodes.h:161
#define castNode(_type_, nodeptr)
Definition: nodes.h:182
char * bmsToString(const Bitmapset *bms)
Definition: outfuncs.c:819
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
@ PARTITION_STRATEGY_HASH
Definition: parsenodes.h:902
@ PARTITION_STRATEGY_LIST
Definition: parsenodes.h:900
@ PARTITION_STRATEGY_RANGE
Definition: parsenodes.h:901
PartitionRangeDatumKind
Definition: parsenodes.h:951
#define ACL_SELECT
Definition: parsenodes.h:77
int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, const Datum *rb_datums, PartitionRangeDatumKind *rb_kind, const Datum *tuple_datums, int n_tuple_datums)
Definition: partbounds.c:3557
uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, const Oid *partcollation, const Datum *values, const bool *isnull)
Definition: partbounds.c:4723
int partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, int nvalues, const Datum *values, bool *is_equal)
Definition: partbounds.c:3696
int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, Datum value, bool *is_equal)
Definition: partbounds.c:3608
#define partition_bound_accepts_nulls(bi)
Definition: partbounds.h:98
PartitionKey RelationGetPartitionKey(Relation rel)
Definition: partcache.c:51
static int16 get_partition_col_attnum(PartitionKey key, int col)
Definition: partcache.h:80
static int get_partition_natts(PartitionKey key)
Definition: partcache.h:65
static Oid get_partition_col_typid(PartitionKey key, int col)
Definition: partcache.h:86
PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
Definition: partdesc.c:423
PartitionDesc PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
Definition: partdesc.c:456
List * get_partition_ancestors(Oid relid)
Definition: partition.c:134
Bitmapset * get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
Definition: partprune.c:846
#define PruneCxtStateIdx(partnatts, step_id, keyno)
Definition: partprune.h:70
int16 attnum
Definition: pg_attribute.h:74
#define PARTITION_MAX_KEYS
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define lfirst_int(lc)
Definition: pg_list.h:173
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
#define linitial(l)
Definition: pg_list.h:178
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define foreach_oid(var, lst)
Definition: pg_list.h:471
#define list_nth_node(type, list, n)
Definition: pg_list.h:327
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define foreach_int(var, lst)
Definition: pg_list.h:470
static char * buf
Definition: pg_test_fsync.c:72
uint64_t Datum
Definition: postgres.h:70
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:212
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
#define INNER_VAR
Definition: primnodes.h:242
#define RelationGetForm(relation)
Definition: rel.h:509
#define RelationGetRelid(relation)
Definition: rel.h:515
#define RelationGetDescr(relation)
Definition: rel.h:541
#define RelationGetRelationName(relation)
Definition: rel.h:549
List * RelationGetIndexPredicate(Relation relation)
Definition: relcache.c:5210
List * RelationGetIndexExpressions(Relation relation)
Definition: relcache.c:5097
int errtable(Relation rel)
Definition: relcache.c:6049
Node * map_variable_attnos(Node *node, int target_varno, int sublevels_up, const AttrMap *attno_map, Oid to_rowtype, bool *found_whole_row)
int check_enable_rls(Oid relid, Oid checkAsUser, bool noError)
Definition: rls.c:52
@ RLS_ENABLED
Definition: rls.h:45
char * pg_get_partkeydef_columns(Oid relid, bool pretty)
Definition: ruleutils.c:1923
void check_stack_depth(void)
Definition: stack_depth.c:95
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
Definition: attmap.h:35
int maplen
Definition: attmap.h:37
AttrNumber * attnums
Definition: attmap.h:36
List * es_part_prune_infos
Definition: execnodes.h:670
List * es_tuple_routing_result_relations
Definition: execnodes.h:698
int es_top_eflags
Definition: execnodes.h:719
int es_instrument
Definition: execnodes.h:720
Bitmapset * es_unpruned_relids
Definition: execnodes.h:673
List * es_part_prune_states
Definition: execnodes.h:671
MemoryContext es_query_cxt
Definition: execnodes.h:710
List * es_tupleTable
Definition: execnodes.h:712
PartitionDirectory es_partition_directory
Definition: execnodes.h:692
List * es_part_prune_results
Definition: execnodes.h:672
ParamListInfo ecxt_param_list_info
Definition: execnodes.h:285
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:273
struct EState * ecxt_estate
Definition: execnodes.h:315
EndForeignInsert_function EndForeignInsert
Definition: fdwapi.h:239
BeginForeignInsert_function BeginForeignInsert
Definition: fdwapi.h:238
ExecForeignBatchInsert_function ExecForeignBatchInsert
Definition: fdwapi.h:233
GetForeignModifyBatchSize_function GetForeignModifyBatchSize
Definition: fdwapi.h:234
Definition: fmgr.h:57
bool ii_Unique
Definition: execnodes.h:200
Oid * ii_ExclusionOps
Definition: execnodes.h:188
bool ii_NullsNotDistinct
Definition: execnodes.h:202
int ii_NumIndexKeyAttrs
Definition: execnodes.h:169
bool ii_ReadyForInserts
Definition: execnodes.h:204
Definition: pg_list.h:54
MergeAction * mas_action
Definition: execnodes.h:449
ProjectionInfo * mas_proj
Definition: execnodes.h:450
ExprState * mas_whenqual
Definition: execnodes.h:452
ResultRelInfo * resultRelInfo
Definition: execnodes.h:1408
PlanState ps
Definition: execnodes.h:1403
ResultRelInfo * rootResultRelInfo
Definition: execnodes.h:1416
List * onConflictCols
Definition: plannodes.h:368
List * mergeJoinConditions
Definition: plannodes.h:378
CmdType operation
Definition: plannodes.h:334
List * resultRelations
Definition: plannodes.h:342
List * onConflictSet
Definition: plannodes.h:366
List * mergeActionLists
Definition: plannodes.h:376
List * returningLists
Definition: plannodes.h:352
List * withCheckOptionLists
Definition: plannodes.h:346
Node * onConflictWhere
Definition: plannodes.h:370
OnConflictAction onConflictAction
Definition: plannodes.h:362
Definition: nodes.h:135
TupleTableSlot * oc_ProjSlot
Definition: execnodes.h:434
TupleTableSlot * oc_Existing
Definition: execnodes.h:433
ExprState * oc_WhereClause
Definition: execnodes.h:436
ProjectionInfo * oc_ProjInfo
Definition: execnodes.h:435
PartitionRangeDatumKind ** kind
Definition: partbounds.h:84
int last_found_datum_index
Definition: partdesc.h:46
PartitionBoundInfo boundinfo
Definition: partdesc.h:38
int last_found_count
Definition: partdesc.h:63
bool * is_leaf
Definition: partdesc.h:35
int last_found_part_index
Definition: partdesc.h:52
TupleTableSlot * tupslot
PartitionDesc partdesc
int indexes[FLEXIBLE_ARRAY_MEMBER]
Oid * partcollation
Definition: partcache.h:39
PartitionStrategy strategy
Definition: partcache.h:27
List * partexprs
Definition: partcache.h:31
FmgrInfo * partsupfunc
Definition: partcache.h:36
AttrNumber * partattrs
Definition: partcache.h:29
FmgrInfo * partsupfunc
Definition: partprune.h:56
ExprContext * exprcontext
Definition: partprune.h:60
MemoryContext ppccontext
Definition: partprune.h:58
PartitionBoundInfo boundinfo
Definition: partprune.h:54
PlanState * planstate
Definition: partprune.h:59
FmgrInfo * stepcmpfuncs
Definition: partprune.h:57
ExprState ** exprstates
Definition: partprune.h:61
Bitmapset * other_subplans
Definition: plannodes.h:1649
Bitmapset * relids
Definition: plannodes.h:1647
PartitionPruningData * partprunedata[FLEXIBLE_ARRAY_MEMBER]
Bitmapset * execparamids
ExprContext * econtext
Bitmapset * other_subplans
MemoryContext prune_context
PartitionPruneStep step
Definition: plannodes.h:1758
Bitmapset * nullkeys
Definition: plannodes.h:1763
PartitionedRelPruningData partrelprunedata[FLEXIBLE_ARRAY_MEMBER]
Definition: execPartition.h:87
PartitionDispatch * partition_dispatch_info
Definition: execPartition.c:95
ResultRelInfo ** partitions
Definition: execPartition.c:99
MemoryContext memcxt
ResultRelInfo ** nonleaf_partitions
Definition: execPartition.c:96
Bitmapset * present_parts
Definition: plannodes.h:1683
Bitmapset * execparamids
Definition: plannodes.h:1712
PartitionPruneContext exec_context
Definition: execPartition.h:74
PartitionPruneContext initial_context
Definition: execPartition.h:73
Plan * plan
Definition: execnodes.h:1165
EState * state
Definition: execnodes.h:1167
ExprContext * ps_ExprContext
Definition: execnodes.h:1204
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1203
Form_pg_index rd_index
Definition: rel.h:192
Oid * rd_opfamily
Definition: rel.h:207
Oid * rd_indcollation
Definition: rel.h:217
Form_pg_class rd_rel
Definition: rel.h:111
TupleTableSlot * ri_PartitionTupleSlot
Definition: execnodes.h:619
OnConflictSetState * ri_onConflict
Definition: execnodes.h:583
List * ri_onConflictArbiterIndexes
Definition: execnodes.h:580
Relation ri_RelationDesc
Definition: execnodes.h:480
struct CopyMultiInsertBuffer * ri_CopyMultiInsertBuffer
Definition: execnodes.h:622
Index ri_RangeTableIndex
Definition: execnodes.h:477
struct FdwRoutine * ri_FdwRoutine
Definition: execnodes.h:533
int ri_BatchSize
Definition: execnodes.h:544
AttrMap * attrMap
Definition: tupconvert.h:28
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:92
TupleTableSlot * execute_attr_map_slot(AttrMap *attrMap, TupleTableSlot *in_slot, TupleTableSlot *out_slot)
Definition: tupconvert.c:193
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition: tuptable.h:398
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:457
#define IsolationUsesXactSnapshot()
Definition: xact.h:52