PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
execPartition.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * execPartition.c
4 * Support routines for partitioning.
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/executor/execPartition.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/table.h"
17#include "access/tableam.h"
18#include "catalog/partition.h"
20#include "executor/executor.h"
22#include "foreign/fdwapi.h"
23#include "mb/pg_wchar.h"
24#include "miscadmin.h"
29#include "storage/lmgr.h"
30#include "utils/acl.h"
31#include "utils/lsyscache.h"
32#include "utils/partcache.h"
33#include "utils/rls.h"
34#include "utils/ruleutils.h"
35
36
37/*-----------------------
38 * PartitionTupleRouting - Encapsulates all information required to
39 * route a tuple inserted into a partitioned table to one of its leaf
40 * partitions.
41 *
42 * partition_root
43 * The partitioned table that's the target of the command.
44 *
45 * partition_dispatch_info
46 * Array of 'max_dispatch' elements containing a pointer to a
47 * PartitionDispatch object for every partitioned table touched by tuple
48 * routing. The entry for the target partitioned table is *always*
49 * present in the 0th element of this array. See comment for
50 * PartitionDispatchData->indexes for details on how this array is
51 * indexed.
52 *
53 * nonleaf_partitions
54 * Array of 'max_dispatch' elements containing pointers to fake
55 * ResultRelInfo objects for nonleaf partitions, useful for checking
56 * the partition constraint.
57 *
58 * num_dispatch
59 * The current number of items stored in the 'partition_dispatch_info'
60 * array. Also serves as the index of the next free array element for
61 * new PartitionDispatch objects that need to be stored.
62 *
63 * max_dispatch
64 * The current allocated size of the 'partition_dispatch_info' array.
65 *
66 * partitions
67 * Array of 'max_partitions' elements containing a pointer to a
68 * ResultRelInfo for every leaf partition touched by tuple routing.
69 * Some of these are pointers to ResultRelInfos which are borrowed out of
70 * the owning ModifyTableState node. The remainder have been built
71 * especially for tuple routing. See comment for
72 * PartitionDispatchData->indexes for details on how this array is
73 * indexed.
74 *
75 * is_borrowed_rel
76 * Array of 'max_partitions' booleans recording whether a given entry
77 * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
78 * ModifyTableState node, rather than being built here.
79 *
80 * num_partitions
81 * The current number of items stored in the 'partitions' array. Also
82 * serves as the index of the next free array element for new
83 * ResultRelInfo objects that need to be stored.
84 *
85 * max_partitions
86 * The current allocated size of the 'partitions' array.
87 *
88 * memcxt
89 * Memory context used to allocate subsidiary structs.
90 *-----------------------
91 */
93{
104};
105
106/*-----------------------
107 * PartitionDispatch - information about one partitioned table in a partition
108 * hierarchy required to route a tuple to any of its partitions. A
109 * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
110 * struct and stored inside its 'partition_dispatch_info' array.
111 *
112 * reldesc
113 * Relation descriptor of the table
114 *
115 * key
116 * Partition key information of the table
117 *
118 * keystate
119 * Execution state required for expressions in the partition key
120 *
121 * partdesc
122 * Partition descriptor of the table
123 *
124 * tupslot
125 * A standalone TupleTableSlot initialized with this table's tuple
126 * descriptor, or NULL if no tuple conversion between the parent is
127 * required.
128 *
129 * tupmap
130 * TupleConversionMap to convert from the parent's rowtype to this table's
131 * rowtype (when extracting the partition key of a tuple just before
132 * routing it through this table). A NULL value is stored if no tuple
133 * conversion is required.
134 *
135 * indexes
136 * Array of partdesc->nparts elements. For leaf partitions the index
137 * corresponds to the partition's ResultRelInfo in the encapsulating
138 * PartitionTupleRouting's partitions array. For partitioned partitions,
139 * the index corresponds to the PartitionDispatch for it in its
140 * partition_dispatch_info array. -1 indicates we've not yet allocated
141 * anything in PartitionTupleRouting for the partition.
142 *-----------------------
143 */
145{
148 List *keystate; /* list of ExprState */
154
155
157 EState *estate, PartitionTupleRouting *proute,
158 PartitionDispatch dispatch,
159 ResultRelInfo *rootResultRelInfo,
160 int partidx);
161static void ExecInitRoutingInfo(ModifyTableState *mtstate,
162 EState *estate,
163 PartitionTupleRouting *proute,
164 PartitionDispatch dispatch,
165 ResultRelInfo *partRelInfo,
166 int partidx,
167 bool is_borrowed_rel);
169 PartitionTupleRouting *proute,
170 Oid partoid, PartitionDispatch parent_pd,
171 int partidx, ResultRelInfo *rootResultRelInfo);
173 TupleTableSlot *slot,
174 EState *estate,
175 Datum *values,
176 bool *isnull);
178 bool *isnull);
180 Datum *values,
181 bool *isnull,
182 int maxfieldlen);
183static List *adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri);
184static List *adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap);
186 PartitionPruneInfo *pruneinfo,
187 Bitmapset **all_leafpart_rtis);
189 List *pruning_steps,
190 PartitionDesc partdesc,
191 PartitionKey partkey,
192 PlanState *planstate,
193 ExprContext *econtext);
195 PlanState *parent_plan,
196 Bitmapset *initially_valid_subplans,
197 int n_total_subplans);
200 bool initial_prune,
201 Bitmapset **validsubplans,
202 Bitmapset **validsubplan_rtis);
203
204
205/*
206 * ExecSetupPartitionTupleRouting - sets up information needed during
207 * tuple routing for partitioned tables, encapsulates it in
208 * PartitionTupleRouting, and returns it.
209 *
210 * Callers must use the returned PartitionTupleRouting during calls to
211 * ExecFindPartition(). The actual ResultRelInfo for a partition is only
212 * allocated when the partition is found for the first time.
213 *
214 * The current memory context is used to allocate this struct and all
215 * subsidiary structs that will be allocated from it later on. Typically
216 * it should be estate->es_query_cxt.
217 */
220{
221 PartitionTupleRouting *proute;
222
223 /*
224 * Here we attempt to expend as little effort as possible in setting up
225 * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
226 * demand, only when we actually need to route a tuple to that partition.
227 * The reason for this is that a common case is for INSERT to insert a
228 * single tuple into a partitioned table and this must be fast.
229 */
231 proute->partition_root = rel;
233 /* Rest of members initialized by zeroing */
234
235 /*
236 * Initialize this table's PartitionDispatch object. Here we pass in the
237 * parent as NULL as we don't need to care about any parent of the target
238 * partitioned table.
239 */
241 NULL, 0, NULL);
242
243 return proute;
244}
245
246/*
247 * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
248 * the tuple contained in *slot should belong to.
249 *
250 * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
251 * one up or reuse one from mtstate's resultRelInfo array. When reusing a
252 * ResultRelInfo from the mtstate we verify that the relation is a valid
253 * target for INSERTs and initialize tuple routing information.
254 *
255 * rootResultRelInfo is the relation named in the query.
256 *
257 * estate must be non-NULL; we'll need it to compute any expressions in the
258 * partition keys. Also, its per-tuple contexts are used as evaluation
259 * scratch space.
260 *
261 * If no leaf partition is found, this routine errors out with the appropriate
262 * error message. An error may also be raised if the found target partition
263 * is not a valid target for an INSERT.
264 */
267 ResultRelInfo *rootResultRelInfo,
268 PartitionTupleRouting *proute,
269 TupleTableSlot *slot, EState *estate)
270{
273 bool isnull[PARTITION_MAX_KEYS];
274 Relation rel;
275 PartitionDispatch dispatch;
276 PartitionDesc partdesc;
277 ExprContext *ecxt = GetPerTupleExprContext(estate);
278 TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
279 TupleTableSlot *rootslot = slot;
280 TupleTableSlot *myslot = NULL;
281 MemoryContext oldcxt;
282 ResultRelInfo *rri = NULL;
283
284 /* use per-tuple context here to avoid leaking memory */
286
287 /*
288 * First check the root table's partition constraint, if any. No point in
289 * routing the tuple if it doesn't belong in the root table itself.
290 */
291 if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition)
292 ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
293
294 /* start with the root partitioned table */
295 dispatch = pd[0];
296 while (dispatch != NULL)
297 {
298 int partidx = -1;
299 bool is_leaf;
300
302
303 rel = dispatch->reldesc;
304 partdesc = dispatch->partdesc;
305
306 /*
307 * Extract partition key from tuple. Expression evaluation machinery
308 * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
309 * point to the correct tuple slot. The slot might have changed from
310 * what was used for the parent table if the table of the current
311 * partitioning level has different tuple descriptor from the parent.
312 * So update ecxt_scantuple accordingly.
313 */
314 ecxt->ecxt_scantuple = slot;
315 FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
316
317 /*
318 * If this partitioned table has no partitions or no partition for
319 * these values, error out.
320 */
321 if (partdesc->nparts == 0 ||
322 (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
323 {
324 char *val_desc;
325
327 values, isnull, 64);
330 (errcode(ERRCODE_CHECK_VIOLATION),
331 errmsg("no partition of relation \"%s\" found for row",
333 val_desc ?
334 errdetail("Partition key of the failing row contains %s.",
335 val_desc) : 0,
336 errtable(rel)));
337 }
338
339 is_leaf = partdesc->is_leaf[partidx];
340 if (is_leaf)
341 {
342 /*
343 * We've reached the leaf -- hurray, we're done. Look to see if
344 * we've already got a ResultRelInfo for this partition.
345 */
346 if (likely(dispatch->indexes[partidx] >= 0))
347 {
348 /* ResultRelInfo already built */
349 Assert(dispatch->indexes[partidx] < proute->num_partitions);
350 rri = proute->partitions[dispatch->indexes[partidx]];
351 }
352 else
353 {
354 /*
355 * If the partition is known in the owning ModifyTableState
356 * node, we can re-use that ResultRelInfo instead of creating
357 * a new one with ExecInitPartitionInfo().
358 */
359 rri = ExecLookupResultRelByOid(mtstate,
360 partdesc->oids[partidx],
361 true, false);
362 if (rri)
363 {
364 /* Verify this ResultRelInfo allows INSERTs */
366
367 /*
368 * Initialize information needed to insert this and
369 * subsequent tuples routed to this partition.
370 */
371 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
372 rri, partidx, true);
373 }
374 else
375 {
376 /* We need to create a new one. */
377 rri = ExecInitPartitionInfo(mtstate, estate, proute,
378 dispatch,
379 rootResultRelInfo, partidx);
380 }
381 }
382 Assert(rri != NULL);
383
384 /* Signal to terminate the loop */
385 dispatch = NULL;
386 }
387 else
388 {
389 /*
390 * Partition is a sub-partitioned table; get the PartitionDispatch
391 */
392 if (likely(dispatch->indexes[partidx] >= 0))
393 {
394 /* Already built. */
395 Assert(dispatch->indexes[partidx] < proute->num_dispatch);
396
397 rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
398
399 /*
400 * Move down to the next partition level and search again
401 * until we find a leaf partition that matches this tuple
402 */
403 dispatch = pd[dispatch->indexes[partidx]];
404 }
405 else
406 {
407 /* Not yet built. Do that now. */
408 PartitionDispatch subdispatch;
409
410 /*
411 * Create the new PartitionDispatch. We pass the current one
412 * in as the parent PartitionDispatch
413 */
414 subdispatch = ExecInitPartitionDispatchInfo(estate,
415 proute,
416 partdesc->oids[partidx],
417 dispatch, partidx,
418 mtstate->rootResultRelInfo);
419 Assert(dispatch->indexes[partidx] >= 0 &&
420 dispatch->indexes[partidx] < proute->num_dispatch);
421
422 rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
423 dispatch = subdispatch;
424 }
425
426 /*
427 * Convert the tuple to the new parent's layout, if different from
428 * the previous parent.
429 */
430 if (dispatch->tupslot)
431 {
432 AttrMap *map = dispatch->tupmap;
433 TupleTableSlot *tempslot = myslot;
434
435 myslot = dispatch->tupslot;
436 slot = execute_attr_map_slot(map, slot, myslot);
437
438 if (tempslot != NULL)
439 ExecClearTuple(tempslot);
440 }
441 }
442
443 /*
444 * If this partition is the default one, we must check its partition
445 * constraint now, which may have changed concurrently due to
446 * partitions being added to the parent.
447 *
448 * (We do this here, and do not rely on ExecInsert doing it, because
449 * we don't want to miss doing it for non-leaf partitions.)
450 */
451 if (partidx == partdesc->boundinfo->default_index)
452 {
453 /*
454 * The tuple must match the partition's layout for the constraint
455 * expression to be evaluated successfully. If the partition is
456 * sub-partitioned, that would already be the case due to the code
457 * above, but for a leaf partition the tuple still matches the
458 * parent's layout.
459 *
460 * Note that we have a map to convert from root to current
461 * partition, but not from immediate parent to current partition.
462 * So if we have to convert, do it from the root slot; if not, use
463 * the root slot as-is.
464 */
465 if (is_leaf)
466 {
467 TupleConversionMap *map = ExecGetRootToChildMap(rri, estate);
468
469 if (map)
470 slot = execute_attr_map_slot(map->attrMap, rootslot,
472 else
473 slot = rootslot;
474 }
475
476 ExecPartitionCheck(rri, slot, estate, true);
477 }
478 }
479
480 /* Release the tuple in the lowest parent's dedicated slot. */
481 if (myslot != NULL)
482 ExecClearTuple(myslot);
483 /* and restore ecxt's scantuple */
484 ecxt->ecxt_scantuple = ecxt_scantuple_saved;
485 MemoryContextSwitchTo(oldcxt);
486
487 return rri;
488}
489
490/*
491 * ExecInitPartitionInfo
492 * Lock the partition and initialize ResultRelInfo. Also setup other
493 * information for the partition and store it in the next empty slot in
494 * the proute->partitions array.
495 *
496 * Returns the ResultRelInfo
497 */
498static ResultRelInfo *
500 PartitionTupleRouting *proute,
501 PartitionDispatch dispatch,
502 ResultRelInfo *rootResultRelInfo,
503 int partidx)
504{
505 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
506 Oid partOid = dispatch->partdesc->oids[partidx];
507 Relation partrel;
508 int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
509 Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
510 ResultRelInfo *leaf_part_rri;
511 MemoryContext oldcxt;
512 AttrMap *part_attmap = NULL;
513 bool found_whole_row;
514
515 oldcxt = MemoryContextSwitchTo(proute->memcxt);
516
517 partrel = table_open(partOid, RowExclusiveLock);
518
519 leaf_part_rri = makeNode(ResultRelInfo);
520 InitResultRelInfo(leaf_part_rri,
521 partrel,
522 0,
523 rootResultRelInfo,
524 estate->es_instrument);
525
526 /*
527 * Verify result relation is a valid target for an INSERT. An UPDATE of a
528 * partition-key becomes a DELETE+INSERT operation, so this check is still
529 * required when the operation is CMD_UPDATE.
530 */
531 CheckValidResultRel(leaf_part_rri, CMD_INSERT, NIL);
532
533 /*
534 * Open partition indices. The user may have asked to check for conflicts
535 * within this leaf partition and do "nothing" instead of throwing an
536 * error. Be prepared in that case by initializing the index information
537 * needed by ExecInsert() to perform speculative insertions.
538 */
539 if (partrel->rd_rel->relhasindex &&
540 leaf_part_rri->ri_IndexRelationDescs == NULL)
541 ExecOpenIndices(leaf_part_rri,
542 (node != NULL &&
544
545 /*
546 * Build WITH CHECK OPTION constraints for the partition. Note that we
547 * didn't build the withCheckOptionList for partitions within the planner,
548 * but simple translation of varattnos will suffice. This only occurs for
549 * the INSERT case or in the case of UPDATE/MERGE tuple routing where we
550 * didn't find a result rel to reuse.
551 */
552 if (node && node->withCheckOptionLists != NIL)
553 {
554 List *wcoList;
555 List *wcoExprs = NIL;
556 ListCell *ll;
557
558 /*
559 * In the case of INSERT on a partitioned table, there is only one
560 * plan. Likewise, there is only one WCO list, not one per partition.
561 * For UPDATE/MERGE, there are as many WCO lists as there are plans.
562 */
563 Assert((node->operation == CMD_INSERT &&
564 list_length(node->withCheckOptionLists) == 1 &&
565 list_length(node->resultRelations) == 1) ||
566 (node->operation == CMD_UPDATE &&
569 (node->operation == CMD_MERGE &&
572
573 /*
574 * Use the WCO list of the first plan as a reference to calculate
575 * attno's for the WCO list of this partition. In the INSERT case,
576 * that refers to the root partitioned table, whereas in the UPDATE
577 * tuple routing case, that refers to the first partition in the
578 * mtstate->resultRelInfo array. In any case, both that relation and
579 * this partition should have the same columns, so we should be able
580 * to map attributes successfully.
581 */
582 wcoList = linitial(node->withCheckOptionLists);
583
584 /*
585 * Convert Vars in it to contain this partition's attribute numbers.
586 */
587 part_attmap =
589 RelationGetDescr(firstResultRel),
590 false);
591 wcoList = (List *)
592 map_variable_attnos((Node *) wcoList,
593 firstVarno, 0,
594 part_attmap,
595 RelationGetForm(partrel)->reltype,
596 &found_whole_row);
597 /* We ignore the value of found_whole_row. */
598
599 foreach(ll, wcoList)
600 {
602 ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
603 &mtstate->ps);
604
605 wcoExprs = lappend(wcoExprs, wcoExpr);
606 }
607
608 leaf_part_rri->ri_WithCheckOptions = wcoList;
609 leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
610 }
611
612 /*
613 * Build the RETURNING projection for the partition. Note that we didn't
614 * build the returningList for partitions within the planner, but simple
615 * translation of varattnos will suffice. This only occurs for the INSERT
616 * case or in the case of UPDATE/MERGE tuple routing where we didn't find
617 * a result rel to reuse.
618 */
619 if (node && node->returningLists != NIL)
620 {
621 TupleTableSlot *slot;
622 ExprContext *econtext;
623 List *returningList;
624
625 /* See the comment above for WCO lists. */
626 Assert((node->operation == CMD_INSERT &&
627 list_length(node->returningLists) == 1 &&
628 list_length(node->resultRelations) == 1) ||
629 (node->operation == CMD_UPDATE &&
632 (node->operation == CMD_MERGE &&
635
636 /*
637 * Use the RETURNING list of the first plan as a reference to
638 * calculate attno's for the RETURNING list of this partition. See
639 * the comment above for WCO lists for more details on why this is
640 * okay.
641 */
642 returningList = linitial(node->returningLists);
643
644 /*
645 * Convert Vars in it to contain this partition's attribute numbers.
646 */
647 if (part_attmap == NULL)
648 part_attmap =
650 RelationGetDescr(firstResultRel),
651 false);
652 returningList = (List *)
653 map_variable_attnos((Node *) returningList,
654 firstVarno, 0,
655 part_attmap,
656 RelationGetForm(partrel)->reltype,
657 &found_whole_row);
658 /* We ignore the value of found_whole_row. */
659
660 leaf_part_rri->ri_returningList = returningList;
661
662 /*
663 * Initialize the projection itself.
664 *
665 * Use the slot and the expression context that would have been set up
666 * in ExecInitModifyTable() for projection's output.
667 */
668 Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
669 slot = mtstate->ps.ps_ResultTupleSlot;
670 Assert(mtstate->ps.ps_ExprContext != NULL);
671 econtext = mtstate->ps.ps_ExprContext;
672 leaf_part_rri->ri_projectReturning =
673 ExecBuildProjectionInfo(returningList, econtext, slot,
674 &mtstate->ps, RelationGetDescr(partrel));
675 }
676
677 /* Set up information needed for routing tuples to the partition. */
678 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
679 leaf_part_rri, partidx, false);
680
681 /*
682 * If there is an ON CONFLICT clause, initialize state for it.
683 */
684 if (node && node->onConflictAction != ONCONFLICT_NONE)
685 {
686 TupleDesc partrelDesc = RelationGetDescr(partrel);
687 ExprContext *econtext = mtstate->ps.ps_ExprContext;
688 ListCell *lc;
689 List *arbiterIndexes = NIL;
690
691 /*
692 * If there is a list of arbiter indexes, map it to a list of indexes
693 * in the partition. We do that by scanning the partition's index
694 * list and searching for ancestry relationships to each index in the
695 * ancestor table.
696 */
697 if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL)
698 {
699 List *childIdxs;
700
701 childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
702
703 foreach(lc, childIdxs)
704 {
705 Oid childIdx = lfirst_oid(lc);
706 List *ancestors;
707 ListCell *lc2;
708
709 ancestors = get_partition_ancestors(childIdx);
710 foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes)
711 {
712 if (list_member_oid(ancestors, lfirst_oid(lc2)))
713 arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
714 }
715 list_free(ancestors);
716 }
717 }
718
719 /*
720 * If the resulting lists are of inequal length, something is wrong.
721 * (This shouldn't happen, since arbiter index selection should not
722 * pick up an invalid index.)
723 */
724 if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
725 list_length(arbiterIndexes))
726 elog(ERROR, "invalid arbiter index list");
727 leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
728
729 /*
730 * In the DO UPDATE case, we have some more state to initialize.
731 */
733 {
736
737 map = ExecGetRootToChildMap(leaf_part_rri, estate);
738
739 Assert(node->onConflictSet != NIL);
740 Assert(rootResultRelInfo->ri_onConflict != NULL);
741
742 leaf_part_rri->ri_onConflict = onconfl;
743
744 /*
745 * Need a separate existing slot for each partition, as the
746 * partition could be of a different AM, even if the tuple
747 * descriptors match.
748 */
749 onconfl->oc_Existing =
750 table_slot_create(leaf_part_rri->ri_RelationDesc,
751 &mtstate->ps.state->es_tupleTable);
752
753 /*
754 * If the partition's tuple descriptor matches exactly the root
755 * parent (the common case), we can re-use most of the parent's ON
756 * CONFLICT SET state, skipping a bunch of work. Otherwise, we
757 * need to create state specific to this partition.
758 */
759 if (map == NULL)
760 {
761 /*
762 * It's safe to reuse these from the partition root, as we
763 * only process one tuple at a time (therefore we won't
764 * overwrite needed data in slots), and the results of
765 * projections are independent of the underlying storage.
766 * Projections and where clauses themselves don't store state
767 * / are independent of the underlying storage.
768 */
769 onconfl->oc_ProjSlot =
770 rootResultRelInfo->ri_onConflict->oc_ProjSlot;
771 onconfl->oc_ProjInfo =
772 rootResultRelInfo->ri_onConflict->oc_ProjInfo;
773 onconfl->oc_WhereClause =
774 rootResultRelInfo->ri_onConflict->oc_WhereClause;
775 }
776 else
777 {
778 List *onconflset;
779 List *onconflcols;
780
781 /*
782 * Translate expressions in onConflictSet to account for
783 * different attribute numbers. For that, map partition
784 * varattnos twice: first to catch the EXCLUDED
785 * pseudo-relation (INNER_VAR), and second to handle the main
786 * target relation (firstVarno).
787 */
788 onconflset = copyObject(node->onConflictSet);
789 if (part_attmap == NULL)
790 part_attmap =
792 RelationGetDescr(firstResultRel),
793 false);
794 onconflset = (List *)
795 map_variable_attnos((Node *) onconflset,
796 INNER_VAR, 0,
797 part_attmap,
798 RelationGetForm(partrel)->reltype,
799 &found_whole_row);
800 /* We ignore the value of found_whole_row. */
801 onconflset = (List *)
802 map_variable_attnos((Node *) onconflset,
803 firstVarno, 0,
804 part_attmap,
805 RelationGetForm(partrel)->reltype,
806 &found_whole_row);
807 /* We ignore the value of found_whole_row. */
808
809 /* Finally, adjust the target colnos to match the partition. */
810 onconflcols = adjust_partition_colnos(node->onConflictCols,
811 leaf_part_rri);
812
813 /* create the tuple slot for the UPDATE SET projection */
814 onconfl->oc_ProjSlot =
815 table_slot_create(partrel,
816 &mtstate->ps.state->es_tupleTable);
817
818 /* build UPDATE SET projection state */
819 onconfl->oc_ProjInfo =
820 ExecBuildUpdateProjection(onconflset,
821 true,
822 onconflcols,
823 partrelDesc,
824 econtext,
825 onconfl->oc_ProjSlot,
826 &mtstate->ps);
827
828 /*
829 * If there is a WHERE clause, initialize state where it will
830 * be evaluated, mapping the attribute numbers appropriately.
831 * As with onConflictSet, we need to map partition varattnos
832 * to the partition's tupdesc.
833 */
834 if (node->onConflictWhere)
835 {
836 List *clause;
837
838 clause = copyObject((List *) node->onConflictWhere);
839 clause = (List *)
840 map_variable_attnos((Node *) clause,
841 INNER_VAR, 0,
842 part_attmap,
843 RelationGetForm(partrel)->reltype,
844 &found_whole_row);
845 /* We ignore the value of found_whole_row. */
846 clause = (List *)
847 map_variable_attnos((Node *) clause,
848 firstVarno, 0,
849 part_attmap,
850 RelationGetForm(partrel)->reltype,
851 &found_whole_row);
852 /* We ignore the value of found_whole_row. */
853 onconfl->oc_WhereClause =
854 ExecInitQual((List *) clause, &mtstate->ps);
855 }
856 }
857 }
858 }
859
860 /*
861 * Since we've just initialized this ResultRelInfo, it's not in any list
862 * attached to the estate as yet. Add it, so that it can be found later.
863 *
864 * Note that the entries in this list appear in no predetermined order,
865 * because partition result rels are initialized as and when they're
866 * needed.
867 */
871 leaf_part_rri);
872
873 /*
874 * Initialize information about this partition that's needed to handle
875 * MERGE. We take the "first" result relation's mergeActionList as
876 * reference and make copy for this relation, converting stuff that
877 * references attribute numbers to match this relation's.
878 *
879 * This duplicates much of the logic in ExecInitMerge(), so if something
880 * changes there, look here too.
881 */
882 if (node && node->operation == CMD_MERGE)
883 {
884 List *firstMergeActionList = linitial(node->mergeActionLists);
885 ListCell *lc;
886 ExprContext *econtext = mtstate->ps.ps_ExprContext;
887 Node *joinCondition;
888
889 if (part_attmap == NULL)
890 part_attmap =
892 RelationGetDescr(firstResultRel),
893 false);
894
895 if (unlikely(!leaf_part_rri->ri_projectNewInfoValid))
896 ExecInitMergeTupleSlots(mtstate, leaf_part_rri);
897
898 /* Initialize state for join condition checking. */
899 joinCondition =
901 firstVarno, 0,
902 part_attmap,
903 RelationGetForm(partrel)->reltype,
904 &found_whole_row);
905 /* We ignore the value of found_whole_row. */
906 leaf_part_rri->ri_MergeJoinCondition =
907 ExecInitQual((List *) joinCondition, &mtstate->ps);
908
909 foreach(lc, firstMergeActionList)
910 {
911 /* Make a copy for this relation to be safe. */
913 MergeActionState *action_state;
914
915 /* Generate the action's state for this relation */
916 action_state = makeNode(MergeActionState);
917 action_state->mas_action = action;
918
919 /* And put the action in the appropriate list */
920 leaf_part_rri->ri_MergeActions[action->matchKind] =
921 lappend(leaf_part_rri->ri_MergeActions[action->matchKind],
922 action_state);
923
924 switch (action->commandType)
925 {
926 case CMD_INSERT:
927
928 /*
929 * ExecCheckPlanOutput() already done on the targetlist
930 * when "first" result relation initialized and it is same
931 * for all result relations.
932 */
933 action_state->mas_proj =
934 ExecBuildProjectionInfo(action->targetList, econtext,
935 leaf_part_rri->ri_newTupleSlot,
936 &mtstate->ps,
937 RelationGetDescr(partrel));
938 break;
939 case CMD_UPDATE:
940
941 /*
942 * Convert updateColnos from "first" result relation
943 * attribute numbers to this result rel's.
944 */
945 if (part_attmap)
946 action->updateColnos =
948 part_attmap);
949 action_state->mas_proj =
951 true,
952 action->updateColnos,
953 RelationGetDescr(leaf_part_rri->ri_RelationDesc),
954 econtext,
955 leaf_part_rri->ri_newTupleSlot,
956 NULL);
957 break;
958 case CMD_DELETE:
959 case CMD_NOTHING:
960 /* Nothing to do */
961 break;
962
963 default:
964 elog(ERROR, "unknown action in MERGE WHEN clause");
965 }
966
967 /* found_whole_row intentionally ignored. */
968 action->qual =
970 firstVarno, 0,
971 part_attmap,
972 RelationGetForm(partrel)->reltype,
973 &found_whole_row);
974 action_state->mas_whenqual =
975 ExecInitQual((List *) action->qual, &mtstate->ps);
976 }
977 }
978 MemoryContextSwitchTo(oldcxt);
979
980 return leaf_part_rri;
981}
982
983/*
984 * ExecInitRoutingInfo
985 * Set up information needed for translating tuples between root
986 * partitioned table format and partition format, and keep track of it
987 * in PartitionTupleRouting.
988 */
989static void
991 EState *estate,
992 PartitionTupleRouting *proute,
993 PartitionDispatch dispatch,
994 ResultRelInfo *partRelInfo,
995 int partidx,
996 bool is_borrowed_rel)
997{
998 MemoryContext oldcxt;
999 int rri_index;
1000
1001 oldcxt = MemoryContextSwitchTo(proute->memcxt);
1002
1003 /*
1004 * Set up tuple conversion between root parent and the partition if the
1005 * two have different rowtypes. If conversion is indeed required, also
1006 * initialize a slot dedicated to storing this partition's converted
1007 * tuples. Various operations that are applied to tuples after routing,
1008 * such as checking constraints, will refer to this slot.
1009 */
1010 if (ExecGetRootToChildMap(partRelInfo, estate) != NULL)
1011 {
1012 Relation partrel = partRelInfo->ri_RelationDesc;
1013
1014 /*
1015 * This pins the partition's TupleDesc, which will be released at the
1016 * end of the command.
1017 */
1018 partRelInfo->ri_PartitionTupleSlot =
1019 table_slot_create(partrel, &estate->es_tupleTable);
1020 }
1021 else
1022 partRelInfo->ri_PartitionTupleSlot = NULL;
1023
1024 /*
1025 * If the partition is a foreign table, let the FDW init itself for
1026 * routing tuples to the partition.
1027 */
1028 if (partRelInfo->ri_FdwRoutine != NULL &&
1029 partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
1030 partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
1031
1032 /*
1033 * Determine if the FDW supports batch insert and determine the batch size
1034 * (a FDW may support batching, but it may be disabled for the
1035 * server/table or for this particular query).
1036 *
1037 * If the FDW does not support batching, we set the batch size to 1.
1038 */
1039 if (partRelInfo->ri_FdwRoutine != NULL &&
1042 partRelInfo->ri_BatchSize =
1043 partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo);
1044 else
1045 partRelInfo->ri_BatchSize = 1;
1046
1047 Assert(partRelInfo->ri_BatchSize >= 1);
1048
1049 partRelInfo->ri_CopyMultiInsertBuffer = NULL;
1050
1051 /*
1052 * Keep track of it in the PartitionTupleRouting->partitions array.
1053 */
1054 Assert(dispatch->indexes[partidx] == -1);
1055
1056 rri_index = proute->num_partitions++;
1057
1058 /* Allocate or enlarge the array, as needed */
1059 if (proute->num_partitions >= proute->max_partitions)
1060 {
1061 if (proute->max_partitions == 0)
1062 {
1063 proute->max_partitions = 8;
1064 proute->partitions = (ResultRelInfo **)
1065 palloc(sizeof(ResultRelInfo *) * proute->max_partitions);
1066 proute->is_borrowed_rel = (bool *)
1067 palloc(sizeof(bool) * proute->max_partitions);
1068 }
1069 else
1070 {
1071 proute->max_partitions *= 2;
1072 proute->partitions = (ResultRelInfo **)
1073 repalloc(proute->partitions, sizeof(ResultRelInfo *) *
1074 proute->max_partitions);
1075 proute->is_borrowed_rel = (bool *)
1076 repalloc(proute->is_borrowed_rel, sizeof(bool) *
1077 proute->max_partitions);
1078 }
1079 }
1080
1081 proute->partitions[rri_index] = partRelInfo;
1082 proute->is_borrowed_rel[rri_index] = is_borrowed_rel;
1083 dispatch->indexes[partidx] = rri_index;
1084
1085 MemoryContextSwitchTo(oldcxt);
1086}
1087
1088/*
1089 * ExecInitPartitionDispatchInfo
1090 * Lock the partitioned table (if not locked already) and initialize
1091 * PartitionDispatch for a partitioned table and store it in the next
1092 * available slot in the proute->partition_dispatch_info array. Also,
1093 * record the index into this array in the parent_pd->indexes[] array in
1094 * the partidx element so that we can properly retrieve the newly created
1095 * PartitionDispatch later.
1096 */
1097static PartitionDispatch
1099 PartitionTupleRouting *proute, Oid partoid,
1100 PartitionDispatch parent_pd, int partidx,
1101 ResultRelInfo *rootResultRelInfo)
1102{
1103 Relation rel;
1104 PartitionDesc partdesc;
1106 int dispatchidx;
1107 MemoryContext oldcxt;
1108
1109 /*
1110 * For data modification, it is better that executor does not include
1111 * partitions being detached, except when running in snapshot-isolation
1112 * mode. This means that a read-committed transaction immediately gets a
1113 * "no partition for tuple" error when a tuple is inserted into a
1114 * partition that's being detached concurrently, but a transaction in
1115 * repeatable-read mode can still use such a partition.
1116 */
1117 if (estate->es_partition_directory == NULL)
1118 estate->es_partition_directory =
1121
1122 oldcxt = MemoryContextSwitchTo(proute->memcxt);
1123
1124 /*
1125 * Only sub-partitioned tables need to be locked here. The root
1126 * partitioned table will already have been locked as it's referenced in
1127 * the query's rtable.
1128 */
1129 if (partoid != RelationGetRelid(proute->partition_root))
1130 rel = table_open(partoid, RowExclusiveLock);
1131 else
1132 rel = proute->partition_root;
1133 partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1134
1135 pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
1136 partdesc->nparts * sizeof(int));
1137 pd->reldesc = rel;
1138 pd->key = RelationGetPartitionKey(rel);
1139 pd->keystate = NIL;
1140 pd->partdesc = partdesc;
1141 if (parent_pd != NULL)
1142 {
1143 TupleDesc tupdesc = RelationGetDescr(rel);
1144
1145 /*
1146 * For sub-partitioned tables where the column order differs from its
1147 * direct parent partitioned table, we must store a tuple table slot
1148 * initialized with its tuple descriptor and a tuple conversion map to
1149 * convert a tuple from its parent's rowtype to its own. This is to
1150 * make sure that we are looking at the correct row using the correct
1151 * tuple descriptor when computing its partition key for tuple
1152 * routing.
1153 */
1155 tupdesc,
1156 false);
1157 pd->tupslot = pd->tupmap ?
1158 MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL;
1159 }
1160 else
1161 {
1162 /* Not required for the root partitioned table */
1163 pd->tupmap = NULL;
1164 pd->tupslot = NULL;
1165 }
1166
1167 /*
1168 * Initialize with -1 to signify that the corresponding partition's
1169 * ResultRelInfo or PartitionDispatch has not been created yet.
1170 */
1171 memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1172
1173 /* Track in PartitionTupleRouting for later use */
1174 dispatchidx = proute->num_dispatch++;
1175
1176 /* Allocate or enlarge the array, as needed */
1177 if (proute->num_dispatch >= proute->max_dispatch)
1178 {
1179 if (proute->max_dispatch == 0)
1180 {
1181 proute->max_dispatch = 4;
1183 palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
1184 proute->nonleaf_partitions = (ResultRelInfo **)
1185 palloc(sizeof(ResultRelInfo *) * proute->max_dispatch);
1186 }
1187 else
1188 {
1189 proute->max_dispatch *= 2;
1192 sizeof(PartitionDispatch) * proute->max_dispatch);
1193 proute->nonleaf_partitions = (ResultRelInfo **)
1195 sizeof(ResultRelInfo *) * proute->max_dispatch);
1196 }
1197 }
1198 proute->partition_dispatch_info[dispatchidx] = pd;
1199
1200 /*
1201 * If setting up a PartitionDispatch for a sub-partitioned table, we may
1202 * also need a minimally valid ResultRelInfo for checking the partition
1203 * constraint later; set that up now.
1204 */
1205 if (parent_pd)
1206 {
1208
1209 InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
1210 proute->nonleaf_partitions[dispatchidx] = rri;
1211 }
1212 else
1213 proute->nonleaf_partitions[dispatchidx] = NULL;
1214
1215 /*
1216 * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1217 * install a downlink in the parent to allow quick descent.
1218 */
1219 if (parent_pd)
1220 {
1221 Assert(parent_pd->indexes[partidx] == -1);
1222 parent_pd->indexes[partidx] = dispatchidx;
1223 }
1224
1225 MemoryContextSwitchTo(oldcxt);
1226
1227 return pd;
1228}
1229
1230/*
1231 * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1232 * routing.
1233 *
1234 * Close all the partitioned tables, leaf partitions, and their indices.
1235 */
1236void
1238 PartitionTupleRouting *proute)
1239{
1240 int i;
1241
1242 /*
1243 * Remember, proute->partition_dispatch_info[0] corresponds to the root
1244 * partitioned table, which we must not try to close, because it is the
1245 * main target table of the query that will be closed by callers such as
1246 * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1247 * partitioned table.
1248 */
1249 for (i = 1; i < proute->num_dispatch; i++)
1250 {
1252
1254
1255 if (pd->tupslot)
1257 }
1258
1259 for (i = 0; i < proute->num_partitions; i++)
1260 {
1261 ResultRelInfo *resultRelInfo = proute->partitions[i];
1262
1263 /* Allow any FDWs to shut down */
1264 if (resultRelInfo->ri_FdwRoutine != NULL &&
1265 resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1266 resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1267 resultRelInfo);
1268
1269 /*
1270 * Close it if it's not one of the result relations borrowed from the
1271 * owning ModifyTableState; those will be closed by ExecEndPlan().
1272 */
1273 if (proute->is_borrowed_rel[i])
1274 continue;
1275
1276 ExecCloseIndices(resultRelInfo);
1277 table_close(resultRelInfo->ri_RelationDesc, NoLock);
1278 }
1279}
1280
1281/* ----------------
1282 * FormPartitionKeyDatum
1283 * Construct values[] and isnull[] arrays for the partition key
1284 * of a tuple.
1285 *
1286 * pd Partition dispatch object of the partitioned table
1287 * slot Heap tuple from which to extract partition key
1288 * estate executor state for evaluating any partition key
1289 * expressions (must be non-NULL)
1290 * values Array of partition key Datums (output area)
1291 * isnull Array of is-null indicators (output area)
1292 *
1293 * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1294 * the heap tuple passed in.
1295 * ----------------
1296 */
1297static void
1299 TupleTableSlot *slot,
1300 EState *estate,
1301 Datum *values,
1302 bool *isnull)
1303{
1304 ListCell *partexpr_item;
1305 int i;
1306
1307 if (pd->key->partexprs != NIL && pd->keystate == NIL)
1308 {
1309 /* Check caller has set up context correctly */
1310 Assert(estate != NULL &&
1311 GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1312
1313 /* First time through, set up expression evaluation state */
1314 pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1315 }
1316
1317 partexpr_item = list_head(pd->keystate);
1318 for (i = 0; i < pd->key->partnatts; i++)
1319 {
1320 AttrNumber keycol = pd->key->partattrs[i];
1321 Datum datum;
1322 bool isNull;
1323
1324 if (keycol != 0)
1325 {
1326 /* Plain column; get the value directly from the heap tuple */
1327 datum = slot_getattr(slot, keycol, &isNull);
1328 }
1329 else
1330 {
1331 /* Expression; need to evaluate it */
1332 if (partexpr_item == NULL)
1333 elog(ERROR, "wrong number of partition key expressions");
1334 datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1335 GetPerTupleExprContext(estate),
1336 &isNull);
1337 partexpr_item = lnext(pd->keystate, partexpr_item);
1338 }
1339 values[i] = datum;
1340 isnull[i] = isNull;
1341 }
1342
1343 if (partexpr_item != NULL)
1344 elog(ERROR, "wrong number of partition key expressions");
1345}
1346
1347/*
1348 * The number of times the same partition must be found in a row before we
1349 * switch from a binary search for the given values to just checking if the
1350 * values belong to the last found partition. This must be above 0.
1351 */
1352#define PARTITION_CACHED_FIND_THRESHOLD 16
1353
1354/*
1355 * get_partition_for_tuple
1356 * Finds partition of relation which accepts the partition key specified
1357 * in values and isnull.
1358 *
1359 * Calling this function can be quite expensive when LIST and RANGE
1360 * partitioned tables have many partitions. This is due to the binary search
1361 * that's done to find the correct partition. Many of the use cases for LIST
1362 * and RANGE partitioned tables make it likely that the same partition is
1363 * found in subsequent ExecFindPartition() calls. This is especially true for
1364 * cases such as RANGE partitioned tables on a TIMESTAMP column where the
1365 * partition key is the current time. When asked to find a partition for a
1366 * RANGE or LIST partitioned table, we record the partition index and datum
1367 * offset we've found for the given 'values' in the PartitionDesc (which is
1368 * stored in relcache), and if we keep finding the same partition
1369 * PARTITION_CACHED_FIND_THRESHOLD times in a row, then we'll enable caching
1370 * logic and instead of performing a binary search to find the correct
1371 * partition, we'll just double-check that 'values' still belong to the last
1372 * found partition, and if so, we'll return that partition index, thus
1373 * skipping the need for the binary search. If we fail to match the last
1374 * partition when double checking, then we fall back on doing a binary search.
1375 * In this case, unless we find 'values' belong to the DEFAULT partition,
1376 * we'll reset the number of times we've hit the same partition so that we
1377 * don't attempt to use the cache again until we've found that partition at
1378 * least PARTITION_CACHED_FIND_THRESHOLD times in a row.
1379 *
1380 * For cases where the partition changes on each lookup, the amount of
1381 * additional work required just amounts to recording the last found partition
1382 * and bound offset then resetting the found counter. This is cheap and does
1383 * not appear to cause any meaningful slowdowns for such cases.
1384 *
1385 * No caching of partitions is done when the last found partition is the
1386 * DEFAULT or NULL partition. For the case of the DEFAULT partition, there
1387 * is no bound offset storing the matching datum, so we cannot confirm the
1388 * indexes match. For the NULL partition, this is just so cheap, there's no
1389 * sense in caching.
1390 *
1391 * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1392 * found or -1 if none found.
1393 */
1394static int
1396{
1397 int bound_offset = -1;
1398 int part_index = -1;
1399 PartitionKey key = pd->key;
1400 PartitionDesc partdesc = pd->partdesc;
1401 PartitionBoundInfo boundinfo = partdesc->boundinfo;
1402
1403 /*
1404 * In the switch statement below, when we perform a cached lookup for
1405 * RANGE and LIST partitioned tables, if we find that the last found
1406 * partition matches the 'values', we return the partition index right
1407 * away. We do this instead of breaking out of the switch as we don't
1408 * want to execute the code about the DEFAULT partition or do any updates
1409 * for any of the cache-related fields. That would be a waste of effort
1410 * as we already know it's not the DEFAULT partition and have no need to
1411 * increment the number of times we found the same partition any higher
1412 * than PARTITION_CACHED_FIND_THRESHOLD.
1413 */
1414
1415 /* Route as appropriate based on partitioning strategy. */
1416 switch (key->strategy)
1417 {
1419 {
1420 uint64 rowHash;
1421
1422 /* hash partitioning is too cheap to bother caching */
1423 rowHash = compute_partition_hash_value(key->partnatts,
1424 key->partsupfunc,
1425 key->partcollation,
1426 values, isnull);
1427
1428 /*
1429 * HASH partitions can't have a DEFAULT partition and we don't
1430 * do any caching work for them, so just return the part index
1431 */
1432 return boundinfo->indexes[rowHash % boundinfo->nindexes];
1433 }
1434
1436 if (isnull[0])
1437 {
1438 /* this is far too cheap to bother doing any caching */
1439 if (partition_bound_accepts_nulls(boundinfo))
1440 {
1441 /*
1442 * When there is a NULL partition we just return that
1443 * directly. We don't have a bound_offset so it's not
1444 * valid to drop into the code after the switch which
1445 * checks and updates the cache fields. We perhaps should
1446 * be invalidating the details of the last cached
1447 * partition but there's no real need to. Keeping those
1448 * fields set gives a chance at matching to the cached
1449 * partition on the next lookup.
1450 */
1451 return boundinfo->null_index;
1452 }
1453 }
1454 else
1455 {
1456 bool equal;
1457
1459 {
1460 int last_datum_offset = partdesc->last_found_datum_index;
1461 Datum lastDatum = boundinfo->datums[last_datum_offset][0];
1462 int32 cmpval;
1463
1464 /* does the last found datum index match this datum? */
1465 cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1466 key->partcollation[0],
1467 lastDatum,
1468 values[0]));
1469
1470 if (cmpval == 0)
1471 return boundinfo->indexes[last_datum_offset];
1472
1473 /* fall-through and do a manual lookup */
1474 }
1475
1476 bound_offset = partition_list_bsearch(key->partsupfunc,
1477 key->partcollation,
1478 boundinfo,
1479 values[0], &equal);
1480 if (bound_offset >= 0 && equal)
1481 part_index = boundinfo->indexes[bound_offset];
1482 }
1483 break;
1484
1486 {
1487 bool equal = false,
1488 range_partkey_has_null = false;
1489 int i;
1490
1491 /*
1492 * No range includes NULL, so this will be accepted by the
1493 * default partition if there is one, and otherwise rejected.
1494 */
1495 for (i = 0; i < key->partnatts; i++)
1496 {
1497 if (isnull[i])
1498 {
1499 range_partkey_has_null = true;
1500 break;
1501 }
1502 }
1503
1504 /* NULLs belong in the DEFAULT partition */
1505 if (range_partkey_has_null)
1506 break;
1507
1509 {
1510 int last_datum_offset = partdesc->last_found_datum_index;
1511 Datum *lastDatums = boundinfo->datums[last_datum_offset];
1512 PartitionRangeDatumKind *kind = boundinfo->kind[last_datum_offset];
1513 int32 cmpval;
1514
1515 /* check if the value is >= to the lower bound */
1516 cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1517 key->partcollation,
1518 lastDatums,
1519 kind,
1520 values,
1521 key->partnatts);
1522
1523 /*
1524 * If it's equal to the lower bound then no need to check
1525 * the upper bound.
1526 */
1527 if (cmpval == 0)
1528 return boundinfo->indexes[last_datum_offset + 1];
1529
1530 if (cmpval < 0 && last_datum_offset + 1 < boundinfo->ndatums)
1531 {
1532 /* check if the value is below the upper bound */
1533 lastDatums = boundinfo->datums[last_datum_offset + 1];
1534 kind = boundinfo->kind[last_datum_offset + 1];
1535 cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1536 key->partcollation,
1537 lastDatums,
1538 kind,
1539 values,
1540 key->partnatts);
1541
1542 if (cmpval > 0)
1543 return boundinfo->indexes[last_datum_offset + 1];
1544 }
1545 /* fall-through and do a manual lookup */
1546 }
1547
1548 bound_offset = partition_range_datum_bsearch(key->partsupfunc,
1549 key->partcollation,
1550 boundinfo,
1551 key->partnatts,
1552 values,
1553 &equal);
1554
1555 /*
1556 * The bound at bound_offset is less than or equal to the
1557 * tuple value, so the bound at offset+1 is the upper bound of
1558 * the partition we're looking for, if there actually exists
1559 * one.
1560 */
1561 part_index = boundinfo->indexes[bound_offset + 1];
1562 }
1563 break;
1564
1565 default:
1566 elog(ERROR, "unexpected partition strategy: %d",
1567 (int) key->strategy);
1568 }
1569
1570 /*
1571 * part_index < 0 means we failed to find a partition of this parent. Use
1572 * the default partition, if there is one.
1573 */
1574 if (part_index < 0)
1575 {
1576 /*
1577 * No need to reset the cache fields here. The next set of values
1578 * might end up belonging to the cached partition, so leaving the
1579 * cache alone improves the chances of a cache hit on the next lookup.
1580 */
1581 return boundinfo->default_index;
1582 }
1583
1584 /* we should only make it here when the code above set bound_offset */
1585 Assert(bound_offset >= 0);
1586
1587 /*
1588 * Attend to the cache fields. If the bound_offset matches the last
1589 * cached bound offset then we've found the same partition as last time,
1590 * so bump the count by one. If all goes well, we'll eventually reach
1591 * PARTITION_CACHED_FIND_THRESHOLD and try the cache path next time
1592 * around. Otherwise, we'll reset the cache count back to 1 to mark that
1593 * we've found this partition for the first time.
1594 */
1595 if (bound_offset == partdesc->last_found_datum_index)
1596 partdesc->last_found_count++;
1597 else
1598 {
1599 partdesc->last_found_count = 1;
1600 partdesc->last_found_part_index = part_index;
1601 partdesc->last_found_datum_index = bound_offset;
1602 }
1603
1604 return part_index;
1605}
1606
1607/*
1608 * ExecBuildSlotPartitionKeyDescription
1609 *
1610 * This works very much like BuildIndexValueDescription() and is currently
1611 * used for building error messages when ExecFindPartition() fails to find
1612 * partition for a row.
1613 */
1614static char *
1616 Datum *values,
1617 bool *isnull,
1618 int maxfieldlen)
1619{
1622 int partnatts = get_partition_natts(key);
1623 int i;
1624 Oid relid = RelationGetRelid(rel);
1625 AclResult aclresult;
1626
1627 if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
1628 return NULL;
1629
1630 /* If the user has table-level access, just go build the description. */
1631 aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
1632 if (aclresult != ACLCHECK_OK)
1633 {
1634 /*
1635 * Step through the columns of the partition key and make sure the
1636 * user has SELECT rights on all of them.
1637 */
1638 for (i = 0; i < partnatts; i++)
1639 {
1641
1642 /*
1643 * If this partition key column is an expression, we return no
1644 * detail rather than try to figure out what column(s) the
1645 * expression includes and if the user has SELECT rights on them.
1646 */
1647 if (attnum == InvalidAttrNumber ||
1650 return NULL;
1651 }
1652 }
1653
1655 appendStringInfo(&buf, "(%s) = (",
1656 pg_get_partkeydef_columns(relid, true));
1657
1658 for (i = 0; i < partnatts; i++)
1659 {
1660 char *val;
1661 int vallen;
1662
1663 if (isnull[i])
1664 val = "null";
1665 else
1666 {
1667 Oid foutoid;
1668 bool typisvarlena;
1669
1671 &foutoid, &typisvarlena);
1672 val = OidOutputFunctionCall(foutoid, values[i]);
1673 }
1674
1675 if (i > 0)
1677
1678 /* truncate if needed */
1679 vallen = strlen(val);
1680 if (vallen <= maxfieldlen)
1681 appendBinaryStringInfo(&buf, val, vallen);
1682 else
1683 {
1684 vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1685 appendBinaryStringInfo(&buf, val, vallen);
1686 appendStringInfoString(&buf, "...");
1687 }
1688 }
1689
1691
1692 return buf.data;
1693}
1694
1695/*
1696 * adjust_partition_colnos
1697 * Adjust the list of UPDATE target column numbers to account for
1698 * attribute differences between the parent and the partition.
1699 *
1700 * Note: mustn't be called if no adjustment is required.
1701 */
1702static List *
1704{
1705 TupleConversionMap *map = ExecGetChildToRootMap(leaf_part_rri);
1706
1707 Assert(map != NULL);
1708
1709 return adjust_partition_colnos_using_map(colnos, map->attrMap);
1710}
1711
1712/*
1713 * adjust_partition_colnos_using_map
1714 * Like adjust_partition_colnos, but uses a caller-supplied map instead
1715 * of assuming to map from the "root" result relation.
1716 *
1717 * Note: mustn't be called if no adjustment is required.
1718 */
1719static List *
1721{
1722 List *new_colnos = NIL;
1723 ListCell *lc;
1724
1725 Assert(attrMap != NULL); /* else we shouldn't be here */
1726
1727 foreach(lc, colnos)
1728 {
1729 AttrNumber parentattrno = lfirst_int(lc);
1730
1731 if (parentattrno <= 0 ||
1732 parentattrno > attrMap->maplen ||
1733 attrMap->attnums[parentattrno - 1] == 0)
1734 elog(ERROR, "unexpected attno %d in target column list",
1735 parentattrno);
1736 new_colnos = lappend_int(new_colnos,
1737 attrMap->attnums[parentattrno - 1]);
1738 }
1739
1740 return new_colnos;
1741}
1742
1743/*-------------------------------------------------------------------------
1744 * Run-Time Partition Pruning Support.
1745 *
1746 * The following series of functions exist to support the removal of unneeded
1747 * subplans for queries against partitioned tables. The supporting functions
1748 * here are designed to work with any plan type which supports an arbitrary
1749 * number of subplans, e.g. Append, MergeAppend.
1750 *
1751 * When pruning involves comparison of a partition key to a constant, it's
1752 * done by the planner. However, if we have a comparison to a non-constant
1753 * but not volatile expression, that presents an opportunity for run-time
1754 * pruning by the executor, allowing irrelevant partitions to be skipped
1755 * dynamically.
1756 *
1757 * We must distinguish expressions containing PARAM_EXEC Params from
1758 * expressions that don't contain those. Even though a PARAM_EXEC Param is
1759 * considered to be a stable expression, it can change value from one plan
1760 * node scan to the next during query execution. Stable comparison
1761 * expressions that don't involve such Params allow partition pruning to be
1762 * done once during executor startup. Expressions that do involve such Params
1763 * require us to prune separately for each scan of the parent plan node.
1764 *
1765 * Note that pruning away unneeded subplans during executor startup has the
1766 * added benefit of not having to initialize the unneeded subplans at all.
1767 *
1768 *
1769 * Functions:
1770 *
1771 * ExecDoInitialPruning:
1772 * Perform runtime "initial" pruning, if necessary, to determine the set
1773 * of child subnodes that need to be initialized during ExecInitNode() for
1774 * all plan nodes that contain a PartitionPruneInfo. This also locks the
1775 * leaf partitions whose subnodes will be initialized if needed.
1776 *
1777 * ExecInitPartitionExecPruning:
1778 * Updates the PartitionPruneState found at given part_prune_index in
1779 * EState.es_part_prune_states for use during "exec" pruning if required.
1780 * Also returns the set of subplans to initialize that would be stored at
1781 * part_prune_index in EState.es_part_prune_result by
1782 * ExecDoInitialPruning(). Maps in PartitionPruneState are updated to
1783 * account for initial pruning possibly having eliminated some of the
1784 * subplans.
1785 *
1786 * ExecFindMatchingSubPlans:
1787 * Returns indexes of matching subplans after evaluating the expressions
1788 * that are safe to evaluate at a given point. This function is first
1789 * called during ExecDoInitialPruning() to find the initially matching
1790 * subplans based on performing the initial pruning steps and then must be
1791 * called again each time the value of a Param listed in
1792 * PartitionPruneState's 'execparamids' changes.
1793 *-------------------------------------------------------------------------
1794 */
1795
1796
1797/*
1798 * ExecDoInitialPruning
1799 * Perform runtime "initial" pruning, if necessary, to determine the set
1800 * of child subnodes that need to be initialized during ExecInitNode() for
1801 * plan nodes that support partition pruning. This also locks the leaf
1802 * partitions whose subnodes will be initialized if needed.
1803 *
1804 * This function iterates over each PartitionPruneInfo entry in
1805 * estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
1806 * and adds it to es_part_prune_states. ExecInitPartitionExecPruning() accesses
1807 * these states through their corresponding indexes in es_part_prune_states and
1808 * assign each state to the parent node's PlanState, from where it will be used
1809 * for "exec" pruning.
1810 *
1811 * If initial pruning steps exist for a PartitionPruneInfo entry, this function
1812 * executes those pruning steps and stores the result as a bitmapset of valid
1813 * child subplans, identifying which subplans should be initialized for
1814 * execution. The results are saved in estate->es_part_prune_results.
1815 *
1816 * If no initial pruning is performed for a given PartitionPruneInfo, a NULL
1817 * entry is still added to es_part_prune_results to maintain alignment with
1818 * es_part_prune_infos. This ensures that ExecInitPartitionExecPruning() can
1819 * use the same index to retrieve the pruning results.
1820 */
1821void
1823{
1824 PlannedStmt *stmt = estate->es_plannedstmt;
1825 ListCell *lc;
1826 List *locked_relids = NIL;
1827
1828 foreach(lc, estate->es_part_prune_infos)
1829 {
1831 PartitionPruneState *prunestate;
1832 Bitmapset *validsubplans = NULL;
1833 Bitmapset *all_leafpart_rtis = NULL;
1834 Bitmapset *validsubplan_rtis = NULL;
1835
1836 /* Create and save the PartitionPruneState. */
1837 prunestate = CreatePartitionPruneState(estate, pruneinfo,
1838 &all_leafpart_rtis);
1840 prunestate);
1841
1842 /*
1843 * Perform initial pruning steps, if any, and save the result
1844 * bitmapset or NULL as described in the header comment.
1845 */
1846 if (prunestate->do_initial_prune)
1847 validsubplans = ExecFindMatchingSubPlans(prunestate, true,
1848 &validsubplan_rtis);
1849 else
1850 validsubplan_rtis = all_leafpart_rtis;
1851
1852 if (ExecShouldLockRelations(estate))
1853 {
1854 int rtindex = -1;
1855
1856 while ((rtindex = bms_next_member(validsubplan_rtis,
1857 rtindex)) >= 0)
1858 {
1859 RangeTblEntry *rte = exec_rt_fetch(rtindex, estate);
1860
1861 Assert(rte->rtekind == RTE_RELATION &&
1862 rte->rellockmode != NoLock);
1863 LockRelationOid(rte->relid, rte->rellockmode);
1864 locked_relids = lappend_int(locked_relids, rtindex);
1865 }
1866 }
1868 validsubplan_rtis);
1870 validsubplans);
1871 }
1872
1873 /*
1874 * Lock the first result relation of each ModifyTable node, even if it was
1875 * pruned. This is required for ExecInitModifyTable(), which keeps its
1876 * first result relation if all other result relations have been pruned,
1877 * because some executor paths (e.g., in nodeModifyTable.c and
1878 * execPartition.c) rely on there being at least one result relation.
1879 *
1880 * There's room for improvement here --- we actually only need to do this
1881 * if all other result relations of the ModifyTable node were pruned, but
1882 * we don't have an easy way to tell that here.
1883 */
1884 if (stmt->resultRelations && ExecShouldLockRelations(estate))
1885 {
1886 foreach(lc, stmt->firstResultRels)
1887 {
1888 Index firstResultRel = lfirst_int(lc);
1889
1890 if (!bms_is_member(firstResultRel, estate->es_unpruned_relids))
1891 {
1892 RangeTblEntry *rte = exec_rt_fetch(firstResultRel, estate);
1893
1894 Assert(rte->rtekind == RTE_RELATION && rte->rellockmode != NoLock);
1895 LockRelationOid(rte->relid, rte->rellockmode);
1896 locked_relids = lappend_int(locked_relids, firstResultRel);
1897 }
1898 }
1899 }
1900
1901 /*
1902 * Release the useless locks if the plan won't be executed. This is the
1903 * same as what CheckCachedPlan() in plancache.c does.
1904 */
1905 if (!ExecPlanStillValid(estate))
1906 {
1907 foreach(lc, locked_relids)
1908 {
1909 RangeTblEntry *rte = exec_rt_fetch(lfirst_int(lc), estate);
1910
1911 UnlockRelationOid(rte->relid, rte->rellockmode);
1912 }
1913 }
1914}
1915
1916/*
1917 * ExecInitPartitionExecPruning
1918 * Initialize the data structures needed for runtime "exec" partition
1919 * pruning and return the result of initial pruning, if available.
1920 *
1921 * 'relids' identifies the relation to which both the parent plan and the
1922 * PartitionPruneInfo given by 'part_prune_index' belong.
1923 *
1924 * On return, *initially_valid_subplans is assigned the set of indexes of
1925 * child subplans that must be initialized along with the parent plan node.
1926 * Initial pruning would have been performed by ExecDoInitialPruning(), if
1927 * necessary, and the bitmapset of surviving subplans' indexes would have
1928 * been stored as the part_prune_index'th element of
1929 * EState.es_part_prune_results.
1930 *
1931 * If subplans were indeed pruned during initial pruning, the subplan_map
1932 * arrays in the returned PartitionPruneState are re-sequenced to exclude those
1933 * subplans, but only if the maps will be needed for subsequent execution
1934 * pruning passes.
1935 */
1938 int n_total_subplans,
1939 int part_prune_index,
1940 Bitmapset *relids,
1941 Bitmapset **initially_valid_subplans)
1942{
1943 PartitionPruneState *prunestate;
1944 EState *estate = planstate->state;
1945 PartitionPruneInfo *pruneinfo;
1946
1947 /* Obtain the pruneinfo we need. */
1949 part_prune_index);
1950
1951 /* Its relids better match the plan node's or the planner messed up. */
1952 if (!bms_equal(relids, pruneinfo->relids))
1953 elog(ERROR, "wrong pruneinfo with relids=%s found at part_prune_index=%d contained in plan node with relids=%s",
1954 bmsToString(pruneinfo->relids), part_prune_index,
1955 bmsToString(relids));
1956
1957 /*
1958 * The PartitionPruneState would have been created by
1959 * ExecDoInitialPruning() and stored as the part_prune_index'th element of
1960 * EState.es_part_prune_states.
1961 */
1962 prunestate = list_nth(estate->es_part_prune_states, part_prune_index);
1963 Assert(prunestate != NULL);
1964
1965 /* Use the result of initial pruning done by ExecDoInitialPruning(). */
1966 if (prunestate->do_initial_prune)
1967 *initially_valid_subplans = list_nth_node(Bitmapset,
1968 estate->es_part_prune_results,
1969 part_prune_index);
1970 else
1971 {
1972 /* No pruning, so we'll need to initialize all subplans */
1973 Assert(n_total_subplans > 0);
1974 *initially_valid_subplans = bms_add_range(NULL, 0,
1975 n_total_subplans - 1);
1976 }
1977
1978 /*
1979 * The exec pruning state must also be initialized, if needed, before it
1980 * can be used for pruning during execution.
1981 *
1982 * This also re-sequences subplan indexes contained in prunestate to
1983 * account for any that were removed due to initial pruning; refer to the
1984 * condition in InitExecPartitionPruneContexts() that is used to determine
1985 * whether to do this. If no exec pruning needs to be done, we would thus
1986 * leave the maps to be in an invalid invalid state, but that's ok since
1987 * that data won't be consulted again (cf initial Assert in
1988 * ExecFindMatchingSubPlans).
1989 */
1990 if (prunestate->do_exec_prune)
1991 InitExecPartitionPruneContexts(prunestate, planstate,
1992 *initially_valid_subplans,
1993 n_total_subplans);
1994
1995 return prunestate;
1996}
1997
1998/*
1999 * CreatePartitionPruneState
2000 * Build the data structure required for calling ExecFindMatchingSubPlans
2001 *
2002 * This includes PartitionPruneContexts (stored in each
2003 * PartitionedRelPruningData corresponding to a PartitionedRelPruneInfo),
2004 * which hold the ExprStates needed to evaluate pruning expressions, and
2005 * mapping arrays to convert partition indexes from the pruning logic
2006 * into subplan indexes in the parent plan node's list of child subplans.
2007 *
2008 * 'pruneinfo' is a PartitionPruneInfo as generated by
2009 * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
2010 * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
2011 * pruneinfo->prune_infos), each of which contains a PartitionedRelPruningData
2012 * for each PartitionedRelPruneInfo appearing in that sublist. This two-level
2013 * system is needed to keep from confusing the different hierarchies when a
2014 * UNION ALL contains multiple partitioned tables as children. The data
2015 * stored in each PartitionedRelPruningData can be re-used each time we
2016 * re-evaluate which partitions match the pruning steps provided in each
2017 * PartitionedRelPruneInfo.
2018 *
2019 * Note that only the PartitionPruneContexts for initial pruning are
2020 * initialized here. Those required for exec pruning are initialized later in
2021 * ExecInitPartitionExecPruning(), as they depend on the availability of the
2022 * parent plan node's PlanState.
2023 *
2024 * If initial pruning steps are to be skipped (e.g., during EXPLAIN
2025 * (GENERIC_PLAN)), *all_leafpart_rtis will be populated with the RT indexes of
2026 * all leaf partitions whose scanning subnode is included in the parent plan
2027 * node's list of child plans. The caller must add these RT indexes to
2028 * estate->es_unpruned_relids.
2029 */
2030static PartitionPruneState *
2032 Bitmapset **all_leafpart_rtis)
2033{
2034 PartitionPruneState *prunestate;
2035 int n_part_hierarchies;
2036 ListCell *lc;
2037 int i;
2038
2039 /*
2040 * Expression context that will be used by partkey_datum_from_expr() to
2041 * evaluate expressions for comparison against partition bounds.
2042 */
2043 ExprContext *econtext = CreateExprContext(estate);
2044
2045 /* For data reading, executor always includes detached partitions */
2046 if (estate->es_partition_directory == NULL)
2047 estate->es_partition_directory =
2048 CreatePartitionDirectory(estate->es_query_cxt, false);
2049
2050 n_part_hierarchies = list_length(pruneinfo->prune_infos);
2051 Assert(n_part_hierarchies > 0);
2052
2053 /*
2054 * Allocate the data structure
2055 */
2056 prunestate = (PartitionPruneState *)
2057 palloc(offsetof(PartitionPruneState, partprunedata) +
2058 sizeof(PartitionPruningData *) * n_part_hierarchies);
2059
2060 /* Save ExprContext for use during InitExecPartitionPruneContexts(). */
2061 prunestate->econtext = econtext;
2062 prunestate->execparamids = NULL;
2063 /* other_subplans can change at runtime, so we need our own copy */
2064 prunestate->other_subplans = bms_copy(pruneinfo->other_subplans);
2065 prunestate->do_initial_prune = false; /* may be set below */
2066 prunestate->do_exec_prune = false; /* may be set below */
2067 prunestate->num_partprunedata = n_part_hierarchies;
2068
2069 /*
2070 * Create a short-term memory context which we'll use when making calls to
2071 * the partition pruning functions. This avoids possible memory leaks,
2072 * since the pruning functions call comparison functions that aren't under
2073 * our control.
2074 */
2075 prunestate->prune_context =
2077 "Partition Prune",
2079
2080 i = 0;
2081 foreach(lc, pruneinfo->prune_infos)
2082 {
2083 List *partrelpruneinfos = lfirst_node(List, lc);
2084 int npartrelpruneinfos = list_length(partrelpruneinfos);
2085 PartitionPruningData *prunedata;
2086 ListCell *lc2;
2087 int j;
2088
2089 prunedata = (PartitionPruningData *)
2090 palloc(offsetof(PartitionPruningData, partrelprunedata) +
2091 npartrelpruneinfos * sizeof(PartitionedRelPruningData));
2092 prunestate->partprunedata[i] = prunedata;
2093 prunedata->num_partrelprunedata = npartrelpruneinfos;
2094
2095 j = 0;
2096 foreach(lc2, partrelpruneinfos)
2097 {
2099 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2100 Relation partrel;
2101 PartitionDesc partdesc;
2102 PartitionKey partkey;
2103
2104 /*
2105 * We can rely on the copies of the partitioned table's partition
2106 * key and partition descriptor appearing in its relcache entry,
2107 * because that entry will be held open and locked for the
2108 * duration of this executor run.
2109 */
2110 partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex, false);
2111
2112 /* Remember for InitExecPartitionPruneContext(). */
2113 pprune->partrel = partrel;
2114
2115 partkey = RelationGetPartitionKey(partrel);
2117 partrel);
2118
2119 /*
2120 * Initialize the subplan_map and subpart_map.
2121 *
2122 * The set of partitions that exist now might not be the same that
2123 * existed when the plan was made. The normal case is that it is;
2124 * optimize for that case with a quick comparison, and just copy
2125 * the subplan_map and make subpart_map, leafpart_rti_map point to
2126 * the ones in PruneInfo.
2127 *
2128 * For the case where they aren't identical, we could have more
2129 * partitions on either side; or even exactly the same number of
2130 * them on both but the set of OIDs doesn't match fully. Handle
2131 * this by creating new subplan_map and subpart_map arrays that
2132 * corresponds to the ones in the PruneInfo where the new
2133 * partition descriptor's OIDs match. Any that don't match can be
2134 * set to -1, as if they were pruned. By construction, both
2135 * arrays are in partition bounds order.
2136 */
2137 pprune->nparts = partdesc->nparts;
2138 pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts);
2139
2140 if (partdesc->nparts == pinfo->nparts &&
2141 memcmp(partdesc->oids, pinfo->relid_map,
2142 sizeof(int) * partdesc->nparts) == 0)
2143 {
2144 pprune->subpart_map = pinfo->subpart_map;
2145 pprune->leafpart_rti_map = pinfo->leafpart_rti_map;
2146 memcpy(pprune->subplan_map, pinfo->subplan_map,
2147 sizeof(int) * pinfo->nparts);
2148 }
2149 else
2150 {
2151 int pd_idx = 0;
2152 int pp_idx;
2153
2154 /*
2155 * When the partition arrays are not identical, there could be
2156 * some new ones but it's also possible that one was removed;
2157 * we cope with both situations by walking the arrays and
2158 * discarding those that don't match.
2159 *
2160 * If the number of partitions on both sides match, it's still
2161 * possible that one partition has been detached and another
2162 * attached. Cope with that by creating a map that skips any
2163 * mismatches.
2164 */
2165 pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts);
2166 pprune->leafpart_rti_map = palloc(sizeof(int) * partdesc->nparts);
2167
2168 for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++)
2169 {
2170 /* Skip any InvalidOid relid_map entries */
2171 while (pd_idx < pinfo->nparts &&
2172 !OidIsValid(pinfo->relid_map[pd_idx]))
2173 pd_idx++;
2174
2175 recheck:
2176 if (pd_idx < pinfo->nparts &&
2177 pinfo->relid_map[pd_idx] == partdesc->oids[pp_idx])
2178 {
2179 /* match... */
2180 pprune->subplan_map[pp_idx] =
2181 pinfo->subplan_map[pd_idx];
2182 pprune->subpart_map[pp_idx] =
2183 pinfo->subpart_map[pd_idx];
2184 pprune->leafpart_rti_map[pp_idx] =
2185 pinfo->leafpart_rti_map[pd_idx];
2186 pd_idx++;
2187 continue;
2188 }
2189
2190 /*
2191 * There isn't an exact match in the corresponding
2192 * positions of both arrays. Peek ahead in
2193 * pinfo->relid_map to see if we have a match for the
2194 * current partition in partdesc. Normally if a match
2195 * exists it's just one element ahead, and it means the
2196 * planner saw one extra partition that we no longer see
2197 * now (its concurrent detach finished just in between);
2198 * so we skip that one by updating pd_idx to the new
2199 * location and jumping above. We can then continue to
2200 * match the rest of the elements after skipping the OID
2201 * with no match; no future matches are tried for the
2202 * element that was skipped, because we know the arrays to
2203 * be in the same order.
2204 *
2205 * If we don't see a match anywhere in the rest of the
2206 * pinfo->relid_map array, that means we see an element
2207 * now that the planner didn't see, so mark that one as
2208 * pruned and move on.
2209 */
2210 for (int pd_idx2 = pd_idx + 1; pd_idx2 < pinfo->nparts; pd_idx2++)
2211 {
2212 if (pd_idx2 >= pinfo->nparts)
2213 break;
2214 if (pinfo->relid_map[pd_idx2] == partdesc->oids[pp_idx])
2215 {
2216 pd_idx = pd_idx2;
2217 goto recheck;
2218 }
2219 }
2220
2221 pprune->subpart_map[pp_idx] = -1;
2222 pprune->subplan_map[pp_idx] = -1;
2223 pprune->leafpart_rti_map[pp_idx] = 0;
2224 }
2225 }
2226
2227 /* present_parts is also subject to later modification */
2228 pprune->present_parts = bms_copy(pinfo->present_parts);
2229
2230 /*
2231 * Only initial_context is initialized here. exec_context is
2232 * initialized during ExecInitPartitionExecPruning() when the
2233 * parent plan's PlanState is available.
2234 *
2235 * Note that we must skip execution-time (both "init" and "exec")
2236 * partition pruning in EXPLAIN (GENERIC_PLAN), since parameter
2237 * values may be missing.
2238 */
2240 if (pinfo->initial_pruning_steps &&
2242 {
2244 pprune->initial_pruning_steps,
2245 partdesc, partkey, NULL,
2246 econtext);
2247 /* Record whether initial pruning is needed at any level */
2248 prunestate->do_initial_prune = true;
2249 }
2250 pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
2251 if (pinfo->exec_pruning_steps &&
2253 {
2254 /* Record whether exec pruning is needed at any level */
2255 prunestate->do_exec_prune = true;
2256 }
2257
2258 /*
2259 * Accumulate the IDs of all PARAM_EXEC Params affecting the
2260 * partitioning decisions at this plan node.
2261 */
2262 prunestate->execparamids = bms_add_members(prunestate->execparamids,
2263 pinfo->execparamids);
2264
2265 /*
2266 * Return all leaf partition indexes if we're skipping pruning in
2267 * the EXPLAIN (GENERIC_PLAN) case.
2268 */
2269 if (pinfo->initial_pruning_steps && !prunestate->do_initial_prune)
2270 {
2271 int part_index = -1;
2272
2273 while ((part_index = bms_next_member(pprune->present_parts,
2274 part_index)) >= 0)
2275 {
2276 Index rtindex = pprune->leafpart_rti_map[part_index];
2277
2278 if (rtindex)
2279 *all_leafpart_rtis = bms_add_member(*all_leafpart_rtis,
2280 rtindex);
2281 }
2282 }
2283
2284 j++;
2285 }
2286 i++;
2287 }
2288
2289 return prunestate;
2290}
2291
2292/*
2293 * Initialize a PartitionPruneContext for the given list of pruning steps.
2294 */
2295static void
2297 List *pruning_steps,
2298 PartitionDesc partdesc,
2299 PartitionKey partkey,
2300 PlanState *planstate,
2301 ExprContext *econtext)
2302{
2303 int n_steps;
2304 int partnatts;
2305 ListCell *lc;
2306
2307 n_steps = list_length(pruning_steps);
2308
2309 context->strategy = partkey->strategy;
2310 context->partnatts = partnatts = partkey->partnatts;
2311 context->nparts = partdesc->nparts;
2312 context->boundinfo = partdesc->boundinfo;
2313 context->partcollation = partkey->partcollation;
2314 context->partsupfunc = partkey->partsupfunc;
2315
2316 /* We'll look up type-specific support functions as needed */
2317 context->stepcmpfuncs = (FmgrInfo *)
2318 palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
2319
2321 context->planstate = planstate;
2322 context->exprcontext = econtext;
2323
2324 /* Initialize expression state for each expression we need */
2325 context->exprstates = (ExprState **)
2326 palloc0(sizeof(ExprState *) * n_steps * partnatts);
2327 foreach(lc, pruning_steps)
2328 {
2330 ListCell *lc2 = list_head(step->exprs);
2331 int keyno;
2332
2333 /* not needed for other step kinds */
2334 if (!IsA(step, PartitionPruneStepOp))
2335 continue;
2336
2337 Assert(list_length(step->exprs) <= partnatts);
2338
2339 for (keyno = 0; keyno < partnatts; keyno++)
2340 {
2341 if (bms_is_member(keyno, step->nullkeys))
2342 continue;
2343
2344 if (lc2 != NULL)
2345 {
2346 Expr *expr = lfirst(lc2);
2347
2348 /* not needed for Consts */
2349 if (!IsA(expr, Const))
2350 {
2351 int stateidx = PruneCxtStateIdx(partnatts,
2352 step->step.step_id,
2353 keyno);
2354
2355 /*
2356 * When planstate is NULL, pruning_steps is known not to
2357 * contain any expressions that depend on the parent plan.
2358 * Information of any available EXTERN parameters must be
2359 * passed explicitly in that case, which the caller must
2360 * have made available via econtext.
2361 */
2362 if (planstate == NULL)
2363 context->exprstates[stateidx] =
2365 econtext->ecxt_param_list_info);
2366 else
2367 context->exprstates[stateidx] =
2368 ExecInitExpr(expr, context->planstate);
2369 }
2370 lc2 = lnext(step->exprs, lc2);
2371 }
2372 }
2373 }
2374}
2375
2376/*
2377 * InitExecPartitionPruneContexts
2378 * Initialize exec pruning contexts deferred by CreatePartitionPruneState()
2379 *
2380 * This function finalizes exec pruning setup for a PartitionPruneState by
2381 * initializing contexts for pruning steps that require the parent plan's
2382 * PlanState. It iterates over PartitionPruningData entries and sets up the
2383 * necessary execution contexts for pruning during query execution.
2384 *
2385 * Also fix the mapping of partition indexes to subplan indexes contained in
2386 * prunestate by considering the new list of subplans that survived initial
2387 * pruning.
2388 *
2389 * Current values of the indexes present in PartitionPruneState count all the
2390 * subplans that would be present before initial pruning was done. If initial
2391 * pruning got rid of some of the subplans, any subsequent pruning passes will
2392 * be looking at a different set of target subplans to choose from than those
2393 * in the pre-initial-pruning set, so the maps in PartitionPruneState
2394 * containing those indexes must be updated to reflect the new indexes of
2395 * subplans in the post-initial-pruning set.
2396 */
2397static void
2399 PlanState *parent_plan,
2400 Bitmapset *initially_valid_subplans,
2401 int n_total_subplans)
2402{
2403 EState *estate;
2404 int *new_subplan_indexes = NULL;
2405 Bitmapset *new_other_subplans;
2406 int i;
2407 int newidx;
2408 bool fix_subplan_map = false;
2409
2410 Assert(prunestate->do_exec_prune);
2411 Assert(parent_plan != NULL);
2412 estate = parent_plan->state;
2413
2414 /*
2415 * No need to fix subplans maps if initial pruning didn't eliminate any
2416 * subplans.
2417 */
2418 if (bms_num_members(initially_valid_subplans) < n_total_subplans)
2419 {
2420 fix_subplan_map = true;
2421
2422 /*
2423 * First we must build a temporary array which maps old subplan
2424 * indexes to new ones. For convenience of initialization, we use
2425 * 1-based indexes in this array and leave pruned items as 0.
2426 */
2427 new_subplan_indexes = (int *) palloc0(sizeof(int) * n_total_subplans);
2428 newidx = 1;
2429 i = -1;
2430 while ((i = bms_next_member(initially_valid_subplans, i)) >= 0)
2431 {
2432 Assert(i < n_total_subplans);
2433 new_subplan_indexes[i] = newidx++;
2434 }
2435 }
2436
2437 /*
2438 * Now we can update each PartitionedRelPruneInfo's subplan_map with new
2439 * subplan indexes. We must also recompute its present_parts bitmap.
2440 */
2441 for (i = 0; i < prunestate->num_partprunedata; i++)
2442 {
2443 PartitionPruningData *prunedata = prunestate->partprunedata[i];
2444 int j;
2445
2446 /*
2447 * Within each hierarchy, we perform this loop in back-to-front order
2448 * so that we determine present_parts for the lowest-level partitioned
2449 * tables first. This way we can tell whether a sub-partitioned
2450 * table's partitions were entirely pruned so we can exclude it from
2451 * the current level's present_parts.
2452 */
2453 for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
2454 {
2455 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2456 int nparts = pprune->nparts;
2457 int k;
2458
2459 /* Initialize PartitionPruneContext for exec pruning, if needed. */
2460 if (pprune->exec_pruning_steps != NIL)
2461 {
2462 PartitionKey partkey;
2463 PartitionDesc partdesc;
2464
2465 /*
2466 * See the comment in CreatePartitionPruneState() regarding
2467 * the usage of partdesc and partkey.
2468 */
2469 partkey = RelationGetPartitionKey(pprune->partrel);
2471 pprune->partrel);
2472
2474 pprune->exec_pruning_steps,
2475 partdesc, partkey, parent_plan,
2476 prunestate->econtext);
2477 }
2478
2479 if (!fix_subplan_map)
2480 continue;
2481
2482 /* We just rebuild present_parts from scratch */
2483 bms_free(pprune->present_parts);
2484 pprune->present_parts = NULL;
2485
2486 for (k = 0; k < nparts; k++)
2487 {
2488 int oldidx = pprune->subplan_map[k];
2489 int subidx;
2490
2491 /*
2492 * If this partition existed as a subplan then change the old
2493 * subplan index to the new subplan index. The new index may
2494 * become -1 if the partition was pruned above, or it may just
2495 * come earlier in the subplan list due to some subplans being
2496 * removed earlier in the list. If it's a subpartition, add
2497 * it to present_parts unless it's entirely pruned.
2498 */
2499 if (oldidx >= 0)
2500 {
2501 Assert(oldidx < n_total_subplans);
2502 pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
2503
2504 if (new_subplan_indexes[oldidx] > 0)
2505 pprune->present_parts =
2506 bms_add_member(pprune->present_parts, k);
2507 }
2508 else if ((subidx = pprune->subpart_map[k]) >= 0)
2509 {
2510 PartitionedRelPruningData *subprune;
2511
2512 subprune = &prunedata->partrelprunedata[subidx];
2513
2514 if (!bms_is_empty(subprune->present_parts))
2515 pprune->present_parts =
2516 bms_add_member(pprune->present_parts, k);
2517 }
2518 }
2519 }
2520 }
2521
2522 /*
2523 * If we fixed subplan maps, we must also recompute the other_subplans
2524 * set, since indexes in it may change.
2525 */
2526 if (fix_subplan_map)
2527 {
2528 new_other_subplans = NULL;
2529 i = -1;
2530 while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
2531 new_other_subplans = bms_add_member(new_other_subplans,
2532 new_subplan_indexes[i] - 1);
2533
2534 bms_free(prunestate->other_subplans);
2535 prunestate->other_subplans = new_other_subplans;
2536
2537 pfree(new_subplan_indexes);
2538 }
2539}
2540
2541/*
2542 * ExecFindMatchingSubPlans
2543 * Determine which subplans match the pruning steps detailed in
2544 * 'prunestate' for the current comparison expression values.
2545 *
2546 * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This
2547 * differentiates the initial executor-time pruning step from later
2548 * runtime pruning.
2549 *
2550 * The caller must pass a non-NULL validsubplan_rtis during initial pruning
2551 * to collect the RT indexes of leaf partitions whose subnodes will be
2552 * executed. These RT indexes are later added to EState.es_unpruned_relids.
2553 */
2554Bitmapset *
2556 bool initial_prune,
2557 Bitmapset **validsubplan_rtis)
2558{
2559 Bitmapset *result = NULL;
2560 MemoryContext oldcontext;
2561 int i;
2562
2563 /*
2564 * Either we're here on the initial prune done during pruning
2565 * initialization, or we're at a point where PARAM_EXEC Params can be
2566 * evaluated *and* there are steps in which to do so.
2567 */
2568 Assert(initial_prune || prunestate->do_exec_prune);
2569 Assert(validsubplan_rtis != NULL || !initial_prune);
2570
2571 /*
2572 * Switch to a temp context to avoid leaking memory in the executor's
2573 * query-lifespan memory context.
2574 */
2575 oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2576
2577 /*
2578 * For each hierarchy, do the pruning tests, and add nondeletable
2579 * subplans' indexes to "result".
2580 */
2581 for (i = 0; i < prunestate->num_partprunedata; i++)
2582 {
2583 PartitionPruningData *prunedata = prunestate->partprunedata[i];
2585
2586 /*
2587 * We pass the zeroth item, belonging to the root table of the
2588 * hierarchy, and find_matching_subplans_recurse() takes care of
2589 * recursing to other (lower-level) parents as needed.
2590 */
2591 pprune = &prunedata->partrelprunedata[0];
2592 find_matching_subplans_recurse(prunedata, pprune, initial_prune,
2593 &result, validsubplan_rtis);
2594
2595 /*
2596 * Expression eval may have used space in ExprContext too. Avoid
2597 * accessing exec_context during initial pruning, as it is not valid
2598 * at that stage.
2599 */
2600 if (!initial_prune && pprune->exec_pruning_steps)
2602 }
2603
2604 /* Add in any subplans that partition pruning didn't account for */
2605 result = bms_add_members(result, prunestate->other_subplans);
2606
2607 MemoryContextSwitchTo(oldcontext);
2608
2609 /* Copy result out of the temp context before we reset it */
2610 result = bms_copy(result);
2611 if (validsubplan_rtis)
2612 *validsubplan_rtis = bms_copy(*validsubplan_rtis);
2613
2614 MemoryContextReset(prunestate->prune_context);
2615
2616 return result;
2617}
2618
2619/*
2620 * find_matching_subplans_recurse
2621 * Recursive worker function for ExecFindMatchingSubPlans
2622 *
2623 * Adds valid (non-prunable) subplan IDs to *validsubplans. If
2624 * *validsubplan_rtis is non-NULL, it also adds the RT indexes of their
2625 * corresponding partitions, but only if they are leaf partitions.
2626 */
2627static void
2630 bool initial_prune,
2631 Bitmapset **validsubplans,
2632 Bitmapset **validsubplan_rtis)
2633{
2634 Bitmapset *partset;
2635 int i;
2636
2637 /* Guard against stack overflow due to overly deep partition hierarchy. */
2639
2640 /*
2641 * Prune as appropriate, if we have pruning steps matching the current
2642 * execution context. Otherwise just include all partitions at this
2643 * level.
2644 */
2645 if (initial_prune && pprune->initial_pruning_steps)
2646 partset = get_matching_partitions(&pprune->initial_context,
2647 pprune->initial_pruning_steps);
2648 else if (!initial_prune && pprune->exec_pruning_steps)
2649 partset = get_matching_partitions(&pprune->exec_context,
2650 pprune->exec_pruning_steps);
2651 else
2652 partset = pprune->present_parts;
2653
2654 /* Translate partset into subplan indexes */
2655 i = -1;
2656 while ((i = bms_next_member(partset, i)) >= 0)
2657 {
2658 if (pprune->subplan_map[i] >= 0)
2659 {
2660 *validsubplans = bms_add_member(*validsubplans,
2661 pprune->subplan_map[i]);
2662
2663 /*
2664 * Only report leaf partitions. Non-leaf partitions may appear
2665 * here when they use an unflattened Append or MergeAppend.
2666 */
2667 if (validsubplan_rtis && pprune->leafpart_rti_map[i])
2668 *validsubplan_rtis = bms_add_member(*validsubplan_rtis,
2669 pprune->leafpart_rti_map[i]);
2670 }
2671 else
2672 {
2673 int partidx = pprune->subpart_map[i];
2674
2675 if (partidx >= 0)
2677 &prunedata->partrelprunedata[partidx],
2678 initial_prune, validsubplans,
2679 validsubplan_rtis);
2680 else
2681 {
2682 /*
2683 * We get here if the planner already pruned all the sub-
2684 * partitions for this partition. Silently ignore this
2685 * partition in this case. The end result is the same: we
2686 * would have pruned all partitions just the same, but we
2687 * don't have any pruning steps to execute to verify this.
2688 */
2689 }
2690 }
2691 }
2692}
AclResult
Definition: acl.h:182
@ ACLCHECK_OK
Definition: acl.h:183
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
Definition: aclchk.c:3853
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4024
AttrMap * build_attrmap_by_name(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
Definition: attmap.c:175
AttrMap * build_attrmap_by_name_if_req(TupleDesc indesc, TupleDesc outdesc, bool missing_ok)
Definition: attmap.c:261
int16 AttrNumber
Definition: attnum.h:21
#define InvalidAttrNumber
Definition: attnum.h:23
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:142
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1306
Bitmapset * bms_add_range(Bitmapset *a, int lower, int upper)
Definition: bitmapset.c:1019
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:751
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:815
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:917
Bitmapset * bms_copy(const Bitmapset *a)
Definition: bitmapset.c:122
#define bms_is_empty(a)
Definition: bitmapset.h:118
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define likely(x)
Definition: c.h:346
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:434
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
#define unlikely(x)
Definition: c.h:347
unsigned int Index
Definition: c.h:585
#define OidIsValid(objectId)
Definition: c.h:746
int errdetail(const char *fmt,...)
Definition: elog.c:1204
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:149
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:143
ProjectionInfo * ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent, TupleDesc inputDesc)
Definition: execExpr.c:370
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:229
ExprState * ExecInitExprWithParams(Expr *node, ParamListInfo ext_params)
Definition: execExpr.c:180
ProjectionInfo * ExecBuildUpdateProjection(List *targetList, bool evalTargetList, List *targetColnos, TupleDesc relDesc, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent)
Definition: execExpr.c:547
List * ExecPrepareExprList(List *nodes, EState *estate)
Definition: execExpr.c:839
void ExecCloseIndices(ResultRelInfo *resultRelInfo)
Definition: execIndexing.c:238
void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
Definition: execIndexing.c:160
void CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation, List *mergeActions)
Definition: execMain.c:1152
bool ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, bool emitError)
Definition: execMain.c:1932
void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, ResultRelInfo *partition_root_rri, int instrument_options)
Definition: execMain.c:1329
static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate, PartitionTupleRouting *proute, Oid partoid, PartitionDispatch parent_pd, int partidx, ResultRelInfo *rootResultRelInfo)
void ExecDoInitialPruning(EState *estate)
static ResultRelInfo * ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *rootResultRelInfo, int partidx)
PartitionPruneState * ExecInitPartitionExecPruning(PlanState *planstate, int n_total_subplans, int part_prune_index, Bitmapset *relids, Bitmapset **initially_valid_subplans)
static void InitExecPartitionPruneContexts(PartitionPruneState *prunstate, PlanState *parent_plan, Bitmapset *initially_valid_subplans, int n_total_subplans)
Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, bool initial_prune, Bitmapset **validsubplan_rtis)
static void ExecInitRoutingInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *partRelInfo, int partidx, bool is_borrowed_rel)
static char * ExecBuildSlotPartitionKeyDescription(Relation rel, Datum *values, bool *isnull, int maxfieldlen)
static void FormPartitionKeyDatum(PartitionDispatch pd, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
#define PARTITION_CACHED_FIND_THRESHOLD
PartitionTupleRouting * ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
static List * adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
static List * adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
ResultRelInfo * ExecFindPartition(ModifyTableState *mtstate, ResultRelInfo *rootResultRelInfo, PartitionTupleRouting *proute, TupleTableSlot *slot, EState *estate)
static void InitPartitionPruneContext(PartitionPruneContext *context, List *pruning_steps, PartitionDesc partdesc, PartitionKey partkey, PlanState *planstate, ExprContext *econtext)
struct PartitionDispatchData PartitionDispatchData
static int get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans, Bitmapset **validsubplan_rtis)
static PartitionPruneState * CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo, Bitmapset **all_leafpart_rtis)
void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute)
struct PartitionDispatchData * PartitionDispatch
Definition: execPartition.h:22
struct PartitionedRelPruningData PartitionedRelPruningData
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1427
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:84
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1443
Relation ExecGetRangeTableRelation(EState *estate, Index rti, bool isResultRel)
Definition: execUtils.c:826
TupleConversionMap * ExecGetRootToChildMap(ResultRelInfo *resultRelInfo, EState *estate)
Definition: execUtils.c:1327
ExprContext * CreateExprContext(EState *estate)
Definition: execUtils.c:308
TupleConversionMap * ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
Definition: execUtils.c:1301
#define GetPerTupleExprContext(estate)
Definition: executor.h:678
static bool ExecShouldLockRelations(EState *estate)
Definition: executor.h:322
#define EXEC_FLAG_EXPLAIN_GENERIC
Definition: executor.h:67
static RangeTblEntry * exec_rt_fetch(Index rti, EState *estate)
Definition: executor.h:719
#define ResetExprContext(econtext)
Definition: executor.h:672
#define GetPerTupleMemoryContext(estate)
Definition: executor.h:683
static bool ExecPlanStillValid(EState *estate)
Definition: executor.h:311
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:458
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:1763
Assert(PointerIsAligned(start, uint64))
#define stmt
Definition: indent_codes.h:59
long val
Definition: informix.c:689
int j
Definition: isn.c:78
int i
Definition: isn.c:77
List * lappend(List *list, void *datum)
Definition: list.c:339
List * lappend_int(List *list, int datum)
Definition: list.c:357
List * lappend_oid(List *list, Oid datum)
Definition: list.c:375
void list_free(List *list)
Definition: list.c:1546
bool list_member_oid(const List *list, Oid datum)
Definition: list.c:722
void UnlockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:229
void LockRelationOid(Oid relid, LOCKMODE lockmode)
Definition: lmgr.c:107
#define NoLock
Definition: lockdefs.h:34
#define RowExclusiveLock
Definition: lockdefs.h:38
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:3047
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1083
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:414
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:2167
void pfree(void *pointer)
Definition: mcxt.c:2147
void * palloc0(Size size)
Definition: mcxt.c:1970
void * palloc(Size size)
Definition: mcxt.c:1940
MemoryContext CurrentMemoryContext
Definition: mcxt.c:159
#define AllocSetContextCreate
Definition: memutils.h:149
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:180
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
Oid GetUserId(void)
Definition: miscinit.c:520
ResultRelInfo * ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid, bool missing_ok, bool update_cache)
void ExecInitMergeTupleSlots(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo)
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
#define copyObject(obj)
Definition: nodes.h:230
@ ONCONFLICT_NONE
Definition: nodes.h:424
@ ONCONFLICT_UPDATE
Definition: nodes.h:426
@ CMD_MERGE
Definition: nodes.h:275
@ CMD_INSERT
Definition: nodes.h:273
@ CMD_DELETE
Definition: nodes.h:274
@ CMD_UPDATE
Definition: nodes.h:272
@ CMD_NOTHING
Definition: nodes.h:278
#define makeNode(_type_)
Definition: nodes.h:161
#define castNode(_type_, nodeptr)
Definition: nodes.h:182
char * bmsToString(const Bitmapset *bms)
Definition: outfuncs.c:814
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
@ PARTITION_STRATEGY_HASH
Definition: parsenodes.h:885
@ PARTITION_STRATEGY_LIST
Definition: parsenodes.h:883
@ PARTITION_STRATEGY_RANGE
Definition: parsenodes.h:884
@ RTE_RELATION
Definition: parsenodes.h:1026
PartitionRangeDatumKind
Definition: parsenodes.h:934
#define ACL_SELECT
Definition: parsenodes.h:77
int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, Datum *rb_datums, PartitionRangeDatumKind *rb_kind, Datum *tuple_datums, int n_tuple_datums)
Definition: partbounds.c:3556
uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, const Oid *partcollation, const Datum *values, const bool *isnull)
Definition: partbounds.c:4722
int partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, int nvalues, Datum *values, bool *is_equal)
Definition: partbounds.c:3695
int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, Datum value, bool *is_equal)
Definition: partbounds.c:3607
#define partition_bound_accepts_nulls(bi)
Definition: partbounds.h:98
PartitionKey RelationGetPartitionKey(Relation rel)
Definition: partcache.c:51
static int16 get_partition_col_attnum(PartitionKey key, int col)
Definition: partcache.h:80
static int get_partition_natts(PartitionKey key)
Definition: partcache.h:65
static Oid get_partition_col_typid(PartitionKey key, int col)
Definition: partcache.h:86
PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
Definition: partdesc.c:423
PartitionDesc PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
Definition: partdesc.c:456
List * get_partition_ancestors(Oid relid)
Definition: partition.c:134
Bitmapset * get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
Definition: partprune.c:846
#define PruneCxtStateIdx(partnatts, step_id, keyno)
Definition: partprune.h:70
int16 attnum
Definition: pg_attribute.h:74
#define PARTITION_MAX_KEYS
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define lfirst_int(lc)
Definition: pg_list.h:173
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
#define linitial(l)
Definition: pg_list.h:178
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define list_nth_node(type, list, n)
Definition: pg_list.h:327
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define lfirst_oid(lc)
Definition: pg_list.h:174
static char * buf
Definition: pg_test_fsync.c:72
uintptr_t Datum
Definition: postgres.h:69
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:207
#define InvalidOid
Definition: postgres_ext.h:35
unsigned int Oid
Definition: postgres_ext.h:30
#define INNER_VAR
Definition: primnodes.h:242
#define RelationGetForm(relation)
Definition: rel.h:510
#define RelationGetRelid(relation)
Definition: rel.h:516
#define RelationGetDescr(relation)
Definition: rel.h:542
#define RelationGetRelationName(relation)
Definition: rel.h:550
List * RelationGetIndexList(Relation relation)
Definition: relcache.c:4819
int errtable(Relation rel)
Definition: relcache.c:6032
Node * map_variable_attnos(Node *node, int target_varno, int sublevels_up, const AttrMap *attno_map, Oid to_rowtype, bool *found_whole_row)
int check_enable_rls(Oid relid, Oid checkAsUser, bool noError)
Definition: rls.c:52
@ RLS_ENABLED
Definition: rls.h:45
char * pg_get_partkeydef_columns(Oid relid, bool pretty)
Definition: ruleutils.c:1924
void check_stack_depth(void)
Definition: stack_depth.c:95
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
Definition: attmap.h:35
int maplen
Definition: attmap.h:37
AttrNumber * attnums
Definition: attmap.h:36
List * es_part_prune_infos
Definition: execnodes.h:668
List * es_tuple_routing_result_relations
Definition: execnodes.h:696
int es_top_eflags
Definition: execnodes.h:717
int es_instrument
Definition: execnodes.h:718
PlannedStmt * es_plannedstmt
Definition: execnodes.h:666
Bitmapset * es_unpruned_relids
Definition: execnodes.h:671
List * es_part_prune_states
Definition: execnodes.h:669
MemoryContext es_query_cxt
Definition: execnodes.h:708
List * es_tupleTable
Definition: execnodes.h:710
PartitionDirectory es_partition_directory
Definition: execnodes.h:690
List * es_part_prune_results
Definition: execnodes.h:670
ParamListInfo ecxt_param_list_info
Definition: execnodes.h:280
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:268
struct EState * ecxt_estate
Definition: execnodes.h:310
EndForeignInsert_function EndForeignInsert
Definition: fdwapi.h:239
BeginForeignInsert_function BeginForeignInsert
Definition: fdwapi.h:238
ExecForeignBatchInsert_function ExecForeignBatchInsert
Definition: fdwapi.h:233
GetForeignModifyBatchSize_function GetForeignModifyBatchSize
Definition: fdwapi.h:234
Definition: fmgr.h:57
Definition: pg_list.h:54
MergeAction * mas_action
Definition: execnodes.h:444
ProjectionInfo * mas_proj
Definition: execnodes.h:445
ExprState * mas_whenqual
Definition: execnodes.h:447
ResultRelInfo * resultRelInfo
Definition: execnodes.h:1402
PlanState ps
Definition: execnodes.h:1397
ResultRelInfo * rootResultRelInfo
Definition: execnodes.h:1410
List * onConflictCols
Definition: plannodes.h:328
List * mergeJoinConditions
Definition: plannodes.h:338
CmdType operation
Definition: plannodes.h:292
List * resultRelations
Definition: plannodes.h:302
List * onConflictSet
Definition: plannodes.h:326
List * mergeActionLists
Definition: plannodes.h:336
List * returningLists
Definition: plannodes.h:312
List * withCheckOptionLists
Definition: plannodes.h:306
Node * onConflictWhere
Definition: plannodes.h:330
OnConflictAction onConflictAction
Definition: plannodes.h:322
Definition: nodes.h:135
TupleTableSlot * oc_ProjSlot
Definition: execnodes.h:429
TupleTableSlot * oc_Existing
Definition: execnodes.h:428
ExprState * oc_WhereClause
Definition: execnodes.h:431
ProjectionInfo * oc_ProjInfo
Definition: execnodes.h:430
PartitionRangeDatumKind ** kind
Definition: partbounds.h:84
int last_found_datum_index
Definition: partdesc.h:46
PartitionBoundInfo boundinfo
Definition: partdesc.h:38
int last_found_count
Definition: partdesc.h:63
bool * is_leaf
Definition: partdesc.h:35
int last_found_part_index
Definition: partdesc.h:52
TupleTableSlot * tupslot
PartitionDesc partdesc
int indexes[FLEXIBLE_ARRAY_MEMBER]
Oid * partcollation
Definition: partcache.h:39
PartitionStrategy strategy
Definition: partcache.h:27
List * partexprs
Definition: partcache.h:31
FmgrInfo * partsupfunc
Definition: partcache.h:36
AttrNumber * partattrs
Definition: partcache.h:29
FmgrInfo * partsupfunc
Definition: partprune.h:56
ExprContext * exprcontext
Definition: partprune.h:60
MemoryContext ppccontext
Definition: partprune.h:58
PartitionBoundInfo boundinfo
Definition: partprune.h:54
PlanState * planstate
Definition: partprune.h:59
FmgrInfo * stepcmpfuncs
Definition: partprune.h:57
ExprState ** exprstates
Definition: partprune.h:61
Bitmapset * other_subplans
Definition: plannodes.h:1599
Bitmapset * relids
Definition: plannodes.h:1597
PartitionPruningData * partprunedata[FLEXIBLE_ARRAY_MEMBER]
Bitmapset * execparamids
ExprContext * econtext
Bitmapset * other_subplans
MemoryContext prune_context
PartitionPruneStep step
Definition: plannodes.h:1708
Bitmapset * nullkeys
Definition: plannodes.h:1713
PartitionedRelPruningData partrelprunedata[FLEXIBLE_ARRAY_MEMBER]
Definition: execPartition.h:87
PartitionDispatch * partition_dispatch_info
Definition: execPartition.c:95
ResultRelInfo ** partitions
Definition: execPartition.c:99
MemoryContext memcxt
ResultRelInfo ** nonleaf_partitions
Definition: execPartition.c:96
Bitmapset * present_parts
Definition: plannodes.h:1633
Bitmapset * execparamids
Definition: plannodes.h:1662
PartitionPruneContext exec_context
Definition: execPartition.h:74
PartitionPruneContext initial_context
Definition: execPartition.h:73
Plan * plan
Definition: execnodes.h:1159
EState * state
Definition: execnodes.h:1161
ExprContext * ps_ExprContext
Definition: execnodes.h:1198
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1197
RTEKind rtekind
Definition: parsenodes.h:1061
Form_pg_class rd_rel
Definition: rel.h:111
TupleTableSlot * ri_PartitionTupleSlot
Definition: execnodes.h:616
OnConflictSetState * ri_onConflict
Definition: execnodes.h:578
List * ri_onConflictArbiterIndexes
Definition: execnodes.h:575
Relation ri_RelationDesc
Definition: execnodes.h:475
struct CopyMultiInsertBuffer * ri_CopyMultiInsertBuffer
Definition: execnodes.h:619
Index ri_RangeTableIndex
Definition: execnodes.h:472
struct FdwRoutine * ri_FdwRoutine
Definition: execnodes.h:528
int ri_BatchSize
Definition: execnodes.h:539
AttrMap * attrMap
Definition: tupconvert.h:28
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:92
TupleTableSlot * execute_attr_map_slot(AttrMap *attrMap, TupleTableSlot *in_slot, TupleTableSlot *out_slot)
Definition: tupconvert.c:192
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition: tuptable.h:399
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:458
#define IsolationUsesXactSnapshot()
Definition: xact.h:51