PostgreSQL Source Code  git master
execPartition.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * execPartition.c
4  * Support routines for partitioning.
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/executor/execPartition.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/table.h"
17 #include "access/tableam.h"
18 #include "catalog/partition.h"
19 #include "catalog/pg_inherits.h"
20 #include "catalog/pg_type.h"
21 #include "executor/execPartition.h"
22 #include "executor/executor.h"
23 #include "foreign/fdwapi.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "nodes/makefuncs.h"
28 #include "partitioning/partdesc.h"
29 #include "partitioning/partprune.h"
30 #include "rewrite/rewriteManip.h"
31 #include "utils/acl.h"
32 #include "utils/lsyscache.h"
33 #include "utils/partcache.h"
34 #include "utils/rls.h"
35 #include "utils/ruleutils.h"
36 
37 
38 /*-----------------------
39  * PartitionTupleRouting - Encapsulates all information required to
40  * route a tuple inserted into a partitioned table to one of its leaf
41  * partitions.
42  *
43  * partition_root
44  * The partitioned table that's the target of the command.
45  *
46  * partition_dispatch_info
47  * Array of 'max_dispatch' elements containing a pointer to a
48  * PartitionDispatch object for every partitioned table touched by tuple
49  * routing. The entry for the target partitioned table is *always*
50  * present in the 0th element of this array. See comment for
51  * PartitionDispatchData->indexes for details on how this array is
52  * indexed.
53  *
54  * nonleaf_partitions
55  * Array of 'max_dispatch' elements containing pointers to fake
56  * ResultRelInfo objects for nonleaf partitions, useful for checking
57  * the partition constraint.
58  *
59  * num_dispatch
60  * The current number of items stored in the 'partition_dispatch_info'
61  * array. Also serves as the index of the next free array element for
62  * new PartitionDispatch objects that need to be stored.
63  *
64  * max_dispatch
65  * The current allocated size of the 'partition_dispatch_info' array.
66  *
67  * partitions
68  * Array of 'max_partitions' elements containing a pointer to a
69  * ResultRelInfo for every leaf partition touched by tuple routing.
70  * Some of these are pointers to ResultRelInfos which are borrowed out of
71  * the owning ModifyTableState node. The remainder have been built
72  * especially for tuple routing. See comment for
73  * PartitionDispatchData->indexes for details on how this array is
74  * indexed.
75  *
76  * is_borrowed_rel
77  * Array of 'max_partitions' booleans recording whether a given entry
78  * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
79  * ModifyTableState node, rather than being built here.
80  *
81  * num_partitions
82  * The current number of items stored in the 'partitions' array. Also
83  * serves as the index of the next free array element for new
84  * ResultRelInfo objects that need to be stored.
85  *
86  * max_partitions
87  * The current allocated size of the 'partitions' array.
88  *
89  * memcxt
90  * Memory context used to allocate subsidiary structs.
91  *-----------------------
92  */
94 {
105 };
106 
107 /*-----------------------
108  * PartitionDispatch - information about one partitioned table in a partition
109  * hierarchy required to route a tuple to any of its partitions. A
110  * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
111  * struct and stored inside its 'partition_dispatch_info' array.
112  *
113  * reldesc
114  * Relation descriptor of the table
115  *
116  * key
117  * Partition key information of the table
118  *
119  * keystate
120  * Execution state required for expressions in the partition key
121  *
122  * partdesc
123  * Partition descriptor of the table
124  *
125  * tupslot
126  * A standalone TupleTableSlot initialized with this table's tuple
127  * descriptor, or NULL if no tuple conversion between the parent is
128  * required.
129  *
130  * tupmap
131  * TupleConversionMap to convert from the parent's rowtype to this table's
132  * rowtype (when extracting the partition key of a tuple just before
133  * routing it through this table). A NULL value is stored if no tuple
134  * conversion is required.
135  *
136  * indexes
137  * Array of partdesc->nparts elements. For leaf partitions the index
138  * corresponds to the partition's ResultRelInfo in the encapsulating
139  * PartitionTupleRouting's partitions array. For partitioned partitions,
140  * the index corresponds to the PartitionDispatch for it in its
141  * partition_dispatch_info array. -1 indicates we've not yet allocated
142  * anything in PartitionTupleRouting for the partition.
143  *-----------------------
144  */
145 typedef struct PartitionDispatchData
146 {
149  List *keystate; /* list of ExprState */
153  int indexes[FLEXIBLE_ARRAY_MEMBER];
155 
156 
158  EState *estate, PartitionTupleRouting *proute,
159  PartitionDispatch dispatch,
160  ResultRelInfo *rootResultRelInfo,
161  int partidx);
162 static void ExecInitRoutingInfo(ModifyTableState *mtstate,
163  EState *estate,
164  PartitionTupleRouting *proute,
165  PartitionDispatch dispatch,
166  ResultRelInfo *partRelInfo,
167  int partidx,
168  bool is_borrowed_rel);
170  PartitionTupleRouting *proute,
171  Oid partoid, PartitionDispatch parent_pd,
172  int partidx, ResultRelInfo *rootResultRelInfo);
174  TupleTableSlot *slot,
175  EState *estate,
176  Datum *values,
177  bool *isnull);
179  bool *isnull);
181  Datum *values,
182  bool *isnull,
183  int maxfieldlen);
184 static List *adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri);
185 static void ExecInitPruningContext(PartitionPruneContext *context,
186  List *pruning_steps,
187  PartitionDesc partdesc,
188  PartitionKey partkey,
189  PlanState *planstate);
192  bool initial_prune,
193  Bitmapset **validsubplans);
194 
195 
196 /*
197  * ExecSetupPartitionTupleRouting - sets up information needed during
198  * tuple routing for partitioned tables, encapsulates it in
199  * PartitionTupleRouting, and returns it.
200  *
201  * Callers must use the returned PartitionTupleRouting during calls to
202  * ExecFindPartition(). The actual ResultRelInfo for a partition is only
203  * allocated when the partition is found for the first time.
204  *
205  * The current memory context is used to allocate this struct and all
206  * subsidiary structs that will be allocated from it later on. Typically
207  * it should be estate->es_query_cxt.
208  */
211 {
212  PartitionTupleRouting *proute;
213 
214  /*
215  * Here we attempt to expend as little effort as possible in setting up
216  * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
217  * demand, only when we actually need to route a tuple to that partition.
218  * The reason for this is that a common case is for INSERT to insert a
219  * single tuple into a partitioned table and this must be fast.
220  */
222  proute->partition_root = rel;
223  proute->memcxt = CurrentMemoryContext;
224  /* Rest of members initialized by zeroing */
225 
226  /*
227  * Initialize this table's PartitionDispatch object. Here we pass in the
228  * parent as NULL as we don't need to care about any parent of the target
229  * partitioned table.
230  */
231  ExecInitPartitionDispatchInfo(estate, proute, RelationGetRelid(rel),
232  NULL, 0, NULL);
233 
234  return proute;
235 }
236 
237 /*
238  * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
239  * the tuple contained in *slot should belong to.
240  *
241  * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
242  * one up or reuse one from mtstate's resultRelInfo array. When reusing a
243  * ResultRelInfo from the mtstate we verify that the relation is a valid
244  * target for INSERTs and initialize tuple routing information.
245  *
246  * rootResultRelInfo is the relation named in the query.
247  *
248  * estate must be non-NULL; we'll need it to compute any expressions in the
249  * partition keys. Also, its per-tuple contexts are used as evaluation
250  * scratch space.
251  *
252  * If no leaf partition is found, this routine errors out with the appropriate
253  * error message. An error may also be raised if the found target partition
254  * is not a valid target for an INSERT.
255  */
258  ResultRelInfo *rootResultRelInfo,
259  PartitionTupleRouting *proute,
260  TupleTableSlot *slot, EState *estate)
261 {
264  bool isnull[PARTITION_MAX_KEYS];
265  Relation rel;
266  PartitionDispatch dispatch;
267  PartitionDesc partdesc;
268  ExprContext *ecxt = GetPerTupleExprContext(estate);
269  TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
270  TupleTableSlot *rootslot = slot;
271  TupleTableSlot *myslot = NULL;
272  MemoryContext oldcxt;
273  ResultRelInfo *rri = NULL;
274 
275  /* use per-tuple context here to avoid leaking memory */
277 
278  /*
279  * First check the root table's partition constraint, if any. No point in
280  * routing the tuple if it doesn't belong in the root table itself.
281  */
282  if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition)
283  ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
284 
285  /* start with the root partitioned table */
286  dispatch = pd[0];
287  while (dispatch != NULL)
288  {
289  int partidx = -1;
290  bool is_leaf;
291 
293 
294  rel = dispatch->reldesc;
295  partdesc = dispatch->partdesc;
296 
297  /*
298  * Extract partition key from tuple. Expression evaluation machinery
299  * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
300  * point to the correct tuple slot. The slot might have changed from
301  * what was used for the parent table if the table of the current
302  * partitioning level has different tuple descriptor from the parent.
303  * So update ecxt_scantuple accordingly.
304  */
305  ecxt->ecxt_scantuple = slot;
306  FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
307 
308  /*
309  * If this partitioned table has no partitions or no partition for
310  * these values, error out.
311  */
312  if (partdesc->nparts == 0 ||
313  (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
314  {
315  char *val_desc;
316 
318  values, isnull, 64);
320  ereport(ERROR,
321  (errcode(ERRCODE_CHECK_VIOLATION),
322  errmsg("no partition of relation \"%s\" found for row",
324  val_desc ?
325  errdetail("Partition key of the failing row contains %s.",
326  val_desc) : 0,
327  errtable(rel)));
328  }
329 
330  is_leaf = partdesc->is_leaf[partidx];
331  if (is_leaf)
332  {
333  /*
334  * We've reached the leaf -- hurray, we're done. Look to see if
335  * we've already got a ResultRelInfo for this partition.
336  */
337  if (likely(dispatch->indexes[partidx] >= 0))
338  {
339  /* ResultRelInfo already built */
340  Assert(dispatch->indexes[partidx] < proute->num_partitions);
341  rri = proute->partitions[dispatch->indexes[partidx]];
342  }
343  else
344  {
345  /*
346  * If the partition is known in the owning ModifyTableState
347  * node, we can re-use that ResultRelInfo instead of creating
348  * a new one with ExecInitPartitionInfo().
349  */
350  rri = ExecLookupResultRelByOid(mtstate,
351  partdesc->oids[partidx],
352  true, false);
353  if (rri)
354  {
355  /* Verify this ResultRelInfo allows INSERTs */
357 
358  /*
359  * Initialize information needed to insert this and
360  * subsequent tuples routed to this partition.
361  */
362  ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
363  rri, partidx, true);
364  }
365  else
366  {
367  /* We need to create a new one. */
368  rri = ExecInitPartitionInfo(mtstate, estate, proute,
369  dispatch,
370  rootResultRelInfo, partidx);
371  }
372  }
373  Assert(rri != NULL);
374 
375  /* Signal to terminate the loop */
376  dispatch = NULL;
377  }
378  else
379  {
380  /*
381  * Partition is a sub-partitioned table; get the PartitionDispatch
382  */
383  if (likely(dispatch->indexes[partidx] >= 0))
384  {
385  /* Already built. */
386  Assert(dispatch->indexes[partidx] < proute->num_dispatch);
387 
388  rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
389 
390  /*
391  * Move down to the next partition level and search again
392  * until we find a leaf partition that matches this tuple
393  */
394  dispatch = pd[dispatch->indexes[partidx]];
395  }
396  else
397  {
398  /* Not yet built. Do that now. */
399  PartitionDispatch subdispatch;
400 
401  /*
402  * Create the new PartitionDispatch. We pass the current one
403  * in as the parent PartitionDispatch
404  */
405  subdispatch = ExecInitPartitionDispatchInfo(estate,
406  proute,
407  partdesc->oids[partidx],
408  dispatch, partidx,
409  mtstate->rootResultRelInfo);
410  Assert(dispatch->indexes[partidx] >= 0 &&
411  dispatch->indexes[partidx] < proute->num_dispatch);
412 
413  rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
414  dispatch = subdispatch;
415  }
416 
417  /*
418  * Convert the tuple to the new parent's layout, if different from
419  * the previous parent.
420  */
421  if (dispatch->tupslot)
422  {
423  AttrMap *map = dispatch->tupmap;
424  TupleTableSlot *tempslot = myslot;
425 
426  myslot = dispatch->tupslot;
427  slot = execute_attr_map_slot(map, slot, myslot);
428 
429  if (tempslot != NULL)
430  ExecClearTuple(tempslot);
431  }
432  }
433 
434  /*
435  * If this partition is the default one, we must check its partition
436  * constraint now, which may have changed concurrently due to
437  * partitions being added to the parent.
438  *
439  * (We do this here, and do not rely on ExecInsert doing it, because
440  * we don't want to miss doing it for non-leaf partitions.)
441  */
442  if (partidx == partdesc->boundinfo->default_index)
443  {
444  /*
445  * The tuple must match the partition's layout for the constraint
446  * expression to be evaluated successfully. If the partition is
447  * sub-partitioned, that would already be the case due to the code
448  * above, but for a leaf partition the tuple still matches the
449  * parent's layout.
450  *
451  * Note that we have a map to convert from root to current
452  * partition, but not from immediate parent to current partition.
453  * So if we have to convert, do it from the root slot; if not, use
454  * the root slot as-is.
455  */
456  if (is_leaf)
457  {
459 
460  if (map)
461  slot = execute_attr_map_slot(map->attrMap, rootslot,
462  rri->ri_PartitionTupleSlot);
463  else
464  slot = rootslot;
465  }
466 
467  ExecPartitionCheck(rri, slot, estate, true);
468  }
469  }
470 
471  /* Release the tuple in the lowest parent's dedicated slot. */
472  if (myslot != NULL)
473  ExecClearTuple(myslot);
474  /* and restore ecxt's scantuple */
475  ecxt->ecxt_scantuple = ecxt_scantuple_saved;
476  MemoryContextSwitchTo(oldcxt);
477 
478  return rri;
479 }
480 
481 /*
482  * ExecInitPartitionInfo
483  * Lock the partition and initialize ResultRelInfo. Also setup other
484  * information for the partition and store it in the next empty slot in
485  * the proute->partitions array.
486  *
487  * Returns the ResultRelInfo
488  */
489 static ResultRelInfo *
491  PartitionTupleRouting *proute,
492  PartitionDispatch dispatch,
493  ResultRelInfo *rootResultRelInfo,
494  int partidx)
495 {
496  ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
497  Oid partOid = dispatch->partdesc->oids[partidx];
498  Relation partrel;
499  int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
500  Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
501  ResultRelInfo *leaf_part_rri;
502  MemoryContext oldcxt;
503  AttrMap *part_attmap = NULL;
504  bool found_whole_row;
505 
506  oldcxt = MemoryContextSwitchTo(proute->memcxt);
507 
508  partrel = table_open(partOid, RowExclusiveLock);
509 
510  leaf_part_rri = makeNode(ResultRelInfo);
511  InitResultRelInfo(leaf_part_rri,
512  partrel,
513  0,
514  rootResultRelInfo,
515  estate->es_instrument);
516 
517  /*
518  * Verify result relation is a valid target for an INSERT. An UPDATE of a
519  * partition-key becomes a DELETE+INSERT operation, so this check is still
520  * required when the operation is CMD_UPDATE.
521  */
522  CheckValidResultRel(leaf_part_rri, CMD_INSERT);
523 
524  /*
525  * Open partition indices. The user may have asked to check for conflicts
526  * within this leaf partition and do "nothing" instead of throwing an
527  * error. Be prepared in that case by initializing the index information
528  * needed by ExecInsert() to perform speculative insertions.
529  */
530  if (partrel->rd_rel->relhasindex &&
531  leaf_part_rri->ri_IndexRelationDescs == NULL)
532  ExecOpenIndices(leaf_part_rri,
533  (node != NULL &&
535 
536  /*
537  * Build WITH CHECK OPTION constraints for the partition. Note that we
538  * didn't build the withCheckOptionList for partitions within the planner,
539  * but simple translation of varattnos will suffice. This only occurs for
540  * the INSERT case or in the case of UPDATE tuple routing where we didn't
541  * find a result rel to reuse.
542  */
543  if (node && node->withCheckOptionLists != NIL)
544  {
545  List *wcoList;
546  List *wcoExprs = NIL;
547  ListCell *ll;
548 
549  /*
550  * In the case of INSERT on a partitioned table, there is only one
551  * plan. Likewise, there is only one WCO list, not one per partition.
552  * For UPDATE, there are as many WCO lists as there are plans.
553  */
554  Assert((node->operation == CMD_INSERT &&
555  list_length(node->withCheckOptionLists) == 1 &&
556  list_length(node->resultRelations) == 1) ||
557  (node->operation == CMD_UPDATE &&
559  list_length(node->resultRelations)));
560 
561  /*
562  * Use the WCO list of the first plan as a reference to calculate
563  * attno's for the WCO list of this partition. In the INSERT case,
564  * that refers to the root partitioned table, whereas in the UPDATE
565  * tuple routing case, that refers to the first partition in the
566  * mtstate->resultRelInfo array. In any case, both that relation and
567  * this partition should have the same columns, so we should be able
568  * to map attributes successfully.
569  */
570  wcoList = linitial(node->withCheckOptionLists);
571 
572  /*
573  * Convert Vars in it to contain this partition's attribute numbers.
574  */
575  part_attmap =
577  RelationGetDescr(firstResultRel));
578  wcoList = (List *)
579  map_variable_attnos((Node *) wcoList,
580  firstVarno, 0,
581  part_attmap,
582  RelationGetForm(partrel)->reltype,
583  &found_whole_row);
584  /* We ignore the value of found_whole_row. */
585 
586  foreach(ll, wcoList)
587  {
589  ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
590  &mtstate->ps);
591 
592  wcoExprs = lappend(wcoExprs, wcoExpr);
593  }
594 
595  leaf_part_rri->ri_WithCheckOptions = wcoList;
596  leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
597  }
598 
599  /*
600  * Build the RETURNING projection for the partition. Note that we didn't
601  * build the returningList for partitions within the planner, but simple
602  * translation of varattnos will suffice. This only occurs for the INSERT
603  * case or in the case of UPDATE tuple routing where we didn't find a
604  * result rel to reuse.
605  */
606  if (node && node->returningLists != NIL)
607  {
608  TupleTableSlot *slot;
609  ExprContext *econtext;
610  List *returningList;
611 
612  /* See the comment above for WCO lists. */
613  Assert((node->operation == CMD_INSERT &&
614  list_length(node->returningLists) == 1 &&
615  list_length(node->resultRelations) == 1) ||
616  (node->operation == CMD_UPDATE &&
617  list_length(node->returningLists) ==
618  list_length(node->resultRelations)));
619 
620  /*
621  * Use the RETURNING list of the first plan as a reference to
622  * calculate attno's for the RETURNING list of this partition. See
623  * the comment above for WCO lists for more details on why this is
624  * okay.
625  */
626  returningList = linitial(node->returningLists);
627 
628  /*
629  * Convert Vars in it to contain this partition's attribute numbers.
630  */
631  if (part_attmap == NULL)
632  part_attmap =
634  RelationGetDescr(firstResultRel));
635  returningList = (List *)
636  map_variable_attnos((Node *) returningList,
637  firstVarno, 0,
638  part_attmap,
639  RelationGetForm(partrel)->reltype,
640  &found_whole_row);
641  /* We ignore the value of found_whole_row. */
642 
643  leaf_part_rri->ri_returningList = returningList;
644 
645  /*
646  * Initialize the projection itself.
647  *
648  * Use the slot and the expression context that would have been set up
649  * in ExecInitModifyTable() for projection's output.
650  */
651  Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
652  slot = mtstate->ps.ps_ResultTupleSlot;
653  Assert(mtstate->ps.ps_ExprContext != NULL);
654  econtext = mtstate->ps.ps_ExprContext;
655  leaf_part_rri->ri_projectReturning =
656  ExecBuildProjectionInfo(returningList, econtext, slot,
657  &mtstate->ps, RelationGetDescr(partrel));
658  }
659 
660  /* Set up information needed for routing tuples to the partition. */
661  ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
662  leaf_part_rri, partidx, false);
663 
664  /*
665  * If there is an ON CONFLICT clause, initialize state for it.
666  */
667  if (node && node->onConflictAction != ONCONFLICT_NONE)
668  {
669  TupleDesc partrelDesc = RelationGetDescr(partrel);
670  ExprContext *econtext = mtstate->ps.ps_ExprContext;
671  ListCell *lc;
672  List *arbiterIndexes = NIL;
673 
674  /*
675  * If there is a list of arbiter indexes, map it to a list of indexes
676  * in the partition. We do that by scanning the partition's index
677  * list and searching for ancestry relationships to each index in the
678  * ancestor table.
679  */
680  if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) > 0)
681  {
682  List *childIdxs;
683 
684  childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
685 
686  foreach(lc, childIdxs)
687  {
688  Oid childIdx = lfirst_oid(lc);
689  List *ancestors;
690  ListCell *lc2;
691 
692  ancestors = get_partition_ancestors(childIdx);
693  foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes)
694  {
695  if (list_member_oid(ancestors, lfirst_oid(lc2)))
696  arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
697  }
698  list_free(ancestors);
699  }
700  }
701 
702  /*
703  * If the resulting lists are of inequal length, something is wrong.
704  * (This shouldn't happen, since arbiter index selection should not
705  * pick up an invalid index.)
706  */
707  if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
708  list_length(arbiterIndexes))
709  elog(ERROR, "invalid arbiter index list");
710  leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
711 
712  /*
713  * In the DO UPDATE case, we have some more state to initialize.
714  */
715  if (node->onConflictAction == ONCONFLICT_UPDATE)
716  {
718  TupleConversionMap *map;
719 
720  map = leaf_part_rri->ri_RootToPartitionMap;
721 
722  Assert(node->onConflictSet != NIL);
723  Assert(rootResultRelInfo->ri_onConflict != NULL);
724 
725  leaf_part_rri->ri_onConflict = onconfl;
726 
727  /*
728  * Need a separate existing slot for each partition, as the
729  * partition could be of a different AM, even if the tuple
730  * descriptors match.
731  */
732  onconfl->oc_Existing =
733  table_slot_create(leaf_part_rri->ri_RelationDesc,
734  &mtstate->ps.state->es_tupleTable);
735 
736  /*
737  * If the partition's tuple descriptor matches exactly the root
738  * parent (the common case), we can re-use most of the parent's ON
739  * CONFLICT SET state, skipping a bunch of work. Otherwise, we
740  * need to create state specific to this partition.
741  */
742  if (map == NULL)
743  {
744  /*
745  * It's safe to reuse these from the partition root, as we
746  * only process one tuple at a time (therefore we won't
747  * overwrite needed data in slots), and the results of
748  * projections are independent of the underlying storage.
749  * Projections and where clauses themselves don't store state
750  * / are independent of the underlying storage.
751  */
752  onconfl->oc_ProjSlot =
753  rootResultRelInfo->ri_onConflict->oc_ProjSlot;
754  onconfl->oc_ProjInfo =
755  rootResultRelInfo->ri_onConflict->oc_ProjInfo;
756  onconfl->oc_WhereClause =
757  rootResultRelInfo->ri_onConflict->oc_WhereClause;
758  }
759  else
760  {
761  List *onconflset;
762  List *onconflcols;
763  bool found_whole_row;
764 
765  /*
766  * Translate expressions in onConflictSet to account for
767  * different attribute numbers. For that, map partition
768  * varattnos twice: first to catch the EXCLUDED
769  * pseudo-relation (INNER_VAR), and second to handle the main
770  * target relation (firstVarno).
771  */
772  onconflset = copyObject(node->onConflictSet);
773  if (part_attmap == NULL)
774  part_attmap =
776  RelationGetDescr(firstResultRel));
777  onconflset = (List *)
778  map_variable_attnos((Node *) onconflset,
779  INNER_VAR, 0,
780  part_attmap,
781  RelationGetForm(partrel)->reltype,
782  &found_whole_row);
783  /* We ignore the value of found_whole_row. */
784  onconflset = (List *)
785  map_variable_attnos((Node *) onconflset,
786  firstVarno, 0,
787  part_attmap,
788  RelationGetForm(partrel)->reltype,
789  &found_whole_row);
790  /* We ignore the value of found_whole_row. */
791 
792  /* Finally, adjust the target colnos to match the partition. */
793  onconflcols = adjust_partition_colnos(node->onConflictCols,
794  leaf_part_rri);
795 
796  /* create the tuple slot for the UPDATE SET projection */
797  onconfl->oc_ProjSlot =
798  table_slot_create(partrel,
799  &mtstate->ps.state->es_tupleTable);
800 
801  /* build UPDATE SET projection state */
802  onconfl->oc_ProjInfo =
803  ExecBuildUpdateProjection(onconflset,
804  true,
805  onconflcols,
806  partrelDesc,
807  econtext,
808  onconfl->oc_ProjSlot,
809  &mtstate->ps);
810 
811  /*
812  * If there is a WHERE clause, initialize state where it will
813  * be evaluated, mapping the attribute numbers appropriately.
814  * As with onConflictSet, we need to map partition varattnos
815  * to the partition's tupdesc.
816  */
817  if (node->onConflictWhere)
818  {
819  List *clause;
820 
821  clause = copyObject((List *) node->onConflictWhere);
822  clause = (List *)
823  map_variable_attnos((Node *) clause,
824  INNER_VAR, 0,
825  part_attmap,
826  RelationGetForm(partrel)->reltype,
827  &found_whole_row);
828  /* We ignore the value of found_whole_row. */
829  clause = (List *)
830  map_variable_attnos((Node *) clause,
831  firstVarno, 0,
832  part_attmap,
833  RelationGetForm(partrel)->reltype,
834  &found_whole_row);
835  /* We ignore the value of found_whole_row. */
836  onconfl->oc_WhereClause =
837  ExecInitQual((List *) clause, &mtstate->ps);
838  }
839  }
840  }
841  }
842 
843  /*
844  * Since we've just initialized this ResultRelInfo, it's not in any list
845  * attached to the estate as yet. Add it, so that it can be found later.
846  *
847  * Note that the entries in this list appear in no predetermined order,
848  * because partition result rels are initialized as and when they're
849  * needed.
850  */
854  leaf_part_rri);
855 
856  MemoryContextSwitchTo(oldcxt);
857 
858  return leaf_part_rri;
859 }
860 
861 /*
862  * ExecInitRoutingInfo
863  * Set up information needed for translating tuples between root
864  * partitioned table format and partition format, and keep track of it
865  * in PartitionTupleRouting.
866  */
867 static void
869  EState *estate,
870  PartitionTupleRouting *proute,
871  PartitionDispatch dispatch,
872  ResultRelInfo *partRelInfo,
873  int partidx,
874  bool is_borrowed_rel)
875 {
876  ResultRelInfo *rootRelInfo = partRelInfo->ri_RootResultRelInfo;
877  MemoryContext oldcxt;
878  int rri_index;
879 
880  oldcxt = MemoryContextSwitchTo(proute->memcxt);
881 
882  /*
883  * Set up a tuple conversion map to convert a tuple routed to the
884  * partition from the parent's type to the partition's.
885  */
886  partRelInfo->ri_RootToPartitionMap =
888  RelationGetDescr(partRelInfo->ri_RelationDesc));
889 
890  /*
891  * If a partition has a different rowtype than the root parent, initialize
892  * a slot dedicated to storing this partition's tuples. The slot is used
893  * for various operations that are applied to tuples after routing, such
894  * as checking constraints.
895  */
896  if (partRelInfo->ri_RootToPartitionMap != NULL)
897  {
898  Relation partrel = partRelInfo->ri_RelationDesc;
899 
900  /*
901  * Initialize the slot itself setting its descriptor to this
902  * partition's TupleDesc; TupleDesc reference will be released at the
903  * end of the command.
904  */
905  partRelInfo->ri_PartitionTupleSlot =
906  table_slot_create(partrel, &estate->es_tupleTable);
907  }
908  else
909  partRelInfo->ri_PartitionTupleSlot = NULL;
910 
911  /*
912  * If the partition is a foreign table, let the FDW init itself for
913  * routing tuples to the partition.
914  */
915  if (partRelInfo->ri_FdwRoutine != NULL &&
916  partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
917  partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
918 
919  /*
920  * Determine if the FDW supports batch insert and determine the batch size
921  * (a FDW may support batching, but it may be disabled for the
922  * server/table or for this particular query).
923  *
924  * If the FDW does not support batching, we set the batch size to 1.
925  */
926  if (mtstate->operation == CMD_INSERT &&
927  partRelInfo->ri_FdwRoutine != NULL &&
930  partRelInfo->ri_BatchSize =
931  partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo);
932  else
933  partRelInfo->ri_BatchSize = 1;
934 
935  Assert(partRelInfo->ri_BatchSize >= 1);
936 
937  partRelInfo->ri_CopyMultiInsertBuffer = NULL;
938 
939  /*
940  * Keep track of it in the PartitionTupleRouting->partitions array.
941  */
942  Assert(dispatch->indexes[partidx] == -1);
943 
944  rri_index = proute->num_partitions++;
945 
946  /* Allocate or enlarge the array, as needed */
947  if (proute->num_partitions >= proute->max_partitions)
948  {
949  if (proute->max_partitions == 0)
950  {
951  proute->max_partitions = 8;
952  proute->partitions = (ResultRelInfo **)
953  palloc(sizeof(ResultRelInfo *) * proute->max_partitions);
954  proute->is_borrowed_rel = (bool *)
955  palloc(sizeof(bool) * proute->max_partitions);
956  }
957  else
958  {
959  proute->max_partitions *= 2;
960  proute->partitions = (ResultRelInfo **)
961  repalloc(proute->partitions, sizeof(ResultRelInfo *) *
962  proute->max_partitions);
963  proute->is_borrowed_rel = (bool *)
964  repalloc(proute->is_borrowed_rel, sizeof(bool) *
965  proute->max_partitions);
966  }
967  }
968 
969  proute->partitions[rri_index] = partRelInfo;
970  proute->is_borrowed_rel[rri_index] = is_borrowed_rel;
971  dispatch->indexes[partidx] = rri_index;
972 
973  MemoryContextSwitchTo(oldcxt);
974 }
975 
976 /*
977  * ExecInitPartitionDispatchInfo
978  * Lock the partitioned table (if not locked already) and initialize
979  * PartitionDispatch for a partitioned table and store it in the next
980  * available slot in the proute->partition_dispatch_info array. Also,
981  * record the index into this array in the parent_pd->indexes[] array in
982  * the partidx element so that we can properly retrieve the newly created
983  * PartitionDispatch later.
984  */
985 static PartitionDispatch
987  PartitionTupleRouting *proute, Oid partoid,
988  PartitionDispatch parent_pd, int partidx,
989  ResultRelInfo *rootResultRelInfo)
990 {
991  Relation rel;
992  PartitionDesc partdesc;
994  int dispatchidx;
995  MemoryContext oldcxt;
996 
997  /*
998  * For data modification, it is better that executor does not include
999  * partitions being detached, except when running in snapshot-isolation
1000  * mode. This means that a read-committed transaction immediately gets a
1001  * "no partition for tuple" error when a tuple is inserted into a
1002  * partition that's being detached concurrently, but a transaction in
1003  * repeatable-read mode can still use such a partition.
1004  */
1005  if (estate->es_partition_directory == NULL)
1006  estate->es_partition_directory =
1009 
1010  oldcxt = MemoryContextSwitchTo(proute->memcxt);
1011 
1012  /*
1013  * Only sub-partitioned tables need to be locked here. The root
1014  * partitioned table will already have been locked as it's referenced in
1015  * the query's rtable.
1016  */
1017  if (partoid != RelationGetRelid(proute->partition_root))
1018  rel = table_open(partoid, RowExclusiveLock);
1019  else
1020  rel = proute->partition_root;
1021  partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1022 
1024  partdesc->nparts * sizeof(int));
1025  pd->reldesc = rel;
1026  pd->key = RelationGetPartitionKey(rel);
1027  pd->keystate = NIL;
1028  pd->partdesc = partdesc;
1029  if (parent_pd != NULL)
1030  {
1031  TupleDesc tupdesc = RelationGetDescr(rel);
1032 
1033  /*
1034  * For sub-partitioned tables where the column order differs from its
1035  * direct parent partitioned table, we must store a tuple table slot
1036  * initialized with its tuple descriptor and a tuple conversion map to
1037  * convert a tuple from its parent's rowtype to its own. This is to
1038  * make sure that we are looking at the correct row using the correct
1039  * tuple descriptor when computing its partition key for tuple
1040  * routing.
1041  */
1043  tupdesc);
1044  pd->tupslot = pd->tupmap ?
1045  MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL;
1046  }
1047  else
1048  {
1049  /* Not required for the root partitioned table */
1050  pd->tupmap = NULL;
1051  pd->tupslot = NULL;
1052  }
1053 
1054  /*
1055  * Initialize with -1 to signify that the corresponding partition's
1056  * ResultRelInfo or PartitionDispatch has not been created yet.
1057  */
1058  memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1059 
1060  /* Track in PartitionTupleRouting for later use */
1061  dispatchidx = proute->num_dispatch++;
1062 
1063  /* Allocate or enlarge the array, as needed */
1064  if (proute->num_dispatch >= proute->max_dispatch)
1065  {
1066  if (proute->max_dispatch == 0)
1067  {
1068  proute->max_dispatch = 4;
1070  palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
1071  proute->nonleaf_partitions = (ResultRelInfo **)
1072  palloc(sizeof(ResultRelInfo *) * proute->max_dispatch);
1073  }
1074  else
1075  {
1076  proute->max_dispatch *= 2;
1079  sizeof(PartitionDispatch) * proute->max_dispatch);
1080  proute->nonleaf_partitions = (ResultRelInfo **)
1081  repalloc(proute->nonleaf_partitions,
1082  sizeof(ResultRelInfo *) * proute->max_dispatch);
1083  }
1084  }
1085  proute->partition_dispatch_info[dispatchidx] = pd;
1086 
1087  /*
1088  * If setting up a PartitionDispatch for a sub-partitioned table, we may
1089  * also need a minimally valid ResultRelInfo for checking the partition
1090  * constraint later; set that up now.
1091  */
1092  if (parent_pd)
1093  {
1095 
1096  InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
1097  proute->nonleaf_partitions[dispatchidx] = rri;
1098  }
1099  else
1100  proute->nonleaf_partitions[dispatchidx] = NULL;
1101 
1102  /*
1103  * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1104  * install a downlink in the parent to allow quick descent.
1105  */
1106  if (parent_pd)
1107  {
1108  Assert(parent_pd->indexes[partidx] == -1);
1109  parent_pd->indexes[partidx] = dispatchidx;
1110  }
1111 
1112  MemoryContextSwitchTo(oldcxt);
1113 
1114  return pd;
1115 }
1116 
1117 /*
1118  * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1119  * routing.
1120  *
1121  * Close all the partitioned tables, leaf partitions, and their indices.
1122  */
1123 void
1125  PartitionTupleRouting *proute)
1126 {
1127  int i;
1128 
1129  /*
1130  * Remember, proute->partition_dispatch_info[0] corresponds to the root
1131  * partitioned table, which we must not try to close, because it is the
1132  * main target table of the query that will be closed by callers such as
1133  * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1134  * partitioned table.
1135  */
1136  for (i = 1; i < proute->num_dispatch; i++)
1137  {
1139 
1140  table_close(pd->reldesc, NoLock);
1141 
1142  if (pd->tupslot)
1144  }
1145 
1146  for (i = 0; i < proute->num_partitions; i++)
1147  {
1148  ResultRelInfo *resultRelInfo = proute->partitions[i];
1149 
1150  /* Allow any FDWs to shut down */
1151  if (resultRelInfo->ri_FdwRoutine != NULL &&
1152  resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1153  resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1154  resultRelInfo);
1155 
1156  /*
1157  * Close it if it's not one of the result relations borrowed from the
1158  * owning ModifyTableState; those will be closed by ExecEndPlan().
1159  */
1160  if (proute->is_borrowed_rel[i])
1161  continue;
1162 
1163  ExecCloseIndices(resultRelInfo);
1164  table_close(resultRelInfo->ri_RelationDesc, NoLock);
1165  }
1166 }
1167 
1168 /* ----------------
1169  * FormPartitionKeyDatum
1170  * Construct values[] and isnull[] arrays for the partition key
1171  * of a tuple.
1172  *
1173  * pd Partition dispatch object of the partitioned table
1174  * slot Heap tuple from which to extract partition key
1175  * estate executor state for evaluating any partition key
1176  * expressions (must be non-NULL)
1177  * values Array of partition key Datums (output area)
1178  * isnull Array of is-null indicators (output area)
1179  *
1180  * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1181  * the heap tuple passed in.
1182  * ----------------
1183  */
1184 static void
1186  TupleTableSlot *slot,
1187  EState *estate,
1188  Datum *values,
1189  bool *isnull)
1190 {
1191  ListCell *partexpr_item;
1192  int i;
1193 
1194  if (pd->key->partexprs != NIL && pd->keystate == NIL)
1195  {
1196  /* Check caller has set up context correctly */
1197  Assert(estate != NULL &&
1198  GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1199 
1200  /* First time through, set up expression evaluation state */
1201  pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1202  }
1203 
1204  partexpr_item = list_head(pd->keystate);
1205  for (i = 0; i < pd->key->partnatts; i++)
1206  {
1207  AttrNumber keycol = pd->key->partattrs[i];
1208  Datum datum;
1209  bool isNull;
1210 
1211  if (keycol != 0)
1212  {
1213  /* Plain column; get the value directly from the heap tuple */
1214  datum = slot_getattr(slot, keycol, &isNull);
1215  }
1216  else
1217  {
1218  /* Expression; need to evaluate it */
1219  if (partexpr_item == NULL)
1220  elog(ERROR, "wrong number of partition key expressions");
1221  datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1222  GetPerTupleExprContext(estate),
1223  &isNull);
1224  partexpr_item = lnext(pd->keystate, partexpr_item);
1225  }
1226  values[i] = datum;
1227  isnull[i] = isNull;
1228  }
1229 
1230  if (partexpr_item != NULL)
1231  elog(ERROR, "wrong number of partition key expressions");
1232 }
1233 
1234 /*
1235  * get_partition_for_tuple
1236  * Finds partition of relation which accepts the partition key specified
1237  * in values and isnull
1238  *
1239  * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1240  * found or -1 if none found.
1241  */
1242 static int
1244 {
1245  int bound_offset;
1246  int part_index = -1;
1247  PartitionKey key = pd->key;
1248  PartitionDesc partdesc = pd->partdesc;
1249  PartitionBoundInfo boundinfo = partdesc->boundinfo;
1250 
1251  /* Route as appropriate based on partitioning strategy. */
1252  switch (key->strategy)
1253  {
1255  {
1256  uint64 rowHash;
1257 
1258  rowHash = compute_partition_hash_value(key->partnatts,
1259  key->partsupfunc,
1260  key->partcollation,
1261  values, isnull);
1262 
1263  part_index = boundinfo->indexes[rowHash % boundinfo->nindexes];
1264  }
1265  break;
1266 
1268  if (isnull[0])
1269  {
1270  if (partition_bound_accepts_nulls(boundinfo))
1271  part_index = boundinfo->null_index;
1272  }
1273  else
1274  {
1275  bool equal = false;
1276 
1277  bound_offset = partition_list_bsearch(key->partsupfunc,
1278  key->partcollation,
1279  boundinfo,
1280  values[0], &equal);
1281  if (bound_offset >= 0 && equal)
1282  part_index = boundinfo->indexes[bound_offset];
1283  }
1284  break;
1285 
1287  {
1288  bool equal = false,
1289  range_partkey_has_null = false;
1290  int i;
1291 
1292  /*
1293  * No range includes NULL, so this will be accepted by the
1294  * default partition if there is one, and otherwise rejected.
1295  */
1296  for (i = 0; i < key->partnatts; i++)
1297  {
1298  if (isnull[i])
1299  {
1300  range_partkey_has_null = true;
1301  break;
1302  }
1303  }
1304 
1305  if (!range_partkey_has_null)
1306  {
1307  bound_offset = partition_range_datum_bsearch(key->partsupfunc,
1308  key->partcollation,
1309  boundinfo,
1310  key->partnatts,
1311  values,
1312  &equal);
1313 
1314  /*
1315  * The bound at bound_offset is less than or equal to the
1316  * tuple value, so the bound at offset+1 is the upper
1317  * bound of the partition we're looking for, if there
1318  * actually exists one.
1319  */
1320  part_index = boundinfo->indexes[bound_offset + 1];
1321  }
1322  }
1323  break;
1324 
1325  default:
1326  elog(ERROR, "unexpected partition strategy: %d",
1327  (int) key->strategy);
1328  }
1329 
1330  /*
1331  * part_index < 0 means we failed to find a partition of this parent. Use
1332  * the default partition, if there is one.
1333  */
1334  if (part_index < 0)
1335  part_index = boundinfo->default_index;
1336 
1337  return part_index;
1338 }
1339 
1340 /*
1341  * ExecBuildSlotPartitionKeyDescription
1342  *
1343  * This works very much like BuildIndexValueDescription() and is currently
1344  * used for building error messages when ExecFindPartition() fails to find
1345  * partition for a row.
1346  */
1347 static char *
1349  Datum *values,
1350  bool *isnull,
1351  int maxfieldlen)
1352 {
1355  int partnatts = get_partition_natts(key);
1356  int i;
1357  Oid relid = RelationGetRelid(rel);
1358  AclResult aclresult;
1359 
1360  if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
1361  return NULL;
1362 
1363  /* If the user has table-level access, just go build the description. */
1364  aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
1365  if (aclresult != ACLCHECK_OK)
1366  {
1367  /*
1368  * Step through the columns of the partition key and make sure the
1369  * user has SELECT rights on all of them.
1370  */
1371  for (i = 0; i < partnatts; i++)
1372  {
1374 
1375  /*
1376  * If this partition key column is an expression, we return no
1377  * detail rather than try to figure out what column(s) the
1378  * expression includes and if the user has SELECT rights on them.
1379  */
1380  if (attnum == InvalidAttrNumber ||
1381  pg_attribute_aclcheck(relid, attnum, GetUserId(),
1382  ACL_SELECT) != ACLCHECK_OK)
1383  return NULL;
1384  }
1385  }
1386 
1387  initStringInfo(&buf);
1388  appendStringInfo(&buf, "(%s) = (",
1389  pg_get_partkeydef_columns(relid, true));
1390 
1391  for (i = 0; i < partnatts; i++)
1392  {
1393  char *val;
1394  int vallen;
1395 
1396  if (isnull[i])
1397  val = "null";
1398  else
1399  {
1400  Oid foutoid;
1401  bool typisvarlena;
1402 
1404  &foutoid, &typisvarlena);
1405  val = OidOutputFunctionCall(foutoid, values[i]);
1406  }
1407 
1408  if (i > 0)
1409  appendStringInfoString(&buf, ", ");
1410 
1411  /* truncate if needed */
1412  vallen = strlen(val);
1413  if (vallen <= maxfieldlen)
1414  appendBinaryStringInfo(&buf, val, vallen);
1415  else
1416  {
1417  vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1418  appendBinaryStringInfo(&buf, val, vallen);
1419  appendStringInfoString(&buf, "...");
1420  }
1421  }
1422 
1423  appendStringInfoChar(&buf, ')');
1424 
1425  return buf.data;
1426 }
1427 
1428 /*
1429  * adjust_partition_colnos
1430  * Adjust the list of UPDATE target column numbers to account for
1431  * attribute differences between the parent and the partition.
1432  */
1433 static List *
1435 {
1436  List *new_colnos = NIL;
1437  TupleConversionMap *map = ExecGetChildToRootMap(leaf_part_rri);
1438  AttrMap *attrMap;
1439  ListCell *lc;
1440 
1441  Assert(map != NULL); /* else we shouldn't be here */
1442  attrMap = map->attrMap;
1443 
1444  foreach(lc, colnos)
1445  {
1446  AttrNumber parentattrno = lfirst_int(lc);
1447 
1448  if (parentattrno <= 0 ||
1449  parentattrno > attrMap->maplen ||
1450  attrMap->attnums[parentattrno - 1] == 0)
1451  elog(ERROR, "unexpected attno %d in target column list",
1452  parentattrno);
1453  new_colnos = lappend_int(new_colnos,
1454  attrMap->attnums[parentattrno - 1]);
1455  }
1456 
1457  return new_colnos;
1458 }
1459 
1460 /*-------------------------------------------------------------------------
1461  * Run-Time Partition Pruning Support.
1462  *
1463  * The following series of functions exist to support the removal of unneeded
1464  * subplans for queries against partitioned tables. The supporting functions
1465  * here are designed to work with any plan type which supports an arbitrary
1466  * number of subplans, e.g. Append, MergeAppend.
1467  *
1468  * When pruning involves comparison of a partition key to a constant, it's
1469  * done by the planner. However, if we have a comparison to a non-constant
1470  * but not volatile expression, that presents an opportunity for run-time
1471  * pruning by the executor, allowing irrelevant partitions to be skipped
1472  * dynamically.
1473  *
1474  * We must distinguish expressions containing PARAM_EXEC Params from
1475  * expressions that don't contain those. Even though a PARAM_EXEC Param is
1476  * considered to be a stable expression, it can change value from one plan
1477  * node scan to the next during query execution. Stable comparison
1478  * expressions that don't involve such Params allow partition pruning to be
1479  * done once during executor startup. Expressions that do involve such Params
1480  * require us to prune separately for each scan of the parent plan node.
1481  *
1482  * Note that pruning away unneeded subplans during executor startup has the
1483  * added benefit of not having to initialize the unneeded subplans at all.
1484  *
1485  *
1486  * Functions:
1487  *
1488  * ExecCreatePartitionPruneState:
1489  * Creates the PartitionPruneState required by each of the two pruning
1490  * functions. Details stored include how to map the partition index
1491  * returned by the partition pruning code into subplan indexes.
1492  *
1493  * ExecFindInitialMatchingSubPlans:
1494  * Returns indexes of matching subplans. Partition pruning is attempted
1495  * without any evaluation of expressions containing PARAM_EXEC Params.
1496  * This function must be called during executor startup for the parent
1497  * plan before the subplans themselves are initialized. Subplans which
1498  * are found not to match by this function must be removed from the
1499  * plan's list of subplans during execution, as this function performs a
1500  * remap of the partition index to subplan index map and the newly
1501  * created map provides indexes only for subplans which remain after
1502  * calling this function.
1503  *
1504  * ExecFindMatchingSubPlans:
1505  * Returns indexes of matching subplans after evaluating all available
1506  * expressions. This function can only be called during execution and
1507  * must be called again each time the value of a Param listed in
1508  * PartitionPruneState's 'execparamids' changes.
1509  *-------------------------------------------------------------------------
1510  */
1511 
1512 /*
1513  * ExecCreatePartitionPruneState
1514  * Build the data structure required for calling
1515  * ExecFindInitialMatchingSubPlans and ExecFindMatchingSubPlans.
1516  *
1517  * 'planstate' is the parent plan node's execution state.
1518  *
1519  * 'partitionpruneinfo' is a PartitionPruneInfo as generated by
1520  * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
1521  * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
1522  * partitionpruneinfo->prune_infos), each of which contains a
1523  * PartitionedRelPruningData for each PartitionedRelPruneInfo appearing in
1524  * that sublist. This two-level system is needed to keep from confusing the
1525  * different hierarchies when a UNION ALL contains multiple partitioned tables
1526  * as children. The data stored in each PartitionedRelPruningData can be
1527  * re-used each time we re-evaluate which partitions match the pruning steps
1528  * provided in each PartitionedRelPruneInfo.
1529  */
1532  PartitionPruneInfo *partitionpruneinfo)
1533 {
1534  EState *estate = planstate->state;
1535  PartitionPruneState *prunestate;
1536  int n_part_hierarchies;
1537  ListCell *lc;
1538  int i;
1539 
1540  /* For data reading, executor always omits detached partitions */
1541  if (estate->es_partition_directory == NULL)
1542  estate->es_partition_directory =
1543  CreatePartitionDirectory(estate->es_query_cxt, false);
1544 
1545  n_part_hierarchies = list_length(partitionpruneinfo->prune_infos);
1546  Assert(n_part_hierarchies > 0);
1547 
1548  /*
1549  * Allocate the data structure
1550  */
1551  prunestate = (PartitionPruneState *)
1552  palloc(offsetof(PartitionPruneState, partprunedata) +
1553  sizeof(PartitionPruningData *) * n_part_hierarchies);
1554 
1555  prunestate->execparamids = NULL;
1556  /* other_subplans can change at runtime, so we need our own copy */
1557  prunestate->other_subplans = bms_copy(partitionpruneinfo->other_subplans);
1558  prunestate->do_initial_prune = false; /* may be set below */
1559  prunestate->do_exec_prune = false; /* may be set below */
1560  prunestate->num_partprunedata = n_part_hierarchies;
1561 
1562  /*
1563  * Create a short-term memory context which we'll use when making calls to
1564  * the partition pruning functions. This avoids possible memory leaks,
1565  * since the pruning functions call comparison functions that aren't under
1566  * our control.
1567  */
1568  prunestate->prune_context =
1570  "Partition Prune",
1572 
1573  i = 0;
1574  foreach(lc, partitionpruneinfo->prune_infos)
1575  {
1576  List *partrelpruneinfos = lfirst_node(List, lc);
1577  int npartrelpruneinfos = list_length(partrelpruneinfos);
1578  PartitionPruningData *prunedata;
1579  ListCell *lc2;
1580  int j;
1581 
1582  prunedata = (PartitionPruningData *)
1583  palloc(offsetof(PartitionPruningData, partrelprunedata) +
1584  npartrelpruneinfos * sizeof(PartitionedRelPruningData));
1585  prunestate->partprunedata[i] = prunedata;
1586  prunedata->num_partrelprunedata = npartrelpruneinfos;
1587 
1588  j = 0;
1589  foreach(lc2, partrelpruneinfos)
1590  {
1592  PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
1593  Relation partrel;
1594  PartitionDesc partdesc;
1595  PartitionKey partkey;
1596 
1597  /*
1598  * We can rely on the copies of the partitioned table's partition
1599  * key and partition descriptor appearing in its relcache entry,
1600  * because that entry will be held open and locked for the
1601  * duration of this executor run.
1602  */
1603  partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex);
1604  partkey = RelationGetPartitionKey(partrel);
1606  partrel);
1607 
1608  /*
1609  * Initialize the subplan_map and subpart_map.
1610  *
1611  * Because we request detached partitions to be included, and
1612  * detaching waits for old transactions, it is safe to assume that
1613  * no partitions have disappeared since this query was planned.
1614  *
1615  * However, new partitions may have been added.
1616  */
1617  Assert(partdesc->nparts >= pinfo->nparts);
1618  pprune->nparts = partdesc->nparts;
1619  pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts);
1620  if (partdesc->nparts == pinfo->nparts)
1621  {
1622  /*
1623  * There are no new partitions, so this is simple. We can
1624  * simply point to the subpart_map from the plan, but we must
1625  * copy the subplan_map since we may change it later.
1626  */
1627  pprune->subpart_map = pinfo->subpart_map;
1628  memcpy(pprune->subplan_map, pinfo->subplan_map,
1629  sizeof(int) * pinfo->nparts);
1630 
1631  /*
1632  * Double-check that the list of unpruned relations has not
1633  * changed. (Pruned partitions are not in relid_map[].)
1634  */
1635 #ifdef USE_ASSERT_CHECKING
1636  for (int k = 0; k < pinfo->nparts; k++)
1637  {
1638  Assert(partdesc->oids[k] == pinfo->relid_map[k] ||
1639  pinfo->subplan_map[k] == -1);
1640  }
1641 #endif
1642  }
1643  else
1644  {
1645  int pd_idx = 0;
1646  int pp_idx;
1647 
1648  /*
1649  * Some new partitions have appeared since plan time, and
1650  * those are reflected in our PartitionDesc but were not
1651  * present in the one used to construct subplan_map and
1652  * subpart_map. So we must construct new and longer arrays
1653  * where the partitions that were originally present map to
1654  * the same sub-structures, and any added partitions map to
1655  * -1, as if the new partitions had been pruned.
1656  *
1657  * Note: pinfo->relid_map[] may contain InvalidOid entries for
1658  * partitions pruned by the planner. We cannot tell exactly
1659  * which of the partdesc entries these correspond to, but we
1660  * don't have to; just skip over them. The non-pruned
1661  * relid_map entries, however, had better be a subset of the
1662  * partdesc entries and in the same order.
1663  */
1664  pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts);
1665  for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++)
1666  {
1667  /* Skip any InvalidOid relid_map entries */
1668  while (pd_idx < pinfo->nparts &&
1669  !OidIsValid(pinfo->relid_map[pd_idx]))
1670  pd_idx++;
1671 
1672  if (pd_idx < pinfo->nparts &&
1673  pinfo->relid_map[pd_idx] == partdesc->oids[pp_idx])
1674  {
1675  /* match... */
1676  pprune->subplan_map[pp_idx] =
1677  pinfo->subplan_map[pd_idx];
1678  pprune->subpart_map[pp_idx] =
1679  pinfo->subpart_map[pd_idx];
1680  pd_idx++;
1681  }
1682  else
1683  {
1684  /* this partdesc entry is not in the plan */
1685  pprune->subplan_map[pp_idx] = -1;
1686  pprune->subpart_map[pp_idx] = -1;
1687  }
1688  }
1689 
1690  /*
1691  * It might seem that we need to skip any trailing InvalidOid
1692  * entries in pinfo->relid_map before checking that we scanned
1693  * all of the relid_map. But we will have skipped them above,
1694  * because they must correspond to some partdesc->oids
1695  * entries; we just couldn't tell which.
1696  */
1697  if (pd_idx != pinfo->nparts)
1698  elog(ERROR, "could not match partition child tables to plan elements");
1699  }
1700 
1701  /* present_parts is also subject to later modification */
1702  pprune->present_parts = bms_copy(pinfo->present_parts);
1703 
1704  /*
1705  * Initialize pruning contexts as needed.
1706  */
1708  if (pinfo->initial_pruning_steps)
1709  {
1711  pinfo->initial_pruning_steps,
1712  partdesc, partkey, planstate);
1713  /* Record whether initial pruning is needed at any level */
1714  prunestate->do_initial_prune = true;
1715  }
1716  pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
1717  if (pinfo->exec_pruning_steps)
1718  {
1720  pinfo->exec_pruning_steps,
1721  partdesc, partkey, planstate);
1722  /* Record whether exec pruning is needed at any level */
1723  prunestate->do_exec_prune = true;
1724  }
1725 
1726  /*
1727  * Accumulate the IDs of all PARAM_EXEC Params affecting the
1728  * partitioning decisions at this plan node.
1729  */
1730  prunestate->execparamids = bms_add_members(prunestate->execparamids,
1731  pinfo->execparamids);
1732 
1733  j++;
1734  }
1735  i++;
1736  }
1737 
1738  return prunestate;
1739 }
1740 
1741 /*
1742  * Initialize a PartitionPruneContext for the given list of pruning steps.
1743  */
1744 static void
1746  List *pruning_steps,
1747  PartitionDesc partdesc,
1748  PartitionKey partkey,
1749  PlanState *planstate)
1750 {
1751  int n_steps;
1752  int partnatts;
1753  ListCell *lc;
1754 
1755  n_steps = list_length(pruning_steps);
1756 
1757  context->strategy = partkey->strategy;
1758  context->partnatts = partnatts = partkey->partnatts;
1759  context->nparts = partdesc->nparts;
1760  context->boundinfo = partdesc->boundinfo;
1761  context->partcollation = partkey->partcollation;
1762  context->partsupfunc = partkey->partsupfunc;
1763 
1764  /* We'll look up type-specific support functions as needed */
1765  context->stepcmpfuncs = (FmgrInfo *)
1766  palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
1767 
1768  context->ppccontext = CurrentMemoryContext;
1769  context->planstate = planstate;
1770 
1771  /* Initialize expression state for each expression we need */
1772  context->exprstates = (ExprState **)
1773  palloc0(sizeof(ExprState *) * n_steps * partnatts);
1774  foreach(lc, pruning_steps)
1775  {
1777  ListCell *lc2;
1778  int keyno;
1779 
1780  /* not needed for other step kinds */
1781  if (!IsA(step, PartitionPruneStepOp))
1782  continue;
1783 
1784  Assert(list_length(step->exprs) <= partnatts);
1785 
1786  keyno = 0;
1787  foreach(lc2, step->exprs)
1788  {
1789  Expr *expr = (Expr *) lfirst(lc2);
1790 
1791  /* not needed for Consts */
1792  if (!IsA(expr, Const))
1793  {
1794  int stateidx = PruneCxtStateIdx(partnatts,
1795  step->step.step_id,
1796  keyno);
1797 
1798  context->exprstates[stateidx] =
1799  ExecInitExpr(expr, context->planstate);
1800  }
1801  keyno++;
1802  }
1803  }
1804 }
1805 
1806 /*
1807  * ExecFindInitialMatchingSubPlans
1808  * Identify the set of subplans that cannot be eliminated by initial
1809  * pruning, disregarding any pruning constraints involving PARAM_EXEC
1810  * Params.
1811  *
1812  * If additional pruning passes will be required (because of PARAM_EXEC
1813  * Params), we must also update the translation data that allows conversion
1814  * of partition indexes into subplan indexes to account for the unneeded
1815  * subplans having been removed.
1816  *
1817  * Must only be called once per 'prunestate', and only if initial pruning
1818  * is required.
1819  *
1820  * 'nsubplans' must be passed as the total number of unpruned subplans.
1821  */
1822 Bitmapset *
1824 {
1825  Bitmapset *result = NULL;
1826  MemoryContext oldcontext;
1827  int i;
1828 
1829  /* Caller error if we get here without do_initial_prune */
1830  Assert(prunestate->do_initial_prune);
1831 
1832  /*
1833  * Switch to a temp context to avoid leaking memory in the executor's
1834  * query-lifespan memory context.
1835  */
1836  oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
1837 
1838  /*
1839  * For each hierarchy, do the pruning tests, and add nondeletable
1840  * subplans' indexes to "result".
1841  */
1842  for (i = 0; i < prunestate->num_partprunedata; i++)
1843  {
1844  PartitionPruningData *prunedata;
1845  PartitionedRelPruningData *pprune;
1846 
1847  prunedata = prunestate->partprunedata[i];
1848  pprune = &prunedata->partrelprunedata[0];
1849 
1850  /* Perform pruning without using PARAM_EXEC Params */
1851  find_matching_subplans_recurse(prunedata, pprune, true, &result);
1852 
1853  /* Expression eval may have used space in node's ps_ExprContext too */
1854  if (pprune->initial_pruning_steps)
1856  }
1857 
1858  /* Add in any subplans that partition pruning didn't account for */
1859  result = bms_add_members(result, prunestate->other_subplans);
1860 
1861  MemoryContextSwitchTo(oldcontext);
1862 
1863  /* Copy result out of the temp context before we reset it */
1864  result = bms_copy(result);
1865 
1866  MemoryContextReset(prunestate->prune_context);
1867 
1868  /*
1869  * If exec-time pruning is required and we pruned subplans above, then we
1870  * must re-sequence the subplan indexes so that ExecFindMatchingSubPlans
1871  * properly returns the indexes from the subplans which will remain after
1872  * execution of this function.
1873  *
1874  * We can safely skip this when !do_exec_prune, even though that leaves
1875  * invalid data in prunestate, because that data won't be consulted again
1876  * (cf initial Assert in ExecFindMatchingSubPlans).
1877  */
1878  if (prunestate->do_exec_prune && bms_num_members(result) < nsubplans)
1879  {
1880  int *new_subplan_indexes;
1881  Bitmapset *new_other_subplans;
1882  int i;
1883  int newidx;
1884 
1885  /*
1886  * First we must build a temporary array which maps old subplan
1887  * indexes to new ones. For convenience of initialization, we use
1888  * 1-based indexes in this array and leave pruned items as 0.
1889  */
1890  new_subplan_indexes = (int *) palloc0(sizeof(int) * nsubplans);
1891  newidx = 1;
1892  i = -1;
1893  while ((i = bms_next_member(result, i)) >= 0)
1894  {
1895  Assert(i < nsubplans);
1896  new_subplan_indexes[i] = newidx++;
1897  }
1898 
1899  /*
1900  * Now we can update each PartitionedRelPruneInfo's subplan_map with
1901  * new subplan indexes. We must also recompute its present_parts
1902  * bitmap.
1903  */
1904  for (i = 0; i < prunestate->num_partprunedata; i++)
1905  {
1906  PartitionPruningData *prunedata = prunestate->partprunedata[i];
1907  int j;
1908 
1909  /*
1910  * Within each hierarchy, we perform this loop in back-to-front
1911  * order so that we determine present_parts for the lowest-level
1912  * partitioned tables first. This way we can tell whether a
1913  * sub-partitioned table's partitions were entirely pruned so we
1914  * can exclude it from the current level's present_parts.
1915  */
1916  for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
1917  {
1918  PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
1919  int nparts = pprune->nparts;
1920  int k;
1921 
1922  /* We just rebuild present_parts from scratch */
1923  bms_free(pprune->present_parts);
1924  pprune->present_parts = NULL;
1925 
1926  for (k = 0; k < nparts; k++)
1927  {
1928  int oldidx = pprune->subplan_map[k];
1929  int subidx;
1930 
1931  /*
1932  * If this partition existed as a subplan then change the
1933  * old subplan index to the new subplan index. The new
1934  * index may become -1 if the partition was pruned above,
1935  * or it may just come earlier in the subplan list due to
1936  * some subplans being removed earlier in the list. If
1937  * it's a subpartition, add it to present_parts unless
1938  * it's entirely pruned.
1939  */
1940  if (oldidx >= 0)
1941  {
1942  Assert(oldidx < nsubplans);
1943  pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
1944 
1945  if (new_subplan_indexes[oldidx] > 0)
1946  pprune->present_parts =
1947  bms_add_member(pprune->present_parts, k);
1948  }
1949  else if ((subidx = pprune->subpart_map[k]) >= 0)
1950  {
1951  PartitionedRelPruningData *subprune;
1952 
1953  subprune = &prunedata->partrelprunedata[subidx];
1954 
1955  if (!bms_is_empty(subprune->present_parts))
1956  pprune->present_parts =
1957  bms_add_member(pprune->present_parts, k);
1958  }
1959  }
1960  }
1961  }
1962 
1963  /*
1964  * We must also recompute the other_subplans set, since indexes in it
1965  * may change.
1966  */
1967  new_other_subplans = NULL;
1968  i = -1;
1969  while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
1970  new_other_subplans = bms_add_member(new_other_subplans,
1971  new_subplan_indexes[i] - 1);
1972 
1973  bms_free(prunestate->other_subplans);
1974  prunestate->other_subplans = new_other_subplans;
1975 
1976  pfree(new_subplan_indexes);
1977  }
1978 
1979  return result;
1980 }
1981 
1982 /*
1983  * ExecFindMatchingSubPlans
1984  * Determine which subplans match the pruning steps detailed in
1985  * 'prunestate' for the current comparison expression values.
1986  *
1987  * Here we assume we may evaluate PARAM_EXEC Params.
1988  */
1989 Bitmapset *
1991 {
1992  Bitmapset *result = NULL;
1993  MemoryContext oldcontext;
1994  int i;
1995 
1996  /*
1997  * If !do_exec_prune, we've got problems because
1998  * ExecFindInitialMatchingSubPlans will not have bothered to update
1999  * prunestate for whatever pruning it did.
2000  */
2001  Assert(prunestate->do_exec_prune);
2002 
2003  /*
2004  * Switch to a temp context to avoid leaking memory in the executor's
2005  * query-lifespan memory context.
2006  */
2007  oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2008 
2009  /*
2010  * For each hierarchy, do the pruning tests, and add nondeletable
2011  * subplans' indexes to "result".
2012  */
2013  for (i = 0; i < prunestate->num_partprunedata; i++)
2014  {
2015  PartitionPruningData *prunedata;
2016  PartitionedRelPruningData *pprune;
2017 
2018  prunedata = prunestate->partprunedata[i];
2019  pprune = &prunedata->partrelprunedata[0];
2020 
2021  find_matching_subplans_recurse(prunedata, pprune, false, &result);
2022 
2023  /* Expression eval may have used space in node's ps_ExprContext too */
2024  if (pprune->exec_pruning_steps)
2026  }
2027 
2028  /* Add in any subplans that partition pruning didn't account for */
2029  result = bms_add_members(result, prunestate->other_subplans);
2030 
2031  MemoryContextSwitchTo(oldcontext);
2032 
2033  /* Copy result out of the temp context before we reset it */
2034  result = bms_copy(result);
2035 
2036  MemoryContextReset(prunestate->prune_context);
2037 
2038  return result;
2039 }
2040 
2041 /*
2042  * find_matching_subplans_recurse
2043  * Recursive worker function for ExecFindMatchingSubPlans and
2044  * ExecFindInitialMatchingSubPlans
2045  *
2046  * Adds valid (non-prunable) subplan IDs to *validsubplans
2047  */
2048 static void
2050  PartitionedRelPruningData *pprune,
2051  bool initial_prune,
2052  Bitmapset **validsubplans)
2053 {
2054  Bitmapset *partset;
2055  int i;
2056 
2057  /* Guard against stack overflow due to overly deep partition hierarchy. */
2059 
2060  /* Only prune if pruning would be useful at this level. */
2061  if (initial_prune && pprune->initial_pruning_steps)
2062  {
2063  partset = get_matching_partitions(&pprune->initial_context,
2064  pprune->initial_pruning_steps);
2065  }
2066  else if (!initial_prune && pprune->exec_pruning_steps)
2067  {
2068  partset = get_matching_partitions(&pprune->exec_context,
2069  pprune->exec_pruning_steps);
2070  }
2071  else
2072  {
2073  /*
2074  * If no pruning is to be done, just include all partitions at this
2075  * level.
2076  */
2077  partset = pprune->present_parts;
2078  }
2079 
2080  /* Translate partset into subplan indexes */
2081  i = -1;
2082  while ((i = bms_next_member(partset, i)) >= 0)
2083  {
2084  if (pprune->subplan_map[i] >= 0)
2085  *validsubplans = bms_add_member(*validsubplans,
2086  pprune->subplan_map[i]);
2087  else
2088  {
2089  int partidx = pprune->subpart_map[i];
2090 
2091  if (partidx >= 0)
2093  &prunedata->partrelprunedata[partidx],
2094  initial_prune, validsubplans);
2095  else
2096  {
2097  /*
2098  * We get here if the planner already pruned all the sub-
2099  * partitions for this partition. Silently ignore this
2100  * partition in this case. The end result is the same: we
2101  * would have pruned all partitions just the same, but we
2102  * don't have any pruning steps to execute to verify this.
2103  */
2104  }
2105  }
2106  }
2107 }
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
#define NIL
Definition: pg_list.h:65
static int get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
Definition: fmgr.h:56
struct PartitionDispatchData PartitionDispatchData
FmgrInfo * partsupfunc
Definition: partprune.h:55
Relation ri_RelationDesc
Definition: execnodes.h:411
Bitmapset * execparamids
Definition: plannodes.h:1202
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
MemoryContext prune_context
#define AllocSetContextCreate
Definition: memutils.h:173
PartitionDesc partdesc
static Datum ExecEvalExprSwitchContext(ExprState *state, ExprContext *econtext, bool *isNull)
Definition: executor.h:331
char * pg_get_partkeydef_columns(Oid relid, bool pretty)
Definition: ruleutils.c:1834
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
#define likely(x)
Definition: c.h:272
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2854
static void ExecInitPruningContext(PartitionPruneContext *context, List *pruning_steps, PartitionDesc partdesc, PartitionKey partkey, PlanState *planstate)
Bitmapset * bms_copy(const Bitmapset *a)
Definition: bitmapset.c:74
struct CopyMultiInsertBuffer * ri_CopyMultiInsertBuffer
Definition: execnodes.h:525
PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
Definition: partdesc.c:377
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:425
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
Definition: aclchk.c:4556
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:322
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3113
#define RelationGetDescr(relation)
Definition: rel.h:495
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1238
Oid GetUserId(void)
Definition: miscinit.c:478
FmgrInfo * stepcmpfuncs
Definition: partprune.h:56
List * withCheckOptionLists
Definition: plannodes.h:227
FmgrInfo * partsupfunc
Definition: partcache.h:35
#define castNode(_type_, nodeptr)
Definition: nodes.h:608
BeginForeignInsert_function BeginForeignInsert
Definition: fdwapi.h:238
ResultRelInfo * resultRelInfo
Definition: execnodes.h:1191
static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate, PartitionTupleRouting *proute, Oid partoid, PartitionDispatch parent_pd, int partidx, ResultRelInfo *rootResultRelInfo)
#define RelationGetForm(relation)
Definition: rel.h:463
ExprContext * ps_ExprContext
Definition: execnodes.h:1004
MemoryContext ppccontext
Definition: partprune.h:57
static Oid get_partition_col_typid(PartitionKey key, int col)
Definition: partcache.h:85
PartitionPruningData * partprunedata[FLEXIBLE_ARRAY_MEMBER]
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1043
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:83
int maplen
Definition: attmap.h:37
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define IsolationUsesXactSnapshot()
Definition: xact.h:51
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:350
Definition: nodes.h:539
static int get_partition_natts(PartitionKey key)
Definition: partcache.h:64
int errcode(int sqlerrcode)
Definition: elog.c:698
#define PARTITION_MAX_KEYS
CmdType operation
Definition: execnodes.h:1187
ResultRelInfo * rootResultRelInfo
Definition: execnodes.h:1199
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:143
bool * is_leaf
Definition: partdesc.h:35
List * partexprs
Definition: partcache.h:30
EState * state
Definition: execnodes.h:967
Node * map_variable_attnos(Node *node, int target_varno, int sublevels_up, const AttrMap *attno_map, Oid to_rowtype, bool *found_whole_row)
PartitionKey RelationGetPartitionKey(Relation rel)
Definition: partcache.c:54
Form_pg_class rd_rel
Definition: rel.h:109
unsigned int Oid
Definition: postgres_ext.h:31
List * lappend_oid(List *list, Oid datum)
Definition: list.c:372
#define OidIsValid(objectId)
Definition: c.h:710
ResultRelInfo ** partitions
ExprState * ExecInitQual(List *qual, PlanState *parent)
Definition: execExpr.c:209
Index ri_RangeTableIndex
Definition: execnodes.h:408
List * onConflictSet
Definition: plannodes.h:235
Definition: attmap.h:34
PartitionBoundInfo boundinfo
Definition: partdesc.h:38
List * resultRelations
Definition: plannodes.h:225
void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
Definition: execIndexing.c:156
TupleTableSlot * oc_Existing
Definition: execnodes.h:380
#define GetPerTupleExprContext(estate)
Definition: executor.h:533
int partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, int nvalues, Datum *values, bool *is_equal)
Definition: partbounds.c:3590
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1003
void pfree(void *pointer)
Definition: mcxt.c:1169
MemoryContext es_query_cxt
Definition: execnodes.h:599
ProjectionInfo * ExecBuildUpdateProjection(List *targetList, bool evalTargetList, List *targetColnos, TupleDesc relDesc, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent)
Definition: execExpr.c:513
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
#define linitial(l)
Definition: pg_list.h:174
AttrMap * build_attrmap_by_name_if_req(TupleDesc indesc, TupleDesc outdesc)
Definition: attmap.c:259
#define ERROR
Definition: elog.h:46
static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, Bitmapset **validsubplans)
PlanState ps
Definition: execnodes.h:1186
ProjectionInfo * oc_ProjInfo
Definition: execnodes.h:382
#define lfirst_int(lc)
Definition: pg_list.h:170
void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute)
ExprState ** exprstates
Definition: partprune.h:59
TupleTableSlot * ri_PartitionTupleSlot
Definition: execnodes.h:513
int pg_mbcliplen(const char *mbstr, int len, int limit)
Definition: mbutils.c:1026
static void FormPartitionKeyDatum(PartitionDispatch pd, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
struct ResultRelInfo * ri_RootResultRelInfo
Definition: execnodes.h:511
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
void CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation)
Definition: execMain.c:992
#define lfirst_node(type, lc)
Definition: pg_list.h:172
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
TupleConversionMap * convert_tuples_by_name(TupleDesc indesc, TupleDesc outdesc)
Definition: tupconvert.c:102
Bitmapset * ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans)
#define NoLock
Definition: lockdefs.h:34
static char * buf
Definition: pg_test_fsync.c:68
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1254
void check_stack_depth(void)
Definition: postgres.c:3469
#define RowExclusiveLock
Definition: lockdefs.h:38
int errdetail(const char *fmt,...)
Definition: elog.c:1042
static List * adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
TupleConversionMap * ExecGetChildToRootMap(ResultRelInfo *resultRelInfo)
Definition: execUtils.c:1234
#define RelationGetRelationName(relation)
Definition: rel.h:503
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
struct FdwRoutine * ri_FdwRoutine
Definition: execnodes.h:455
Bitmapset * present_parts
Definition: plannodes.h:1187
PartitionDispatch * partition_dispatch_info
Definition: execPartition.c:96
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
Bitmapset * execparamids
int es_instrument
Definition: execnodes.h:606
ExprState * oc_WhereClause
Definition: execnodes.h:383
PartitionPruneStep step
Definition: plannodes.h:1247
GetForeignModifyBatchSize_function GetForeignModifyBatchSize
Definition: fdwapi.h:234
AttrMap * attrMap
Definition: tupconvert.h:28
List * lappend_int(List *list, int datum)
Definition: list.c:354
List * ExecPrepareExprList(List *nodes, EState *estate)
Definition: execExpr.c:820
ResultRelInfo * ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid, bool missing_ok, bool update_cache)
PartitionPruneContext exec_context
Definition: execPartition.h:57
List * lappend(List *list, void *datum)
Definition: list.c:336
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:701
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
OnConflictSetState * ri_onConflict
Definition: execnodes.h:494
ResultRelInfo ** nonleaf_partitions
Definition: execPartition.c:97
static int16 get_partition_col_attnum(PartitionKey key, int col)
Definition: partcache.h:79
Oid * partcollation
Definition: partcache.h:38
List * es_tupleTable
Definition: execnodes.h:601
void * palloc0(Size size)
Definition: mcxt.c:1093
AclResult
Definition: acl.h:177
AttrNumber * partattrs
Definition: partcache.h:28
uintptr_t Datum
Definition: postgres.h:411
void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, ResultRelInfo *partition_root_rri, int instrument_options)
Definition: execMain.c:1193
#define ACL_SELECT
Definition: parsenodes.h:83
TupleTableSlot * tupslot
#define PARTITION_STRATEGY_HASH
Definition: parsenodes.h:814
#define partition_bound_accepts_nulls(bi)
Definition: partbounds.h:81
Plan * plan
Definition: execnodes.h:965
int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, Datum value, bool *is_equal)
Definition: partbounds.c:3502
uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, Datum *values, bool *isnull)
Definition: partbounds.c:4621
#define InvalidOid
Definition: postgres_ext.h:36
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition: tuptable.h:381
List * es_tuple_routing_result_relations
Definition: execnodes.h:587
int16 attnum
Definition: pg_attribute.h:83
#define ereport(elevel,...)
Definition: elog.h:157
TupleTableSlot * oc_ProjSlot
Definition: execnodes.h:381
#define INNER_VAR
Definition: primnodes.h:175
AttrMap * build_attrmap_by_name(TupleDesc indesc, TupleDesc outdesc)
Definition: attmap.c:174
int check_enable_rls(Oid relid, Oid checkAsUser, bool noError)
Definition: rls.c:52
Relation ExecGetRangeTableRelation(EState *estate, Index rti)
Definition: execUtils.c:782
static ResultRelInfo * ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *rootResultRelInfo, int partidx)
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
ExecForeignBatchInsert_function ExecForeignBatchInsert
Definition: fdwapi.h:233
#define makeNode(_type_)
Definition: nodes.h:587
bool list_member_oid(const List *list, Oid datum)
Definition: list.c:689
TupleTableSlot * execute_attr_map_slot(AttrMap *attrMap, TupleTableSlot *in_slot, TupleTableSlot *out_slot)
Definition: tupconvert.c:177
Bitmapset * other_subplans
Definition: plannodes.h:1163
int ri_BatchSize
Definition: execnodes.h:465
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
OnConflictAction onConflictAction
Definition: plannodes.h:233
AttrNumber * attnums
Definition: attmap.h:36
static int list_length(const List *l)
Definition: pg_list.h:149
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:226
static void ExecInitRoutingInfo(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, PartitionDispatch dispatch, ResultRelInfo *partRelInfo, int partidx, bool is_borrowed_rel)
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:736
PartitionDirectory es_partition_directory
Definition: execnodes.h:581
List * onConflictCols
Definition: plannodes.h:236
#define PARTITION_STRATEGY_LIST
Definition: parsenodes.h:815
List * RelationGetIndexList(Relation relation)
Definition: relcache.c:4570
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
PartitionedRelPruningData partrelprunedata[FLEXIBLE_ARRAY_MEMBER]
Definition: execPartition.h:70
#define InvalidAttrNumber
Definition: attnum.h:23
#define GetPerTupleMemoryContext(estate)
Definition: executor.h:538
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4685
static Datum values[MAXATTR]
Definition: bootstrap.c:166
#define PARTITION_STRATEGY_RANGE
Definition: parsenodes.h:816
static char * ExecBuildSlotPartitionKeyDescription(Relation rel, Datum *values, bool *isnull, int maxfieldlen)
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:1653
void * palloc(Size size)
Definition: mcxt.c:1062
PartitionBoundInfo boundinfo
Definition: partprune.h:53
TupleConversionMap * ri_RootToPartitionMap
Definition: execnodes.h:512
ProjectionInfo * ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, TupleTableSlot *slot, PlanState *parent, TupleDesc inputDesc)
Definition: execExpr.c:353
int errmsg(const char *fmt,...)
Definition: elog.c:909
Bitmapset * get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
Definition: partprune.c:819
CmdType operation
Definition: plannodes.h:220
void list_free(List *list)
Definition: list.c:1391
#define elog(elevel,...)
Definition: elog.h:232
int i
bool ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, bool emitError)
Definition: execMain.c:1697
List * returningLists
Definition: plannodes.h:228
PartitionDesc PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
Definition: partdesc.c:410
ExprState * ExecInitExpr(Expr *node, PlanState *parent)
Definition: execExpr.c:123
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120
ResultRelInfo * ExecFindPartition(ModifyTableState *mtstate, ResultRelInfo *rootResultRelInfo, PartitionTupleRouting *proute, TupleTableSlot *slot, EState *estate)
PartitionPruneState * ExecCreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *partitionpruneinfo)
MemoryContext memcxt
#define copyObject(obj)
Definition: nodes.h:655
#define PruneCxtStateIdx(partnatts, step_id, keyno)
Definition: partprune.h:68
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
struct PartitionDispatchData * PartitionDispatch
Definition: execPartition.h:22
Definition: pg_list.h:50
int errtable(Relation rel)
Definition: relcache.c:5628
List * get_partition_ancestors(Oid relid)
Definition: partition.c:133
PartitionPruneContext initial_context
Definition: execPartition.h:56
int16 AttrNumber
Definition: attnum.h:21
#define RelationGetRelid(relation)
Definition: rel.h:469
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:227
long val
Definition: informix.c:664
List * ri_onConflictArbiterIndexes
Definition: execnodes.h:491
void ExecCloseIndices(ResultRelInfo *resultRelInfo)
Definition: execIndexing.c:231
#define offsetof(type, field)
Definition: c.h:727
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:793
int indexes[FLEXIBLE_ARRAY_MEMBER]
#define ResetExprContext(econtext)
Definition: executor.h:527
#define lfirst_oid(lc)
Definition: pg_list.h:171
Bitmapset * other_subplans
PlanState * planstate
Definition: partprune.h:58
EndForeignInsert_function EndForeignInsert
Definition: fdwapi.h:239
struct PartitionedRelPruningData PartitionedRelPruningData
Node * onConflictWhere
Definition: plannodes.h:237
Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate)
PartitionTupleRouting * ExecSetupPartitionTupleRouting(EState *estate, Relation rel)