PostgreSQL Source Code  git master
nodeAppend.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeAppend.c
4  * routines to handle append nodes.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeAppend.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /* INTERFACE ROUTINES
16  * ExecInitAppend - initialize the append node
17  * ExecAppend - retrieve the next tuple from the node
18  * ExecEndAppend - shut down the append node
19  * ExecReScanAppend - rescan the append node
20  *
21  * NOTES
22  * Each append node contains a list of one or more subplans which
23  * must be iteratively processed (forwards or backwards).
24  * Tuples are retrieved by executing the 'whichplan'th subplan
25  * until the subplan stops returning tuples, at which point that
26  * plan is shut down and the next started up.
27  *
28  * Append nodes don't make use of their left and right
29  * subtrees, rather they maintain a list of subplans so
30  * a typical append node looks like this in the plan tree:
31  *
32  * ...
33  * /
34  * Append -------+------+------+--- nil
35  * / \ | | |
36  * nil nil ... ... ...
37  * subplans
38  *
39  * Append nodes are currently used for unions, and to support
40  * inheritance queries, where several relations need to be scanned.
41  * For example, in our standard person/student/employee/student-emp
42  * example, where student and employee inherit from person
43  * and student-emp inherits from student and employee, the
44  * query:
45  *
46  * select name from person
47  *
48  * generates the plan:
49  *
50  * |
51  * Append -------+-------+--------+--------+
52  * / \ | | | |
53  * nil nil Scan Scan Scan Scan
54  * | | | |
55  * person employee student student-emp
56  */
57 
58 #include "postgres.h"
59 
60 #include "executor/execdebug.h"
61 #include "executor/nodeAppend.h"
62 #include "miscadmin.h"
63 
64 /* Shared state for parallel-aware Append. */
66 {
67  LWLock pa_lock; /* mutual exclusion to choose next subplan */
68  int pa_next_plan; /* next plan to choose by any worker */
69 
70  /*
71  * pa_finished[i] should be true if no more workers should select subplan
72  * i. for a non-partial plan, this should be set to true as soon as a
73  * worker selects the plan; for a partial plan, it remains false until
74  * some worker executes the plan to completion.
75  */
76  bool pa_finished[FLEXIBLE_ARRAY_MEMBER];
77 };
78 
79 #define INVALID_SUBPLAN_INDEX -1
80 
81 static TupleTableSlot *ExecAppend(PlanState *pstate);
82 static bool choose_next_subplan_locally(AppendState *node);
85 
86 /* ----------------------------------------------------------------
87  * ExecInitAppend
88  *
89  * Begin all of the subscans of the append node.
90  *
91  * (This is potentially wasteful, since the entire result of the
92  * append node may not be scanned, but this way all of the
93  * structures get allocated in the executor's top level memory
94  * block instead of that of the call to ExecAppend.)
95  * ----------------------------------------------------------------
96  */
98 ExecInitAppend(Append *node, EState *estate, int eflags)
99 {
100  AppendState *appendstate = makeNode(AppendState);
101  PlanState **appendplanstates;
102  int nplans;
103  int i;
104  ListCell *lc;
105 
106  /* check for unsupported flags */
107  Assert(!(eflags & EXEC_FLAG_MARK));
108 
109  /*
110  * Lock the non-leaf tables in the partition tree controlled by this node.
111  * It's a no-op for non-partitioned parent tables.
112  */
114 
115  /*
116  * Set up empty vector of subplan states
117  */
118  nplans = list_length(node->appendplans);
119 
120  appendplanstates = (PlanState **) palloc0(nplans * sizeof(PlanState *));
121 
122  /*
123  * create new AppendState for our append node
124  */
125  appendstate->ps.plan = (Plan *) node;
126  appendstate->ps.state = estate;
127  appendstate->ps.ExecProcNode = ExecAppend;
128  appendstate->appendplans = appendplanstates;
129  appendstate->as_nplans = nplans;
130 
131  /*
132  * Initialize result tuple type and slot.
133  */
134  ExecInitResultTupleSlotTL(estate, &appendstate->ps);
135 
136  /*
137  * call ExecInitNode on each of the plans to be executed and save the
138  * results into the array "appendplans".
139  */
140  i = 0;
141  foreach(lc, node->appendplans)
142  {
143  Plan *initNode = (Plan *) lfirst(lc);
144 
145  appendplanstates[i] = ExecInitNode(initNode, estate, eflags);
146  i++;
147  }
148 
149  /*
150  * Miscellaneous initialization
151  *
152  * Append plans don't have expression contexts because they never call
153  * ExecQual or ExecProject.
154  */
155  appendstate->ps.ps_ProjInfo = NULL;
156 
157  /*
158  * Parallel-aware append plans must choose the first subplan to execute by
159  * looking at shared memory, but non-parallel-aware append plans can
160  * always start with the first subplan.
161  */
162  appendstate->as_whichplan =
163  appendstate->ps.plan->parallel_aware ? INVALID_SUBPLAN_INDEX : 0;
164 
165  /* If parallel-aware, this will be overridden later. */
167 
168  return appendstate;
169 }
170 
171 /* ----------------------------------------------------------------
172  * ExecAppend
173  *
174  * Handles iteration over multiple subplans.
175  * ----------------------------------------------------------------
176  */
177 static TupleTableSlot *
179 {
180  AppendState *node = castNode(AppendState, pstate);
181 
182  /* If no subplan has been chosen, we must choose one before proceeding. */
183  if (node->as_whichplan == INVALID_SUBPLAN_INDEX &&
184  !node->choose_next_subplan(node))
185  return ExecClearTuple(node->ps.ps_ResultTupleSlot);
186 
187  for (;;)
188  {
189  PlanState *subnode;
190  TupleTableSlot *result;
191 
193 
194  /*
195  * figure out which subplan we are currently processing
196  */
197  Assert(node->as_whichplan >= 0 && node->as_whichplan < node->as_nplans);
198  subnode = node->appendplans[node->as_whichplan];
199 
200  /*
201  * get a tuple from the subplan
202  */
203  result = ExecProcNode(subnode);
204 
205  if (!TupIsNull(result))
206  {
207  /*
208  * If the subplan gave us something then return it as-is. We do
209  * NOT make use of the result slot that was set up in
210  * ExecInitAppend; there's no need for it.
211  */
212  return result;
213  }
214 
215  /* choose new subplan; if none, we're done */
216  if (!node->choose_next_subplan(node))
217  return ExecClearTuple(node->ps.ps_ResultTupleSlot);
218  }
219 }
220 
221 /* ----------------------------------------------------------------
222  * ExecEndAppend
223  *
224  * Shuts down the subscans of the append node.
225  *
226  * Returns nothing of interest.
227  * ----------------------------------------------------------------
228  */
229 void
231 {
232  PlanState **appendplans;
233  int nplans;
234  int i;
235 
236  /*
237  * get information from the node
238  */
239  appendplans = node->appendplans;
240  nplans = node->as_nplans;
241 
242  /*
243  * shut down each of the subscans
244  */
245  for (i = 0; i < nplans; i++)
246  ExecEndNode(appendplans[i]);
247 }
248 
249 void
251 {
252  int i;
253 
254  for (i = 0; i < node->as_nplans; i++)
255  {
256  PlanState *subnode = node->appendplans[i];
257 
258  /*
259  * ExecReScan doesn't know about my subplans, so I have to do
260  * changed-parameter signaling myself.
261  */
262  if (node->ps.chgParam != NULL)
263  UpdateChangedParamSet(subnode, node->ps.chgParam);
264 
265  /*
266  * If chgParam of subnode is not null then plan will be re-scanned by
267  * first ExecProcNode.
268  */
269  if (subnode->chgParam == NULL)
270  ExecReScan(subnode);
271  }
272 
273  node->as_whichplan =
275 }
276 
277 /* ----------------------------------------------------------------
278  * Parallel Append Support
279  * ----------------------------------------------------------------
280  */
281 
282 /* ----------------------------------------------------------------
283  * ExecAppendEstimate
284  *
285  * Compute the amount of space we'll need in the parallel
286  * query DSM, and inform pcxt->estimator about our needs.
287  * ----------------------------------------------------------------
288  */
289 void
291  ParallelContext *pcxt)
292 {
293  node->pstate_len =
295  sizeof(bool) * node->as_nplans);
296 
298  shm_toc_estimate_keys(&pcxt->estimator, 1);
299 }
300 
301 
302 /* ----------------------------------------------------------------
303  * ExecAppendInitializeDSM
304  *
305  * Set up shared state for Parallel Append.
306  * ----------------------------------------------------------------
307  */
308 void
310  ParallelContext *pcxt)
311 {
312  ParallelAppendState *pstate;
313 
314  pstate = shm_toc_allocate(pcxt->toc, node->pstate_len);
315  memset(pstate, 0, node->pstate_len);
317  shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, pstate);
318 
319  node->as_pstate = pstate;
321 }
322 
323 /* ----------------------------------------------------------------
324  * ExecAppendReInitializeDSM
325  *
326  * Reset shared state before beginning a fresh scan.
327  * ----------------------------------------------------------------
328  */
329 void
331 {
332  ParallelAppendState *pstate = node->as_pstate;
333 
334  pstate->pa_next_plan = 0;
335  memset(pstate->pa_finished, 0, sizeof(bool) * node->as_nplans);
336 }
337 
338 /* ----------------------------------------------------------------
339  * ExecAppendInitializeWorker
340  *
341  * Copy relevant information from TOC into planstate, and initialize
342  * whatever is required to choose and execute the optimal subplan.
343  * ----------------------------------------------------------------
344  */
345 void
347 {
348  node->as_pstate = shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false);
350 }
351 
352 /* ----------------------------------------------------------------
353  * choose_next_subplan_locally
354  *
355  * Choose next subplan for a non-parallel-aware Append,
356  * returning false if there are no more.
357  * ----------------------------------------------------------------
358  */
359 static bool
361 {
362  int whichplan = node->as_whichplan;
363 
364  /* We should never see INVALID_SUBPLAN_INDEX in this case. */
365  Assert(whichplan >= 0 && whichplan <= node->as_nplans);
366 
368  {
369  if (whichplan >= node->as_nplans - 1)
370  return false;
371  node->as_whichplan++;
372  }
373  else
374  {
375  if (whichplan <= 0)
376  return false;
377  node->as_whichplan--;
378  }
379 
380  return true;
381 }
382 
383 /* ----------------------------------------------------------------
384  * choose_next_subplan_for_leader
385  *
386  * Try to pick a plan which doesn't commit us to doing much
387  * work locally, so that as much work as possible is done in
388  * the workers. Cheapest subplans are at the end.
389  * ----------------------------------------------------------------
390  */
391 static bool
393 {
394  ParallelAppendState *pstate = node->as_pstate;
395  Append *append = (Append *) node->ps.plan;
396 
397  /* Backward scan is not supported by parallel-aware plans */
399 
401 
402  if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
403  {
404  /* Mark just-completed subplan as finished. */
405  node->as_pstate->pa_finished[node->as_whichplan] = true;
406  }
407  else
408  {
409  /* Start with last subplan. */
410  node->as_whichplan = node->as_nplans - 1;
411  }
412 
413  /* Loop until we find a subplan to execute. */
414  while (pstate->pa_finished[node->as_whichplan])
415  {
416  if (node->as_whichplan == 0)
417  {
420  LWLockRelease(&pstate->pa_lock);
421  return false;
422  }
423  node->as_whichplan--;
424  }
425 
426  /* If non-partial, immediately mark as finished. */
427  if (node->as_whichplan < append->first_partial_plan)
428  node->as_pstate->pa_finished[node->as_whichplan] = true;
429 
430  LWLockRelease(&pstate->pa_lock);
431 
432  return true;
433 }
434 
435 /* ----------------------------------------------------------------
436  * choose_next_subplan_for_worker
437  *
438  * Choose next subplan for a parallel-aware Append, returning
439  * false if there are no more.
440  *
441  * We start from the first plan and advance through the list;
442  * when we get back to the end, we loop back to the first
443  * partial plan. This assigns the non-partial plans first in
444  * order of descending cost and then spreads out the workers
445  * as evenly as possible across the remaining partial plans.
446  * ----------------------------------------------------------------
447  */
448 static bool
450 {
451  ParallelAppendState *pstate = node->as_pstate;
452  Append *append = (Append *) node->ps.plan;
453 
454  /* Backward scan is not supported by parallel-aware plans */
456 
458 
459  /* Mark just-completed subplan as finished. */
460  if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
461  node->as_pstate->pa_finished[node->as_whichplan] = true;
462 
463  /* If all the plans are already done, we have nothing to do */
464  if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX)
465  {
466  LWLockRelease(&pstate->pa_lock);
467  return false;
468  }
469 
470  /* Save the plan from which we are starting the search. */
471  node->as_whichplan = pstate->pa_next_plan;
472 
473  /* Loop until we find a subplan to execute. */
474  while (pstate->pa_finished[pstate->pa_next_plan])
475  {
476  if (pstate->pa_next_plan < node->as_nplans - 1)
477  {
478  /* Advance to next plan. */
479  pstate->pa_next_plan++;
480  }
481  else if (node->as_whichplan > append->first_partial_plan)
482  {
483  /* Loop back to first partial plan. */
484  pstate->pa_next_plan = append->first_partial_plan;
485  }
486  else
487  {
488  /*
489  * At last plan, and either there are no partial plans or we've
490  * tried them all. Arrange to bail out.
491  */
492  pstate->pa_next_plan = node->as_whichplan;
493  }
494 
495  if (pstate->pa_next_plan == node->as_whichplan)
496  {
497  /* We've tried everything! */
499  LWLockRelease(&pstate->pa_lock);
500  return false;
501  }
502  }
503 
504  /* Pick the plan we found, and advance pa_next_plan one more time. */
505  node->as_whichplan = pstate->pa_next_plan++;
506  if (pstate->pa_next_plan >= node->as_nplans)
507  {
508  if (append->first_partial_plan < node->as_nplans)
509  pstate->pa_next_plan = append->first_partial_plan;
510  else
511  {
512  /*
513  * We have only non-partial plans, and we already chose the last
514  * one; so arrange for the other workers to immediately bail out.
515  */
517  }
518  }
519 
520  /* If non-partial, immediately mark as finished. */
521  if (node->as_whichplan < append->first_partial_plan)
522  node->as_pstate->pa_finished[node->as_whichplan] = true;
523 
524  LWLockRelease(&pstate->pa_lock);
525 
526  return true;
527 }
Size pstate_len
Definition: execnodes.h:1028
void ExecLockNonLeafAppendTables(List *partitioned_rels, EState *estate)
Definition: execUtils.c:861
Definition: lwlock.h:32
void ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt)
Definition: nodeAppend.c:346
static TupleTableSlot * ExecAppend(PlanState *pstate)
Definition: nodeAppend.c:178
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:903
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
#define castNode(_type_, nodeptr)
Definition: nodes.h:582
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:539
void ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt)
Definition: nodeAppend.c:330
shm_toc_estimator estimator
Definition: parallel.h:41
static bool choose_next_subplan_locally(AppendState *node)
Definition: nodeAppend.c:360
void ExecReScan(PlanState *node)
Definition: execAmi.c:76
TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: execTuples.c:475
int plan_node_id
Definition: plannodes.h:143
AppendState * ExecInitAppend(Append *node, EState *estate, int eflags)
Definition: nodeAppend.c:98
EState * state
Definition: execnodes.h:870
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
ScanDirection es_direction
Definition: execnodes.h:442
PlanState ps
Definition: execnodes.h:1023
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1724
List * appendplans
Definition: plannodes.h:251
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:901
ParallelAppendState * as_pstate
Definition: execnodes.h:1027
void ExecEndAppend(AppendState *node)
Definition: nodeAppend.c:230
int first_partial_plan
Definition: plannodes.h:252
bool(* choose_next_subplan)(AppendState *)
Definition: execnodes.h:1029
bool parallel_aware
Definition: plannodes.h:137
#define TupIsNull(slot)
Definition: tuptable.h:139
List * partitioned_rels
Definition: plannodes.h:250
Bitmapset * chgParam
Definition: execnodes.h:896
static bool choose_next_subplan_for_worker(AppendState *node)
Definition: nodeAppend.c:449
#define INVALID_SUBPLAN_INDEX
Definition: nodeAppend.c:79
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:676
void * palloc0(Size size)
Definition: mcxt.c:864
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:874
void ExecReScanAppend(AppendState *node)
Definition: nodeAppend.c:250
Size add_size(Size s1, Size s2)
Definition: shmem.c:475
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:234
bool pa_finished[FLEXIBLE_ARRAY_MEMBER]
Definition: nodeAppend.c:76
void ExecInitResultTupleSlotTL(EState *estate, PlanState *planstate)
Definition: execTuples.c:870
Plan * plan
Definition: execnodes.h:868
void UpdateChangedParamSet(PlanState *node, Bitmapset *newchg)
Definition: execUtils.c:706
void ExecAppendInitializeDSM(AppendState *node, ParallelContext *pcxt)
Definition: nodeAppend.c:309
void ExecAppendEstimate(AppendState *node, ParallelContext *pcxt)
Definition: nodeAppend.c:290
#define makeNode(_type_)
Definition: nodes.h:561
#define Assert(condition)
Definition: c.h:688
#define lfirst(lc)
Definition: pg_list.h:106
#define EXEC_FLAG_MARK
Definition: executor.h:62
static bool choose_next_subplan_for_leader(AppendState *node)
Definition: nodeAppend.c:392
int as_whichplan
Definition: execnodes.h:1026
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
static int list_length(const List *l)
Definition: pg_list.h:89
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1120
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
PlanState ** appendplans
Definition: execnodes.h:1024
int i
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:98
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:139
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define offsetof(type, field)
Definition: c.h:611
shm_toc * toc
Definition: parallel.h:44