PostgreSQL Source Code  git master
nodeAppend.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * nodeAppend.c
4  * routines to handle append nodes.
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/executor/nodeAppend.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /* INTERFACE ROUTINES
16  * ExecInitAppend - initialize the append node
17  * ExecAppend - retrieve the next tuple from the node
18  * ExecEndAppend - shut down the append node
19  * ExecReScanAppend - rescan the append node
20  *
21  * NOTES
22  * Each append node contains a list of one or more subplans which
23  * must be iteratively processed (forwards or backwards).
24  * Tuples are retrieved by executing the 'whichplan'th subplan
25  * until the subplan stops returning tuples, at which point that
26  * plan is shut down and the next started up.
27  *
28  * Append nodes don't make use of their left and right
29  * subtrees, rather they maintain a list of subplans so
30  * a typical append node looks like this in the plan tree:
31  *
32  * ...
33  * /
34  * Append -------+------+------+--- nil
35  * / \ | | |
36  * nil nil ... ... ...
37  * subplans
38  *
39  * Append nodes are currently used for unions, and to support
40  * inheritance queries, where several relations need to be scanned.
41  * For example, in our standard person/student/employee/student-emp
42  * example, where student and employee inherit from person
43  * and student-emp inherits from student and employee, the
44  * query:
45  *
46  * select name from person
47  *
48  * generates the plan:
49  *
50  * |
51  * Append -------+-------+--------+--------+
52  * / \ | | | |
53  * nil nil Scan Scan Scan Scan
54  * | | | |
55  * person employee student student-emp
56  */
57 
58 #include "postgres.h"
59 
60 #include "executor/execAsync.h"
61 #include "executor/execdebug.h"
62 #include "executor/execPartition.h"
63 #include "executor/nodeAppend.h"
64 #include "miscadmin.h"
65 #include "pgstat.h"
66 #include "storage/latch.h"
67 
68 /* Shared state for parallel-aware Append. */
70 {
71  LWLock pa_lock; /* mutual exclusion to choose next subplan */
72  int pa_next_plan; /* next plan to choose by any worker */
73 
74  /*
75  * pa_finished[i] should be true if no more workers should select subplan
76  * i. for a non-partial plan, this should be set to true as soon as a
77  * worker selects the plan; for a partial plan, it remains false until
78  * some worker executes the plan to completion.
79  */
81 };
82 
83 #define INVALID_SUBPLAN_INDEX -1
84 #define EVENT_BUFFER_SIZE 16
85 
86 static TupleTableSlot *ExecAppend(PlanState *pstate);
87 static bool choose_next_subplan_locally(AppendState *node);
91 static void ExecAppendAsyncBegin(AppendState *node);
92 static bool ExecAppendAsyncGetNext(AppendState *node, TupleTableSlot **result);
93 static bool ExecAppendAsyncRequest(AppendState *node, TupleTableSlot **result);
94 static void ExecAppendAsyncEventWait(AppendState *node);
95 static void classify_matching_subplans(AppendState *node);
96 
97 /* ----------------------------------------------------------------
98  * ExecInitAppend
99  *
100  * Begin all of the subscans of the append node.
101  *
102  * (This is potentially wasteful, since the entire result of the
103  * append node may not be scanned, but this way all of the
104  * structures get allocated in the executor's top level memory
105  * block instead of that of the call to ExecAppend.)
106  * ----------------------------------------------------------------
107  */
108 AppendState *
109 ExecInitAppend(Append *node, EState *estate, int eflags)
110 {
111  AppendState *appendstate = makeNode(AppendState);
112  PlanState **appendplanstates;
113  Bitmapset *validsubplans;
114  Bitmapset *asyncplans;
115  int nplans;
116  int nasyncplans;
117  int firstvalid;
118  int i,
119  j;
120 
121  /* check for unsupported flags */
122  Assert(!(eflags & EXEC_FLAG_MARK));
123 
124  /*
125  * create new AppendState for our append node
126  */
127  appendstate->ps.plan = (Plan *) node;
128  appendstate->ps.state = estate;
129  appendstate->ps.ExecProcNode = ExecAppend;
130 
131  /* Let choose_next_subplan_* function handle setting the first subplan */
132  appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
133  appendstate->as_syncdone = false;
134  appendstate->as_begun = false;
135 
136  /* If run-time partition pruning is enabled, then set that up now */
137  if (node->part_prune_info != NULL)
138  {
139  PartitionPruneState *prunestate;
140 
141  /* We may need an expression context to evaluate partition exprs */
142  ExecAssignExprContext(estate, &appendstate->ps);
143 
144  /* Create the working data structure for pruning. */
145  prunestate = ExecCreatePartitionPruneState(&appendstate->ps,
146  node->part_prune_info);
147  appendstate->as_prune_state = prunestate;
148 
149  /* Perform an initial partition prune, if required. */
150  if (prunestate->do_initial_prune)
151  {
152  /* Determine which subplans survive initial pruning */
153  validsubplans = ExecFindInitialMatchingSubPlans(prunestate,
154  list_length(node->appendplans));
155 
156  nplans = bms_num_members(validsubplans);
157  }
158  else
159  {
160  /* We'll need to initialize all subplans */
161  nplans = list_length(node->appendplans);
162  Assert(nplans > 0);
163  validsubplans = bms_add_range(NULL, 0, nplans - 1);
164  }
165 
166  /*
167  * When no run-time pruning is required and there's at least one
168  * subplan, we can fill as_valid_subplans immediately, preventing
169  * later calls to ExecFindMatchingSubPlans.
170  */
171  if (!prunestate->do_exec_prune && nplans > 0)
172  appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1);
173  }
174  else
175  {
176  nplans = list_length(node->appendplans);
177 
178  /*
179  * When run-time partition pruning is not enabled we can just mark all
180  * subplans as valid; they must also all be initialized.
181  */
182  Assert(nplans > 0);
183  appendstate->as_valid_subplans = validsubplans =
184  bms_add_range(NULL, 0, nplans - 1);
185  appendstate->as_prune_state = NULL;
186  }
187 
188  /*
189  * Initialize result tuple type and slot.
190  */
191  ExecInitResultTupleSlotTL(&appendstate->ps, &TTSOpsVirtual);
192 
193  /* node returns slots from each of its subnodes, therefore not fixed */
194  appendstate->ps.resultopsset = true;
195  appendstate->ps.resultopsfixed = false;
196 
197  appendplanstates = (PlanState **) palloc(nplans *
198  sizeof(PlanState *));
199 
200  /*
201  * call ExecInitNode on each of the valid plans to be executed and save
202  * the results into the appendplanstates array.
203  *
204  * While at it, find out the first valid partial plan.
205  */
206  j = 0;
207  asyncplans = NULL;
208  nasyncplans = 0;
209  firstvalid = nplans;
210  i = -1;
211  while ((i = bms_next_member(validsubplans, i)) >= 0)
212  {
213  Plan *initNode = (Plan *) list_nth(node->appendplans, i);
214 
215  /*
216  * Record async subplans. When executing EvalPlanQual, we treat them
217  * as sync ones; don't do this when initializing an EvalPlanQual plan
218  * tree.
219  */
220  if (initNode->async_capable && estate->es_epq_active == NULL)
221  {
222  asyncplans = bms_add_member(asyncplans, j);
223  nasyncplans++;
224  }
225 
226  /*
227  * Record the lowest appendplans index which is a valid partial plan.
228  */
229  if (i >= node->first_partial_plan && j < firstvalid)
230  firstvalid = j;
231 
232  appendplanstates[j++] = ExecInitNode(initNode, estate, eflags);
233  }
234 
235  appendstate->as_first_partial_plan = firstvalid;
236  appendstate->appendplans = appendplanstates;
237  appendstate->as_nplans = nplans;
238 
239  /* Initialize async state */
240  appendstate->as_asyncplans = asyncplans;
241  appendstate->as_nasyncplans = nasyncplans;
242  appendstate->as_asyncrequests = NULL;
243  appendstate->as_asyncresults = (TupleTableSlot **)
244  palloc0(nasyncplans * sizeof(TupleTableSlot *));
245  appendstate->as_needrequest = NULL;
246  appendstate->as_eventset = NULL;
247 
248  if (nasyncplans > 0)
249  {
250  appendstate->as_asyncrequests = (AsyncRequest **)
251  palloc0(nplans * sizeof(AsyncRequest *));
252 
253  i = -1;
254  while ((i = bms_next_member(asyncplans, i)) >= 0)
255  {
256  AsyncRequest *areq;
257 
258  areq = palloc(sizeof(AsyncRequest));
259  areq->requestor = (PlanState *) appendstate;
260  areq->requestee = appendplanstates[i];
261  areq->request_index = i;
262  areq->callback_pending = false;
263  areq->request_complete = false;
264  areq->result = NULL;
265 
266  appendstate->as_asyncrequests[i] = areq;
267  }
268  }
269 
270  /*
271  * Miscellaneous initialization
272  */
273 
274  appendstate->ps.ps_ProjInfo = NULL;
275 
276  /* For parallel query, this will be overridden later. */
278 
279  return appendstate;
280 }
281 
282 /* ----------------------------------------------------------------
283  * ExecAppend
284  *
285  * Handles iteration over multiple subplans.
286  * ----------------------------------------------------------------
287  */
288 static TupleTableSlot *
290 {
291  AppendState *node = castNode(AppendState, pstate);
292  TupleTableSlot *result;
293 
294  /*
295  * If this is the first call after Init or ReScan, we need to do the
296  * initialization work.
297  */
298  if (!node->as_begun)
299  {
301  Assert(!node->as_syncdone);
302 
303  /* Nothing to do if there are no subplans */
304  if (node->as_nplans == 0)
305  return ExecClearTuple(node->ps.ps_ResultTupleSlot);
306 
307  /* If there are any async subplans, begin executing them. */
308  if (node->as_nasyncplans > 0)
309  ExecAppendAsyncBegin(node);
310 
311  /*
312  * If no sync subplan has been chosen, we must choose one before
313  * proceeding.
314  */
315  if (!node->choose_next_subplan(node) && node->as_nasyncremain == 0)
316  return ExecClearTuple(node->ps.ps_ResultTupleSlot);
317 
318  Assert(node->as_syncdone ||
319  (node->as_whichplan >= 0 &&
320  node->as_whichplan < node->as_nplans));
321 
322  /* And we're initialized. */
323  node->as_begun = true;
324  }
325 
326  for (;;)
327  {
328  PlanState *subnode;
329 
331 
332  /*
333  * try to get a tuple from an async subplan if any
334  */
335  if (node->as_syncdone || !bms_is_empty(node->as_needrequest))
336  {
337  if (ExecAppendAsyncGetNext(node, &result))
338  return result;
339  Assert(!node->as_syncdone);
341  }
342 
343  /*
344  * figure out which sync subplan we are currently processing
345  */
346  Assert(node->as_whichplan >= 0 && node->as_whichplan < node->as_nplans);
347  subnode = node->appendplans[node->as_whichplan];
348 
349  /*
350  * get a tuple from the subplan
351  */
352  result = ExecProcNode(subnode);
353 
354  if (!TupIsNull(result))
355  {
356  /*
357  * If the subplan gave us something then return it as-is. We do
358  * NOT make use of the result slot that was set up in
359  * ExecInitAppend; there's no need for it.
360  */
361  return result;
362  }
363 
364  /*
365  * wait or poll async events if any. We do this before checking for
366  * the end of iteration, because it might drain the remaining async
367  * subplans.
368  */
369  if (node->as_nasyncremain > 0)
371 
372  /* choose new sync subplan; if no sync/async subplans, we're done */
373  if (!node->choose_next_subplan(node) && node->as_nasyncremain == 0)
374  return ExecClearTuple(node->ps.ps_ResultTupleSlot);
375  }
376 }
377 
378 /* ----------------------------------------------------------------
379  * ExecEndAppend
380  *
381  * Shuts down the subscans of the append node.
382  *
383  * Returns nothing of interest.
384  * ----------------------------------------------------------------
385  */
386 void
388 {
389  PlanState **appendplans;
390  int nplans;
391  int i;
392 
393  /*
394  * get information from the node
395  */
396  appendplans = node->appendplans;
397  nplans = node->as_nplans;
398 
399  /*
400  * shut down each of the subscans
401  */
402  for (i = 0; i < nplans; i++)
403  ExecEndNode(appendplans[i]);
404 }
405 
406 void
408 {
409  int nasyncplans = node->as_nasyncplans;
410  int i;
411 
412  /*
413  * If any PARAM_EXEC Params used in pruning expressions have changed, then
414  * we'd better unset the valid subplans so that they are reselected for
415  * the new parameter values.
416  */
417  if (node->as_prune_state &&
418  bms_overlap(node->ps.chgParam,
420  {
422  node->as_valid_subplans = NULL;
423  if (nasyncplans > 0)
424  {
426  node->as_valid_asyncplans = NULL;
427  }
428  }
429 
430  for (i = 0; i < node->as_nplans; i++)
431  {
432  PlanState *subnode = node->appendplans[i];
433 
434  /*
435  * ExecReScan doesn't know about my subplans, so I have to do
436  * changed-parameter signaling myself.
437  */
438  if (node->ps.chgParam != NULL)
439  UpdateChangedParamSet(subnode, node->ps.chgParam);
440 
441  /*
442  * If chgParam of subnode is not null then plan will be re-scanned by
443  * first ExecProcNode.
444  */
445  if (subnode->chgParam == NULL)
446  ExecReScan(subnode);
447  }
448 
449  /* Reset async state */
450  if (nasyncplans > 0)
451  {
452  i = -1;
453  while ((i = bms_next_member(node->as_asyncplans, i)) >= 0)
454  {
455  AsyncRequest *areq = node->as_asyncrequests[i];
456 
457  areq->callback_pending = false;
458  areq->request_complete = false;
459  areq->result = NULL;
460  }
461 
462  bms_free(node->as_needrequest);
463  node->as_needrequest = NULL;
464  }
465 
466  /* Let choose_next_subplan_* function handle setting the first subplan */
468  node->as_syncdone = false;
469  node->as_begun = false;
470 }
471 
472 /* ----------------------------------------------------------------
473  * Parallel Append Support
474  * ----------------------------------------------------------------
475  */
476 
477 /* ----------------------------------------------------------------
478  * ExecAppendEstimate
479  *
480  * Compute the amount of space we'll need in the parallel
481  * query DSM, and inform pcxt->estimator about our needs.
482  * ----------------------------------------------------------------
483  */
484 void
486  ParallelContext *pcxt)
487 {
488  node->pstate_len =
490  sizeof(bool) * node->as_nplans);
491 
493  shm_toc_estimate_keys(&pcxt->estimator, 1);
494 }
495 
496 
497 /* ----------------------------------------------------------------
498  * ExecAppendInitializeDSM
499  *
500  * Set up shared state for Parallel Append.
501  * ----------------------------------------------------------------
502  */
503 void
505  ParallelContext *pcxt)
506 {
507  ParallelAppendState *pstate;
508 
509  pstate = shm_toc_allocate(pcxt->toc, node->pstate_len);
510  memset(pstate, 0, node->pstate_len);
512  shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, pstate);
513 
514  node->as_pstate = pstate;
516 }
517 
518 /* ----------------------------------------------------------------
519  * ExecAppendReInitializeDSM
520  *
521  * Reset shared state before beginning a fresh scan.
522  * ----------------------------------------------------------------
523  */
524 void
526 {
527  ParallelAppendState *pstate = node->as_pstate;
528 
529  pstate->pa_next_plan = 0;
530  memset(pstate->pa_finished, 0, sizeof(bool) * node->as_nplans);
531 }
532 
533 /* ----------------------------------------------------------------
534  * ExecAppendInitializeWorker
535  *
536  * Copy relevant information from TOC into planstate, and initialize
537  * whatever is required to choose and execute the optimal subplan.
538  * ----------------------------------------------------------------
539  */
540 void
542 {
543  node->as_pstate = shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false);
545 }
546 
547 /* ----------------------------------------------------------------
548  * choose_next_subplan_locally
549  *
550  * Choose next sync subplan for a non-parallel-aware Append,
551  * returning false if there are no more.
552  * ----------------------------------------------------------------
553  */
554 static bool
556 {
557  int whichplan = node->as_whichplan;
558  int nextplan;
559 
560  /* We should never be called when there are no subplans */
561  Assert(node->as_nplans > 0);
562 
563  /* Nothing to do if syncdone */
564  if (node->as_syncdone)
565  return false;
566 
567  /*
568  * If first call then have the bms member function choose the first valid
569  * sync subplan by initializing whichplan to -1. If there happen to be
570  * no valid sync subplans then the bms member function will handle that
571  * by returning a negative number which will allow us to exit returning a
572  * false value.
573  */
574  if (whichplan == INVALID_SUBPLAN_INDEX)
575  {
576  if (node->as_nasyncplans > 0)
577  {
578  /* We'd have filled as_valid_subplans already */
579  Assert(node->as_valid_subplans);
580  }
581  else if (node->as_valid_subplans == NULL)
582  node->as_valid_subplans =
584 
585  whichplan = -1;
586  }
587 
588  /* Ensure whichplan is within the expected range */
589  Assert(whichplan >= -1 && whichplan <= node->as_nplans);
590 
592  nextplan = bms_next_member(node->as_valid_subplans, whichplan);
593  else
594  nextplan = bms_prev_member(node->as_valid_subplans, whichplan);
595 
596  if (nextplan < 0)
597  {
598  /* Set as_syncdone if in async mode */
599  if (node->as_nasyncplans > 0)
600  node->as_syncdone = true;
601  return false;
602  }
603 
604  node->as_whichplan = nextplan;
605 
606  return true;
607 }
608 
609 /* ----------------------------------------------------------------
610  * choose_next_subplan_for_leader
611  *
612  * Try to pick a plan which doesn't commit us to doing much
613  * work locally, so that as much work as possible is done in
614  * the workers. Cheapest subplans are at the end.
615  * ----------------------------------------------------------------
616  */
617 static bool
619 {
620  ParallelAppendState *pstate = node->as_pstate;
621 
622  /* Backward scan is not supported by parallel-aware plans */
624 
625  /* We should never be called when there are no subplans */
626  Assert(node->as_nplans > 0);
627 
629 
630  if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
631  {
632  /* Mark just-completed subplan as finished. */
633  node->as_pstate->pa_finished[node->as_whichplan] = true;
634  }
635  else
636  {
637  /* Start with last subplan. */
638  node->as_whichplan = node->as_nplans - 1;
639 
640  /*
641  * If we've yet to determine the valid subplans then do so now. If
642  * run-time pruning is disabled then the valid subplans will always be
643  * set to all subplans.
644  */
645  if (node->as_valid_subplans == NULL)
646  {
647  node->as_valid_subplans =
649 
650  /*
651  * Mark each invalid plan as finished to allow the loop below to
652  * select the first valid subplan.
653  */
655  }
656  }
657 
658  /* Loop until we find a subplan to execute. */
659  while (pstate->pa_finished[node->as_whichplan])
660  {
661  if (node->as_whichplan == 0)
662  {
665  LWLockRelease(&pstate->pa_lock);
666  return false;
667  }
668 
669  /*
670  * We needn't pay attention to as_valid_subplans here as all invalid
671  * plans have been marked as finished.
672  */
673  node->as_whichplan--;
674  }
675 
676  /* If non-partial, immediately mark as finished. */
677  if (node->as_whichplan < node->as_first_partial_plan)
678  node->as_pstate->pa_finished[node->as_whichplan] = true;
679 
680  LWLockRelease(&pstate->pa_lock);
681 
682  return true;
683 }
684 
685 /* ----------------------------------------------------------------
686  * choose_next_subplan_for_worker
687  *
688  * Choose next subplan for a parallel-aware Append, returning
689  * false if there are no more.
690  *
691  * We start from the first plan and advance through the list;
692  * when we get back to the end, we loop back to the first
693  * partial plan. This assigns the non-partial plans first in
694  * order of descending cost and then spreads out the workers
695  * as evenly as possible across the remaining partial plans.
696  * ----------------------------------------------------------------
697  */
698 static bool
700 {
701  ParallelAppendState *pstate = node->as_pstate;
702 
703  /* Backward scan is not supported by parallel-aware plans */
705 
706  /* We should never be called when there are no subplans */
707  Assert(node->as_nplans > 0);
708 
710 
711  /* Mark just-completed subplan as finished. */
712  if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
713  node->as_pstate->pa_finished[node->as_whichplan] = true;
714 
715  /*
716  * If we've yet to determine the valid subplans then do so now. If
717  * run-time pruning is disabled then the valid subplans will always be set
718  * to all subplans.
719  */
720  else if (node->as_valid_subplans == NULL)
721  {
722  node->as_valid_subplans =
725  }
726 
727  /* If all the plans are already done, we have nothing to do */
728  if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX)
729  {
730  LWLockRelease(&pstate->pa_lock);
731  return false;
732  }
733 
734  /* Save the plan from which we are starting the search. */
735  node->as_whichplan = pstate->pa_next_plan;
736 
737  /* Loop until we find a valid subplan to execute. */
738  while (pstate->pa_finished[pstate->pa_next_plan])
739  {
740  int nextplan;
741 
742  nextplan = bms_next_member(node->as_valid_subplans,
743  pstate->pa_next_plan);
744  if (nextplan >= 0)
745  {
746  /* Advance to the next valid plan. */
747  pstate->pa_next_plan = nextplan;
748  }
749  else if (node->as_whichplan > node->as_first_partial_plan)
750  {
751  /*
752  * Try looping back to the first valid partial plan, if there is
753  * one. If there isn't, arrange to bail out below.
754  */
755  nextplan = bms_next_member(node->as_valid_subplans,
756  node->as_first_partial_plan - 1);
757  pstate->pa_next_plan =
758  nextplan < 0 ? node->as_whichplan : nextplan;
759  }
760  else
761  {
762  /*
763  * At last plan, and either there are no partial plans or we've
764  * tried them all. Arrange to bail out.
765  */
766  pstate->pa_next_plan = node->as_whichplan;
767  }
768 
769  if (pstate->pa_next_plan == node->as_whichplan)
770  {
771  /* We've tried everything! */
773  LWLockRelease(&pstate->pa_lock);
774  return false;
775  }
776  }
777 
778  /* Pick the plan we found, and advance pa_next_plan one more time. */
779  node->as_whichplan = pstate->pa_next_plan;
781  pstate->pa_next_plan);
782 
783  /*
784  * If there are no more valid plans then try setting the next plan to the
785  * first valid partial plan.
786  */
787  if (pstate->pa_next_plan < 0)
788  {
789  int nextplan = bms_next_member(node->as_valid_subplans,
790  node->as_first_partial_plan - 1);
791 
792  if (nextplan >= 0)
793  pstate->pa_next_plan = nextplan;
794  else
795  {
796  /*
797  * There are no valid partial plans, and we already chose the last
798  * non-partial plan; so flag that there's nothing more for our
799  * fellow workers to do.
800  */
802  }
803  }
804 
805  /* If non-partial, immediately mark as finished. */
806  if (node->as_whichplan < node->as_first_partial_plan)
807  node->as_pstate->pa_finished[node->as_whichplan] = true;
808 
809  LWLockRelease(&pstate->pa_lock);
810 
811  return true;
812 }
813 
814 /*
815  * mark_invalid_subplans_as_finished
816  * Marks the ParallelAppendState's pa_finished as true for each invalid
817  * subplan.
818  *
819  * This function should only be called for parallel Append with run-time
820  * pruning enabled.
821  */
822 static void
824 {
825  int i;
826 
827  /* Only valid to call this while in parallel Append mode */
828  Assert(node->as_pstate);
829 
830  /* Shouldn't have been called when run-time pruning is not enabled */
831  Assert(node->as_prune_state);
832 
833  /* Nothing to do if all plans are valid */
834  if (bms_num_members(node->as_valid_subplans) == node->as_nplans)
835  return;
836 
837  /* Mark all non-valid plans as finished */
838  for (i = 0; i < node->as_nplans; i++)
839  {
840  if (!bms_is_member(i, node->as_valid_subplans))
841  node->as_pstate->pa_finished[i] = true;
842  }
843 }
844 
845 /* ----------------------------------------------------------------
846  * Asynchronous Append Support
847  * ----------------------------------------------------------------
848  */
849 
850 /* ----------------------------------------------------------------
851  * ExecAppendAsyncBegin
852  *
853  * Begin executing designed async-capable subplans.
854  * ----------------------------------------------------------------
855  */
856 static void
858 {
859  int i;
860 
861  /* Backward scan is not supported by async-aware Appends. */
863 
864  /* We should never be called when there are no async subplans. */
865  Assert(node->as_nasyncplans > 0);
866 
867  /* If we've yet to determine the valid subplans then do so now. */
868  if (node->as_valid_subplans == NULL)
869  node->as_valid_subplans =
871 
873 
874  /* Nothing to do if there are no valid async subplans. */
875  if (node->as_nasyncremain == 0)
876  return;
877 
878  /* Make a request for each of the valid async subplans. */
879  i = -1;
880  while ((i = bms_next_member(node->as_valid_asyncplans, i)) >= 0)
881  {
882  AsyncRequest *areq = node->as_asyncrequests[i];
883 
884  Assert(areq->request_index == i);
885  Assert(!areq->callback_pending);
886 
887  /* Do the actual work. */
888  ExecAsyncRequest(areq);
889  }
890 }
891 
892 /* ----------------------------------------------------------------
893  * ExecAppendAsyncGetNext
894  *
895  * Get the next tuple from any of the asynchronous subplans.
896  * ----------------------------------------------------------------
897  */
898 static bool
900 {
901  *result = NULL;
902 
903  /* We should never be called when there are no valid async subplans. */
904  Assert(node->as_nasyncremain > 0);
905 
906  /* Request a tuple asynchronously. */
907  if (ExecAppendAsyncRequest(node, result))
908  return true;
909 
910  while (node->as_nasyncremain > 0)
911  {
913 
914  /* Wait or poll async events. */
916 
917  /* Request a tuple asynchronously. */
918  if (ExecAppendAsyncRequest(node, result))
919  return true;
920 
921  /* Break from loop if there's any sync subplan that isn't complete. */
922  if (!node->as_syncdone)
923  break;
924  }
925 
926  /*
927  * If all sync subplans are complete, we're totally done scanning the
928  * given node. Otherwise, we're done with the asynchronous stuff but
929  * must continue scanning the sync subplans.
930  */
931  if (node->as_syncdone)
932  {
933  Assert(node->as_nasyncremain == 0);
934  *result = ExecClearTuple(node->ps.ps_ResultTupleSlot);
935  return true;
936  }
937 
938  return false;
939 }
940 
941 /* ----------------------------------------------------------------
942  * ExecAppendAsyncRequest
943  *
944  * Request a tuple asynchronously.
945  * ----------------------------------------------------------------
946  */
947 static bool
949 {
950  Bitmapset *needrequest;
951  int i;
952 
953  /* Nothing to do if there are no async subplans needing a new request. */
954  if (bms_is_empty(node->as_needrequest))
955  return false;
956 
957  /*
958  * If there are any asynchronously-generated results that have not yet
959  * been returned, we have nothing to do; just return one of them.
960  */
961  if (node->as_nasyncresults > 0)
962  {
963  --node->as_nasyncresults;
964  *result = node->as_asyncresults[node->as_nasyncresults];
965  return true;
966  }
967 
968  /* Make a new request for each of the async subplans that need it. */
969  needrequest = node->as_needrequest;
970  node->as_needrequest = NULL;
971  i = -1;
972  while ((i = bms_next_member(needrequest, i)) >= 0)
973  {
974  AsyncRequest *areq = node->as_asyncrequests[i];
975 
976  /* Do the actual work. */
977  ExecAsyncRequest(areq);
978  }
979  bms_free(needrequest);
980 
981  /* Return one of the asynchronously-generated results if any. */
982  if (node->as_nasyncresults > 0)
983  {
984  --node->as_nasyncresults;
985  *result = node->as_asyncresults[node->as_nasyncresults];
986  return true;
987  }
988 
989  return false;
990 }
991 
992 /* ----------------------------------------------------------------
993  * ExecAppendAsyncEventWait
994  *
995  * Wait or poll for file descriptor events and fire callbacks.
996  * ----------------------------------------------------------------
997  */
998 static void
1000 {
1001  long timeout = node->as_syncdone ? -1 : 0;
1002  WaitEvent occurred_event[EVENT_BUFFER_SIZE];
1003  int noccurred;
1004  int nevents;
1005  int i;
1006 
1007  /* We should never be called when there are no valid async subplans. */
1008  Assert(node->as_nasyncremain > 0);
1009 
1011  node->as_nasyncplans + 1);
1013  NULL, NULL);
1014 
1015  /* Give each waiting subplan a chance to add an event. */
1016  i = -1;
1017  while ((i = bms_next_member(node->as_asyncplans, i)) >= 0)
1018  {
1019  AsyncRequest *areq = node->as_asyncrequests[i];
1020 
1021  if (areq->callback_pending)
1022  ExecAsyncConfigureWait(areq);
1023  }
1024 
1025  /* Wait for at least one event to occur. */
1026  nevents = Min(node->as_nasyncplans + 1, EVENT_BUFFER_SIZE);
1027  noccurred = WaitEventSetWait(node->as_eventset, timeout, occurred_event,
1028  nevents, WAIT_EVENT_APPEND_READY);
1030  node->as_eventset = NULL;
1031  if (noccurred == 0)
1032  return;
1033 
1034  /* Deliver notifications. */
1035  for (i = 0; i < noccurred; i++)
1036  {
1037  WaitEvent *w = &occurred_event[i];
1038 
1039  /*
1040  * Each waiting subplan should have registered its wait event with
1041  * user_data pointing back to its AsyncRequest.
1042  */
1043  if ((w->events & WL_SOCKET_READABLE) != 0)
1044  {
1045  AsyncRequest *areq = (AsyncRequest *) w->user_data;
1046 
1047  /*
1048  * Mark it as no longer needing a callback. We must do this
1049  * before dispatching the callback in case the callback resets
1050  * the flag.
1051  */
1052  Assert(areq->callback_pending);
1053  areq->callback_pending = false;
1054 
1055  /* Do the actual work. */
1056  ExecAsyncNotify(areq);
1057  }
1058  }
1059 }
1060 
1061 /* ----------------------------------------------------------------
1062  * ExecAsyncAppendResponse
1063  *
1064  * Receive a response from an asynchronous request we made.
1065  * ----------------------------------------------------------------
1066  */
1067 void
1069 {
1070  AppendState *node = (AppendState *) areq->requestor;
1071  TupleTableSlot *slot = areq->result;
1072 
1073  /* The result should be a TupleTableSlot or NULL. */
1074  Assert(slot == NULL || IsA(slot, TupleTableSlot));
1075 
1076  /* Nothing to do if the request is pending. */
1077  if (!areq->request_complete)
1078  {
1079  /* The request would have been pending for a callback */
1080  Assert(areq->callback_pending);
1081  return;
1082  }
1083 
1084  /* If the result is NULL or an empty slot, there's nothing more to do. */
1085  if (TupIsNull(slot))
1086  {
1087  /* The ending subplan wouldn't have been pending for a callback. */
1088  Assert(!areq->callback_pending);
1089  --node->as_nasyncremain;
1090  return;
1091  }
1092 
1093  /* Save result so we can return it. */
1094  Assert(node->as_nasyncresults < node->as_nasyncplans);
1095  node->as_asyncresults[node->as_nasyncresults++] = slot;
1096 
1097  /*
1098  * Mark the subplan that returned a result as ready for a new request. We
1099  * don't launch another one here immediately because it might complete.
1100  */
1102  areq->request_index);
1103 }
1104 
1105 /* ----------------------------------------------------------------
1106  * classify_matching_subplans
1107  *
1108  * Classify the node's as_valid_subplans into sync ones and
1109  * async ones, adjust it to contain sync ones only, and save
1110  * async ones in the node's as_valid_asyncplans.
1111  * ----------------------------------------------------------------
1112  */
1113 static void
1115 {
1116  Bitmapset *valid_asyncplans;
1117 
1118  Assert(node->as_valid_asyncplans == NULL);
1119 
1120  /* Nothing to do if there are no valid subplans. */
1121  if (bms_is_empty(node->as_valid_subplans))
1122  {
1123  node->as_syncdone = true;
1124  node->as_nasyncremain = 0;
1125  return;
1126  }
1127 
1128  /* Nothing to do if there are no valid async subplans. */
1129  if (!bms_overlap(node->as_valid_subplans, node->as_asyncplans))
1130  {
1131  node->as_nasyncremain = 0;
1132  return;
1133  }
1134 
1135  /* Get valid async subplans. */
1136  valid_asyncplans = bms_copy(node->as_asyncplans);
1137  valid_asyncplans = bms_int_members(valid_asyncplans,
1138  node->as_valid_subplans);
1139 
1140  /* Adjust the valid subplans to contain sync subplans only. */
1142  valid_asyncplans);
1144 
1145  /* Save valid async subplans. */
1146  node->as_valid_asyncplans = valid_asyncplans;
1147  node->as_nasyncremain = bms_num_members(valid_asyncplans);
1148 }
void ExecAsyncRequest(AsyncRequest *areq)
Definition: execAsync.c:25
Size pstate_len
Definition: execnodes.h:1268
static void classify_matching_subplans(AppendState *node)
Definition: nodeAppend.c:1114
Definition: lwlock.h:31
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
void ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt)
Definition: nodeAppend.c:541
static TupleTableSlot * ExecAppend(PlanState *pstate)
Definition: nodeAppend.c:289
Bitmapset * bms_copy(const Bitmapset *a)
Definition: bitmapset.c:74
int as_nasyncremain
Definition: execnodes.h:1261
int as_nasyncplans
Definition: execnodes.h:1255
void FreeWaitEventSet(WaitEventSet *set)
Definition: latch.c:798
ProjectionInfo * ps_ProjInfo
Definition: execnodes.h:1004
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:425
#define ScanDirectionIsForward(direction)
Definition: sdir.h:55
int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, void *user_data)
Definition: latch.c:862
struct PartitionPruneState * as_prune_state
Definition: execnodes.h:1269
Bitmapset * as_asyncplans
Definition: execnodes.h:1254
#define castNode(_type_, nodeptr)
Definition: nodes.h:608
void ExecEndNode(PlanState *node)
Definition: execProcnode.c:555
Bitmapset * as_valid_asyncplans
Definition: execnodes.h:1271
struct PlanState * requestor
Definition: execnodes.h:536
void ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt)
Definition: nodeAppend.c:525
static void ExecAppendAsyncEventWait(AppendState *node)
Definition: nodeAppend.c:999
shm_toc_estimator estimator
Definition: parallel.h:42
static bool choose_next_subplan_locally(AppendState *node)
Definition: nodeAppend.c:555
#define Min(x, y)
Definition: c.h:986
void ExecReScan(PlanState *node)
Definition: execAmi.c:78
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1043
const TupleTableSlotOps TTSOpsVirtual
Definition: execTuples.c:83
#define EVENT_BUFFER_SIZE
Definition: nodeAppend.c:84
int plan_node_id
Definition: plannodes.h:140
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:350
AppendState * ExecInitAppend(Append *node, EState *estate, int eflags)
Definition: nodeAppend.c:109
#define WL_SOCKET_READABLE
Definition: latch.h:126
EState * state
Definition: execnodes.h:966
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
int as_nasyncresults
Definition: execnodes.h:1258
void ExecAsyncConfigureWait(AsyncRequest *areq)
Definition: execAsync.c:49
ScanDirection es_direction
Definition: execnodes.h:555
struct EPQState * es_epq_active
Definition: execnodes.h:627
PlanState ps
Definition: execnodes.h:1249
bool as_begun
Definition: execnodes.h:1253
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1805
List * appendplans
Definition: plannodes.h:252
WaitEventSet * CreateWaitEventSet(MemoryContext context, int nevents)
Definition: latch.c:684
bool async_capable
Definition: plannodes.h:135
TupleTableSlot * ps_ResultTupleSlot
Definition: execnodes.h:1002
static bool ExecAppendAsyncGetNext(AppendState *node, TupleTableSlot **result)
Definition: nodeAppend.c:899
struct WaitEventSet * as_eventset
Definition: execnodes.h:1263
bool as_syncdone
Definition: execnodes.h:1259
Bitmapset * bms_add_range(Bitmapset *a, int lower, int upper)
Definition: bitmapset.c:834
struct PlanState * requestee
Definition: execnodes.h:537
ParallelAppendState * as_pstate
Definition: execnodes.h:1267
static void * list_nth(const List *list, int n)
Definition: pg_list.h:278
void ExecEndAppend(AppendState *node)
Definition: nodeAppend.c:387
uint32 events
Definition: latch.h:145
int first_partial_plan
Definition: plannodes.h:259
void ExecAsyncNotify(AsyncRequest *areq)
Definition: execAsync.c:67
bool(* choose_next_subplan)(AppendState *)
Definition: execnodes.h:1272
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
Bitmapset * ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans)
#define TupIsNull(slot)
Definition: tuptable.h:292
int as_first_partial_plan
Definition: execnodes.h:1265
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
Bitmapset * execparamids
bool resultopsset
Definition: execnodes.h:1047
Bitmapset * chgParam
Definition: execnodes.h:996
static bool choose_next_subplan_for_worker(AppendState *node)
Definition: nodeAppend.c:699
#define INVALID_SUBPLAN_INDEX
Definition: nodeAppend.c:83
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:740
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:701
static bool ExecAppendAsyncRequest(AppendState *node, TupleTableSlot **result)
Definition: nodeAppend.c:948
TupleTableSlot * result
Definition: execnodes.h:541
void * palloc0(Size size)
Definition: mcxt.c:1093
ExecProcNodeMtd ExecProcNode
Definition: execnodes.h:970
Bitmapset * as_valid_subplans
Definition: execnodes.h:1270
void ExecReScanAppend(AppendState *node)
Definition: nodeAppend.c:407
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
#define PGINVALID_SOCKET
Definition: port.h:33
static TupleTableSlot * ExecProcNode(PlanState *node)
Definition: executor.h:252
bool pa_finished[FLEXIBLE_ARRAY_MEMBER]
Definition: nodeAppend.c:80
Plan * plan
Definition: execnodes.h:964
int bms_prev_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1102
void UpdateChangedParamSet(PlanState *node, Bitmapset *newchg)
Definition: execUtils.c:864
void ExecAsyncAppendResponse(AsyncRequest *areq)
Definition: nodeAppend.c:1068
void ExecAppendInitializeDSM(AppendState *node, ParallelContext *pcxt)
Definition: nodeAppend.c:504
bool callback_pending
Definition: execnodes.h:539
static void mark_invalid_subplans_as_finished(AppendState *node)
Definition: nodeAppend.c:823
void ExecAppendEstimate(AppendState *node, ParallelContext *pcxt)
Definition: nodeAppend.c:485
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
#define makeNode(_type_)
Definition: nodes.h:587
struct PartitionPruneInfo * part_prune_info
Definition: plannodes.h:262
#define Assert(condition)
Definition: c.h:804
#define EXEC_FLAG_MARK
Definition: executor.h:59
static bool choose_next_subplan_for_leader(AppendState *node)
Definition: nodeAppend.c:618
int as_whichplan
Definition: execnodes.h:1252
static void ExecAppendAsyncBegin(AppendState *node)
Definition: nodeAppend.c:857
void ExecAssignExprContext(EState *estate, PlanState *planstate)
Definition: execUtils.c:480
int request_index
Definition: execnodes.h:538
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
static int list_length(const List *l)
Definition: pg_list.h:149
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1203
void ExecInitResultTupleSlotTL(PlanState *planstate, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1769
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:736
void * user_data
Definition: latch.h:147
Bitmapset * bms_del_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:928
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:494
bool resultopsfixed
Definition: execnodes.h:1043
AsyncRequest ** as_asyncrequests
Definition: execnodes.h:1256
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * palloc(Size size)
Definition: mcxt.c:1062
PlanState ** appendplans
Definition: execnodes.h:1250
int i
Bitmapset * as_needrequest
Definition: execnodes.h:1262
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:102
PartitionPruneState * ExecCreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *partitionpruneinfo)
Bitmapset * bms_int_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:902
PlanState * ExecInitNode(Plan *node, EState *estate, int eflags)
Definition: execProcnode.c:141
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
bool request_complete
Definition: execnodes.h:540
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
#define offsetof(type, field)
Definition: c.h:727
shm_toc * toc
Definition: parallel.h:45
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:130
int WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents, uint32 wait_event_info)
Definition: latch.c:1308
Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate)
TupleTableSlot ** as_asyncresults
Definition: execnodes.h:1257