PostgreSQL Source Code  git master
prepunion.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * prepunion.c
4  * Routines to plan set-operation queries. The filename is a leftover
5  * from a time when only UNIONs were implemented.
6  *
7  * There are two code paths in the planner for set-operation queries.
8  * If a subquery consists entirely of simple UNION ALL operations, it
9  * is converted into an "append relation". Otherwise, it is handled
10  * by the general code in this module (plan_set_operations and its
11  * subroutines). There is some support code here for the append-relation
12  * case, but most of the heavy lifting for that is done elsewhere,
13  * notably in prepjointree.c and allpaths.c.
14  *
15  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
16  * Portions Copyright (c) 1994, Regents of the University of California
17  *
18  *
19  * IDENTIFICATION
20  * src/backend/optimizer/prep/prepunion.c
21  *
22  *-------------------------------------------------------------------------
23  */
24 #include "postgres.h"
25 
26 #include "access/htup_details.h"
27 #include "catalog/pg_type.h"
28 #include "miscadmin.h"
29 #include "nodes/makefuncs.h"
30 #include "nodes/nodeFuncs.h"
31 #include "optimizer/cost.h"
32 #include "optimizer/pathnode.h"
33 #include "optimizer/paths.h"
34 #include "optimizer/planner.h"
35 #include "optimizer/prep.h"
36 #include "optimizer/tlist.h"
37 #include "parser/parse_coerce.h"
38 #include "utils/selfuncs.h"
39 
40 
42  List *colTypes, List *colCollations,
43  bool junkOK,
44  int flag, List *refnames_tlist,
45  List **pTargetList,
46  bool *istrivial_tlist);
49  List *refnames_tlist,
50  List **pTargetList);
52  bool trivial_tlist, List *child_tlist,
53  List *interesting_pathkeys,
54  double *pNumGroups);
56  List *refnames_tlist,
57  List **pTargetList);
59  List *refnames_tlist,
60  List **pTargetList);
62  SetOperationStmt *top_union,
63  List *refnames_tlist,
64  List **tlist_list,
65  List **istrivial_tlist);
67 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
68  Path *input_path,
69  double dNumGroups, double dNumOutputRows,
70  const char *construct);
71 static List *generate_setop_tlist(List *colTypes, List *colCollations,
72  int flag,
73  Index varno,
74  bool hack_constants,
75  List *input_tlist,
76  List *refnames_tlist,
77  bool *trivial_tlist);
78 static List *generate_append_tlist(List *colTypes, List *colCollations,
79  bool flag,
80  List *input_tlists,
81  List *refnames_tlist);
82 static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
83 
84 
85 /*
86  * plan_set_operations
87  *
88  * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
89  *
90  * This routine only deals with the setOperations tree of the given query.
91  * Any top-level ORDER BY requested in root->parse->sortClause will be handled
92  * when we return to grouping_planner; likewise for LIMIT.
93  *
94  * What we return is an "upperrel" RelOptInfo containing at least one Path
95  * that implements the set-operation tree. In addition, root->processed_tlist
96  * receives a targetlist representing the output of the topmost setop node.
97  */
98 RelOptInfo *
100 {
101  Query *parse = root->parse;
102  SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
103  Node *node;
104  RangeTblEntry *leftmostRTE;
105  Query *leftmostQuery;
106  RelOptInfo *setop_rel;
107  List *top_tlist;
108 
109  Assert(topop);
110 
111  /* check for unsupported stuff */
112  Assert(parse->jointree->fromlist == NIL);
113  Assert(parse->jointree->quals == NULL);
114  Assert(parse->groupClause == NIL);
115  Assert(parse->havingQual == NULL);
116  Assert(parse->windowClause == NIL);
117  Assert(parse->distinctClause == NIL);
118 
119  /*
120  * In the outer query level, equivalence classes are limited to classes
121  * which define that the top-level target entry is equivalent to the
122  * corresponding child target entry. There won't be any equivalence class
123  * merging. Mark that merging is complete to allow us to make pathkeys.
124  */
125  Assert(root->eq_classes == NIL);
126  root->ec_merging_done = true;
127 
128  /*
129  * We'll need to build RelOptInfos for each of the leaf subqueries, which
130  * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
131  * arrays for those, and for AppendRelInfos in case they're needed.
132  */
134 
135  /*
136  * Find the leftmost component Query. We need to use its column names for
137  * all generated tlists (else SELECT INTO won't work right).
138  */
139  node = topop->larg;
140  while (node && IsA(node, SetOperationStmt))
141  node = ((SetOperationStmt *) node)->larg;
142  Assert(node && IsA(node, RangeTblRef));
143  leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
144  leftmostQuery = leftmostRTE->subquery;
145  Assert(leftmostQuery != NULL);
146 
147  /*
148  * If the topmost node is a recursive union, it needs special processing.
149  */
150  if (root->hasRecursion)
151  {
152  setop_rel = generate_recursion_path(topop, root,
153  leftmostQuery->targetList,
154  &top_tlist);
155  }
156  else
157  {
158  bool trivial_tlist;
159 
160  /*
161  * Recurse on setOperations tree to generate paths for set ops. The
162  * final output paths should have just the column types shown as the
163  * output from the top-level node, plus possibly resjunk working
164  * columns (we can rely on upper-level nodes to deal with that).
165  */
166  setop_rel = recurse_set_operations((Node *) topop, root,
167  topop->colTypes, topop->colCollations,
168  true, -1,
169  leftmostQuery->targetList,
170  &top_tlist,
171  &trivial_tlist);
172  }
173 
174  /* Must return the built tlist into root->processed_tlist. */
175  root->processed_tlist = top_tlist;
176 
177  return setop_rel;
178 }
179 
180 /*
181  * set_operation_ordered_results_useful
182  * Return true if the given SetOperationStmt can be executed by utilizing
183  * paths that provide sorted input according to the setop's targetlist.
184  * Returns false when sorted paths are not any more useful then unsorted
185  * ones.
186  */
187 bool
189 {
190  /*
191  * Paths sorted by the targetlist are useful for UNION as we can opt to
192  * MergeAppend the sorted paths then Unique them. Ordered paths are no
193  * more useful than unordered ones for UNION ALL.
194  */
195  if (!setop->all && setop->op == SETOP_UNION)
196  return true;
197 
198  /*
199  * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
200  * correctly sorted input paths.
201  */
202  return false;
203 }
204 
205 /*
206  * recurse_set_operations
207  * Recursively handle one step in a tree of set operations
208  *
209  * colTypes: OID list of set-op's result column datatypes
210  * colCollations: OID list of set-op's result column collations
211  * junkOK: if true, child resjunk columns may be left in the result
212  * flag: if >= 0, add a resjunk output column indicating value of flag
213  * refnames_tlist: targetlist to take column names from
214  *
215  * Returns a RelOptInfo for the subtree, as well as these output parameters:
216  * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
217  * *istrivial_tlist: true if, and only if, datatypes between parent and child
218  * match.
219  *
220  * The pTargetList output parameter is mostly redundant with the pathtarget
221  * of the returned RelOptInfo, but for the moment we need it because much of
222  * the logic in this file depends on flag columns being marked resjunk.
223  * Pending a redesign of how that works, this is the easy way out.
224  *
225  * We don't have to care about typmods here: the only allowed difference
226  * between set-op input and output typmods is input is a specific typmod
227  * and output is -1, and that does not require a coercion.
228  */
229 static RelOptInfo *
231  List *colTypes, List *colCollations,
232  bool junkOK,
233  int flag, List *refnames_tlist,
234  List **pTargetList,
235  bool *istrivial_tlist)
236 {
237  RelOptInfo *rel;
238 
239  *istrivial_tlist = true; /* for now */
240 
241  /* Guard against stack overflow due to overly complex setop nests */
243 
244  if (IsA(setOp, RangeTblRef))
245  {
246  RangeTblRef *rtr = (RangeTblRef *) setOp;
247  RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
248  SetOperationStmt *setops;
249  Query *subquery = rte->subquery;
250  PlannerInfo *subroot;
251  List *tlist;
252  bool trivial_tlist;
253 
254  Assert(subquery != NULL);
255 
256  /* Build a RelOptInfo for this leaf subquery. */
257  rel = build_simple_rel(root, rtr->rtindex, NULL);
258 
259  /* plan_params should not be in use in current query level */
260  Assert(root->plan_params == NIL);
261 
262  /*
263  * Pass the set operation details to the subquery_planner to have it
264  * consider generating Paths correctly ordered for the set operation.
265  */
266  setops = castNode(SetOperationStmt, root->parse->setOperations);
267 
268  /* Generate a subroot and Paths for the subquery */
269  subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
270  false, root->tuple_fraction,
271  setops);
272 
273  /*
274  * It should not be possible for the primitive query to contain any
275  * cross-references to other primitive queries in the setop tree.
276  */
277  if (root->plan_params)
278  elog(ERROR, "unexpected outer reference in set operation subquery");
279 
280  /* Figure out the appropriate target list for this subquery. */
281  tlist = generate_setop_tlist(colTypes, colCollations,
282  flag,
283  rtr->rtindex,
284  true,
285  subroot->processed_tlist,
286  refnames_tlist,
287  &trivial_tlist);
288  rel->reltarget = create_pathtarget(root, tlist);
289 
290  /* Return the fully-fledged tlist to caller, too */
291  *pTargetList = tlist;
292  *istrivial_tlist = trivial_tlist;
293  }
294  else if (IsA(setOp, SetOperationStmt))
295  {
296  SetOperationStmt *op = (SetOperationStmt *) setOp;
297 
298  /* UNIONs are much different from INTERSECT/EXCEPT */
299  if (op->op == SETOP_UNION)
300  rel = generate_union_paths(op, root,
301  refnames_tlist,
302  pTargetList);
303  else
304  rel = generate_nonunion_paths(op, root,
305  refnames_tlist,
306  pTargetList);
307 
308  /*
309  * If necessary, add a Result node to project the caller-requested
310  * output columns.
311  *
312  * XXX you don't really want to know about this: setrefs.c will apply
313  * fix_upper_expr() to the Result node's tlist. This would fail if the
314  * Vars generated by generate_setop_tlist() were not exactly equal()
315  * to the corresponding tlist entries of the subplan. However, since
316  * the subplan was generated by generate_union_paths() or
317  * generate_nonunion_paths(), and hence its tlist was generated by
318  * generate_append_tlist(), this will work. We just tell
319  * generate_setop_tlist() to use varno 0.
320  */
321  if (flag >= 0 ||
322  !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
323  !tlist_same_collations(*pTargetList, colCollations, junkOK))
324  {
325  PathTarget *target;
326  bool trivial_tlist;
327  ListCell *lc;
328 
329  *pTargetList = generate_setop_tlist(colTypes, colCollations,
330  flag,
331  0,
332  false,
333  *pTargetList,
334  refnames_tlist,
335  &trivial_tlist);
336  *istrivial_tlist = trivial_tlist;
337  target = create_pathtarget(root, *pTargetList);
338 
339  /* Apply projection to each path */
340  foreach(lc, rel->pathlist)
341  {
342  Path *subpath = (Path *) lfirst(lc);
343  Path *path;
344 
345  Assert(subpath->param_info == NULL);
346  path = apply_projection_to_path(root, subpath->parent,
347  subpath, target);
348  /* If we had to add a Result, path is different from subpath */
349  if (path != subpath)
350  lfirst(lc) = path;
351  }
352 
353  /* Apply projection to each partial path */
354  foreach(lc, rel->partial_pathlist)
355  {
356  Path *subpath = (Path *) lfirst(lc);
357  Path *path;
358 
359  Assert(subpath->param_info == NULL);
360 
361  /* avoid apply_projection_to_path, in case of multiple refs */
362  path = (Path *) create_projection_path(root, subpath->parent,
363  subpath, target);
364  lfirst(lc) = path;
365  }
366  }
368  }
369  else
370  {
371  elog(ERROR, "unrecognized node type: %d",
372  (int) nodeTag(setOp));
373  *pTargetList = NIL;
374  rel = NULL; /* keep compiler quiet */
375  }
376 
377  return rel;
378 }
379 
380 /*
381  * Generate paths for a recursive UNION node
382  */
383 static RelOptInfo *
385  List *refnames_tlist,
386  List **pTargetList)
387 {
388  RelOptInfo *result_rel;
389  Path *path;
390  RelOptInfo *lrel,
391  *rrel;
392  Path *lpath;
393  Path *rpath;
394  List *lpath_tlist;
395  bool lpath_trivial_tlist;
396  List *rpath_tlist;
397  bool rpath_trivial_tlist;
398  List *tlist;
399  List *groupList;
400  double dNumGroups;
401 
402  /* Parser should have rejected other cases */
403  if (setOp->op != SETOP_UNION)
404  elog(ERROR, "only UNION queries can be recursive");
405  /* Worktable ID should be assigned */
406  Assert(root->wt_param_id >= 0);
407 
408  /*
409  * Unlike a regular UNION node, process the left and right inputs
410  * separately without any intention of combining them into one Append.
411  */
412  lrel = recurse_set_operations(setOp->larg, root,
413  setOp->colTypes, setOp->colCollations,
414  false, -1,
415  refnames_tlist,
416  &lpath_tlist,
417  &lpath_trivial_tlist);
418  if (lrel->rtekind == RTE_SUBQUERY)
419  build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
420  NIL, NULL);
421  lpath = lrel->cheapest_total_path;
422  /* The right path will want to look at the left one ... */
423  root->non_recursive_path = lpath;
424  rrel = recurse_set_operations(setOp->rarg, root,
425  setOp->colTypes, setOp->colCollations,
426  false, -1,
427  refnames_tlist,
428  &rpath_tlist,
429  &rpath_trivial_tlist);
430  if (rrel->rtekind == RTE_SUBQUERY)
431  build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
432  NIL, NULL);
433  rpath = rrel->cheapest_total_path;
434  root->non_recursive_path = NULL;
435 
436  /*
437  * Generate tlist for RecursiveUnion path node --- same as in Append cases
438  */
439  tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
440  list_make2(lpath_tlist, rpath_tlist),
441  refnames_tlist);
442 
443  *pTargetList = tlist;
444 
445  /* Build result relation. */
446  result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
447  bms_union(lrel->relids, rrel->relids));
448  result_rel->reltarget = create_pathtarget(root, tlist);
449 
450  /*
451  * If UNION, identify the grouping operators
452  */
453  if (setOp->all)
454  {
455  groupList = NIL;
456  dNumGroups = 0;
457  }
458  else
459  {
460  /* Identify the grouping semantics */
461  groupList = generate_setop_grouplist(setOp, tlist);
462 
463  /* We only support hashing here */
464  if (!grouping_is_hashable(groupList))
465  ereport(ERROR,
466  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
467  errmsg("could not implement recursive UNION"),
468  errdetail("All column datatypes must be hashable.")));
469 
470  /*
471  * For the moment, take the number of distinct groups as equal to the
472  * total input size, ie, the worst case.
473  */
474  dNumGroups = lpath->rows + rpath->rows * 10;
475  }
476 
477  /*
478  * And make the path node.
479  */
481  result_rel,
482  lpath,
483  rpath,
484  result_rel->reltarget,
485  groupList,
486  root->wt_param_id,
487  dNumGroups);
488 
489  add_path(result_rel, path);
490  postprocess_setop_rel(root, result_rel);
491  return result_rel;
492 }
493 
494 /*
495  * build_setop_child_paths
496  * Build paths for the set op child relation denoted by 'rel'.
497  *
498  * interesting_pathkeys: if not NIL, also include paths that suit these
499  * pathkeys, sorting any unsorted paths as required.
500  * *pNumGroups: if not NULL, we estimate the number of distinct groups
501  * in the result, and store it there
502  */
503 static void
505  bool trivial_tlist, List *child_tlist,
506  List *interesting_pathkeys, double *pNumGroups)
507 {
508  RelOptInfo *final_rel;
509  List *setop_pathkeys = rel->subroot->setop_pathkeys;
510  ListCell *lc;
511 
512  /* it can't be a set op child rel if it's not a subquery */
513  Assert(rel->rtekind == RTE_SUBQUERY);
514 
515  /* when sorting is needed, add child rel equivalences */
516  if (interesting_pathkeys != NIL)
518  rel,
519  child_tlist,
520  interesting_pathkeys);
521 
522  /*
523  * Mark rel with estimated output rows, width, etc. Note that we have to
524  * do this before generating outer-query paths, else cost_subqueryscan is
525  * not happy.
526  */
528 
529  /*
530  * Since we may want to add a partial path to this relation, we must set
531  * its consider_parallel flag correctly.
532  */
533  final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
534  rel->consider_parallel = final_rel->consider_parallel;
535 
536  /* Generate subquery scan paths for any interesting path in final_rel */
537  foreach(lc, final_rel->pathlist)
538  {
539  Path *subpath = (Path *) lfirst(lc);
540  List *pathkeys;
541  Path *cheapest_input_path = final_rel->cheapest_total_path;
542  bool is_sorted;
543  int presorted_keys;
544 
545  /*
546  * Include the cheapest path as-is so that the set operation can be
547  * cheaply implemented using a method which does not require the input
548  * to be sorted.
549  */
550  if (subpath == cheapest_input_path)
551  {
552  /* Convert subpath's pathkeys to outer representation */
553  pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
554  make_tlist_from_pathtarget(subpath->pathtarget));
555 
556  /* Generate outer path using this subpath */
558  rel,
559  subpath,
560  trivial_tlist,
561  pathkeys,
562  NULL));
563  }
564 
565  /* skip dealing with sorted paths if the setop doesn't need them */
566  if (interesting_pathkeys == NIL)
567  continue;
568 
569  /*
570  * Create paths to suit final sort order required for setop_pathkeys.
571  * Here we'll sort the cheapest input path (if not sorted already) and
572  * incremental sort any paths which are partially sorted.
573  */
574  is_sorted = pathkeys_count_contained_in(setop_pathkeys,
575  subpath->pathkeys,
576  &presorted_keys);
577 
578  if (!is_sorted)
579  {
580  double limittuples = rel->subroot->limit_tuples;
581 
582  /*
583  * Try at least sorting the cheapest path and also try
584  * incrementally sorting any path which is partially sorted
585  * already (no need to deal with paths which have presorted keys
586  * when incremental sort is disabled unless it's the cheapest
587  * input path).
588  */
589  if (subpath != cheapest_input_path &&
590  (presorted_keys == 0 || !enable_incremental_sort))
591  continue;
592 
593  /*
594  * We've no need to consider both a sort and incremental sort.
595  * We'll just do a sort if there are no presorted keys and an
596  * incremental sort when there are presorted keys.
597  */
598  if (presorted_keys == 0 || !enable_incremental_sort)
600  final_rel,
601  subpath,
602  setop_pathkeys,
603  limittuples);
604  else
606  final_rel,
607  subpath,
608  setop_pathkeys,
609  presorted_keys,
610  limittuples);
611  }
612 
613  /*
614  * subpath is now sorted, so add it to the pathlist. We already added
615  * the cheapest_input_path above, so don't add it again unless we just
616  * sorted it.
617  */
618  if (subpath != cheapest_input_path)
619  {
620  /* Convert subpath's pathkeys to outer representation */
621  pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
622  make_tlist_from_pathtarget(subpath->pathtarget));
623 
624  /* Generate outer path using this subpath */
626  rel,
627  subpath,
628  trivial_tlist,
629  pathkeys,
630  NULL));
631  }
632  }
633 
634  /* if consider_parallel is false, there should be no partial paths */
635  Assert(final_rel->consider_parallel ||
636  final_rel->partial_pathlist == NIL);
637 
638  /*
639  * If we have a partial path for the child relation, we can use that to
640  * build a partial path for this relation. But there's no point in
641  * considering any path but the cheapest.
642  */
643  if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
644  final_rel->partial_pathlist != NIL)
645  {
646  Path *partial_subpath;
647  Path *partial_path;
648 
649  partial_subpath = linitial(final_rel->partial_pathlist);
650  partial_path = (Path *)
651  create_subqueryscan_path(root, rel, partial_subpath,
652  trivial_tlist,
653  NIL, NULL);
654  add_partial_path(rel, partial_path);
655  }
656 
658 
659  /*
660  * Estimate number of groups if caller wants it. If the subquery used
661  * grouping or aggregation, its output is probably mostly unique anyway;
662  * otherwise do statistical estimation.
663  *
664  * XXX you don't really want to know about this: we do the estimation
665  * using the subroot->parse's original targetlist expressions, not the
666  * subroot->processed_tlist which might seem more appropriate. The reason
667  * is that if the subquery is itself a setop, it may return a
668  * processed_tlist containing "varno 0" Vars generated by
669  * generate_append_tlist, and those would confuse estimate_num_groups
670  * mightily. We ought to get rid of the "varno 0" hack, but that requires
671  * a redesign of the parsetree representation of setops, so that there can
672  * be an RTE corresponding to each setop's output. Note, we use this not
673  * subquery's targetlist but subroot->parse's targetlist, because it was
674  * revised by self-join removal. subquery's targetlist might contain the
675  * references to the removed relids.
676  */
677  if (pNumGroups)
678  {
679  PlannerInfo *subroot = rel->subroot;
680  Query *subquery = subroot->parse;
681 
682  if (subquery->groupClause || subquery->groupingSets ||
683  subquery->distinctClause || subroot->hasHavingQual ||
684  subquery->hasAggs)
685  *pNumGroups = rel->cheapest_total_path->rows;
686  else
687  *pNumGroups = estimate_num_groups(subroot,
688  get_tlist_exprs(subroot->parse->targetList, false),
690  NULL,
691  NULL);
692  }
693 }
694 
695 /*
696  * Generate paths for a UNION or UNION ALL node
697  */
698 static RelOptInfo *
700  List *refnames_tlist,
701  List **pTargetList)
702 {
703  Relids relids = NULL;
704  RelOptInfo *result_rel;
705  ListCell *lc;
706  ListCell *lc2;
707  ListCell *lc3;
708  List *cheapest_pathlist = NIL;
709  List *ordered_pathlist = NIL;
710  List *partial_pathlist = NIL;
711  bool partial_paths_valid = true;
712  bool consider_parallel = true;
713  List *rellist;
714  List *tlist_list;
715  List *trivial_tlist_list;
716  List *tlist;
717  List *groupList = NIL;
718  Path *apath;
719  Path *gpath = NULL;
720  bool try_sorted;
721  List *union_pathkeys = NIL;
722 
723  /*
724  * If any of my children are identical UNION nodes (same op, all-flag, and
725  * colTypes) then they can be merged into this node so that we generate
726  * only one Append/MergeAppend and unique-ification for the lot. Recurse
727  * to find such nodes.
728  */
729  rellist = plan_union_children(root,
730  op,
731  refnames_tlist,
732  &tlist_list,
733  &trivial_tlist_list);
734 
735  /*
736  * Generate tlist for Append/MergeAppend plan node.
737  *
738  * The tlist for an Append plan isn't important as far as the Append is
739  * concerned, but we must make it look real anyway for the benefit of the
740  * next plan level up.
741  */
742  tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
743  tlist_list, refnames_tlist);
744  *pTargetList = tlist;
745 
746  /* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
747  try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
748 
749  if (try_sorted)
750  {
751  /* Identify the grouping semantics */
752  groupList = generate_setop_grouplist(op, tlist);
753 
754  /* Determine the pathkeys for sorting by the whole target list */
755  union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
756 
757  root->query_pathkeys = union_pathkeys;
758  }
759 
760  /*
761  * Now that we've got the append target list, we can build the union child
762  * paths.
763  */
764  forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
765  {
766  RelOptInfo *rel = lfirst(lc);
767  bool trivial_tlist = lfirst_int(lc2);
768  List *child_tlist = lfirst_node(List, lc3);
769 
770  /* only build paths for the union children */
771  if (rel->rtekind == RTE_SUBQUERY)
772  build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
773  union_pathkeys, NULL);
774  }
775 
776  /* Build path lists and relid set. */
777  foreach(lc, rellist)
778  {
779  RelOptInfo *rel = lfirst(lc);
780  Path *ordered_path;
781 
782  cheapest_pathlist = lappend(cheapest_pathlist,
783  rel->cheapest_total_path);
784 
785  if (try_sorted)
786  {
787  ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
788  union_pathkeys,
789  NULL,
790  TOTAL_COST,
791  false);
792 
793  if (ordered_path != NULL)
794  ordered_pathlist = lappend(ordered_pathlist, ordered_path);
795  else
796  {
797  /*
798  * If we can't find a sorted path, just give up trying to
799  * generate a list of correctly sorted child paths. This can
800  * happen when type coercion was added to the targetlist due
801  * to mismatching types from the union children.
802  */
803  try_sorted = false;
804  }
805  }
806 
807  if (consider_parallel)
808  {
809  if (!rel->consider_parallel)
810  {
811  consider_parallel = false;
812  partial_paths_valid = false;
813  }
814  else if (rel->partial_pathlist == NIL)
815  partial_paths_valid = false;
816  else
817  partial_pathlist = lappend(partial_pathlist,
818  linitial(rel->partial_pathlist));
819  }
820 
821  relids = bms_union(relids, rel->relids);
822  }
823 
824  /* Build result relation. */
825  result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
826  result_rel->reltarget = create_pathtarget(root, tlist);
827  result_rel->consider_parallel = consider_parallel;
828  result_rel->consider_startup = (root->tuple_fraction > 0);
829 
830  /*
831  * Append the child results together using the cheapest paths from each
832  * union child.
833  */
834  apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
835  NIL, NIL, NULL, 0, false, -1);
836 
837  /*
838  * Estimate number of groups. For now we just assume the output is unique
839  * --- this is certainly true for the UNION case, and we want worst-case
840  * estimates anyway.
841  */
842  result_rel->rows = apath->rows;
843 
844  /*
845  * Now consider doing the same thing using the partial paths plus Append
846  * plus Gather.
847  */
848  if (partial_paths_valid)
849  {
850  Path *papath;
851  int parallel_workers = 0;
852 
853  /* Find the highest number of workers requested for any subpath. */
854  foreach(lc, partial_pathlist)
855  {
856  Path *subpath = lfirst(lc);
857 
858  parallel_workers = Max(parallel_workers,
859  subpath->parallel_workers);
860  }
861  Assert(parallel_workers > 0);
862 
863  /*
864  * If the use of parallel append is permitted, always request at least
865  * log2(# of children) paths. We assume it can be useful to have
866  * extra workers in this case because they will be spread out across
867  * the children. The precise formula is just a guess; see
868  * add_paths_to_append_rel.
869  */
871  {
872  parallel_workers = Max(parallel_workers,
873  pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
874  parallel_workers = Min(parallel_workers,
876  }
877  Assert(parallel_workers > 0);
878 
879  papath = (Path *)
880  create_append_path(root, result_rel, NIL, partial_pathlist,
881  NIL, NULL, parallel_workers,
883  gpath = (Path *)
884  create_gather_path(root, result_rel, papath,
885  result_rel->reltarget, NULL, NULL);
886  }
887 
888  if (!op->all)
889  {
890  double dNumGroups;
891  bool can_sort = grouping_is_sortable(groupList);
892  bool can_hash = grouping_is_hashable(groupList);
893 
894  /*
895  * XXX for the moment, take the number of distinct groups as equal to
896  * the total input size, i.e., the worst case. This is too
897  * conservative, but it's not clear how to get a decent estimate of
898  * the true size. One should note as well the propensity of novices
899  * to write UNION rather than UNION ALL even when they don't expect
900  * any duplicates...
901  */
902  dNumGroups = apath->rows;
903 
904  if (can_hash)
905  {
906  Path *path;
907 
908  /*
909  * Try a hash aggregate plan on 'apath'. This is the cheapest
910  * available path containing each append child.
911  */
912  path = (Path *) create_agg_path(root,
913  result_rel,
914  apath,
915  create_pathtarget(root, tlist),
916  AGG_HASHED,
918  groupList,
919  NIL,
920  NULL,
921  dNumGroups);
922  add_path(result_rel, path);
923 
924  /* Try hash aggregate on the Gather path, if valid */
925  if (gpath != NULL)
926  {
927  /* Hashed aggregate plan --- no sort needed */
928  path = (Path *) create_agg_path(root,
929  result_rel,
930  gpath,
931  create_pathtarget(root, tlist),
932  AGG_HASHED,
934  groupList,
935  NIL,
936  NULL,
937  dNumGroups);
938  add_path(result_rel, path);
939  }
940  }
941 
942  if (can_sort)
943  {
944  Path *path = apath;
945 
946  /* Try Sort -> Unique on the Append path */
947  if (groupList != NIL)
948  path = (Path *) create_sort_path(root, result_rel, path,
949  make_pathkeys_for_sortclauses(root, groupList, tlist),
950  -1.0);
951 
953  result_rel,
954  path,
955  list_length(path->pathkeys),
956  dNumGroups);
957 
958  add_path(result_rel, path);
959 
960  /* Try Sort -> Unique on the Gather path, if set */
961  if (gpath != NULL)
962  {
963  path = gpath;
964 
965  path = (Path *) create_sort_path(root, result_rel, path,
966  make_pathkeys_for_sortclauses(root, groupList, tlist),
967  -1.0);
968 
970  result_rel,
971  path,
972  list_length(path->pathkeys),
973  dNumGroups);
974  add_path(result_rel, path);
975  }
976  }
977 
978  /*
979  * Try making a MergeAppend path if we managed to find a path with the
980  * correct pathkeys in each union child query.
981  */
982  if (try_sorted && groupList != NIL)
983  {
984  Path *path;
985 
987  result_rel,
988  ordered_pathlist,
989  union_pathkeys,
990  NULL);
991 
992  /* and make the MergeAppend unique */
994  result_rel,
995  path,
996  list_length(tlist),
997  dNumGroups);
998 
999  add_path(result_rel, path);
1000  }
1001  }
1002  else
1003  {
1004  /* UNION ALL */
1005  add_path(result_rel, apath);
1006 
1007  if (gpath != NULL)
1008  add_path(result_rel, gpath);
1009  }
1010 
1011  return result_rel;
1012 }
1013 
1014 /*
1015  * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
1016  */
1017 static RelOptInfo *
1019  List *refnames_tlist,
1020  List **pTargetList)
1021 {
1022  RelOptInfo *result_rel;
1023  RelOptInfo *lrel,
1024  *rrel;
1025  double save_fraction = root->tuple_fraction;
1026  Path *lpath,
1027  *rpath,
1028  *path;
1029  List *lpath_tlist,
1030  *rpath_tlist,
1031  *tlist_list,
1032  *tlist,
1033  *groupList,
1034  *pathlist;
1035  bool lpath_trivial_tlist,
1036  rpath_trivial_tlist;
1037  double dLeftGroups,
1038  dRightGroups,
1039  dNumGroups,
1040  dNumOutputRows;
1041  bool use_hash;
1042  SetOpCmd cmd;
1043  int firstFlag;
1044 
1045  /*
1046  * Tell children to fetch all tuples.
1047  */
1048  root->tuple_fraction = 0.0;
1049 
1050  /* Recurse on children, ensuring their outputs are marked */
1051  lrel = recurse_set_operations(op->larg, root,
1052  op->colTypes, op->colCollations,
1053  false, 0,
1054  refnames_tlist,
1055  &lpath_tlist,
1056  &lpath_trivial_tlist);
1057  if (lrel->rtekind == RTE_SUBQUERY)
1058  build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1059  NIL, &dLeftGroups);
1060  else
1061  dLeftGroups = lrel->rows;
1062 
1063  lpath = lrel->cheapest_total_path;
1064  rrel = recurse_set_operations(op->rarg, root,
1065  op->colTypes, op->colCollations,
1066  false, 1,
1067  refnames_tlist,
1068  &rpath_tlist,
1069  &rpath_trivial_tlist);
1070  if (rrel->rtekind == RTE_SUBQUERY)
1071  build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1072  NIL, &dRightGroups);
1073  else
1074  dRightGroups = rrel->rows;
1075 
1076  rpath = rrel->cheapest_total_path;
1077 
1078  /* Undo effects of forcing tuple_fraction to 0 */
1079  root->tuple_fraction = save_fraction;
1080 
1081  /*
1082  * For EXCEPT, we must put the left input first. For INTERSECT, either
1083  * order should give the same results, and we prefer to put the smaller
1084  * input first in order to minimize the size of the hash table in the
1085  * hashing case. "Smaller" means the one with the fewer groups.
1086  */
1087  if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
1088  {
1089  pathlist = list_make2(lpath, rpath);
1090  tlist_list = list_make2(lpath_tlist, rpath_tlist);
1091  firstFlag = 0;
1092  }
1093  else
1094  {
1095  pathlist = list_make2(rpath, lpath);
1096  tlist_list = list_make2(rpath_tlist, lpath_tlist);
1097  firstFlag = 1;
1098  }
1099 
1100  /*
1101  * Generate tlist for Append plan node.
1102  *
1103  * The tlist for an Append plan isn't important as far as the Append is
1104  * concerned, but we must make it look real anyway for the benefit of the
1105  * next plan level up. In fact, it has to be real enough that the flag
1106  * column is shown as a variable not a constant, else setrefs.c will get
1107  * confused.
1108  */
1109  tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
1110  tlist_list, refnames_tlist);
1111 
1112  *pTargetList = tlist;
1113 
1114  /* Build result relation. */
1115  result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1116  bms_union(lrel->relids, rrel->relids));
1117  result_rel->reltarget = create_pathtarget(root, tlist);
1118 
1119  /*
1120  * Append the child results together.
1121  */
1122  path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
1123  NIL, NULL, 0, false, -1);
1124 
1125  /* Identify the grouping semantics */
1126  groupList = generate_setop_grouplist(op, tlist);
1127 
1128  /*
1129  * Estimate number of distinct groups that we'll need hashtable entries
1130  * for; this is the size of the left-hand input for EXCEPT, or the smaller
1131  * input for INTERSECT. Also estimate the number of eventual output rows.
1132  * In non-ALL cases, we estimate each group produces one output row; in
1133  * ALL cases use the relevant relation size. These are worst-case
1134  * estimates, of course, but we need to be conservative.
1135  */
1136  if (op->op == SETOP_EXCEPT)
1137  {
1138  dNumGroups = dLeftGroups;
1139  dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1140  }
1141  else
1142  {
1143  dNumGroups = Min(dLeftGroups, dRightGroups);
1144  dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1145  }
1146 
1147  /*
1148  * Decide whether to hash or sort, and add a sort node if needed.
1149  */
1150  use_hash = choose_hashed_setop(root, groupList, path,
1151  dNumGroups, dNumOutputRows,
1152  (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
1153 
1154  if (groupList && !use_hash)
1155  path = (Path *) create_sort_path(root,
1156  result_rel,
1157  path,
1159  groupList,
1160  tlist),
1161  -1.0);
1162 
1163  /*
1164  * Finally, add a SetOp path node to generate the correct output.
1165  */
1166  switch (op->op)
1167  {
1168  case SETOP_INTERSECT:
1170  break;
1171  case SETOP_EXCEPT:
1172  cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1173  break;
1174  default:
1175  elog(ERROR, "unrecognized set op: %d", (int) op->op);
1176  cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1177  break;
1178  }
1179  path = (Path *) create_setop_path(root,
1180  result_rel,
1181  path,
1182  cmd,
1183  use_hash ? SETOP_HASHED : SETOP_SORTED,
1184  groupList,
1185  list_length(op->colTypes) + 1,
1186  use_hash ? firstFlag : -1,
1187  dNumGroups,
1188  dNumOutputRows);
1189 
1190  result_rel->rows = path->rows;
1191  add_path(result_rel, path);
1192  return result_rel;
1193 }
1194 
1195 /*
1196  * Pull up children of a UNION node that are identically-propertied UNIONs.
1197  *
1198  * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1199  * output rows will be lost anyway.
1200  *
1201  * NOTE: currently, we ignore collations while determining if a child has
1202  * the same properties. This is semantically sound only so long as all
1203  * collations have the same notion of equality. It is valid from an
1204  * implementation standpoint because we don't care about the ordering of
1205  * a UNION child's result: UNION ALL results are always unordered, and
1206  * generate_union_paths will force a fresh sort if the top level is a UNION.
1207  */
1208 static List *
1210  SetOperationStmt *top_union,
1211  List *refnames_tlist,
1212  List **tlist_list,
1213  List **istrivial_tlist)
1214 {
1215  List *pending_rels = list_make1(top_union);
1216  List *result = NIL;
1217  List *child_tlist;
1218  bool trivial_tlist;
1219 
1220  *tlist_list = NIL;
1221  *istrivial_tlist = NIL;
1222 
1223  while (pending_rels != NIL)
1224  {
1225  Node *setOp = linitial(pending_rels);
1226 
1227  pending_rels = list_delete_first(pending_rels);
1228 
1229  if (IsA(setOp, SetOperationStmt))
1230  {
1231  SetOperationStmt *op = (SetOperationStmt *) setOp;
1232 
1233  if (op->op == top_union->op &&
1234  (op->all == top_union->all || op->all) &&
1235  equal(op->colTypes, top_union->colTypes))
1236  {
1237  /* Same UNION, so fold children into parent */
1238  pending_rels = lcons(op->rarg, pending_rels);
1239  pending_rels = lcons(op->larg, pending_rels);
1240  continue;
1241  }
1242  }
1243 
1244  /*
1245  * Not same, so plan this child separately.
1246  *
1247  * Note we disallow any resjunk columns in child results. This is
1248  * necessary since the Append node that implements the union won't do
1249  * any projection, and upper levels will get confused if some of our
1250  * output tuples have junk and some don't. This case only arises when
1251  * we have an EXCEPT or INTERSECT as child, else there won't be
1252  * resjunk anyway.
1253  */
1254  result = lappend(result, recurse_set_operations(setOp, root,
1255  top_union->colTypes,
1256  top_union->colCollations,
1257  false, -1,
1258  refnames_tlist,
1259  &child_tlist,
1260  &trivial_tlist));
1261  *tlist_list = lappend(*tlist_list, child_tlist);
1262  *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1263  }
1264 
1265  return result;
1266 }
1267 
1268 /*
1269  * postprocess_setop_rel - perform steps required after adding paths
1270  */
1271 static void
1273 {
1274  /*
1275  * We don't currently worry about allowing FDWs to contribute paths to
1276  * this relation, but give extensions a chance.
1277  */
1279  (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1280  NULL, rel, NULL);
1281 
1282  /* Select cheapest path */
1283  set_cheapest(rel);
1284 }
1285 
1286 /*
1287  * choose_hashed_setop - should we use hashing for a set operation?
1288  */
1289 static bool
1291  Path *input_path,
1292  double dNumGroups, double dNumOutputRows,
1293  const char *construct)
1294 {
1295  int numGroupCols = list_length(groupClauses);
1296  Size hash_mem_limit = get_hash_memory_limit();
1297  bool can_sort;
1298  bool can_hash;
1299  Size hashentrysize;
1300  Path hashed_p;
1301  Path sorted_p;
1302  double tuple_fraction;
1303 
1304  /* Check whether the operators support sorting or hashing */
1305  can_sort = grouping_is_sortable(groupClauses);
1306  can_hash = grouping_is_hashable(groupClauses);
1307  if (can_hash && can_sort)
1308  {
1309  /* we have a meaningful choice to make, continue ... */
1310  }
1311  else if (can_hash)
1312  return true;
1313  else if (can_sort)
1314  return false;
1315  else
1316  ereport(ERROR,
1317  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1318  /* translator: %s is UNION, INTERSECT, or EXCEPT */
1319  errmsg("could not implement %s", construct),
1320  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1321 
1322  /* Prefer sorting when enable_hashagg is off */
1323  if (!enable_hashagg)
1324  return false;
1325 
1326  /*
1327  * Don't do it if it doesn't look like the hashtable will fit into
1328  * hash_mem.
1329  */
1330  hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1331 
1332  if (hashentrysize * dNumGroups > hash_mem_limit)
1333  return false;
1334 
1335  /*
1336  * See if the estimated cost is no more than doing it the other way.
1337  *
1338  * We need to consider input_plan + hashagg versus input_plan + sort +
1339  * group. Note that the actual result plan might involve a SetOp or
1340  * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1341  * should be close enough for our purposes here.
1342  *
1343  * These path variables are dummies that just hold cost fields; we don't
1344  * make actual Paths for these steps.
1345  */
1346  cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1347  numGroupCols, dNumGroups,
1348  NIL,
1349  input_path->startup_cost, input_path->total_cost,
1350  input_path->rows, input_path->pathtarget->width);
1351 
1352  /*
1353  * Now for the sorted case. Note that the input is *always* unsorted,
1354  * since it was made by appending unrelated sub-relations together.
1355  */
1356  sorted_p.startup_cost = input_path->startup_cost;
1357  sorted_p.total_cost = input_path->total_cost;
1358  /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1359  cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1360  input_path->rows, input_path->pathtarget->width,
1361  0.0, work_mem, -1.0);
1362  cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1363  NIL,
1364  sorted_p.startup_cost, sorted_p.total_cost,
1365  input_path->rows);
1366 
1367  /*
1368  * Now make the decision using the top-level tuple fraction. First we
1369  * have to convert an absolute count (LIMIT) into fractional form.
1370  */
1371  tuple_fraction = root->tuple_fraction;
1372  if (tuple_fraction >= 1.0)
1373  tuple_fraction /= dNumOutputRows;
1374 
1375  if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1376  tuple_fraction) < 0)
1377  {
1378  /* Hashed is cheaper, so use it */
1379  return true;
1380  }
1381  return false;
1382 }
1383 
1384 /*
1385  * Generate targetlist for a set-operation plan node
1386  *
1387  * colTypes: OID list of set-op's result column datatypes
1388  * colCollations: OID list of set-op's result column collations
1389  * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1390  * varno: varno to use in generated Vars
1391  * hack_constants: true to copy up constants (see comments in code)
1392  * input_tlist: targetlist of this node's input node
1393  * refnames_tlist: targetlist to take column names from
1394  * trivial_tlist: output parameter, set to true if targetlist is trivial
1395  */
1396 static List *
1397 generate_setop_tlist(List *colTypes, List *colCollations,
1398  int flag,
1399  Index varno,
1400  bool hack_constants,
1401  List *input_tlist,
1402  List *refnames_tlist,
1403  bool *trivial_tlist)
1404 {
1405  List *tlist = NIL;
1406  int resno = 1;
1407  ListCell *ctlc,
1408  *cclc,
1409  *itlc,
1410  *rtlc;
1411  TargetEntry *tle;
1412  Node *expr;
1413 
1414  *trivial_tlist = true; /* until proven differently */
1415 
1416  forfour(ctlc, colTypes, cclc, colCollations,
1417  itlc, input_tlist, rtlc, refnames_tlist)
1418  {
1419  Oid colType = lfirst_oid(ctlc);
1420  Oid colColl = lfirst_oid(cclc);
1421  TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1422  TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1423 
1424  Assert(inputtle->resno == resno);
1425  Assert(reftle->resno == resno);
1426  Assert(!inputtle->resjunk);
1427  Assert(!reftle->resjunk);
1428 
1429  /*
1430  * Generate columns referencing input columns and having appropriate
1431  * data types and column names. Insert datatype coercions where
1432  * necessary.
1433  *
1434  * HACK: constants in the input's targetlist are copied up as-is
1435  * rather than being referenced as subquery outputs. This is mainly
1436  * to ensure that when we try to coerce them to the output column's
1437  * datatype, the right things happen for UNKNOWN constants. But do
1438  * this only at the first level of subquery-scan plans; we don't want
1439  * phony constants appearing in the output tlists of upper-level
1440  * nodes!
1441  *
1442  * Note that copying a constant doesn't in itself require us to mark
1443  * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1444  */
1445  if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1446  expr = (Node *) inputtle->expr;
1447  else
1448  expr = (Node *) makeVar(varno,
1449  inputtle->resno,
1450  exprType((Node *) inputtle->expr),
1451  exprTypmod((Node *) inputtle->expr),
1452  exprCollation((Node *) inputtle->expr),
1453  0);
1454 
1455  if (exprType(expr) != colType)
1456  {
1457  /*
1458  * Note: it's not really cool to be applying coerce_to_common_type
1459  * here; one notable point is that assign_expr_collations never
1460  * gets run on any generated nodes. For the moment that's not a
1461  * problem because we force the correct exposed collation below.
1462  * It would likely be best to make the parser generate the correct
1463  * output tlist for every set-op to begin with, though.
1464  */
1465  expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1466  expr,
1467  colType,
1468  "UNION/INTERSECT/EXCEPT");
1469  *trivial_tlist = false; /* the coercion makes it not trivial */
1470  }
1471 
1472  /*
1473  * Ensure the tlist entry's exposed collation matches the set-op. This
1474  * is necessary because plan_set_operations() reports the result
1475  * ordering as a list of SortGroupClauses, which don't carry collation
1476  * themselves but just refer to tlist entries. If we don't show the
1477  * right collation then planner.c might do the wrong thing in
1478  * higher-level queries.
1479  *
1480  * Note we use RelabelType, not CollateExpr, since this expression
1481  * will reach the executor without any further processing.
1482  */
1483  if (exprCollation(expr) != colColl)
1484  {
1485  expr = applyRelabelType(expr,
1486  exprType(expr), exprTypmod(expr), colColl,
1487  COERCE_IMPLICIT_CAST, -1, false);
1488  *trivial_tlist = false; /* the relabel makes it not trivial */
1489  }
1490 
1491  tle = makeTargetEntry((Expr *) expr,
1492  (AttrNumber) resno++,
1493  pstrdup(reftle->resname),
1494  false);
1495 
1496  /*
1497  * By convention, all non-resjunk columns in a setop tree have
1498  * ressortgroupref equal to their resno. In some cases the ref isn't
1499  * needed, but this is a cleaner way than modifying the tlist later.
1500  */
1501  tle->ressortgroupref = tle->resno;
1502 
1503  tlist = lappend(tlist, tle);
1504  }
1505 
1506  if (flag >= 0)
1507  {
1508  /* Add a resjunk flag column */
1509  /* flag value is the given constant */
1510  expr = (Node *) makeConst(INT4OID,
1511  -1,
1512  InvalidOid,
1513  sizeof(int32),
1515  false,
1516  true);
1517  tle = makeTargetEntry((Expr *) expr,
1518  (AttrNumber) resno++,
1519  pstrdup("flag"),
1520  true);
1521  tlist = lappend(tlist, tle);
1522  *trivial_tlist = false; /* the extra entry makes it not trivial */
1523  }
1524 
1525  return tlist;
1526 }
1527 
1528 /*
1529  * Generate targetlist for a set-operation Append node
1530  *
1531  * colTypes: OID list of set-op's result column datatypes
1532  * colCollations: OID list of set-op's result column collations
1533  * flag: true to create a flag column copied up from subplans
1534  * input_tlists: list of tlists for sub-plans of the Append
1535  * refnames_tlist: targetlist to take column names from
1536  *
1537  * The entries in the Append's targetlist should always be simple Vars;
1538  * we just have to make sure they have the right datatypes/typmods/collations.
1539  * The Vars are always generated with varno 0.
1540  *
1541  * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1542  * cannot figure out a realistic width for the tlist we make here. But we
1543  * ought to refactor this code to produce a PathTarget directly, anyway.
1544  */
1545 static List *
1546 generate_append_tlist(List *colTypes, List *colCollations,
1547  bool flag,
1548  List *input_tlists,
1549  List *refnames_tlist)
1550 {
1551  List *tlist = NIL;
1552  int resno = 1;
1553  ListCell *curColType;
1554  ListCell *curColCollation;
1555  ListCell *ref_tl_item;
1556  int colindex;
1557  TargetEntry *tle;
1558  Node *expr;
1559  ListCell *tlistl;
1560  int32 *colTypmods;
1561 
1562  /*
1563  * First extract typmods to use.
1564  *
1565  * If the inputs all agree on type and typmod of a particular column, use
1566  * that typmod; else use -1.
1567  */
1568  colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1569 
1570  foreach(tlistl, input_tlists)
1571  {
1572  List *subtlist = (List *) lfirst(tlistl);
1573  ListCell *subtlistl;
1574 
1575  curColType = list_head(colTypes);
1576  colindex = 0;
1577  foreach(subtlistl, subtlist)
1578  {
1579  TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1580 
1581  if (subtle->resjunk)
1582  continue;
1583  Assert(curColType != NULL);
1584  if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1585  {
1586  /* If first subplan, copy the typmod; else compare */
1587  int32 subtypmod = exprTypmod((Node *) subtle->expr);
1588 
1589  if (tlistl == list_head(input_tlists))
1590  colTypmods[colindex] = subtypmod;
1591  else if (subtypmod != colTypmods[colindex])
1592  colTypmods[colindex] = -1;
1593  }
1594  else
1595  {
1596  /* types disagree, so force typmod to -1 */
1597  colTypmods[colindex] = -1;
1598  }
1599  curColType = lnext(colTypes, curColType);
1600  colindex++;
1601  }
1602  Assert(curColType == NULL);
1603  }
1604 
1605  /*
1606  * Now we can build the tlist for the Append.
1607  */
1608  colindex = 0;
1609  forthree(curColType, colTypes, curColCollation, colCollations,
1610  ref_tl_item, refnames_tlist)
1611  {
1612  Oid colType = lfirst_oid(curColType);
1613  int32 colTypmod = colTypmods[colindex++];
1614  Oid colColl = lfirst_oid(curColCollation);
1615  TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1616 
1617  Assert(reftle->resno == resno);
1618  Assert(!reftle->resjunk);
1619  expr = (Node *) makeVar(0,
1620  resno,
1621  colType,
1622  colTypmod,
1623  colColl,
1624  0);
1625  tle = makeTargetEntry((Expr *) expr,
1626  (AttrNumber) resno++,
1627  pstrdup(reftle->resname),
1628  false);
1629 
1630  /*
1631  * By convention, all non-resjunk columns in a setop tree have
1632  * ressortgroupref equal to their resno. In some cases the ref isn't
1633  * needed, but this is a cleaner way than modifying the tlist later.
1634  */
1635  tle->ressortgroupref = tle->resno;
1636 
1637  tlist = lappend(tlist, tle);
1638  }
1639 
1640  if (flag)
1641  {
1642  /* Add a resjunk flag column */
1643  /* flag value is shown as copied up from subplan */
1644  expr = (Node *) makeVar(0,
1645  resno,
1646  INT4OID,
1647  -1,
1648  InvalidOid,
1649  0);
1650  tle = makeTargetEntry((Expr *) expr,
1651  (AttrNumber) resno++,
1652  pstrdup("flag"),
1653  true);
1654  tlist = lappend(tlist, tle);
1655  }
1656 
1657  pfree(colTypmods);
1658 
1659  return tlist;
1660 }
1661 
1662 /*
1663  * generate_setop_grouplist
1664  * Build a SortGroupClause list defining the sort/grouping properties
1665  * of the setop's output columns.
1666  *
1667  * Parse analysis already determined the properties and built a suitable
1668  * list, except that the entries do not have sortgrouprefs set because
1669  * the parser output representation doesn't include a tlist for each
1670  * setop. So what we need to do here is copy that list and install
1671  * proper sortgrouprefs into it (copying those from the targetlist).
1672  */
1673 static List *
1675 {
1676  List *grouplist = copyObject(op->groupClauses);
1677  ListCell *lg;
1678  ListCell *lt;
1679 
1680  lg = list_head(grouplist);
1681  foreach(lt, targetlist)
1682  {
1683  TargetEntry *tle = (TargetEntry *) lfirst(lt);
1684  SortGroupClause *sgc;
1685 
1686  if (tle->resjunk)
1687  {
1688  /* resjunk columns should not have sortgrouprefs */
1689  Assert(tle->ressortgroupref == 0);
1690  continue; /* ignore resjunk columns */
1691  }
1692 
1693  /* non-resjunk columns should have sortgroupref = resno */
1694  Assert(tle->ressortgroupref == tle->resno);
1695 
1696  /* non-resjunk columns should have grouping clauses */
1697  Assert(lg != NULL);
1698  sgc = (SortGroupClause *) lfirst(lg);
1699  lg = lnext(grouplist, lg);
1700  Assert(sgc->tleSortGroupRef == 0);
1701 
1702  sgc->tleSortGroupRef = tle->ressortgroupref;
1703  }
1704  Assert(lg == NULL);
1705  return grouplist;
1706 }
int16 AttrNumber
Definition: attnum.h:21
Bitmapset * bms_union(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:251
#define bms_is_empty(a)
Definition: bitmapset.h:118
#define Min(x, y)
Definition: c.h:1004
#define MAXALIGN(LEN)
Definition: c.h:811
signed int int32
Definition: c.h:494
#define Max(x, y)
Definition: c.h:998
#define Assert(condition)
Definition: c.h:858
unsigned int Index
Definition: c.h:614
size_t Size
Definition: c.h:605
int max_parallel_workers_per_gather
Definition: costsize.c:132
void cost_agg(Path *path, PlannerInfo *root, AggStrategy aggstrategy, const AggClauseCosts *aggcosts, int numGroupCols, double numGroups, List *quals, Cost input_startup_cost, Cost input_total_cost, double input_tuples, double input_width)
Definition: costsize.c:2650
void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel)
Definition: costsize.c:5792
bool enable_parallel_append
Definition: costsize.c:150
bool enable_hashagg
Definition: costsize.c:141
void cost_sort(Path *path, PlannerInfo *root, List *pathkeys, Cost input_cost, double tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples)
Definition: costsize.c:2124
void cost_group(Path *path, PlannerInfo *root, int numGroupCols, double numGroups, List *quals, Cost input_startup_cost, Cost input_total_cost, double input_tuples)
Definition: costsize.c:3163
bool enable_incremental_sort
Definition: costsize.c:140
int errdetail(const char *fmt,...)
Definition: elog.c:1205
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
void add_setop_child_rel_equivalences(PlannerInfo *root, RelOptInfo *child_rel, List *child_tlist, List *setop_pathkeys)
Definition: equivclass.c:2899
int work_mem
Definition: globals.c:128
#define SizeofMinimalTupleHeader
Definition: htup_details.h:647
List * lappend(List *list, void *datum)
Definition: list.c:339
List * lappend_int(List *list, int datum)
Definition: list.c:357
List * list_delete_first(List *list)
Definition: list.c:943
List * lcons(void *datum, List *list)
Definition: list.c:495
Datum subpath(PG_FUNCTION_ARGS)
Definition: ltree_op.c:310
TargetEntry * makeTargetEntry(Expr *expr, AttrNumber resno, char *resname, bool resjunk)
Definition: makefuncs.c:240
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:301
Var * makeVar(int varno, AttrNumber varattno, Oid vartype, int32 vartypmod, Oid varcollid, Index varlevelsup)
Definition: makefuncs.c:66
char * pstrdup(const char *in)
Definition: mcxt.c:1695
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc(Size size)
Definition: mcxt.c:1316
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:298
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:816
Node * applyRelabelType(Node *arg, Oid rtype, int32 rtypmod, Oid rcollid, CoercionForm rformat, int rlocation, bool overwrite_ok)
Definition: nodeFuncs.c:631
size_t get_hash_memory_limit(void)
Definition: nodeHash.c:3595
SetOpCmd
Definition: nodes.h:396
@ SETOPCMD_EXCEPT
Definition: nodes.h:399
@ SETOPCMD_EXCEPT_ALL
Definition: nodes.h:400
@ SETOPCMD_INTERSECT_ALL
Definition: nodes.h:398
@ SETOPCMD_INTERSECT
Definition: nodes.h:397
@ SETOP_HASHED
Definition: nodes.h:406
@ SETOP_SORTED
Definition: nodes.h:405
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define copyObject(obj)
Definition: nodes.h:224
#define nodeTag(nodeptr)
Definition: nodes.h:133
@ AGG_HASHED
Definition: nodes.h:355
@ AGGSPLIT_SIMPLE
Definition: nodes.h:376
#define castNode(_type_, nodeptr)
Definition: nodes.h:176
Node * coerce_to_common_type(ParseState *pstate, Node *node, Oid targetTypeId, const char *context)
@ SETOP_INTERSECT
Definition: parsenodes.h:2117
@ SETOP_UNION
Definition: parsenodes.h:2116
@ SETOP_EXCEPT
Definition: parsenodes.h:2118
@ RTE_SUBQUERY
Definition: parsenodes.h:1029
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:635
bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
Definition: pathkeys.c:573
List * make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist)
Definition: pathkeys.c:1347
List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *subquery_pathkeys, List *subquery_tlist)
Definition: pathkeys.c:1069
AppendPath * create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, double rows)
Definition: pathnode.c:1244
SortPath * create_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *pathkeys, double limit_tuples)
Definition: pathnode.c:3000
ProjectionPath * create_projection_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target)
Definition: pathnode.c:2685
UpperUniquePath * create_upper_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, int numCols, double numGroups)
Definition: pathnode.c:3103
GatherPath * create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target, Relids required_outer, double *rows)
Definition: pathnode.c:1972
MergeAppendPath * create_merge_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *pathkeys, Relids required_outer)
Definition: pathnode.c:1415
void set_cheapest(RelOptInfo *parent_rel)
Definition: pathnode.c:242
SubqueryScanPath * create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, bool trivial_pathtarget, List *pathkeys, Relids required_outer)
Definition: pathnode.c:2016
void add_partial_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:747
int compare_fractional_path_costs(Path *path1, Path *path2, double fraction)
Definition: pathnode.c:115
SetOpPath * create_setop_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SetOpCmd cmd, SetOpStrategy strategy, List *distinctList, AttrNumber flagColIdx, int firstFlag, double numGroups, double outputRows)
Definition: pathnode.c:3552
RecursiveUnionPath * create_recursiveunion_path(PlannerInfo *root, RelOptInfo *rel, Path *leftpath, Path *rightpath, PathTarget *target, List *distinctList, int wtParam, double numGroups)
Definition: pathnode.c:3614
IncrementalSortPath * create_incremental_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *pathkeys, int presorted_keys, double limit_tuples)
Definition: pathnode.c:2951
AggPath * create_agg_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target, AggStrategy aggstrategy, AggSplit aggsplit, List *groupClause, List *qual, const AggClauseCosts *aggcosts, double numGroups)
Definition: pathnode.c:3155
void add_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:420
Path * apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, PathTarget *target)
Definition: pathnode.c:2793
@ TOTAL_COST
Definition: pathnodes.h:38
@ UPPERREL_SETOP
Definition: pathnodes.h:71
@ UPPERREL_FINAL
Definition: pathnodes.h:79
static int pg_leftmost_one_pos32(uint32 word)
Definition: pg_bitutils.h:41
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define lfirst_int(lc)
Definition: pg_list.h:173
#define list_make1(x1)
Definition: pg_list.h:212
#define forthree(cell1, list1, cell2, list2, cell3, list3)
Definition: pg_list.h:563
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define linitial(l)
Definition: pg_list.h:178
#define forfour(cell1, list1, cell2, list2, cell3, list3, cell4, list4)
Definition: pg_list.h:575
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define lfirst_oid(lc)
Definition: pg_list.h:174
#define list_make2(x1, x2)
Definition: pg_list.h:214
PlannerInfo * subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, bool hasRecursion, double tuple_fraction, SetOperationStmt *setops)
Definition: planner.c:628
create_upper_paths_hook_type create_upper_paths_hook
Definition: planner.c:74
void check_stack_depth(void)
Definition: postgres.c:3531
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
static List * generate_setop_tlist(List *colTypes, List *colCollations, int flag, Index varno, bool hack_constants, List *input_tlist, List *refnames_tlist, bool *trivial_tlist)
Definition: prepunion.c:1397
static List * generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
Definition: prepunion.c:1674
static RelOptInfo * generate_union_paths(SetOperationStmt *op, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:699
static RelOptInfo * generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:1018
static RelOptInfo * recurse_set_operations(Node *setOp, PlannerInfo *root, List *colTypes, List *colCollations, bool junkOK, int flag, List *refnames_tlist, List **pTargetList, bool *istrivial_tlist)
Definition: prepunion.c:230
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
Definition: prepunion.c:1272
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, Path *input_path, double dNumGroups, double dNumOutputRows, const char *construct)
Definition: prepunion.c:1290
static List * generate_append_tlist(List *colTypes, List *colCollations, bool flag, List *input_tlists, List *refnames_tlist)
Definition: prepunion.c:1546
static RelOptInfo * generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:384
bool set_operation_ordered_results_useful(SetOperationStmt *setop)
Definition: prepunion.c:188
RelOptInfo * plan_set_operations(PlannerInfo *root)
Definition: prepunion.c:99
static List * plan_union_children(PlannerInfo *root, SetOperationStmt *top_union, List *refnames_tlist, List **tlist_list, List **istrivial_tlist)
Definition: prepunion.c:1209
static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel, bool trivial_tlist, List *child_tlist, List *interesting_pathkeys, double *pNumGroups)
Definition: prepunion.c:504
@ COERCE_IMPLICIT_CAST
Definition: primnodes.h:706
tree ctl root
Definition: radixtree.h:1884
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:715
void setup_simple_rel_arrays(PlannerInfo *root)
Definition: relnode.c:94
RelOptInfo * build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
Definition: relnode.c:192
RelOptInfo * fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids)
Definition: relnode.c:1470
double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo)
Definition: selfuncs.c:3416
Definition: pg_list.h:54
Definition: nodes.h:129
List * pathkeys
Definition: pathnodes.h:1654
Cardinality rows
Definition: pathnodes.h:1649
Cost startup_cost
Definition: pathnodes.h:1650
Cost total_cost
Definition: pathnodes.h:1651
List * processed_tlist
Definition: pathnodes.h:458
bool hasHavingQual
Definition: pathnodes.h:498
Query * parse
Definition: pathnodes.h:202
Cardinality limit_tuples
Definition: pathnodes.h:485
List * setop_pathkeys
Definition: pathnodes.h:404
List * groupClause
Definition: parsenodes.h:200
List * targetList
Definition: parsenodes.h:191
List * groupingSets
Definition: parsenodes.h:203
List * distinctClause
Definition: parsenodes.h:209
Query * subquery
Definition: parsenodes.h:1114
Relids relids
Definition: pathnodes.h:861
struct PathTarget * reltarget
Definition: pathnodes.h:883
bool consider_parallel
Definition: pathnodes.h:877
Relids lateral_relids
Definition: pathnodes.h:903
List * pathlist
Definition: pathnodes.h:888
struct Path * cheapest_total_path
Definition: pathnodes.h:892
bool consider_startup
Definition: pathnodes.h:873
List * partial_pathlist
Definition: pathnodes.h:890
PlannerInfo * subroot
Definition: pathnodes.h:943
Cardinality rows
Definition: pathnodes.h:867
RTEKind rtekind
Definition: pathnodes.h:912
SetOperation op
Definition: parsenodes.h:2193
Index tleSortGroupRef
Definition: parsenodes.h:1442
Expr * expr
Definition: primnodes.h:2162
AttrNumber resno
Definition: primnodes.h:2164
Index ressortgroupref
Definition: primnodes.h:2168
char * flag(int b)
Definition: test-ctype.c:33
bool tlist_same_collations(List *tlist, List *colCollations, bool junkOK)
Definition: tlist.c:282
List * get_tlist_exprs(List *tlist, bool includeJunk)
Definition: tlist.c:163
bool grouping_is_sortable(List *groupClause)
Definition: tlist.c:540
List * make_tlist_from_pathtarget(PathTarget *target)
Definition: tlist.c:624
bool grouping_is_hashable(List *groupClause)
Definition: tlist.c:560
bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK)
Definition: tlist.c:248
#define create_pathtarget(root, tlist)
Definition: tlist.h:53