PostgreSQL Source Code  git master
prepunion.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * prepunion.c
4  * Routines to plan set-operation queries. The filename is a leftover
5  * from a time when only UNIONs were implemented.
6  *
7  * There are two code paths in the planner for set-operation queries.
8  * If a subquery consists entirely of simple UNION ALL operations, it
9  * is converted into an "append relation". Otherwise, it is handled
10  * by the general code in this module (plan_set_operations and its
11  * subroutines). There is some support code here for the append-relation
12  * case, but most of the heavy lifting for that is done elsewhere,
13  * notably in prepjointree.c and allpaths.c.
14  *
15  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
16  * Portions Copyright (c) 1994, Regents of the University of California
17  *
18  *
19  * IDENTIFICATION
20  * src/backend/optimizer/prep/prepunion.c
21  *
22  *-------------------------------------------------------------------------
23  */
24 #include "postgres.h"
25 
26 #include "access/htup_details.h"
27 #include "catalog/pg_type.h"
28 #include "miscadmin.h"
29 #include "nodes/makefuncs.h"
30 #include "nodes/nodeFuncs.h"
31 #include "optimizer/cost.h"
32 #include "optimizer/pathnode.h"
33 #include "optimizer/paths.h"
34 #include "optimizer/planner.h"
35 #include "optimizer/prep.h"
36 #include "optimizer/tlist.h"
37 #include "parser/parse_coerce.h"
38 #include "utils/selfuncs.h"
39 
40 
42  List *colTypes, List *colCollations,
43  bool junkOK,
44  int flag, List *refnames_tlist,
45  List **pTargetList,
46  bool *istrivial_tlist);
49  List *refnames_tlist,
50  List **pTargetList);
52  bool trivial_tlist, List *child_tlist,
53  List *interesting_pathkeys,
54  double *pNumGroups);
56  List *refnames_tlist,
57  List **pTargetList);
59  List *refnames_tlist,
60  List **pTargetList);
62  SetOperationStmt *top_union,
63  List *refnames_tlist,
64  List **tlist_list,
65  List **istrivial_tlist);
67 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
68  Path *input_path,
69  double dNumGroups, double dNumOutputRows,
70  const char *construct);
71 static List *generate_setop_tlist(List *colTypes, List *colCollations,
72  int flag,
73  Index varno,
74  bool hack_constants,
75  List *input_tlist,
76  List *refnames_tlist,
77  bool *trivial_tlist);
78 static List *generate_append_tlist(List *colTypes, List *colCollations,
79  bool flag,
80  List *input_tlists,
81  List *refnames_tlist);
82 static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
83 
84 
85 /*
86  * plan_set_operations
87  *
88  * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
89  *
90  * This routine only deals with the setOperations tree of the given query.
91  * Any top-level ORDER BY requested in root->parse->sortClause will be handled
92  * when we return to grouping_planner; likewise for LIMIT.
93  *
94  * What we return is an "upperrel" RelOptInfo containing at least one Path
95  * that implements the set-operation tree. In addition, root->processed_tlist
96  * receives a targetlist representing the output of the topmost setop node.
97  */
98 RelOptInfo *
100 {
101  Query *parse = root->parse;
102  SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
103  Node *node;
104  RangeTblEntry *leftmostRTE;
105  Query *leftmostQuery;
106  RelOptInfo *setop_rel;
107  List *top_tlist;
108 
109  Assert(topop);
110 
111  /* check for unsupported stuff */
112  Assert(parse->jointree->fromlist == NIL);
113  Assert(parse->jointree->quals == NULL);
114  Assert(parse->groupClause == NIL);
115  Assert(parse->havingQual == NULL);
116  Assert(parse->windowClause == NIL);
117  Assert(parse->distinctClause == NIL);
118 
119  /*
120  * In the outer query level, equivalence classes are limited to classes
121  * which define that the top-level target entry is equivalent to the
122  * corresponding child target entry. There won't be any equivalence class
123  * merging. Mark that merging is complete to allow us to make pathkeys.
124  */
125  Assert(root->eq_classes == NIL);
126  root->ec_merging_done = true;
127 
128  /*
129  * We'll need to build RelOptInfos for each of the leaf subqueries, which
130  * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
131  * arrays for those, and for AppendRelInfos in case they're needed.
132  */
134 
135  /*
136  * Find the leftmost component Query. We need to use its column names for
137  * all generated tlists (else SELECT INTO won't work right).
138  */
139  node = topop->larg;
140  while (node && IsA(node, SetOperationStmt))
141  node = ((SetOperationStmt *) node)->larg;
142  Assert(node && IsA(node, RangeTblRef));
143  leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
144  leftmostQuery = leftmostRTE->subquery;
145  Assert(leftmostQuery != NULL);
146 
147  /*
148  * If the topmost node is a recursive union, it needs special processing.
149  */
150  if (root->hasRecursion)
151  {
152  setop_rel = generate_recursion_path(topop, root,
153  leftmostQuery->targetList,
154  &top_tlist);
155  }
156  else
157  {
158  bool trivial_tlist;
159 
160  /*
161  * Recurse on setOperations tree to generate paths for set ops. The
162  * final output paths should have just the column types shown as the
163  * output from the top-level node, plus possibly resjunk working
164  * columns (we can rely on upper-level nodes to deal with that).
165  */
166  setop_rel = recurse_set_operations((Node *) topop, root,
167  topop->colTypes, topop->colCollations,
168  true, -1,
169  leftmostQuery->targetList,
170  &top_tlist,
171  &trivial_tlist);
172  }
173 
174  /* Must return the built tlist into root->processed_tlist. */
175  root->processed_tlist = top_tlist;
176 
177  return setop_rel;
178 }
179 
180 /*
181  * set_operation_ordered_results_useful
182  * Return true if the given SetOperationStmt can be executed by utilizing
183  * paths that provide sorted input according to the setop's targetlist.
184  * Returns false when sorted paths are not any more useful then unsorted
185  * ones.
186  */
187 bool
189 {
190  /*
191  * Paths sorted by the targetlist are useful for UNION as we can opt to
192  * MergeAppend the sorted paths then Unique them. Ordered paths are no
193  * more useful than unordered ones for UNION ALL.
194  */
195  if (!setop->all && setop->op == SETOP_UNION)
196  return true;
197 
198  /*
199  * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
200  * correctly sorted input paths.
201  */
202  return false;
203 }
204 
205 /*
206  * recurse_set_operations
207  * Recursively handle one step in a tree of set operations
208  *
209  * colTypes: OID list of set-op's result column datatypes
210  * colCollations: OID list of set-op's result column collations
211  * junkOK: if true, child resjunk columns may be left in the result
212  * flag: if >= 0, add a resjunk output column indicating value of flag
213  * refnames_tlist: targetlist to take column names from
214  *
215  * Returns a RelOptInfo for the subtree, as well as these output parameters:
216  * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
217  * *istrivial_tlist: true if, and only if, datatypes between parent and child
218  * match.
219  *
220  * The pTargetList output parameter is mostly redundant with the pathtarget
221  * of the returned RelOptInfo, but for the moment we need it because much of
222  * the logic in this file depends on flag columns being marked resjunk.
223  * Pending a redesign of how that works, this is the easy way out.
224  *
225  * We don't have to care about typmods here: the only allowed difference
226  * between set-op input and output typmods is input is a specific typmod
227  * and output is -1, and that does not require a coercion.
228  */
229 static RelOptInfo *
231  List *colTypes, List *colCollations,
232  bool junkOK,
233  int flag, List *refnames_tlist,
234  List **pTargetList,
235  bool *istrivial_tlist)
236 {
237  RelOptInfo *rel;
238 
239  *istrivial_tlist = true; /* for now */
240 
241  /* Guard against stack overflow due to overly complex setop nests */
243 
244  if (IsA(setOp, RangeTblRef))
245  {
246  RangeTblRef *rtr = (RangeTblRef *) setOp;
247  RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
248  SetOperationStmt *setops;
249  Query *subquery = rte->subquery;
250  PlannerInfo *subroot;
251  List *tlist;
252  bool trivial_tlist;
253 
254  Assert(subquery != NULL);
255 
256  /* Build a RelOptInfo for this leaf subquery. */
257  rel = build_simple_rel(root, rtr->rtindex, NULL);
258 
259  /* plan_params should not be in use in current query level */
260  Assert(root->plan_params == NIL);
261 
262  /*
263  * Pass the set operation details to the subquery_planner to have it
264  * consider generating Paths correctly ordered for the set operation.
265  */
266  setops = castNode(SetOperationStmt, root->parse->setOperations);
267 
268  /* Generate a subroot and Paths for the subquery */
269  subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
270  false, root->tuple_fraction,
271  setops);
272 
273  /*
274  * It should not be possible for the primitive query to contain any
275  * cross-references to other primitive queries in the setop tree.
276  */
277  if (root->plan_params)
278  elog(ERROR, "unexpected outer reference in set operation subquery");
279 
280  /* Figure out the appropriate target list for this subquery. */
281  tlist = generate_setop_tlist(colTypes, colCollations,
282  flag,
283  rtr->rtindex,
284  true,
285  subroot->processed_tlist,
286  refnames_tlist,
287  &trivial_tlist);
288  rel->reltarget = create_pathtarget(root, tlist);
289 
290  /* Return the fully-fledged tlist to caller, too */
291  *pTargetList = tlist;
292  *istrivial_tlist = trivial_tlist;
293  }
294  else if (IsA(setOp, SetOperationStmt))
295  {
296  SetOperationStmt *op = (SetOperationStmt *) setOp;
297 
298  /* UNIONs are much different from INTERSECT/EXCEPT */
299  if (op->op == SETOP_UNION)
300  rel = generate_union_paths(op, root,
301  refnames_tlist,
302  pTargetList);
303  else
304  rel = generate_nonunion_paths(op, root,
305  refnames_tlist,
306  pTargetList);
307 
308  /*
309  * If necessary, add a Result node to project the caller-requested
310  * output columns.
311  *
312  * XXX you don't really want to know about this: setrefs.c will apply
313  * fix_upper_expr() to the Result node's tlist. This would fail if the
314  * Vars generated by generate_setop_tlist() were not exactly equal()
315  * to the corresponding tlist entries of the subplan. However, since
316  * the subplan was generated by generate_union_paths() or
317  * generate_nonunion_paths(), and hence its tlist was generated by
318  * generate_append_tlist(), this will work. We just tell
319  * generate_setop_tlist() to use varno 0.
320  */
321  if (flag >= 0 ||
322  !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
323  !tlist_same_collations(*pTargetList, colCollations, junkOK))
324  {
325  PathTarget *target;
326  bool trivial_tlist;
327  ListCell *lc;
328 
329  *pTargetList = generate_setop_tlist(colTypes, colCollations,
330  flag,
331  0,
332  false,
333  *pTargetList,
334  refnames_tlist,
335  &trivial_tlist);
336  *istrivial_tlist = trivial_tlist;
337  target = create_pathtarget(root, *pTargetList);
338 
339  /* Apply projection to each path */
340  foreach(lc, rel->pathlist)
341  {
342  Path *subpath = (Path *) lfirst(lc);
343  Path *path;
344 
345  Assert(subpath->param_info == NULL);
346  path = apply_projection_to_path(root, subpath->parent,
347  subpath, target);
348  /* If we had to add a Result, path is different from subpath */
349  if (path != subpath)
350  lfirst(lc) = path;
351  }
352 
353  /* Apply projection to each partial path */
354  foreach(lc, rel->partial_pathlist)
355  {
356  Path *subpath = (Path *) lfirst(lc);
357  Path *path;
358 
359  Assert(subpath->param_info == NULL);
360 
361  /* avoid apply_projection_to_path, in case of multiple refs */
362  path = (Path *) create_projection_path(root, subpath->parent,
363  subpath, target);
364  lfirst(lc) = path;
365  }
366  }
368  }
369  else
370  {
371  elog(ERROR, "unrecognized node type: %d",
372  (int) nodeTag(setOp));
373  *pTargetList = NIL;
374  rel = NULL; /* keep compiler quiet */
375  }
376 
377  return rel;
378 }
379 
380 /*
381  * Generate paths for a recursive UNION node
382  */
383 static RelOptInfo *
385  List *refnames_tlist,
386  List **pTargetList)
387 {
388  RelOptInfo *result_rel;
389  Path *path;
390  RelOptInfo *lrel,
391  *rrel;
392  Path *lpath;
393  Path *rpath;
394  List *lpath_tlist;
395  bool lpath_trivial_tlist;
396  List *rpath_tlist;
397  bool rpath_trivial_tlist;
398  List *tlist;
399  List *groupList;
400  double dNumGroups;
401 
402  /* Parser should have rejected other cases */
403  if (setOp->op != SETOP_UNION)
404  elog(ERROR, "only UNION queries can be recursive");
405  /* Worktable ID should be assigned */
406  Assert(root->wt_param_id >= 0);
407 
408  /*
409  * Unlike a regular UNION node, process the left and right inputs
410  * separately without any intention of combining them into one Append.
411  */
412  lrel = recurse_set_operations(setOp->larg, root,
413  setOp->colTypes, setOp->colCollations,
414  false, -1,
415  refnames_tlist,
416  &lpath_tlist,
417  &lpath_trivial_tlist);
418  if (lrel->rtekind == RTE_SUBQUERY)
419  build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
420  NIL, NULL);
421  lpath = lrel->cheapest_total_path;
422  /* The right path will want to look at the left one ... */
423  root->non_recursive_path = lpath;
424  rrel = recurse_set_operations(setOp->rarg, root,
425  setOp->colTypes, setOp->colCollations,
426  false, -1,
427  refnames_tlist,
428  &rpath_tlist,
429  &rpath_trivial_tlist);
430  if (rrel->rtekind == RTE_SUBQUERY)
431  build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
432  NIL, NULL);
433  rpath = rrel->cheapest_total_path;
434  root->non_recursive_path = NULL;
435 
436  /*
437  * Generate tlist for RecursiveUnion path node --- same as in Append cases
438  */
439  tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
440  list_make2(lpath_tlist, rpath_tlist),
441  refnames_tlist);
442 
443  *pTargetList = tlist;
444 
445  /* Build result relation. */
446  result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
447  bms_union(lrel->relids, rrel->relids));
448  result_rel->reltarget = create_pathtarget(root, tlist);
449 
450  /*
451  * If UNION, identify the grouping operators
452  */
453  if (setOp->all)
454  {
455  groupList = NIL;
456  dNumGroups = 0;
457  }
458  else
459  {
460  /* Identify the grouping semantics */
461  groupList = generate_setop_grouplist(setOp, tlist);
462 
463  /* We only support hashing here */
464  if (!grouping_is_hashable(groupList))
465  ereport(ERROR,
466  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
467  errmsg("could not implement recursive UNION"),
468  errdetail("All column datatypes must be hashable.")));
469 
470  /*
471  * For the moment, take the number of distinct groups as equal to the
472  * total input size, ie, the worst case.
473  */
474  dNumGroups = lpath->rows + rpath->rows * 10;
475  }
476 
477  /*
478  * And make the path node.
479  */
481  result_rel,
482  lpath,
483  rpath,
484  result_rel->reltarget,
485  groupList,
486  root->wt_param_id,
487  dNumGroups);
488 
489  add_path(result_rel, path);
490  postprocess_setop_rel(root, result_rel);
491  return result_rel;
492 }
493 
494 /*
495  * build_setop_child_paths
496  * Build paths for the set op child relation denoted by 'rel'.
497  *
498  * interesting_pathkeys: if not NIL, also include paths that suit these
499  * pathkeys, sorting any unsorted paths as required.
500  * *pNumGroups: if not NULL, we estimate the number of distinct groups
501  * in the result, and store it there
502  */
503 static void
505  bool trivial_tlist, List *child_tlist,
506  List *interesting_pathkeys, double *pNumGroups)
507 {
508  RelOptInfo *final_rel;
509  List *setop_pathkeys = rel->subroot->setop_pathkeys;
510  ListCell *lc;
511 
512  /* it can't be a set op child rel if it's not a subquery */
513  Assert(rel->rtekind == RTE_SUBQUERY);
514 
515  /* when sorting is needed, add child rel equivalences */
516  if (interesting_pathkeys != NIL)
518  rel,
519  child_tlist,
520  interesting_pathkeys);
521 
522  /*
523  * Mark rel with estimated output rows, width, etc. Note that we have to
524  * do this before generating outer-query paths, else cost_subqueryscan is
525  * not happy.
526  */
528 
529  /*
530  * Since we may want to add a partial path to this relation, we must set
531  * its consider_parallel flag correctly.
532  */
533  final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
534  rel->consider_parallel = final_rel->consider_parallel;
535 
536  /* Generate subquery scan paths for any interesting path in final_rel */
537  foreach(lc, final_rel->pathlist)
538  {
539  Path *subpath = (Path *) lfirst(lc);
540  List *pathkeys;
541  Path *cheapest_input_path = final_rel->cheapest_total_path;
542  bool is_sorted;
543  int presorted_keys;
544 
545  /*
546  * Include the cheapest path as-is so that the set operation can be
547  * cheaply implemented using a method which does not require the input
548  * to be sorted.
549  */
550  if (subpath == cheapest_input_path)
551  {
552  /* Convert subpath's pathkeys to outer representation */
553  pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
554  make_tlist_from_pathtarget(subpath->pathtarget));
555 
556  /* Generate outer path using this subpath */
558  rel,
559  subpath,
560  trivial_tlist,
561  pathkeys,
562  NULL));
563  }
564 
565  /* skip dealing with sorted paths if the setop doesn't need them */
566  if (interesting_pathkeys == NIL)
567  continue;
568 
569  /*
570  * Create paths to suit final sort order required for setop_pathkeys.
571  * Here we'll sort the cheapest input path (if not sorted already) and
572  * incremental sort any paths which are partially sorted.
573  */
574  is_sorted = pathkeys_count_contained_in(setop_pathkeys,
575  subpath->pathkeys,
576  &presorted_keys);
577 
578  if (!is_sorted)
579  {
580  double limittuples = rel->subroot->limit_tuples;
581 
582  /*
583  * Try at least sorting the cheapest path and also try
584  * incrementally sorting any path which is partially sorted
585  * already (no need to deal with paths which have presorted keys
586  * when incremental sort is disabled unless it's the cheapest
587  * input path).
588  */
589  if (subpath != cheapest_input_path &&
590  (presorted_keys == 0 || !enable_incremental_sort))
591  continue;
592 
593  /*
594  * We've no need to consider both a sort and incremental sort.
595  * We'll just do a sort if there are no presorted keys and an
596  * incremental sort when there are presorted keys.
597  */
598  if (presorted_keys == 0 || !enable_incremental_sort)
600  final_rel,
601  subpath,
602  setop_pathkeys,
603  limittuples);
604  else
606  final_rel,
607  subpath,
608  setop_pathkeys,
609  presorted_keys,
610  limittuples);
611  }
612 
613  /*
614  * subpath is now sorted, so add it to the pathlist. We already added
615  * the cheapest_input_path above, so don't add it again unless we just
616  * sorted it.
617  */
618  if (subpath != cheapest_input_path)
619  {
620  /* Convert subpath's pathkeys to outer representation */
621  pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
622  make_tlist_from_pathtarget(subpath->pathtarget));
623 
624  /* Generate outer path using this subpath */
626  rel,
627  subpath,
628  trivial_tlist,
629  pathkeys,
630  NULL));
631  }
632  }
633 
634  /* if consider_parallel is false, there should be no partial paths */
635  Assert(final_rel->consider_parallel ||
636  final_rel->partial_pathlist == NIL);
637 
638  /*
639  * If we have a partial path for the child relation, we can use that to
640  * build a partial path for this relation. But there's no point in
641  * considering any path but the cheapest.
642  */
643  if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
644  final_rel->partial_pathlist != NIL)
645  {
646  Path *partial_subpath;
647  Path *partial_path;
648 
649  partial_subpath = linitial(final_rel->partial_pathlist);
650  partial_path = (Path *)
651  create_subqueryscan_path(root, rel, partial_subpath,
652  trivial_tlist,
653  NIL, NULL);
654  add_partial_path(rel, partial_path);
655  }
656 
658 
659  /*
660  * Estimate number of groups if caller wants it. If the subquery used
661  * grouping or aggregation, its output is probably mostly unique anyway;
662  * otherwise do statistical estimation.
663  *
664  * XXX you don't really want to know about this: we do the estimation
665  * using the subquery's original targetlist expressions, not the
666  * subroot->processed_tlist which might seem more appropriate. The reason
667  * is that if the subquery is itself a setop, it may return a
668  * processed_tlist containing "varno 0" Vars generated by
669  * generate_append_tlist, and those would confuse estimate_num_groups
670  * mightily. We ought to get rid of the "varno 0" hack, but that requires
671  * a redesign of the parsetree representation of setops, so that there can
672  * be an RTE corresponding to each setop's output.
673  */
674  if (pNumGroups)
675  {
676  PlannerInfo *subroot = rel->subroot;
677  Query *subquery = subroot->parse;
678 
679  if (subquery->groupClause || subquery->groupingSets ||
680  subquery->distinctClause || subroot->hasHavingQual ||
681  subquery->hasAggs)
682  *pNumGroups = rel->cheapest_total_path->rows;
683  else
684  *pNumGroups = estimate_num_groups(subroot,
685  get_tlist_exprs(subquery->targetList, false),
687  NULL,
688  NULL);
689  }
690 }
691 
692 /*
693  * Generate paths for a UNION or UNION ALL node
694  */
695 static RelOptInfo *
697  List *refnames_tlist,
698  List **pTargetList)
699 {
700  Relids relids = NULL;
701  RelOptInfo *result_rel;
702  ListCell *lc;
703  ListCell *lc2;
704  ListCell *lc3;
705  List *cheapest_pathlist = NIL;
706  List *ordered_pathlist = NIL;
707  List *partial_pathlist = NIL;
708  bool partial_paths_valid = true;
709  bool consider_parallel = true;
710  List *rellist;
711  List *tlist_list;
712  List *trivial_tlist_list;
713  List *tlist;
714  List *groupList = NIL;
715  Path *apath;
716  Path *gpath = NULL;
717  bool try_sorted;
718  List *union_pathkeys = NIL;
719 
720  /*
721  * If any of my children are identical UNION nodes (same op, all-flag, and
722  * colTypes) then they can be merged into this node so that we generate
723  * only one Append/MergeAppend and unique-ification for the lot. Recurse
724  * to find such nodes.
725  */
726  rellist = plan_union_children(root,
727  op,
728  refnames_tlist,
729  &tlist_list,
730  &trivial_tlist_list);
731 
732  /*
733  * Generate tlist for Append/MergeAppend plan node.
734  *
735  * The tlist for an Append plan isn't important as far as the Append is
736  * concerned, but we must make it look real anyway for the benefit of the
737  * next plan level up.
738  */
739  tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
740  tlist_list, refnames_tlist);
741  *pTargetList = tlist;
742 
743  /* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
744  try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
745 
746  if (try_sorted)
747  {
748  /* Identify the grouping semantics */
749  groupList = generate_setop_grouplist(op, tlist);
750 
751  /* Determine the pathkeys for sorting by the whole target list */
752  union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
753 
754  root->query_pathkeys = union_pathkeys;
755  }
756 
757  /*
758  * Now that we've got the append target list, we can build the union child
759  * paths.
760  */
761  forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
762  {
763  RelOptInfo *rel = lfirst(lc);
764  bool trivial_tlist = lfirst_int(lc2);
765  List *child_tlist = lfirst_node(List, lc3);
766 
767  /* only build paths for the union children */
768  if (rel->rtekind == RTE_SUBQUERY)
769  build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
770  union_pathkeys, NULL);
771  }
772 
773  /* Build path lists and relid set. */
774  foreach(lc, rellist)
775  {
776  RelOptInfo *rel = lfirst(lc);
777  Path *ordered_path;
778 
779  cheapest_pathlist = lappend(cheapest_pathlist,
780  rel->cheapest_total_path);
781 
782  if (try_sorted)
783  {
784  ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
785  union_pathkeys,
786  NULL,
787  TOTAL_COST,
788  false);
789 
790  if (ordered_path != NULL)
791  ordered_pathlist = lappend(ordered_pathlist, ordered_path);
792  else
793  {
794  /*
795  * If we can't find a sorted path, just give up trying to
796  * generate a list of correctly sorted child paths. This can
797  * happen when type coercion was added to the targetlist due
798  * to mismatching types from the union children.
799  */
800  try_sorted = false;
801  }
802  }
803 
804  if (consider_parallel)
805  {
806  if (!rel->consider_parallel)
807  {
808  consider_parallel = false;
809  partial_paths_valid = false;
810  }
811  else if (rel->partial_pathlist == NIL)
812  partial_paths_valid = false;
813  else
814  partial_pathlist = lappend(partial_pathlist,
815  linitial(rel->partial_pathlist));
816  }
817 
818  relids = bms_union(relids, rel->relids);
819  }
820 
821  /* Build result relation. */
822  result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
823  result_rel->reltarget = create_pathtarget(root, tlist);
824  result_rel->consider_parallel = consider_parallel;
825  result_rel->consider_startup = (root->tuple_fraction > 0);
826 
827  /*
828  * Append the child results together using the cheapest paths from each
829  * union child.
830  */
831  apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
832  NIL, NIL, NULL, 0, false, -1);
833 
834  /*
835  * Estimate number of groups. For now we just assume the output is unique
836  * --- this is certainly true for the UNION case, and we want worst-case
837  * estimates anyway.
838  */
839  result_rel->rows = apath->rows;
840 
841  /*
842  * Now consider doing the same thing using the partial paths plus Append
843  * plus Gather.
844  */
845  if (partial_paths_valid)
846  {
847  Path *papath;
848  int parallel_workers = 0;
849 
850  /* Find the highest number of workers requested for any subpath. */
851  foreach(lc, partial_pathlist)
852  {
853  Path *subpath = lfirst(lc);
854 
855  parallel_workers = Max(parallel_workers,
856  subpath->parallel_workers);
857  }
858  Assert(parallel_workers > 0);
859 
860  /*
861  * If the use of parallel append is permitted, always request at least
862  * log2(# of children) paths. We assume it can be useful to have
863  * extra workers in this case because they will be spread out across
864  * the children. The precise formula is just a guess; see
865  * add_paths_to_append_rel.
866  */
868  {
869  parallel_workers = Max(parallel_workers,
870  pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
871  parallel_workers = Min(parallel_workers,
873  }
874  Assert(parallel_workers > 0);
875 
876  papath = (Path *)
877  create_append_path(root, result_rel, NIL, partial_pathlist,
878  NIL, NULL, parallel_workers,
880  gpath = (Path *)
881  create_gather_path(root, result_rel, papath,
882  result_rel->reltarget, NULL, NULL);
883  }
884 
885  if (!op->all)
886  {
887  double dNumGroups;
888  bool can_sort = grouping_is_sortable(groupList);
889  bool can_hash = grouping_is_hashable(groupList);
890 
891  /*
892  * XXX for the moment, take the number of distinct groups as equal to
893  * the total input size, i.e., the worst case. This is too
894  * conservative, but it's not clear how to get a decent estimate of
895  * the true size. One should note as well the propensity of novices
896  * to write UNION rather than UNION ALL even when they don't expect
897  * any duplicates...
898  */
899  dNumGroups = apath->rows;
900 
901  if (can_hash)
902  {
903  Path *path;
904 
905  /*
906  * Try a hash aggregate plan on 'apath'. This is the cheapest
907  * available path containing each append child.
908  */
909  path = (Path *) create_agg_path(root,
910  result_rel,
911  apath,
912  create_pathtarget(root, tlist),
913  AGG_HASHED,
915  groupList,
916  NIL,
917  NULL,
918  dNumGroups);
919  add_path(result_rel, path);
920 
921  /* Try hash aggregate on the Gather path, if valid */
922  if (gpath != NULL)
923  {
924  /* Hashed aggregate plan --- no sort needed */
925  path = (Path *) create_agg_path(root,
926  result_rel,
927  gpath,
928  create_pathtarget(root, tlist),
929  AGG_HASHED,
931  groupList,
932  NIL,
933  NULL,
934  dNumGroups);
935  add_path(result_rel, path);
936  }
937  }
938 
939  if (can_sort)
940  {
941  Path *path = apath;
942 
943  /* Try Sort -> Unique on the Append path */
944  if (groupList != NIL)
945  path = (Path *) create_sort_path(root, result_rel, path,
946  make_pathkeys_for_sortclauses(root, groupList, tlist),
947  -1.0);
948 
950  result_rel,
951  path,
952  list_length(path->pathkeys),
953  dNumGroups);
954 
955  add_path(result_rel, path);
956 
957  /* Try Sort -> Unique on the Gather path, if set */
958  if (gpath != NULL)
959  {
960  path = gpath;
961 
962  path = (Path *) create_sort_path(root, result_rel, path,
963  make_pathkeys_for_sortclauses(root, groupList, tlist),
964  -1.0);
965 
967  result_rel,
968  path,
969  list_length(path->pathkeys),
970  dNumGroups);
971  add_path(result_rel, path);
972  }
973  }
974 
975  /*
976  * Try making a MergeAppend path if we managed to find a path with the
977  * correct pathkeys in each union child query.
978  */
979  if (try_sorted && groupList != NIL)
980  {
981  Path *path;
982 
984  result_rel,
985  ordered_pathlist,
986  union_pathkeys,
987  NULL);
988 
989  /* and make the MergeAppend unique */
991  result_rel,
992  path,
993  list_length(tlist),
994  dNumGroups);
995 
996  add_path(result_rel, path);
997  }
998  }
999  else
1000  {
1001  /* UNION ALL */
1002  add_path(result_rel, apath);
1003 
1004  if (gpath != NULL)
1005  add_path(result_rel, gpath);
1006  }
1007 
1008  return result_rel;
1009 }
1010 
1011 /*
1012  * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
1013  */
1014 static RelOptInfo *
1016  List *refnames_tlist,
1017  List **pTargetList)
1018 {
1019  RelOptInfo *result_rel;
1020  RelOptInfo *lrel,
1021  *rrel;
1022  double save_fraction = root->tuple_fraction;
1023  Path *lpath,
1024  *rpath,
1025  *path;
1026  List *lpath_tlist,
1027  *rpath_tlist,
1028  *tlist_list,
1029  *tlist,
1030  *groupList,
1031  *pathlist;
1032  bool lpath_trivial_tlist,
1033  rpath_trivial_tlist;
1034  double dLeftGroups,
1035  dRightGroups,
1036  dNumGroups,
1037  dNumOutputRows;
1038  bool use_hash;
1039  SetOpCmd cmd;
1040  int firstFlag;
1041 
1042  /*
1043  * Tell children to fetch all tuples.
1044  */
1045  root->tuple_fraction = 0.0;
1046 
1047  /* Recurse on children, ensuring their outputs are marked */
1048  lrel = recurse_set_operations(op->larg, root,
1049  op->colTypes, op->colCollations,
1050  false, 0,
1051  refnames_tlist,
1052  &lpath_tlist,
1053  &lpath_trivial_tlist);
1054  if (lrel->rtekind == RTE_SUBQUERY)
1055  build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1056  NIL, &dLeftGroups);
1057  else
1058  dLeftGroups = lrel->rows;
1059 
1060  lpath = lrel->cheapest_total_path;
1061  rrel = recurse_set_operations(op->rarg, root,
1062  op->colTypes, op->colCollations,
1063  false, 1,
1064  refnames_tlist,
1065  &rpath_tlist,
1066  &rpath_trivial_tlist);
1067  if (rrel->rtekind == RTE_SUBQUERY)
1068  build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1069  NIL, &dRightGroups);
1070  else
1071  dRightGroups = rrel->rows;
1072 
1073  rpath = rrel->cheapest_total_path;
1074 
1075  /* Undo effects of forcing tuple_fraction to 0 */
1076  root->tuple_fraction = save_fraction;
1077 
1078  /*
1079  * For EXCEPT, we must put the left input first. For INTERSECT, either
1080  * order should give the same results, and we prefer to put the smaller
1081  * input first in order to minimize the size of the hash table in the
1082  * hashing case. "Smaller" means the one with the fewer groups.
1083  */
1084  if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
1085  {
1086  pathlist = list_make2(lpath, rpath);
1087  tlist_list = list_make2(lpath_tlist, rpath_tlist);
1088  firstFlag = 0;
1089  }
1090  else
1091  {
1092  pathlist = list_make2(rpath, lpath);
1093  tlist_list = list_make2(rpath_tlist, lpath_tlist);
1094  firstFlag = 1;
1095  }
1096 
1097  /*
1098  * Generate tlist for Append plan node.
1099  *
1100  * The tlist for an Append plan isn't important as far as the Append is
1101  * concerned, but we must make it look real anyway for the benefit of the
1102  * next plan level up. In fact, it has to be real enough that the flag
1103  * column is shown as a variable not a constant, else setrefs.c will get
1104  * confused.
1105  */
1106  tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
1107  tlist_list, refnames_tlist);
1108 
1109  *pTargetList = tlist;
1110 
1111  /* Build result relation. */
1112  result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1113  bms_union(lrel->relids, rrel->relids));
1114  result_rel->reltarget = create_pathtarget(root, tlist);
1115 
1116  /*
1117  * Append the child results together.
1118  */
1119  path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
1120  NIL, NULL, 0, false, -1);
1121 
1122  /* Identify the grouping semantics */
1123  groupList = generate_setop_grouplist(op, tlist);
1124 
1125  /*
1126  * Estimate number of distinct groups that we'll need hashtable entries
1127  * for; this is the size of the left-hand input for EXCEPT, or the smaller
1128  * input for INTERSECT. Also estimate the number of eventual output rows.
1129  * In non-ALL cases, we estimate each group produces one output row; in
1130  * ALL cases use the relevant relation size. These are worst-case
1131  * estimates, of course, but we need to be conservative.
1132  */
1133  if (op->op == SETOP_EXCEPT)
1134  {
1135  dNumGroups = dLeftGroups;
1136  dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1137  }
1138  else
1139  {
1140  dNumGroups = Min(dLeftGroups, dRightGroups);
1141  dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1142  }
1143 
1144  /*
1145  * Decide whether to hash or sort, and add a sort node if needed.
1146  */
1147  use_hash = choose_hashed_setop(root, groupList, path,
1148  dNumGroups, dNumOutputRows,
1149  (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
1150 
1151  if (groupList && !use_hash)
1152  path = (Path *) create_sort_path(root,
1153  result_rel,
1154  path,
1156  groupList,
1157  tlist),
1158  -1.0);
1159 
1160  /*
1161  * Finally, add a SetOp path node to generate the correct output.
1162  */
1163  switch (op->op)
1164  {
1165  case SETOP_INTERSECT:
1167  break;
1168  case SETOP_EXCEPT:
1169  cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1170  break;
1171  default:
1172  elog(ERROR, "unrecognized set op: %d", (int) op->op);
1173  cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1174  break;
1175  }
1176  path = (Path *) create_setop_path(root,
1177  result_rel,
1178  path,
1179  cmd,
1180  use_hash ? SETOP_HASHED : SETOP_SORTED,
1181  groupList,
1182  list_length(op->colTypes) + 1,
1183  use_hash ? firstFlag : -1,
1184  dNumGroups,
1185  dNumOutputRows);
1186 
1187  result_rel->rows = path->rows;
1188  add_path(result_rel, path);
1189  return result_rel;
1190 }
1191 
1192 /*
1193  * Pull up children of a UNION node that are identically-propertied UNIONs.
1194  *
1195  * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1196  * output rows will be lost anyway.
1197  *
1198  * NOTE: currently, we ignore collations while determining if a child has
1199  * the same properties. This is semantically sound only so long as all
1200  * collations have the same notion of equality. It is valid from an
1201  * implementation standpoint because we don't care about the ordering of
1202  * a UNION child's result: UNION ALL results are always unordered, and
1203  * generate_union_paths will force a fresh sort if the top level is a UNION.
1204  */
1205 static List *
1207  SetOperationStmt *top_union,
1208  List *refnames_tlist,
1209  List **tlist_list,
1210  List **istrivial_tlist)
1211 {
1212  List *pending_rels = list_make1(top_union);
1213  List *result = NIL;
1214  List *child_tlist;
1215  bool trivial_tlist;
1216 
1217  *tlist_list = NIL;
1218  *istrivial_tlist = NIL;
1219 
1220  while (pending_rels != NIL)
1221  {
1222  Node *setOp = linitial(pending_rels);
1223 
1224  pending_rels = list_delete_first(pending_rels);
1225 
1226  if (IsA(setOp, SetOperationStmt))
1227  {
1228  SetOperationStmt *op = (SetOperationStmt *) setOp;
1229 
1230  if (op->op == top_union->op &&
1231  (op->all == top_union->all || op->all) &&
1232  equal(op->colTypes, top_union->colTypes))
1233  {
1234  /* Same UNION, so fold children into parent */
1235  pending_rels = lcons(op->rarg, pending_rels);
1236  pending_rels = lcons(op->larg, pending_rels);
1237  continue;
1238  }
1239  }
1240 
1241  /*
1242  * Not same, so plan this child separately.
1243  *
1244  * Note we disallow any resjunk columns in child results. This is
1245  * necessary since the Append node that implements the union won't do
1246  * any projection, and upper levels will get confused if some of our
1247  * output tuples have junk and some don't. This case only arises when
1248  * we have an EXCEPT or INTERSECT as child, else there won't be
1249  * resjunk anyway.
1250  */
1251  result = lappend(result, recurse_set_operations(setOp, root,
1252  top_union->colTypes,
1253  top_union->colCollations,
1254  false, -1,
1255  refnames_tlist,
1256  &child_tlist,
1257  &trivial_tlist));
1258  *tlist_list = lappend(*tlist_list, child_tlist);
1259  *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1260  }
1261 
1262  return result;
1263 }
1264 
1265 /*
1266  * postprocess_setop_rel - perform steps required after adding paths
1267  */
1268 static void
1270 {
1271  /*
1272  * We don't currently worry about allowing FDWs to contribute paths to
1273  * this relation, but give extensions a chance.
1274  */
1276  (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1277  NULL, rel, NULL);
1278 
1279  /* Select cheapest path */
1280  set_cheapest(rel);
1281 }
1282 
1283 /*
1284  * choose_hashed_setop - should we use hashing for a set operation?
1285  */
1286 static bool
1288  Path *input_path,
1289  double dNumGroups, double dNumOutputRows,
1290  const char *construct)
1291 {
1292  int numGroupCols = list_length(groupClauses);
1293  Size hash_mem_limit = get_hash_memory_limit();
1294  bool can_sort;
1295  bool can_hash;
1296  Size hashentrysize;
1297  Path hashed_p;
1298  Path sorted_p;
1299  double tuple_fraction;
1300 
1301  /* Check whether the operators support sorting or hashing */
1302  can_sort = grouping_is_sortable(groupClauses);
1303  can_hash = grouping_is_hashable(groupClauses);
1304  if (can_hash && can_sort)
1305  {
1306  /* we have a meaningful choice to make, continue ... */
1307  }
1308  else if (can_hash)
1309  return true;
1310  else if (can_sort)
1311  return false;
1312  else
1313  ereport(ERROR,
1314  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1315  /* translator: %s is UNION, INTERSECT, or EXCEPT */
1316  errmsg("could not implement %s", construct),
1317  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1318 
1319  /* Prefer sorting when enable_hashagg is off */
1320  if (!enable_hashagg)
1321  return false;
1322 
1323  /*
1324  * Don't do it if it doesn't look like the hashtable will fit into
1325  * hash_mem.
1326  */
1327  hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1328 
1329  if (hashentrysize * dNumGroups > hash_mem_limit)
1330  return false;
1331 
1332  /*
1333  * See if the estimated cost is no more than doing it the other way.
1334  *
1335  * We need to consider input_plan + hashagg versus input_plan + sort +
1336  * group. Note that the actual result plan might involve a SetOp or
1337  * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1338  * should be close enough for our purposes here.
1339  *
1340  * These path variables are dummies that just hold cost fields; we don't
1341  * make actual Paths for these steps.
1342  */
1343  cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1344  numGroupCols, dNumGroups,
1345  NIL,
1346  input_path->startup_cost, input_path->total_cost,
1347  input_path->rows, input_path->pathtarget->width);
1348 
1349  /*
1350  * Now for the sorted case. Note that the input is *always* unsorted,
1351  * since it was made by appending unrelated sub-relations together.
1352  */
1353  sorted_p.startup_cost = input_path->startup_cost;
1354  sorted_p.total_cost = input_path->total_cost;
1355  /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1356  cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1357  input_path->rows, input_path->pathtarget->width,
1358  0.0, work_mem, -1.0);
1359  cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1360  NIL,
1361  sorted_p.startup_cost, sorted_p.total_cost,
1362  input_path->rows);
1363 
1364  /*
1365  * Now make the decision using the top-level tuple fraction. First we
1366  * have to convert an absolute count (LIMIT) into fractional form.
1367  */
1368  tuple_fraction = root->tuple_fraction;
1369  if (tuple_fraction >= 1.0)
1370  tuple_fraction /= dNumOutputRows;
1371 
1372  if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1373  tuple_fraction) < 0)
1374  {
1375  /* Hashed is cheaper, so use it */
1376  return true;
1377  }
1378  return false;
1379 }
1380 
1381 /*
1382  * Generate targetlist for a set-operation plan node
1383  *
1384  * colTypes: OID list of set-op's result column datatypes
1385  * colCollations: OID list of set-op's result column collations
1386  * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1387  * varno: varno to use in generated Vars
1388  * hack_constants: true to copy up constants (see comments in code)
1389  * input_tlist: targetlist of this node's input node
1390  * refnames_tlist: targetlist to take column names from
1391  * trivial_tlist: output parameter, set to true if targetlist is trivial
1392  */
1393 static List *
1394 generate_setop_tlist(List *colTypes, List *colCollations,
1395  int flag,
1396  Index varno,
1397  bool hack_constants,
1398  List *input_tlist,
1399  List *refnames_tlist,
1400  bool *trivial_tlist)
1401 {
1402  List *tlist = NIL;
1403  int resno = 1;
1404  ListCell *ctlc,
1405  *cclc,
1406  *itlc,
1407  *rtlc;
1408  TargetEntry *tle;
1409  Node *expr;
1410 
1411  *trivial_tlist = true; /* until proven differently */
1412 
1413  forfour(ctlc, colTypes, cclc, colCollations,
1414  itlc, input_tlist, rtlc, refnames_tlist)
1415  {
1416  Oid colType = lfirst_oid(ctlc);
1417  Oid colColl = lfirst_oid(cclc);
1418  TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1419  TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1420 
1421  Assert(inputtle->resno == resno);
1422  Assert(reftle->resno == resno);
1423  Assert(!inputtle->resjunk);
1424  Assert(!reftle->resjunk);
1425 
1426  /*
1427  * Generate columns referencing input columns and having appropriate
1428  * data types and column names. Insert datatype coercions where
1429  * necessary.
1430  *
1431  * HACK: constants in the input's targetlist are copied up as-is
1432  * rather than being referenced as subquery outputs. This is mainly
1433  * to ensure that when we try to coerce them to the output column's
1434  * datatype, the right things happen for UNKNOWN constants. But do
1435  * this only at the first level of subquery-scan plans; we don't want
1436  * phony constants appearing in the output tlists of upper-level
1437  * nodes!
1438  *
1439  * Note that copying a constant doesn't in itself require us to mark
1440  * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1441  */
1442  if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1443  expr = (Node *) inputtle->expr;
1444  else
1445  expr = (Node *) makeVar(varno,
1446  inputtle->resno,
1447  exprType((Node *) inputtle->expr),
1448  exprTypmod((Node *) inputtle->expr),
1449  exprCollation((Node *) inputtle->expr),
1450  0);
1451 
1452  if (exprType(expr) != colType)
1453  {
1454  /*
1455  * Note: it's not really cool to be applying coerce_to_common_type
1456  * here; one notable point is that assign_expr_collations never
1457  * gets run on any generated nodes. For the moment that's not a
1458  * problem because we force the correct exposed collation below.
1459  * It would likely be best to make the parser generate the correct
1460  * output tlist for every set-op to begin with, though.
1461  */
1462  expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1463  expr,
1464  colType,
1465  "UNION/INTERSECT/EXCEPT");
1466  *trivial_tlist = false; /* the coercion makes it not trivial */
1467  }
1468 
1469  /*
1470  * Ensure the tlist entry's exposed collation matches the set-op. This
1471  * is necessary because plan_set_operations() reports the result
1472  * ordering as a list of SortGroupClauses, which don't carry collation
1473  * themselves but just refer to tlist entries. If we don't show the
1474  * right collation then planner.c might do the wrong thing in
1475  * higher-level queries.
1476  *
1477  * Note we use RelabelType, not CollateExpr, since this expression
1478  * will reach the executor without any further processing.
1479  */
1480  if (exprCollation(expr) != colColl)
1481  {
1482  expr = applyRelabelType(expr,
1483  exprType(expr), exprTypmod(expr), colColl,
1484  COERCE_IMPLICIT_CAST, -1, false);
1485  *trivial_tlist = false; /* the relabel makes it not trivial */
1486  }
1487 
1488  tle = makeTargetEntry((Expr *) expr,
1489  (AttrNumber) resno++,
1490  pstrdup(reftle->resname),
1491  false);
1492 
1493  /*
1494  * By convention, all non-resjunk columns in a setop tree have
1495  * ressortgroupref equal to their resno. In some cases the ref isn't
1496  * needed, but this is a cleaner way than modifying the tlist later.
1497  */
1498  tle->ressortgroupref = tle->resno;
1499 
1500  tlist = lappend(tlist, tle);
1501  }
1502 
1503  if (flag >= 0)
1504  {
1505  /* Add a resjunk flag column */
1506  /* flag value is the given constant */
1507  expr = (Node *) makeConst(INT4OID,
1508  -1,
1509  InvalidOid,
1510  sizeof(int32),
1512  false,
1513  true);
1514  tle = makeTargetEntry((Expr *) expr,
1515  (AttrNumber) resno++,
1516  pstrdup("flag"),
1517  true);
1518  tlist = lappend(tlist, tle);
1519  *trivial_tlist = false; /* the extra entry makes it not trivial */
1520  }
1521 
1522  return tlist;
1523 }
1524 
1525 /*
1526  * Generate targetlist for a set-operation Append node
1527  *
1528  * colTypes: OID list of set-op's result column datatypes
1529  * colCollations: OID list of set-op's result column collations
1530  * flag: true to create a flag column copied up from subplans
1531  * input_tlists: list of tlists for sub-plans of the Append
1532  * refnames_tlist: targetlist to take column names from
1533  *
1534  * The entries in the Append's targetlist should always be simple Vars;
1535  * we just have to make sure they have the right datatypes/typmods/collations.
1536  * The Vars are always generated with varno 0.
1537  *
1538  * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1539  * cannot figure out a realistic width for the tlist we make here. But we
1540  * ought to refactor this code to produce a PathTarget directly, anyway.
1541  */
1542 static List *
1543 generate_append_tlist(List *colTypes, List *colCollations,
1544  bool flag,
1545  List *input_tlists,
1546  List *refnames_tlist)
1547 {
1548  List *tlist = NIL;
1549  int resno = 1;
1550  ListCell *curColType;
1551  ListCell *curColCollation;
1552  ListCell *ref_tl_item;
1553  int colindex;
1554  TargetEntry *tle;
1555  Node *expr;
1556  ListCell *tlistl;
1557  int32 *colTypmods;
1558 
1559  /*
1560  * First extract typmods to use.
1561  *
1562  * If the inputs all agree on type and typmod of a particular column, use
1563  * that typmod; else use -1.
1564  */
1565  colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1566 
1567  foreach(tlistl, input_tlists)
1568  {
1569  List *subtlist = (List *) lfirst(tlistl);
1570  ListCell *subtlistl;
1571 
1572  curColType = list_head(colTypes);
1573  colindex = 0;
1574  foreach(subtlistl, subtlist)
1575  {
1576  TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1577 
1578  if (subtle->resjunk)
1579  continue;
1580  Assert(curColType != NULL);
1581  if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1582  {
1583  /* If first subplan, copy the typmod; else compare */
1584  int32 subtypmod = exprTypmod((Node *) subtle->expr);
1585 
1586  if (tlistl == list_head(input_tlists))
1587  colTypmods[colindex] = subtypmod;
1588  else if (subtypmod != colTypmods[colindex])
1589  colTypmods[colindex] = -1;
1590  }
1591  else
1592  {
1593  /* types disagree, so force typmod to -1 */
1594  colTypmods[colindex] = -1;
1595  }
1596  curColType = lnext(colTypes, curColType);
1597  colindex++;
1598  }
1599  Assert(curColType == NULL);
1600  }
1601 
1602  /*
1603  * Now we can build the tlist for the Append.
1604  */
1605  colindex = 0;
1606  forthree(curColType, colTypes, curColCollation, colCollations,
1607  ref_tl_item, refnames_tlist)
1608  {
1609  Oid colType = lfirst_oid(curColType);
1610  int32 colTypmod = colTypmods[colindex++];
1611  Oid colColl = lfirst_oid(curColCollation);
1612  TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1613 
1614  Assert(reftle->resno == resno);
1615  Assert(!reftle->resjunk);
1616  expr = (Node *) makeVar(0,
1617  resno,
1618  colType,
1619  colTypmod,
1620  colColl,
1621  0);
1622  tle = makeTargetEntry((Expr *) expr,
1623  (AttrNumber) resno++,
1624  pstrdup(reftle->resname),
1625  false);
1626 
1627  /*
1628  * By convention, all non-resjunk columns in a setop tree have
1629  * ressortgroupref equal to their resno. In some cases the ref isn't
1630  * needed, but this is a cleaner way than modifying the tlist later.
1631  */
1632  tle->ressortgroupref = tle->resno;
1633 
1634  tlist = lappend(tlist, tle);
1635  }
1636 
1637  if (flag)
1638  {
1639  /* Add a resjunk flag column */
1640  /* flag value is shown as copied up from subplan */
1641  expr = (Node *) makeVar(0,
1642  resno,
1643  INT4OID,
1644  -1,
1645  InvalidOid,
1646  0);
1647  tle = makeTargetEntry((Expr *) expr,
1648  (AttrNumber) resno++,
1649  pstrdup("flag"),
1650  true);
1651  tlist = lappend(tlist, tle);
1652  }
1653 
1654  pfree(colTypmods);
1655 
1656  return tlist;
1657 }
1658 
1659 /*
1660  * generate_setop_grouplist
1661  * Build a SortGroupClause list defining the sort/grouping properties
1662  * of the setop's output columns.
1663  *
1664  * Parse analysis already determined the properties and built a suitable
1665  * list, except that the entries do not have sortgrouprefs set because
1666  * the parser output representation doesn't include a tlist for each
1667  * setop. So what we need to do here is copy that list and install
1668  * proper sortgrouprefs into it (copying those from the targetlist).
1669  */
1670 static List *
1672 {
1673  List *grouplist = copyObject(op->groupClauses);
1674  ListCell *lg;
1675  ListCell *lt;
1676 
1677  lg = list_head(grouplist);
1678  foreach(lt, targetlist)
1679  {
1680  TargetEntry *tle = (TargetEntry *) lfirst(lt);
1681  SortGroupClause *sgc;
1682 
1683  if (tle->resjunk)
1684  {
1685  /* resjunk columns should not have sortgrouprefs */
1686  Assert(tle->ressortgroupref == 0);
1687  continue; /* ignore resjunk columns */
1688  }
1689 
1690  /* non-resjunk columns should have sortgroupref = resno */
1691  Assert(tle->ressortgroupref == tle->resno);
1692 
1693  /* non-resjunk columns should have grouping clauses */
1694  Assert(lg != NULL);
1695  sgc = (SortGroupClause *) lfirst(lg);
1696  lg = lnext(grouplist, lg);
1697  Assert(sgc->tleSortGroupRef == 0);
1698 
1699  sgc->tleSortGroupRef = tle->ressortgroupref;
1700  }
1701  Assert(lg == NULL);
1702  return grouplist;
1703 }
int16 AttrNumber
Definition: attnum.h:21
Bitmapset * bms_union(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:251
#define bms_is_empty(a)
Definition: bitmapset.h:118
#define Min(x, y)
Definition: c.h:1004
#define MAXALIGN(LEN)
Definition: c.h:811
signed int int32
Definition: c.h:494
#define Max(x, y)
Definition: c.h:998
#define Assert(condition)
Definition: c.h:858
unsigned int Index
Definition: c.h:614
size_t Size
Definition: c.h:605
int max_parallel_workers_per_gather
Definition: costsize.c:132
void cost_agg(Path *path, PlannerInfo *root, AggStrategy aggstrategy, const AggClauseCosts *aggcosts, int numGroupCols, double numGroups, List *quals, Cost input_startup_cost, Cost input_total_cost, double input_tuples, double input_width)
Definition: costsize.c:2650
void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel)
Definition: costsize.c:5792
bool enable_parallel_append
Definition: costsize.c:150
bool enable_hashagg
Definition: costsize.c:141
void cost_sort(Path *path, PlannerInfo *root, List *pathkeys, Cost input_cost, double tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples)
Definition: costsize.c:2124
void cost_group(Path *path, PlannerInfo *root, int numGroupCols, double numGroups, List *quals, Cost input_startup_cost, Cost input_total_cost, double input_tuples)
Definition: costsize.c:3163
bool enable_incremental_sort
Definition: costsize.c:140
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:857
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
void add_setop_child_rel_equivalences(PlannerInfo *root, RelOptInfo *child_rel, List *child_tlist, List *setop_pathkeys)
Definition: equivclass.c:2899
int work_mem
Definition: globals.c:128
#define SizeofMinimalTupleHeader
Definition: htup_details.h:647
List * lappend(List *list, void *datum)
Definition: list.c:339
List * lappend_int(List *list, int datum)
Definition: list.c:357
List * list_delete_first(List *list)
Definition: list.c:943
List * lcons(void *datum, List *list)
Definition: list.c:495
Datum subpath(PG_FUNCTION_ARGS)
Definition: ltree_op.c:310
TargetEntry * makeTargetEntry(Expr *expr, AttrNumber resno, char *resname, bool resjunk)
Definition: makefuncs.c:240
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:301
Var * makeVar(int varno, AttrNumber varattno, Oid vartype, int32 vartypmod, Oid varcollid, Index varlevelsup)
Definition: makefuncs.c:66
char * pstrdup(const char *in)
Definition: mcxt.c:1695
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc(Size size)
Definition: mcxt.c:1316
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:298
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:816
Node * applyRelabelType(Node *arg, Oid rtype, int32 rtypmod, Oid rcollid, CoercionForm rformat, int rlocation, bool overwrite_ok)
Definition: nodeFuncs.c:631
size_t get_hash_memory_limit(void)
Definition: nodeHash.c:3595
SetOpCmd
Definition: nodes.h:396
@ SETOPCMD_EXCEPT
Definition: nodes.h:399
@ SETOPCMD_EXCEPT_ALL
Definition: nodes.h:400
@ SETOPCMD_INTERSECT_ALL
Definition: nodes.h:398
@ SETOPCMD_INTERSECT
Definition: nodes.h:397
@ SETOP_HASHED
Definition: nodes.h:406
@ SETOP_SORTED
Definition: nodes.h:405
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define copyObject(obj)
Definition: nodes.h:224
#define nodeTag(nodeptr)
Definition: nodes.h:133
@ AGG_HASHED
Definition: nodes.h:355
@ AGGSPLIT_SIMPLE
Definition: nodes.h:376
#define castNode(_type_, nodeptr)
Definition: nodes.h:176
Node * coerce_to_common_type(ParseState *pstate, Node *node, Oid targetTypeId, const char *context)
@ SETOP_INTERSECT
Definition: parsenodes.h:2115
@ SETOP_UNION
Definition: parsenodes.h:2114
@ SETOP_EXCEPT
Definition: parsenodes.h:2116
@ RTE_SUBQUERY
Definition: parsenodes.h:1029
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:635
bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
Definition: pathkeys.c:573
List * make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist)
Definition: pathkeys.c:1347
List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *subquery_pathkeys, List *subquery_tlist)
Definition: pathkeys.c:1069
AppendPath * create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, double rows)
Definition: pathnode.c:1244
SortPath * create_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *pathkeys, double limit_tuples)
Definition: pathnode.c:3000
ProjectionPath * create_projection_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target)
Definition: pathnode.c:2685
UpperUniquePath * create_upper_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, int numCols, double numGroups)
Definition: pathnode.c:3103
GatherPath * create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target, Relids required_outer, double *rows)
Definition: pathnode.c:1972
MergeAppendPath * create_merge_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *pathkeys, Relids required_outer)
Definition: pathnode.c:1415
void set_cheapest(RelOptInfo *parent_rel)
Definition: pathnode.c:242
SubqueryScanPath * create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, bool trivial_pathtarget, List *pathkeys, Relids required_outer)
Definition: pathnode.c:2016
void add_partial_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:747
int compare_fractional_path_costs(Path *path1, Path *path2, double fraction)
Definition: pathnode.c:115
SetOpPath * create_setop_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SetOpCmd cmd, SetOpStrategy strategy, List *distinctList, AttrNumber flagColIdx, int firstFlag, double numGroups, double outputRows)
Definition: pathnode.c:3555
RecursiveUnionPath * create_recursiveunion_path(PlannerInfo *root, RelOptInfo *rel, Path *leftpath, Path *rightpath, PathTarget *target, List *distinctList, int wtParam, double numGroups)
Definition: pathnode.c:3617
IncrementalSortPath * create_incremental_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *pathkeys, int presorted_keys, double limit_tuples)
Definition: pathnode.c:2951
AggPath * create_agg_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target, AggStrategy aggstrategy, AggSplit aggsplit, List *groupClause, List *qual, const AggClauseCosts *aggcosts, double numGroups)
Definition: pathnode.c:3155
void add_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:420
Path * apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, PathTarget *target)
Definition: pathnode.c:2793
@ TOTAL_COST
Definition: pathnodes.h:38
@ UPPERREL_SETOP
Definition: pathnodes.h:71
@ UPPERREL_FINAL
Definition: pathnodes.h:79
static int pg_leftmost_one_pos32(uint32 word)
Definition: pg_bitutils.h:41
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define lfirst_int(lc)
Definition: pg_list.h:173
#define list_make1(x1)
Definition: pg_list.h:212
#define forthree(cell1, list1, cell2, list2, cell3, list3)
Definition: pg_list.h:563
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define linitial(l)
Definition: pg_list.h:178
#define forfour(cell1, list1, cell2, list2, cell3, list3, cell4, list4)
Definition: pg_list.h:575
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define lfirst_oid(lc)
Definition: pg_list.h:174
#define list_make2(x1, x2)
Definition: pg_list.h:214
PlannerInfo * subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, bool hasRecursion, double tuple_fraction, SetOperationStmt *setops)
Definition: planner.c:628
create_upper_paths_hook_type create_upper_paths_hook
Definition: planner.c:74
void check_stack_depth(void)
Definition: postgres.c:3531
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
static List * generate_setop_tlist(List *colTypes, List *colCollations, int flag, Index varno, bool hack_constants, List *input_tlist, List *refnames_tlist, bool *trivial_tlist)
Definition: prepunion.c:1394
static List * generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
Definition: prepunion.c:1671
static RelOptInfo * generate_union_paths(SetOperationStmt *op, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:696
static RelOptInfo * generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:1015
static RelOptInfo * recurse_set_operations(Node *setOp, PlannerInfo *root, List *colTypes, List *colCollations, bool junkOK, int flag, List *refnames_tlist, List **pTargetList, bool *istrivial_tlist)
Definition: prepunion.c:230
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
Definition: prepunion.c:1269
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, Path *input_path, double dNumGroups, double dNumOutputRows, const char *construct)
Definition: prepunion.c:1287
static List * generate_append_tlist(List *colTypes, List *colCollations, bool flag, List *input_tlists, List *refnames_tlist)
Definition: prepunion.c:1543
static RelOptInfo * generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:384
bool set_operation_ordered_results_useful(SetOperationStmt *setop)
Definition: prepunion.c:188
RelOptInfo * plan_set_operations(PlannerInfo *root)
Definition: prepunion.c:99
static List * plan_union_children(PlannerInfo *root, SetOperationStmt *top_union, List *refnames_tlist, List **tlist_list, List **istrivial_tlist)
Definition: prepunion.c:1206
static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel, bool trivial_tlist, List *child_tlist, List *interesting_pathkeys, double *pNumGroups)
Definition: prepunion.c:504
@ COERCE_IMPLICIT_CAST
Definition: primnodes.h:736
tree ctl root
Definition: radixtree.h:1884
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:715
void setup_simple_rel_arrays(PlannerInfo *root)
Definition: relnode.c:94
RelOptInfo * build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
Definition: relnode.c:192
RelOptInfo * fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids)
Definition: relnode.c:1470
double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo)
Definition: selfuncs.c:3416
Definition: pg_list.h:54
Definition: nodes.h:129
List * pathkeys
Definition: pathnodes.h:1654
Cardinality rows
Definition: pathnodes.h:1649
Cost startup_cost
Definition: pathnodes.h:1650
Cost total_cost
Definition: pathnodes.h:1651
List * processed_tlist
Definition: pathnodes.h:458
bool hasHavingQual
Definition: pathnodes.h:498
Query * parse
Definition: pathnodes.h:202
Cardinality limit_tuples
Definition: pathnodes.h:485
List * setop_pathkeys
Definition: pathnodes.h:404
List * groupClause
Definition: parsenodes.h:200
List * targetList
Definition: parsenodes.h:191
List * groupingSets
Definition: parsenodes.h:203
List * distinctClause
Definition: parsenodes.h:209
Query * subquery
Definition: parsenodes.h:1114
Relids relids
Definition: pathnodes.h:861
struct PathTarget * reltarget
Definition: pathnodes.h:883
bool consider_parallel
Definition: pathnodes.h:877
Relids lateral_relids
Definition: pathnodes.h:903
List * pathlist
Definition: pathnodes.h:888
struct Path * cheapest_total_path
Definition: pathnodes.h:892
bool consider_startup
Definition: pathnodes.h:873
List * partial_pathlist
Definition: pathnodes.h:890
PlannerInfo * subroot
Definition: pathnodes.h:943
Cardinality rows
Definition: pathnodes.h:867
RTEKind rtekind
Definition: pathnodes.h:912
SetOperation op
Definition: parsenodes.h:2191
Index tleSortGroupRef
Definition: parsenodes.h:1442
Expr * expr
Definition: primnodes.h:2192
AttrNumber resno
Definition: primnodes.h:2194
Index ressortgroupref
Definition: primnodes.h:2198
char * flag(int b)
Definition: test-ctype.c:33
bool tlist_same_collations(List *tlist, List *colCollations, bool junkOK)
Definition: tlist.c:282
List * get_tlist_exprs(List *tlist, bool includeJunk)
Definition: tlist.c:163
bool grouping_is_sortable(List *groupClause)
Definition: tlist.c:540
List * make_tlist_from_pathtarget(PathTarget *target)
Definition: tlist.c:624
bool grouping_is_hashable(List *groupClause)
Definition: tlist.c:560
bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK)
Definition: tlist.c:248
#define create_pathtarget(root, tlist)
Definition: tlist.h:53