PostgreSQL Source Code  git master
prepunion.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * prepunion.c
4  * Routines to plan set-operation queries. The filename is a leftover
5  * from a time when only UNIONs were implemented.
6  *
7  * There are two code paths in the planner for set-operation queries.
8  * If a subquery consists entirely of simple UNION ALL operations, it
9  * is converted into an "append relation". Otherwise, it is handled
10  * by the general code in this module (plan_set_operations and its
11  * subroutines). There is some support code here for the append-relation
12  * case, but most of the heavy lifting for that is done elsewhere,
13  * notably in prepjointree.c and allpaths.c.
14  *
15  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
16  * Portions Copyright (c) 1994, Regents of the University of California
17  *
18  *
19  * IDENTIFICATION
20  * src/backend/optimizer/prep/prepunion.c
21  *
22  *-------------------------------------------------------------------------
23  */
24 #include "postgres.h"
25 
26 #include "access/htup_details.h"
27 #include "catalog/pg_type.h"
28 #include "miscadmin.h"
29 #include "nodes/makefuncs.h"
30 #include "nodes/nodeFuncs.h"
31 #include "optimizer/cost.h"
32 #include "optimizer/pathnode.h"
33 #include "optimizer/paths.h"
34 #include "optimizer/planner.h"
35 #include "optimizer/prep.h"
36 #include "optimizer/tlist.h"
37 #include "parser/parse_coerce.h"
38 #include "utils/selfuncs.h"
39 
40 
42  List *colTypes, List *colCollations,
43  bool junkOK,
44  int flag, List *refnames_tlist,
45  List **pTargetList,
46  bool *istrivial_tlist);
49  List *refnames_tlist,
50  List **pTargetList);
52  bool trivial_tlist, List *child_tlist,
53  List *interesting_pathkeys,
54  double *pNumGroups);
56  List *refnames_tlist,
57  List **pTargetList);
59  List *refnames_tlist,
60  List **pTargetList);
62  SetOperationStmt *top_union,
63  List *refnames_tlist,
64  List **tlist_list,
65  List **istrivial_tlist);
67 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
68  Path *input_path,
69  double dNumGroups, double dNumOutputRows,
70  const char *construct);
71 static List *generate_setop_tlist(List *colTypes, List *colCollations,
72  int flag,
73  Index varno,
74  bool hack_constants,
75  List *input_tlist,
76  List *refnames_tlist,
77  bool *trivial_tlist);
78 static List *generate_append_tlist(List *colTypes, List *colCollations,
79  bool flag,
80  List *input_tlists,
81  List *refnames_tlist);
82 static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
83 
84 
85 /*
86  * plan_set_operations
87  *
88  * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
89  *
90  * This routine only deals with the setOperations tree of the given query.
91  * Any top-level ORDER BY requested in root->parse->sortClause will be handled
92  * when we return to grouping_planner; likewise for LIMIT.
93  *
94  * What we return is an "upperrel" RelOptInfo containing at least one Path
95  * that implements the set-operation tree. In addition, root->processed_tlist
96  * receives a targetlist representing the output of the topmost setop node.
97  */
98 RelOptInfo *
100 {
101  Query *parse = root->parse;
102  SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
103  Node *node;
104  RangeTblEntry *leftmostRTE;
105  Query *leftmostQuery;
106  RelOptInfo *setop_rel;
107  List *top_tlist;
108 
109  Assert(topop);
110 
111  /* check for unsupported stuff */
112  Assert(parse->jointree->fromlist == NIL);
113  Assert(parse->jointree->quals == NULL);
114  Assert(parse->groupClause == NIL);
115  Assert(parse->havingQual == NULL);
116  Assert(parse->windowClause == NIL);
117  Assert(parse->distinctClause == NIL);
118 
119  /*
120  * In the outer query level, equivalence classes are limited to classes
121  * which define that the top-level target entry is equivalent to the
122  * corresponding child target entry. There won't be any equivalence class
123  * merging. Mark that merging is complete to allow us to make pathkeys.
124  */
125  Assert(root->eq_classes == NIL);
126  root->ec_merging_done = true;
127 
128  /*
129  * We'll need to build RelOptInfos for each of the leaf subqueries, which
130  * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
131  * arrays for those, and for AppendRelInfos in case they're needed.
132  */
134 
135  /*
136  * Find the leftmost component Query. We need to use its column names for
137  * all generated tlists (else SELECT INTO won't work right).
138  */
139  node = topop->larg;
140  while (node && IsA(node, SetOperationStmt))
141  node = ((SetOperationStmt *) node)->larg;
142  Assert(node && IsA(node, RangeTblRef));
143  leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
144  leftmostQuery = leftmostRTE->subquery;
145  Assert(leftmostQuery != NULL);
146 
147  /*
148  * If the topmost node is a recursive union, it needs special processing.
149  */
150  if (root->hasRecursion)
151  {
152  setop_rel = generate_recursion_path(topop, root,
153  leftmostQuery->targetList,
154  &top_tlist);
155  }
156  else
157  {
158  bool trivial_tlist;
159 
160  /*
161  * Recurse on setOperations tree to generate paths for set ops. The
162  * final output paths should have just the column types shown as the
163  * output from the top-level node, plus possibly resjunk working
164  * columns (we can rely on upper-level nodes to deal with that).
165  */
166  setop_rel = recurse_set_operations((Node *) topop, root,
167  topop->colTypes, topop->colCollations,
168  true, -1,
169  leftmostQuery->targetList,
170  &top_tlist,
171  &trivial_tlist);
172  }
173 
174  /* Must return the built tlist into root->processed_tlist. */
175  root->processed_tlist = top_tlist;
176 
177  return setop_rel;
178 }
179 
180 /*
181  * set_operation_ordered_results_useful
182  * Return true if the given SetOperationStmt can be executed by utilizing
183  * paths that provide sorted input according to the setop's targetlist.
184  * Returns false when sorted paths are not any more useful then unsorted
185  * ones.
186  */
187 bool
189 {
190  /*
191  * Paths sorted by the targetlist are useful for UNION as we can opt to
192  * MergeAppend the sorted paths then Unique them. Ordered paths are no
193  * more useful than unordered ones for UNION ALL.
194  */
195  if (!setop->all && setop->op == SETOP_UNION)
196  return true;
197 
198  /*
199  * EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
200  * correctly sorted input paths.
201  */
202  return false;
203 }
204 
205 /*
206  * recurse_set_operations
207  * Recursively handle one step in a tree of set operations
208  *
209  * colTypes: OID list of set-op's result column datatypes
210  * colCollations: OID list of set-op's result column collations
211  * junkOK: if true, child resjunk columns may be left in the result
212  * flag: if >= 0, add a resjunk output column indicating value of flag
213  * refnames_tlist: targetlist to take column names from
214  *
215  * Returns a RelOptInfo for the subtree, as well as these output parameters:
216  * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
217  * *istrivial_tlist: true if, and only if, datatypes between parent and child
218  * match.
219  *
220  * The pTargetList output parameter is mostly redundant with the pathtarget
221  * of the returned RelOptInfo, but for the moment we need it because much of
222  * the logic in this file depends on flag columns being marked resjunk.
223  * Pending a redesign of how that works, this is the easy way out.
224  *
225  * We don't have to care about typmods here: the only allowed difference
226  * between set-op input and output typmods is input is a specific typmod
227  * and output is -1, and that does not require a coercion.
228  */
229 static RelOptInfo *
231  List *colTypes, List *colCollations,
232  bool junkOK,
233  int flag, List *refnames_tlist,
234  List **pTargetList,
235  bool *istrivial_tlist)
236 {
237  RelOptInfo *rel;
238 
239  *istrivial_tlist = true; /* for now */
240 
241  /* Guard against stack overflow due to overly complex setop nests */
243 
244  if (IsA(setOp, RangeTblRef))
245  {
246  RangeTblRef *rtr = (RangeTblRef *) setOp;
247  RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
248  SetOperationStmt *setops;
249  Query *subquery = rte->subquery;
250  PlannerInfo *subroot;
251  List *tlist;
252  bool trivial_tlist;
253 
254  Assert(subquery != NULL);
255 
256  /* Build a RelOptInfo for this leaf subquery. */
257  rel = build_simple_rel(root, rtr->rtindex, NULL);
258 
259  /* plan_params should not be in use in current query level */
260  Assert(root->plan_params == NIL);
261 
262  /*
263  * Pass the set operation details to the subquery_planner to have it
264  * consider generating Paths correctly ordered for the set operation.
265  */
266  setops = castNode(SetOperationStmt, root->parse->setOperations);
267 
268  /* Generate a subroot and Paths for the subquery */
269  subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
270  false, root->tuple_fraction,
271  setops);
272 
273  /*
274  * It should not be possible for the primitive query to contain any
275  * cross-references to other primitive queries in the setop tree.
276  */
277  if (root->plan_params)
278  elog(ERROR, "unexpected outer reference in set operation subquery");
279 
280  /* Figure out the appropriate target list for this subquery. */
281  tlist = generate_setop_tlist(colTypes, colCollations,
282  flag,
283  rtr->rtindex,
284  true,
285  subroot->processed_tlist,
286  refnames_tlist,
287  &trivial_tlist);
288  rel->reltarget = create_pathtarget(root, tlist);
289 
290  /* Return the fully-fledged tlist to caller, too */
291  *pTargetList = tlist;
292  *istrivial_tlist = trivial_tlist;
293  }
294  else if (IsA(setOp, SetOperationStmt))
295  {
296  SetOperationStmt *op = (SetOperationStmt *) setOp;
297 
298  /* UNIONs are much different from INTERSECT/EXCEPT */
299  if (op->op == SETOP_UNION)
300  rel = generate_union_paths(op, root,
301  refnames_tlist,
302  pTargetList);
303  else
304  rel = generate_nonunion_paths(op, root,
305  refnames_tlist,
306  pTargetList);
307 
308  /*
309  * If necessary, add a Result node to project the caller-requested
310  * output columns.
311  *
312  * XXX you don't really want to know about this: setrefs.c will apply
313  * fix_upper_expr() to the Result node's tlist. This would fail if the
314  * Vars generated by generate_setop_tlist() were not exactly equal()
315  * to the corresponding tlist entries of the subplan. However, since
316  * the subplan was generated by generate_union_paths() or
317  * generate_nonunion_paths(), and hence its tlist was generated by
318  * generate_append_tlist(), this will work. We just tell
319  * generate_setop_tlist() to use varno 0.
320  */
321  if (flag >= 0 ||
322  !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
323  !tlist_same_collations(*pTargetList, colCollations, junkOK))
324  {
325  PathTarget *target;
326  bool trivial_tlist;
327  ListCell *lc;
328 
329  *pTargetList = generate_setop_tlist(colTypes, colCollations,
330  flag,
331  0,
332  false,
333  *pTargetList,
334  refnames_tlist,
335  &trivial_tlist);
336  *istrivial_tlist = trivial_tlist;
337  target = create_pathtarget(root, *pTargetList);
338 
339  /* Apply projection to each path */
340  foreach(lc, rel->pathlist)
341  {
342  Path *subpath = (Path *) lfirst(lc);
343  Path *path;
344 
345  Assert(subpath->param_info == NULL);
346  path = apply_projection_to_path(root, subpath->parent,
347  subpath, target);
348  /* If we had to add a Result, path is different from subpath */
349  if (path != subpath)
350  lfirst(lc) = path;
351  }
352 
353  /* Apply projection to each partial path */
354  foreach(lc, rel->partial_pathlist)
355  {
356  Path *subpath = (Path *) lfirst(lc);
357  Path *path;
358 
359  Assert(subpath->param_info == NULL);
360 
361  /* avoid apply_projection_to_path, in case of multiple refs */
362  path = (Path *) create_projection_path(root, subpath->parent,
363  subpath, target);
364  lfirst(lc) = path;
365  }
366  }
368  }
369  else
370  {
371  elog(ERROR, "unrecognized node type: %d",
372  (int) nodeTag(setOp));
373  *pTargetList = NIL;
374  rel = NULL; /* keep compiler quiet */
375  }
376 
377  return rel;
378 }
379 
380 /*
381  * Generate paths for a recursive UNION node
382  */
383 static RelOptInfo *
385  List *refnames_tlist,
386  List **pTargetList)
387 {
388  RelOptInfo *result_rel;
389  Path *path;
390  RelOptInfo *lrel,
391  *rrel;
392  Path *lpath;
393  Path *rpath;
394  List *lpath_tlist;
395  bool lpath_trivial_tlist;
396  List *rpath_tlist;
397  bool rpath_trivial_tlist;
398  List *tlist;
399  List *groupList;
400  double dNumGroups;
401 
402  /* Parser should have rejected other cases */
403  if (setOp->op != SETOP_UNION)
404  elog(ERROR, "only UNION queries can be recursive");
405  /* Worktable ID should be assigned */
406  Assert(root->wt_param_id >= 0);
407 
408  /*
409  * Unlike a regular UNION node, process the left and right inputs
410  * separately without any intention of combining them into one Append.
411  */
412  lrel = recurse_set_operations(setOp->larg, root,
413  setOp->colTypes, setOp->colCollations,
414  false, -1,
415  refnames_tlist,
416  &lpath_tlist,
417  &lpath_trivial_tlist);
418  if (lrel->rtekind == RTE_SUBQUERY)
419  build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
420  NIL, NULL);
421  lpath = lrel->cheapest_total_path;
422  /* The right path will want to look at the left one ... */
423  root->non_recursive_path = lpath;
424  rrel = recurse_set_operations(setOp->rarg, root,
425  setOp->colTypes, setOp->colCollations,
426  false, -1,
427  refnames_tlist,
428  &rpath_tlist,
429  &rpath_trivial_tlist);
430  if (rrel->rtekind == RTE_SUBQUERY)
431  build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
432  NIL, NULL);
433  rpath = rrel->cheapest_total_path;
434  root->non_recursive_path = NULL;
435 
436  /*
437  * Generate tlist for RecursiveUnion path node --- same as in Append cases
438  */
439  tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
440  list_make2(lpath_tlist, rpath_tlist),
441  refnames_tlist);
442 
443  *pTargetList = tlist;
444 
445  /* Build result relation. */
446  result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
447  bms_union(lrel->relids, rrel->relids));
448  result_rel->reltarget = create_pathtarget(root, tlist);
449 
450  /*
451  * If UNION, identify the grouping operators
452  */
453  if (setOp->all)
454  {
455  groupList = NIL;
456  dNumGroups = 0;
457  }
458  else
459  {
460  /* Identify the grouping semantics */
461  groupList = generate_setop_grouplist(setOp, tlist);
462 
463  /* We only support hashing here */
464  if (!grouping_is_hashable(groupList))
465  ereport(ERROR,
466  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
467  errmsg("could not implement recursive UNION"),
468  errdetail("All column datatypes must be hashable.")));
469 
470  /*
471  * For the moment, take the number of distinct groups as equal to the
472  * total input size, ie, the worst case.
473  */
474  dNumGroups = lpath->rows + rpath->rows * 10;
475  }
476 
477  /*
478  * And make the path node.
479  */
481  result_rel,
482  lpath,
483  rpath,
484  result_rel->reltarget,
485  groupList,
486  root->wt_param_id,
487  dNumGroups);
488 
489  add_path(result_rel, path);
490  postprocess_setop_rel(root, result_rel);
491  return result_rel;
492 }
493 
494 /*
495  * build_setop_child_paths
496  * Build paths for the set op child relation denoted by 'rel'.
497  *
498  * interesting_pathkeys: if not NIL, also include paths that suit these
499  * pathkeys, sorting any unsorted paths as required.
500  * *pNumGroups: if not NULL, we estimate the number of distinct groups
501  * in the result, and store it there.
502  */
503 static void
505  bool trivial_tlist, List *child_tlist,
506  List *interesting_pathkeys, double *pNumGroups)
507 {
508  RelOptInfo *final_rel;
509  List *setop_pathkeys = rel->subroot->setop_pathkeys;
510  ListCell *lc;
511 
512  /* it can't be a set op child rel if it's not a subquery */
513  Assert(rel->rtekind == RTE_SUBQUERY);
514 
515  /* when sorting is needed, add child rel equivalences */
516  if (interesting_pathkeys != NIL)
518  rel,
519  child_tlist,
520  interesting_pathkeys);
521 
522  /*
523  * Mark rel with estimated output rows, width, etc. Note that we have to
524  * do this before generating outer-query paths, else cost_subqueryscan is
525  * not happy.
526  */
528 
529  /*
530  * Since we may want to add a partial path to this relation, we must set
531  * its consider_parallel flag correctly.
532  */
533  final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
534  rel->consider_parallel = final_rel->consider_parallel;
535 
536  /* Generate subquery scan paths for any interesting path in final_rel */
537  foreach(lc, final_rel->pathlist)
538  {
539  Path *subpath = (Path *) lfirst(lc);
540  List *pathkeys;
541  Path *cheapest_input_path = final_rel->cheapest_total_path;
542  bool is_sorted;
543  int presorted_keys;
544 
545  /*
546  * Include the cheapest path as-is so that the set operation can be
547  * cheaply implemented using a method which does not require the input
548  * to be sorted.
549  */
550  if (subpath == cheapest_input_path)
551  {
552  /* Convert subpath's pathkeys to outer representation */
553  pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
554  make_tlist_from_pathtarget(subpath->pathtarget));
555 
556  /* Generate outer path using this subpath */
558  rel,
559  subpath,
560  trivial_tlist,
561  pathkeys,
562  NULL));
563  }
564 
565  /* skip dealing with sorted paths if the setop doesn't need them */
566  if (interesting_pathkeys == NIL)
567  continue;
568 
569  /*
570  * Create paths to suit final sort order required for setop_pathkeys.
571  * Here we'll sort the cheapest input path (if not sorted already) and
572  * incremental sort any paths which are partially sorted.
573  */
574  is_sorted = pathkeys_count_contained_in(setop_pathkeys,
575  subpath->pathkeys,
576  &presorted_keys);
577 
578  if (!is_sorted)
579  {
580  double limittuples = rel->subroot->limit_tuples;
581 
582  /*
583  * Try at least sorting the cheapest path and also try
584  * incrementally sorting any path which is partially sorted
585  * already (no need to deal with paths which have presorted keys
586  * when incremental sort is disabled unless it's the cheapest
587  * input path).
588  */
589  if (subpath != cheapest_input_path &&
590  (presorted_keys == 0 || !enable_incremental_sort))
591  continue;
592 
593  /*
594  * We've no need to consider both a sort and incremental sort.
595  * We'll just do a sort if there are no presorted keys and an
596  * incremental sort when there are presorted keys.
597  */
598  if (presorted_keys == 0 || !enable_incremental_sort)
600  final_rel,
601  subpath,
602  setop_pathkeys,
603  limittuples);
604  else
606  final_rel,
607  subpath,
608  setop_pathkeys,
609  presorted_keys,
610  limittuples);
611  }
612 
613  /*
614  * subpath is now sorted, so add it to the pathlist. We already added
615  * the cheapest_input_path above, so don't add it again unless we just
616  * sorted it.
617  */
618  if (subpath != cheapest_input_path)
619  {
620  /* Convert subpath's pathkeys to outer representation */
621  pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
622  make_tlist_from_pathtarget(subpath->pathtarget));
623 
624  /* Generate outer path using this subpath */
626  rel,
627  subpath,
628  trivial_tlist,
629  pathkeys,
630  NULL));
631  }
632  }
633 
634  /* if consider_parallel is false, there should be no partial paths */
635  Assert(final_rel->consider_parallel ||
636  final_rel->partial_pathlist == NIL);
637 
638  /*
639  * If we have a partial path for the child relation, we can use that to
640  * build a partial path for this relation. But there's no point in
641  * considering any path but the cheapest.
642  */
643  if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
644  final_rel->partial_pathlist != NIL)
645  {
646  Path *partial_subpath;
647  Path *partial_path;
648 
649  partial_subpath = linitial(final_rel->partial_pathlist);
650  partial_path = (Path *)
651  create_subqueryscan_path(root, rel, partial_subpath,
652  trivial_tlist,
653  NIL, NULL);
654  add_partial_path(rel, partial_path);
655  }
656 
658 
659  /*
660  * Estimate number of groups if caller wants it. If the subquery used
661  * grouping or aggregation, its output is probably mostly unique anyway;
662  * otherwise do statistical estimation.
663  *
664  * XXX you don't really want to know about this: we do the estimation
665  * using the subquery's original targetlist expressions, not the
666  * subroot->processed_tlist which might seem more appropriate. The reason
667  * is that if the subquery is itself a setop, it may return a
668  * processed_tlist containing "varno 0" Vars generated by
669  * generate_append_tlist, and those would confuse estimate_num_groups
670  * mightily. We ought to get rid of the "varno 0" hack, but that requires
671  * a redesign of the parsetree representation of setops, so that there can
672  * be an RTE corresponding to each setop's output.
673  */
674  if (pNumGroups)
675  {
676  PlannerInfo *subroot = rel->subroot;
677  Query *subquery = subroot->parse;
678 
679  if (subquery->groupClause || subquery->groupingSets ||
680  subquery->distinctClause || subroot->hasHavingQual ||
681  subquery->hasAggs)
682  *pNumGroups = rel->cheapest_total_path->rows;
683  else
684  *pNumGroups = estimate_num_groups(subroot,
685  get_tlist_exprs(subquery->targetList, false),
687  NULL,
688  NULL);
689  }
690 }
691 
692 /*
693  * Generate paths for a UNION or UNION ALL node
694  */
695 static RelOptInfo *
697  List *refnames_tlist,
698  List **pTargetList)
699 {
700  Relids relids = NULL;
701  RelOptInfo *result_rel;
702  ListCell *lc;
703  ListCell *lc2;
704  ListCell *lc3;
705  List *cheapest_pathlist = NIL;
706  List *ordered_pathlist = NIL;
707  List *partial_pathlist = NIL;
708  bool partial_paths_valid = true;
709  bool consider_parallel = true;
710  List *rellist;
711  List *tlist_list;
712  List *trivial_tlist_list;
713  List *tlist;
714  List *groupList = NIL;
715  Path *apath;
716  Path *gpath = NULL;
717  bool try_sorted = false;
718  List *union_pathkeys = NIL;
719 
720  /*
721  * If any of my children are identical UNION nodes (same op, all-flag, and
722  * colTypes) then they can be merged into this node so that we generate
723  * only one Append/MergeAppend and unique-ification for the lot. Recurse
724  * to find such nodes.
725  */
726  rellist = plan_union_children(root,
727  op,
728  refnames_tlist,
729  &tlist_list,
730  &trivial_tlist_list);
731 
732  /*
733  * Generate tlist for Append/MergeAppend plan node.
734  *
735  * The tlist for an Append plan isn't important as far as the Append is
736  * concerned, but we must make it look real anyway for the benefit of the
737  * next plan level up.
738  */
739  tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
740  tlist_list, refnames_tlist);
741  *pTargetList = tlist;
742 
743  /* For UNIONs (not UNION ALL), try sorting, if sorting is possible */
744  if (!op->all)
745  {
746  /* Identify the grouping semantics */
747  groupList = generate_setop_grouplist(op, tlist);
748 
749  if (grouping_is_sortable(op->groupClauses))
750  {
751  try_sorted = true;
752  /* Determine the pathkeys for sorting by the whole target list */
753  union_pathkeys = make_pathkeys_for_sortclauses(root, groupList,
754  tlist);
755 
756  root->query_pathkeys = union_pathkeys;
757  }
758  }
759 
760  /*
761  * Now that we've got the append target list, we can build the union child
762  * paths.
763  */
764  forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
765  {
766  RelOptInfo *rel = lfirst(lc);
767  bool trivial_tlist = lfirst_int(lc2);
768  List *child_tlist = lfirst_node(List, lc3);
769 
770  /* only build paths for the union children */
771  if (rel->rtekind == RTE_SUBQUERY)
772  build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
773  union_pathkeys, NULL);
774  }
775 
776  /* Build path lists and relid set. */
777  foreach(lc, rellist)
778  {
779  RelOptInfo *rel = lfirst(lc);
780  Path *ordered_path;
781 
782  cheapest_pathlist = lappend(cheapest_pathlist,
783  rel->cheapest_total_path);
784 
785  if (try_sorted)
786  {
787  ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
788  union_pathkeys,
789  NULL,
790  TOTAL_COST,
791  false);
792 
793  if (ordered_path != NULL)
794  ordered_pathlist = lappend(ordered_pathlist, ordered_path);
795  else
796  {
797  /*
798  * If we can't find a sorted path, just give up trying to
799  * generate a list of correctly sorted child paths. This can
800  * happen when type coercion was added to the targetlist due
801  * to mismatching types from the union children.
802  */
803  try_sorted = false;
804  }
805  }
806 
807  if (consider_parallel)
808  {
809  if (!rel->consider_parallel)
810  {
811  consider_parallel = false;
812  partial_paths_valid = false;
813  }
814  else if (rel->partial_pathlist == NIL)
815  partial_paths_valid = false;
816  else
817  partial_pathlist = lappend(partial_pathlist,
818  linitial(rel->partial_pathlist));
819  }
820 
821  relids = bms_union(relids, rel->relids);
822  }
823 
824  /* Build result relation. */
825  result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
826  result_rel->reltarget = create_pathtarget(root, tlist);
827  result_rel->consider_parallel = consider_parallel;
828  result_rel->consider_startup = (root->tuple_fraction > 0);
829 
830  /*
831  * Append the child results together using the cheapest paths from each
832  * union child.
833  */
834  apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
835  NIL, NIL, NULL, 0, false, -1);
836 
837  /*
838  * Estimate number of groups. For now we just assume the output is unique
839  * --- this is certainly true for the UNION case, and we want worst-case
840  * estimates anyway.
841  */
842  result_rel->rows = apath->rows;
843 
844  /*
845  * Now consider doing the same thing using the partial paths plus Append
846  * plus Gather.
847  */
848  if (partial_paths_valid)
849  {
850  Path *papath;
851  int parallel_workers = 0;
852 
853  /* Find the highest number of workers requested for any subpath. */
854  foreach(lc, partial_pathlist)
855  {
856  Path *subpath = lfirst(lc);
857 
858  parallel_workers = Max(parallel_workers,
859  subpath->parallel_workers);
860  }
861  Assert(parallel_workers > 0);
862 
863  /*
864  * If the use of parallel append is permitted, always request at least
865  * log2(# of children) paths. We assume it can be useful to have
866  * extra workers in this case because they will be spread out across
867  * the children. The precise formula is just a guess; see
868  * add_paths_to_append_rel.
869  */
871  {
872  parallel_workers = Max(parallel_workers,
873  pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
874  parallel_workers = Min(parallel_workers,
876  }
877  Assert(parallel_workers > 0);
878 
879  papath = (Path *)
880  create_append_path(root, result_rel, NIL, partial_pathlist,
881  NIL, NULL, parallel_workers,
883  gpath = (Path *)
884  create_gather_path(root, result_rel, papath,
885  result_rel->reltarget, NULL, NULL);
886  }
887 
888  if (!op->all)
889  {
890  double dNumGroups;
891  bool can_sort = grouping_is_sortable(groupList);
892  bool can_hash = grouping_is_hashable(groupList);
893 
894  /*
895  * XXX for the moment, take the number of distinct groups as equal to
896  * the total input size, i.e., the worst case. This is too
897  * conservative, but it's not clear how to get a decent estimate of
898  * the true size. One should note as well the propensity of novices
899  * to write UNION rather than UNION ALL even when they don't expect
900  * any duplicates...
901  */
902  dNumGroups = apath->rows;
903 
904  if (can_hash)
905  {
906  Path *path;
907 
908  /*
909  * Try a hash aggregate plan on 'apath'. This is the cheapest
910  * available path containing each append child.
911  */
912  path = (Path *) create_agg_path(root,
913  result_rel,
914  apath,
915  create_pathtarget(root, tlist),
916  AGG_HASHED,
918  groupList,
919  NIL,
920  NULL,
921  dNumGroups);
922  add_path(result_rel, path);
923 
924  /* Try hash aggregate on the Gather path, if valid */
925  if (gpath != NULL)
926  {
927  /* Hashed aggregate plan --- no sort needed */
928  path = (Path *) create_agg_path(root,
929  result_rel,
930  gpath,
931  create_pathtarget(root, tlist),
932  AGG_HASHED,
934  groupList,
935  NIL,
936  NULL,
937  dNumGroups);
938  add_path(result_rel, path);
939  }
940  }
941 
942  if (can_sort)
943  {
944  Path *path = apath;
945 
946  /* Try Sort -> Unique on the Append path */
947  if (groupList != NIL)
948  path = (Path *) create_sort_path(root, result_rel, path,
949  make_pathkeys_for_sortclauses(root, groupList, tlist),
950  -1.0);
951 
953  result_rel,
954  path,
955  list_length(path->pathkeys),
956  dNumGroups);
957 
958  add_path(result_rel, path);
959 
960  /* Try Sort -> Unique on the Gather path, if set */
961  if (gpath != NULL)
962  {
963  path = gpath;
964 
965  path = (Path *) create_sort_path(root, result_rel, path,
966  make_pathkeys_for_sortclauses(root, groupList, tlist),
967  -1.0);
968 
970  result_rel,
971  path,
972  list_length(path->pathkeys),
973  dNumGroups);
974  add_path(result_rel, path);
975  }
976  }
977 
978  /*
979  * Try making a MergeAppend path if we managed to find a path with the
980  * correct pathkeys in each union child query.
981  */
982  if (try_sorted && groupList != NIL)
983  {
984  Path *path;
985 
987  result_rel,
988  ordered_pathlist,
989  union_pathkeys,
990  NULL);
991 
992  /* and make the MergeAppend unique */
994  result_rel,
995  path,
996  list_length(tlist),
997  dNumGroups);
998 
999  add_path(result_rel, path);
1000  }
1001  }
1002  else
1003  {
1004  /* UNION ALL */
1005  add_path(result_rel, apath);
1006 
1007  if (gpath != NULL)
1008  add_path(result_rel, gpath);
1009  }
1010 
1011  return result_rel;
1012 }
1013 
1014 /*
1015  * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
1016  */
1017 static RelOptInfo *
1019  List *refnames_tlist,
1020  List **pTargetList)
1021 {
1022  RelOptInfo *result_rel;
1023  RelOptInfo *lrel,
1024  *rrel;
1025  double save_fraction = root->tuple_fraction;
1026  Path *lpath,
1027  *rpath,
1028  *path;
1029  List *lpath_tlist,
1030  *rpath_tlist,
1031  *tlist_list,
1032  *tlist,
1033  *groupList,
1034  *pathlist;
1035  bool lpath_trivial_tlist,
1036  rpath_trivial_tlist;
1037  double dLeftGroups,
1038  dRightGroups,
1039  dNumGroups,
1040  dNumOutputRows;
1041  bool use_hash;
1042  SetOpCmd cmd;
1043  int firstFlag;
1044 
1045  /*
1046  * Tell children to fetch all tuples.
1047  */
1048  root->tuple_fraction = 0.0;
1049 
1050  /* Recurse on children, ensuring their outputs are marked */
1051  lrel = recurse_set_operations(op->larg, root,
1052  op->colTypes, op->colCollations,
1053  false, 0,
1054  refnames_tlist,
1055  &lpath_tlist,
1056  &lpath_trivial_tlist);
1057  if (lrel->rtekind == RTE_SUBQUERY)
1058  build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
1059  NIL, &dLeftGroups);
1060  else
1061  dLeftGroups = lrel->rows;
1062 
1063  lpath = lrel->cheapest_total_path;
1064  rrel = recurse_set_operations(op->rarg, root,
1065  op->colTypes, op->colCollations,
1066  false, 1,
1067  refnames_tlist,
1068  &rpath_tlist,
1069  &rpath_trivial_tlist);
1070  if (rrel->rtekind == RTE_SUBQUERY)
1071  build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
1072  NIL, &dRightGroups);
1073  else
1074  dRightGroups = rrel->rows;
1075 
1076  rpath = rrel->cheapest_total_path;
1077 
1078  /* Undo effects of forcing tuple_fraction to 0 */
1079  root->tuple_fraction = save_fraction;
1080 
1081  /*
1082  * For EXCEPT, we must put the left input first. For INTERSECT, either
1083  * order should give the same results, and we prefer to put the smaller
1084  * input first in order to minimize the size of the hash table in the
1085  * hashing case. "Smaller" means the one with the fewer groups.
1086  */
1087  if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
1088  {
1089  pathlist = list_make2(lpath, rpath);
1090  tlist_list = list_make2(lpath_tlist, rpath_tlist);
1091  firstFlag = 0;
1092  }
1093  else
1094  {
1095  pathlist = list_make2(rpath, lpath);
1096  tlist_list = list_make2(rpath_tlist, lpath_tlist);
1097  firstFlag = 1;
1098  }
1099 
1100  /*
1101  * Generate tlist for Append plan node.
1102  *
1103  * The tlist for an Append plan isn't important as far as the Append is
1104  * concerned, but we must make it look real anyway for the benefit of the
1105  * next plan level up. In fact, it has to be real enough that the flag
1106  * column is shown as a variable not a constant, else setrefs.c will get
1107  * confused.
1108  */
1109  tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
1110  tlist_list, refnames_tlist);
1111 
1112  *pTargetList = tlist;
1113 
1114  /* Build result relation. */
1115  result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1116  bms_union(lrel->relids, rrel->relids));
1117  result_rel->reltarget = create_pathtarget(root, tlist);
1118 
1119  /*
1120  * Append the child results together.
1121  */
1122  path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
1123  NIL, NULL, 0, false, -1);
1124 
1125  /* Identify the grouping semantics */
1126  groupList = generate_setop_grouplist(op, tlist);
1127 
1128  /*
1129  * Estimate number of distinct groups that we'll need hashtable entries
1130  * for; this is the size of the left-hand input for EXCEPT, or the smaller
1131  * input for INTERSECT. Also estimate the number of eventual output rows.
1132  * In non-ALL cases, we estimate each group produces one output row; in
1133  * ALL cases use the relevant relation size. These are worst-case
1134  * estimates, of course, but we need to be conservative.
1135  */
1136  if (op->op == SETOP_EXCEPT)
1137  {
1138  dNumGroups = dLeftGroups;
1139  dNumOutputRows = op->all ? lpath->rows : dNumGroups;
1140  }
1141  else
1142  {
1143  dNumGroups = Min(dLeftGroups, dRightGroups);
1144  dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
1145  }
1146 
1147  /*
1148  * Decide whether to hash or sort, and add a sort node if needed.
1149  */
1150  use_hash = choose_hashed_setop(root, groupList, path,
1151  dNumGroups, dNumOutputRows,
1152  (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
1153 
1154  if (groupList && !use_hash)
1155  path = (Path *) create_sort_path(root,
1156  result_rel,
1157  path,
1159  groupList,
1160  tlist),
1161  -1.0);
1162 
1163  /*
1164  * Finally, add a SetOp path node to generate the correct output.
1165  */
1166  switch (op->op)
1167  {
1168  case SETOP_INTERSECT:
1170  break;
1171  case SETOP_EXCEPT:
1172  cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
1173  break;
1174  default:
1175  elog(ERROR, "unrecognized set op: %d", (int) op->op);
1176  cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
1177  break;
1178  }
1179  path = (Path *) create_setop_path(root,
1180  result_rel,
1181  path,
1182  cmd,
1183  use_hash ? SETOP_HASHED : SETOP_SORTED,
1184  groupList,
1185  list_length(op->colTypes) + 1,
1186  use_hash ? firstFlag : -1,
1187  dNumGroups,
1188  dNumOutputRows);
1189 
1190  result_rel->rows = path->rows;
1191  add_path(result_rel, path);
1192  return result_rel;
1193 }
1194 
1195 /*
1196  * Pull up children of a UNION node that are identically-propertied UNIONs.
1197  *
1198  * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
1199  * output rows will be lost anyway.
1200  *
1201  * NOTE: currently, we ignore collations while determining if a child has
1202  * the same properties. This is semantically sound only so long as all
1203  * collations have the same notion of equality. It is valid from an
1204  * implementation standpoint because we don't care about the ordering of
1205  * a UNION child's result: UNION ALL results are always unordered, and
1206  * generate_union_paths will force a fresh sort if the top level is a UNION.
1207  */
1208 static List *
1210  SetOperationStmt *top_union,
1211  List *refnames_tlist,
1212  List **tlist_list,
1213  List **istrivial_tlist)
1214 {
1215  List *pending_rels = list_make1(top_union);
1216  List *result = NIL;
1217  List *child_tlist;
1218  bool trivial_tlist;
1219 
1220  *tlist_list = NIL;
1221  *istrivial_tlist = NIL;
1222 
1223  while (pending_rels != NIL)
1224  {
1225  Node *setOp = linitial(pending_rels);
1226 
1227  pending_rels = list_delete_first(pending_rels);
1228 
1229  if (IsA(setOp, SetOperationStmt))
1230  {
1231  SetOperationStmt *op = (SetOperationStmt *) setOp;
1232 
1233  if (op->op == top_union->op &&
1234  (op->all == top_union->all || op->all) &&
1235  equal(op->colTypes, top_union->colTypes))
1236  {
1237  /* Same UNION, so fold children into parent */
1238  pending_rels = lcons(op->rarg, pending_rels);
1239  pending_rels = lcons(op->larg, pending_rels);
1240  continue;
1241  }
1242  }
1243 
1244  /*
1245  * Not same, so plan this child separately.
1246  *
1247  * Note we disallow any resjunk columns in child results. This is
1248  * necessary since the Append node that implements the union won't do
1249  * any projection, and upper levels will get confused if some of our
1250  * output tuples have junk and some don't. This case only arises when
1251  * we have an EXCEPT or INTERSECT as child, else there won't be
1252  * resjunk anyway.
1253  */
1254  result = lappend(result, recurse_set_operations(setOp, root,
1255  top_union->colTypes,
1256  top_union->colCollations,
1257  false, -1,
1258  refnames_tlist,
1259  &child_tlist,
1260  &trivial_tlist));
1261  *tlist_list = lappend(*tlist_list, child_tlist);
1262  *istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
1263  }
1264 
1265  return result;
1266 }
1267 
1268 /*
1269  * postprocess_setop_rel - perform steps required after adding paths
1270  */
1271 static void
1273 {
1274  /*
1275  * We don't currently worry about allowing FDWs to contribute paths to
1276  * this relation, but give extensions a chance.
1277  */
1279  (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1280  NULL, rel, NULL);
1281 
1282  /* Select cheapest path */
1283  set_cheapest(rel);
1284 }
1285 
1286 /*
1287  * choose_hashed_setop - should we use hashing for a set operation?
1288  */
1289 static bool
1291  Path *input_path,
1292  double dNumGroups, double dNumOutputRows,
1293  const char *construct)
1294 {
1295  int numGroupCols = list_length(groupClauses);
1296  Size hash_mem_limit = get_hash_memory_limit();
1297  bool can_sort;
1298  bool can_hash;
1299  Size hashentrysize;
1300  Path hashed_p;
1301  Path sorted_p;
1302  double tuple_fraction;
1303 
1304  /* Check whether the operators support sorting or hashing */
1305  can_sort = grouping_is_sortable(groupClauses);
1306  can_hash = grouping_is_hashable(groupClauses);
1307  if (can_hash && can_sort)
1308  {
1309  /* we have a meaningful choice to make, continue ... */
1310  }
1311  else if (can_hash)
1312  return true;
1313  else if (can_sort)
1314  return false;
1315  else
1316  ereport(ERROR,
1317  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1318  /* translator: %s is UNION, INTERSECT, or EXCEPT */
1319  errmsg("could not implement %s", construct),
1320  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1321 
1322  /* Prefer sorting when enable_hashagg is off */
1323  if (!enable_hashagg)
1324  return false;
1325 
1326  /*
1327  * Don't do it if it doesn't look like the hashtable will fit into
1328  * hash_mem.
1329  */
1330  hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1331 
1332  if (hashentrysize * dNumGroups > hash_mem_limit)
1333  return false;
1334 
1335  /*
1336  * See if the estimated cost is no more than doing it the other way.
1337  *
1338  * We need to consider input_plan + hashagg versus input_plan + sort +
1339  * group. Note that the actual result plan might involve a SetOp or
1340  * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1341  * should be close enough for our purposes here.
1342  *
1343  * These path variables are dummies that just hold cost fields; we don't
1344  * make actual Paths for these steps.
1345  */
1346  cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1347  numGroupCols, dNumGroups,
1348  NIL,
1349  input_path->disabled_nodes,
1350  input_path->startup_cost, input_path->total_cost,
1351  input_path->rows, input_path->pathtarget->width);
1352 
1353  /*
1354  * Now for the sorted case. Note that the input is *always* unsorted,
1355  * since it was made by appending unrelated sub-relations together.
1356  */
1357  sorted_p.disabled_nodes = input_path->disabled_nodes;
1358  sorted_p.startup_cost = input_path->startup_cost;
1359  sorted_p.total_cost = input_path->total_cost;
1360  /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1361  cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes,
1362  sorted_p.total_cost,
1363  input_path->rows, input_path->pathtarget->width,
1364  0.0, work_mem, -1.0);
1365  cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1366  NIL,
1367  sorted_p.disabled_nodes,
1368  sorted_p.startup_cost, sorted_p.total_cost,
1369  input_path->rows);
1370 
1371  /*
1372  * Now make the decision using the top-level tuple fraction. First we
1373  * have to convert an absolute count (LIMIT) into fractional form.
1374  */
1375  tuple_fraction = root->tuple_fraction;
1376  if (tuple_fraction >= 1.0)
1377  tuple_fraction /= dNumOutputRows;
1378 
1379  if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1380  tuple_fraction) < 0)
1381  {
1382  /* Hashed is cheaper, so use it */
1383  return true;
1384  }
1385  return false;
1386 }
1387 
1388 /*
1389  * Generate targetlist for a set-operation plan node
1390  *
1391  * colTypes: OID list of set-op's result column datatypes
1392  * colCollations: OID list of set-op's result column collations
1393  * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1394  * varno: varno to use in generated Vars
1395  * hack_constants: true to copy up constants (see comments in code)
1396  * input_tlist: targetlist of this node's input node
1397  * refnames_tlist: targetlist to take column names from
1398  * trivial_tlist: output parameter, set to true if targetlist is trivial
1399  */
1400 static List *
1401 generate_setop_tlist(List *colTypes, List *colCollations,
1402  int flag,
1403  Index varno,
1404  bool hack_constants,
1405  List *input_tlist,
1406  List *refnames_tlist,
1407  bool *trivial_tlist)
1408 {
1409  List *tlist = NIL;
1410  int resno = 1;
1411  ListCell *ctlc,
1412  *cclc,
1413  *itlc,
1414  *rtlc;
1415  TargetEntry *tle;
1416  Node *expr;
1417 
1418  *trivial_tlist = true; /* until proven differently */
1419 
1420  forfour(ctlc, colTypes, cclc, colCollations,
1421  itlc, input_tlist, rtlc, refnames_tlist)
1422  {
1423  Oid colType = lfirst_oid(ctlc);
1424  Oid colColl = lfirst_oid(cclc);
1425  TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1426  TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1427 
1428  Assert(inputtle->resno == resno);
1429  Assert(reftle->resno == resno);
1430  Assert(!inputtle->resjunk);
1431  Assert(!reftle->resjunk);
1432 
1433  /*
1434  * Generate columns referencing input columns and having appropriate
1435  * data types and column names. Insert datatype coercions where
1436  * necessary.
1437  *
1438  * HACK: constants in the input's targetlist are copied up as-is
1439  * rather than being referenced as subquery outputs. This is mainly
1440  * to ensure that when we try to coerce them to the output column's
1441  * datatype, the right things happen for UNKNOWN constants. But do
1442  * this only at the first level of subquery-scan plans; we don't want
1443  * phony constants appearing in the output tlists of upper-level
1444  * nodes!
1445  *
1446  * Note that copying a constant doesn't in itself require us to mark
1447  * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1448  */
1449  if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1450  expr = (Node *) inputtle->expr;
1451  else
1452  expr = (Node *) makeVar(varno,
1453  inputtle->resno,
1454  exprType((Node *) inputtle->expr),
1455  exprTypmod((Node *) inputtle->expr),
1456  exprCollation((Node *) inputtle->expr),
1457  0);
1458 
1459  if (exprType(expr) != colType)
1460  {
1461  /*
1462  * Note: it's not really cool to be applying coerce_to_common_type
1463  * here; one notable point is that assign_expr_collations never
1464  * gets run on any generated nodes. For the moment that's not a
1465  * problem because we force the correct exposed collation below.
1466  * It would likely be best to make the parser generate the correct
1467  * output tlist for every set-op to begin with, though.
1468  */
1469  expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1470  expr,
1471  colType,
1472  "UNION/INTERSECT/EXCEPT");
1473  *trivial_tlist = false; /* the coercion makes it not trivial */
1474  }
1475 
1476  /*
1477  * Ensure the tlist entry's exposed collation matches the set-op. This
1478  * is necessary because plan_set_operations() reports the result
1479  * ordering as a list of SortGroupClauses, which don't carry collation
1480  * themselves but just refer to tlist entries. If we don't show the
1481  * right collation then planner.c might do the wrong thing in
1482  * higher-level queries.
1483  *
1484  * Note we use RelabelType, not CollateExpr, since this expression
1485  * will reach the executor without any further processing.
1486  */
1487  if (exprCollation(expr) != colColl)
1488  {
1489  expr = applyRelabelType(expr,
1490  exprType(expr), exprTypmod(expr), colColl,
1491  COERCE_IMPLICIT_CAST, -1, false);
1492  *trivial_tlist = false; /* the relabel makes it not trivial */
1493  }
1494 
1495  tle = makeTargetEntry((Expr *) expr,
1496  (AttrNumber) resno++,
1497  pstrdup(reftle->resname),
1498  false);
1499 
1500  /*
1501  * By convention, all non-resjunk columns in a setop tree have
1502  * ressortgroupref equal to their resno. In some cases the ref isn't
1503  * needed, but this is a cleaner way than modifying the tlist later.
1504  */
1505  tle->ressortgroupref = tle->resno;
1506 
1507  tlist = lappend(tlist, tle);
1508  }
1509 
1510  if (flag >= 0)
1511  {
1512  /* Add a resjunk flag column */
1513  /* flag value is the given constant */
1514  expr = (Node *) makeConst(INT4OID,
1515  -1,
1516  InvalidOid,
1517  sizeof(int32),
1519  false,
1520  true);
1521  tle = makeTargetEntry((Expr *) expr,
1522  (AttrNumber) resno++,
1523  pstrdup("flag"),
1524  true);
1525  tlist = lappend(tlist, tle);
1526  *trivial_tlist = false; /* the extra entry makes it not trivial */
1527  }
1528 
1529  return tlist;
1530 }
1531 
1532 /*
1533  * Generate targetlist for a set-operation Append node
1534  *
1535  * colTypes: OID list of set-op's result column datatypes
1536  * colCollations: OID list of set-op's result column collations
1537  * flag: true to create a flag column copied up from subplans
1538  * input_tlists: list of tlists for sub-plans of the Append
1539  * refnames_tlist: targetlist to take column names from
1540  *
1541  * The entries in the Append's targetlist should always be simple Vars;
1542  * we just have to make sure they have the right datatypes/typmods/collations.
1543  * The Vars are always generated with varno 0.
1544  *
1545  * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1546  * cannot figure out a realistic width for the tlist we make here. But we
1547  * ought to refactor this code to produce a PathTarget directly, anyway.
1548  */
1549 static List *
1550 generate_append_tlist(List *colTypes, List *colCollations,
1551  bool flag,
1552  List *input_tlists,
1553  List *refnames_tlist)
1554 {
1555  List *tlist = NIL;
1556  int resno = 1;
1557  ListCell *curColType;
1558  ListCell *curColCollation;
1559  ListCell *ref_tl_item;
1560  int colindex;
1561  TargetEntry *tle;
1562  Node *expr;
1563  ListCell *tlistl;
1564  int32 *colTypmods;
1565 
1566  /*
1567  * First extract typmods to use.
1568  *
1569  * If the inputs all agree on type and typmod of a particular column, use
1570  * that typmod; else use -1.
1571  */
1572  colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1573 
1574  foreach(tlistl, input_tlists)
1575  {
1576  List *subtlist = (List *) lfirst(tlistl);
1577  ListCell *subtlistl;
1578 
1579  curColType = list_head(colTypes);
1580  colindex = 0;
1581  foreach(subtlistl, subtlist)
1582  {
1583  TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1584 
1585  if (subtle->resjunk)
1586  continue;
1587  Assert(curColType != NULL);
1588  if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1589  {
1590  /* If first subplan, copy the typmod; else compare */
1591  int32 subtypmod = exprTypmod((Node *) subtle->expr);
1592 
1593  if (tlistl == list_head(input_tlists))
1594  colTypmods[colindex] = subtypmod;
1595  else if (subtypmod != colTypmods[colindex])
1596  colTypmods[colindex] = -1;
1597  }
1598  else
1599  {
1600  /* types disagree, so force typmod to -1 */
1601  colTypmods[colindex] = -1;
1602  }
1603  curColType = lnext(colTypes, curColType);
1604  colindex++;
1605  }
1606  Assert(curColType == NULL);
1607  }
1608 
1609  /*
1610  * Now we can build the tlist for the Append.
1611  */
1612  colindex = 0;
1613  forthree(curColType, colTypes, curColCollation, colCollations,
1614  ref_tl_item, refnames_tlist)
1615  {
1616  Oid colType = lfirst_oid(curColType);
1617  int32 colTypmod = colTypmods[colindex++];
1618  Oid colColl = lfirst_oid(curColCollation);
1619  TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1620 
1621  Assert(reftle->resno == resno);
1622  Assert(!reftle->resjunk);
1623  expr = (Node *) makeVar(0,
1624  resno,
1625  colType,
1626  colTypmod,
1627  colColl,
1628  0);
1629  tle = makeTargetEntry((Expr *) expr,
1630  (AttrNumber) resno++,
1631  pstrdup(reftle->resname),
1632  false);
1633 
1634  /*
1635  * By convention, all non-resjunk columns in a setop tree have
1636  * ressortgroupref equal to their resno. In some cases the ref isn't
1637  * needed, but this is a cleaner way than modifying the tlist later.
1638  */
1639  tle->ressortgroupref = tle->resno;
1640 
1641  tlist = lappend(tlist, tle);
1642  }
1643 
1644  if (flag)
1645  {
1646  /* Add a resjunk flag column */
1647  /* flag value is shown as copied up from subplan */
1648  expr = (Node *) makeVar(0,
1649  resno,
1650  INT4OID,
1651  -1,
1652  InvalidOid,
1653  0);
1654  tle = makeTargetEntry((Expr *) expr,
1655  (AttrNumber) resno++,
1656  pstrdup("flag"),
1657  true);
1658  tlist = lappend(tlist, tle);
1659  }
1660 
1661  pfree(colTypmods);
1662 
1663  return tlist;
1664 }
1665 
1666 /*
1667  * generate_setop_grouplist
1668  * Build a SortGroupClause list defining the sort/grouping properties
1669  * of the setop's output columns.
1670  *
1671  * Parse analysis already determined the properties and built a suitable
1672  * list, except that the entries do not have sortgrouprefs set because
1673  * the parser output representation doesn't include a tlist for each
1674  * setop. So what we need to do here is copy that list and install
1675  * proper sortgrouprefs into it (copying those from the targetlist).
1676  */
1677 static List *
1679 {
1680  List *grouplist = copyObject(op->groupClauses);
1681  ListCell *lg;
1682  ListCell *lt;
1683 
1684  lg = list_head(grouplist);
1685  foreach(lt, targetlist)
1686  {
1687  TargetEntry *tle = (TargetEntry *) lfirst(lt);
1688  SortGroupClause *sgc;
1689 
1690  if (tle->resjunk)
1691  {
1692  /* resjunk columns should not have sortgrouprefs */
1693  Assert(tle->ressortgroupref == 0);
1694  continue; /* ignore resjunk columns */
1695  }
1696 
1697  /* non-resjunk columns should have sortgroupref = resno */
1698  Assert(tle->ressortgroupref == tle->resno);
1699 
1700  /* non-resjunk columns should have grouping clauses */
1701  Assert(lg != NULL);
1702  sgc = (SortGroupClause *) lfirst(lg);
1703  lg = lnext(grouplist, lg);
1704  Assert(sgc->tleSortGroupRef == 0);
1705 
1706  sgc->tleSortGroupRef = tle->ressortgroupref;
1707  }
1708  Assert(lg == NULL);
1709  return grouplist;
1710 }
int16 AttrNumber
Definition: attnum.h:21
Bitmapset * bms_union(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:251
#define bms_is_empty(a)
Definition: bitmapset.h:118
#define Min(x, y)
Definition: c.h:1007
#define MAXALIGN(LEN)
Definition: c.h:814
signed int int32
Definition: c.h:497
#define Max(x, y)
Definition: c.h:1001
#define Assert(condition)
Definition: c.h:861
unsigned int Index
Definition: c.h:617
size_t Size
Definition: c.h:608
int max_parallel_workers_per_gather
Definition: costsize.c:143
void cost_agg(Path *path, PlannerInfo *root, AggStrategy aggstrategy, const AggClauseCosts *aggcosts, int numGroupCols, double numGroups, List *quals, int disabled_nodes, Cost input_startup_cost, Cost input_total_cost, double input_tuples, double input_width)
Definition: costsize.c:2682
void cost_sort(Path *path, PlannerInfo *root, List *pathkeys, int input_disabled_nodes, Cost input_cost, double tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples)
Definition: costsize.c:2144
void cost_group(Path *path, PlannerInfo *root, int numGroupCols, double numGroups, List *quals, int input_disabled_nodes, Cost input_startup_cost, Cost input_total_cost, double input_tuples)
Definition: costsize.c:3196
void set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel)
Definition: costsize.c:5831
bool enable_parallel_append
Definition: costsize.c:161
bool enable_hashagg
Definition: costsize.c:152
bool enable_incremental_sort
Definition: costsize.c:151
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
void add_setop_child_rel_equivalences(PlannerInfo *root, RelOptInfo *child_rel, List *child_tlist, List *setop_pathkeys)
Definition: equivclass.c:2947
int work_mem
Definition: globals.c:130
#define SizeofMinimalTupleHeader
Definition: htup_details.h:647
List * lappend(List *list, void *datum)
Definition: list.c:339
List * lappend_int(List *list, int datum)
Definition: list.c:357
List * list_delete_first(List *list)
Definition: list.c:943
List * lcons(void *datum, List *list)
Definition: list.c:495
Datum subpath(PG_FUNCTION_ARGS)
Definition: ltree_op.c:310
TargetEntry * makeTargetEntry(Expr *expr, AttrNumber resno, char *resname, bool resjunk)
Definition: makefuncs.c:240
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:301
Var * makeVar(int varno, AttrNumber varattno, Oid vartype, int32 vartypmod, Oid varcollid, Index varlevelsup)
Definition: makefuncs.c:66
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:298
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:816
Node * applyRelabelType(Node *arg, Oid rtype, int32 rtypmod, Oid rcollid, CoercionForm rformat, int rlocation, bool overwrite_ok)
Definition: nodeFuncs.c:631
size_t get_hash_memory_limit(void)
Definition: nodeHash.c:3481
SetOpCmd
Definition: nodes.h:397
@ SETOPCMD_EXCEPT
Definition: nodes.h:400
@ SETOPCMD_EXCEPT_ALL
Definition: nodes.h:401
@ SETOPCMD_INTERSECT_ALL
Definition: nodes.h:399
@ SETOPCMD_INTERSECT
Definition: nodes.h:398
@ SETOP_HASHED
Definition: nodes.h:407
@ SETOP_SORTED
Definition: nodes.h:406
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define copyObject(obj)
Definition: nodes.h:224
#define nodeTag(nodeptr)
Definition: nodes.h:133
@ AGG_HASHED
Definition: nodes.h:356
@ AGGSPLIT_SIMPLE
Definition: nodes.h:377
#define castNode(_type_, nodeptr)
Definition: nodes.h:176
Node * coerce_to_common_type(ParseState *pstate, Node *node, Oid targetTypeId, const char *context)
@ SETOP_INTERSECT
Definition: parsenodes.h:2111
@ SETOP_UNION
Definition: parsenodes.h:2110
@ SETOP_EXCEPT
Definition: parsenodes.h:2112
@ RTE_SUBQUERY
Definition: parsenodes.h:1018
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:619
bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
Definition: pathkeys.c:557
List * make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist)
Definition: pathkeys.c:1334
List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *subquery_pathkeys, List *subquery_tlist)
Definition: pathkeys.c:1053
AppendPath * create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, double rows)
Definition: pathnode.c:1300
SortPath * create_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *pathkeys, double limit_tuples)
Definition: pathnode.c:3082
ProjectionPath * create_projection_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target)
Definition: pathnode.c:2763
UpperUniquePath * create_upper_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, int numCols, double numGroups)
Definition: pathnode.c:3187
GatherPath * create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target, Relids required_outer, double *rows)
Definition: pathnode.c:2044
MergeAppendPath * create_merge_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *pathkeys, Relids required_outer)
Definition: pathnode.c:1471
void set_cheapest(RelOptInfo *parent_rel)
Definition: pathnode.c:269
SubqueryScanPath * create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, bool trivial_pathtarget, List *pathkeys, Relids required_outer)
Definition: pathnode.c:2088
void add_partial_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:795
int compare_fractional_path_costs(Path *path1, Path *path2, double fraction)
Definition: pathnode.c:124
SetOpPath * create_setop_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SetOpCmd cmd, SetOpStrategy strategy, List *distinctList, AttrNumber flagColIdx, int firstFlag, double numGroups, double outputRows)
Definition: pathnode.c:3648
RecursiveUnionPath * create_recursiveunion_path(PlannerInfo *root, RelOptInfo *rel, Path *leftpath, Path *rightpath, PathTarget *target, List *distinctList, int wtParam, double numGroups)
Definition: pathnode.c:3711
IncrementalSortPath * create_incremental_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *pathkeys, int presorted_keys, double limit_tuples)
Definition: pathnode.c:3032
AggPath * create_agg_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, PathTarget *target, AggStrategy aggstrategy, AggSplit aggsplit, List *groupClause, List *qual, const AggClauseCosts *aggcosts, double numGroups)
Definition: pathnode.c:3240
void add_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:461
Path * apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, PathTarget *target)
Definition: pathnode.c:2873
@ TOTAL_COST
Definition: pathnodes.h:38
@ UPPERREL_SETOP
Definition: pathnodes.h:71
@ UPPERREL_FINAL
Definition: pathnodes.h:79
static int pg_leftmost_one_pos32(uint32 word)
Definition: pg_bitutils.h:41
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define lfirst_int(lc)
Definition: pg_list.h:173
#define list_make1(x1)
Definition: pg_list.h:212
#define forthree(cell1, list1, cell2, list2, cell3, list3)
Definition: pg_list.h:563
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define linitial(l)
Definition: pg_list.h:178
#define forfour(cell1, list1, cell2, list2, cell3, list3, cell4, list4)
Definition: pg_list.h:575
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define lfirst_oid(lc)
Definition: pg_list.h:174
#define list_make2(x1, x2)
Definition: pg_list.h:214
PlannerInfo * subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, bool hasRecursion, double tuple_fraction, SetOperationStmt *setops)
Definition: planner.c:636
create_upper_paths_hook_type create_upper_paths_hook
Definition: planner.c:75
void check_stack_depth(void)
Definition: postgres.c:3564
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
static List * generate_setop_tlist(List *colTypes, List *colCollations, int flag, Index varno, bool hack_constants, List *input_tlist, List *refnames_tlist, bool *trivial_tlist)
Definition: prepunion.c:1401
static List * generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
Definition: prepunion.c:1678
static RelOptInfo * generate_union_paths(SetOperationStmt *op, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:696
static RelOptInfo * generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:1018
static RelOptInfo * recurse_set_operations(Node *setOp, PlannerInfo *root, List *colTypes, List *colCollations, bool junkOK, int flag, List *refnames_tlist, List **pTargetList, bool *istrivial_tlist)
Definition: prepunion.c:230
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
Definition: prepunion.c:1272
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, Path *input_path, double dNumGroups, double dNumOutputRows, const char *construct)
Definition: prepunion.c:1290
static List * generate_append_tlist(List *colTypes, List *colCollations, bool flag, List *input_tlists, List *refnames_tlist)
Definition: prepunion.c:1550
static RelOptInfo * generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root, List *refnames_tlist, List **pTargetList)
Definition: prepunion.c:384
bool set_operation_ordered_results_useful(SetOperationStmt *setop)
Definition: prepunion.c:188
RelOptInfo * plan_set_operations(PlannerInfo *root)
Definition: prepunion.c:99
static List * plan_union_children(PlannerInfo *root, SetOperationStmt *top_union, List *refnames_tlist, List **tlist_list, List **istrivial_tlist)
Definition: prepunion.c:1209
static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel, bool trivial_tlist, List *child_tlist, List *interesting_pathkeys, double *pNumGroups)
Definition: prepunion.c:504
@ COERCE_IMPLICIT_CAST
Definition: primnodes.h:736
tree ctl root
Definition: radixtree.h:1886
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:715
void setup_simple_rel_arrays(PlannerInfo *root)
Definition: relnode.c:94
RelOptInfo * build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
Definition: relnode.c:192
RelOptInfo * fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids)
Definition: relnode.c:1458
double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo)
Definition: selfuncs.c:3417
Definition: pg_list.h:54
Definition: nodes.h:129
List * pathkeys
Definition: pathnodes.h:1672
Cardinality rows
Definition: pathnodes.h:1666
Cost startup_cost
Definition: pathnodes.h:1668
int disabled_nodes
Definition: pathnodes.h:1667
Cost total_cost
Definition: pathnodes.h:1669
List * processed_tlist
Definition: pathnodes.h:462
bool hasHavingQual
Definition: pathnodes.h:502
Query * parse
Definition: pathnodes.h:202
Cardinality limit_tuples
Definition: pathnodes.h:489
List * setop_pathkeys
Definition: pathnodes.h:404
List * groupClause
Definition: parsenodes.h:202
List * targetList
Definition: parsenodes.h:193
List * groupingSets
Definition: parsenodes.h:205
List * distinctClause
Definition: parsenodes.h:211
Query * subquery
Definition: parsenodes.h:1104
Relids relids
Definition: pathnodes.h:871
struct PathTarget * reltarget
Definition: pathnodes.h:893
bool consider_parallel
Definition: pathnodes.h:887
Relids lateral_relids
Definition: pathnodes.h:913
List * pathlist
Definition: pathnodes.h:898
struct Path * cheapest_total_path
Definition: pathnodes.h:902
bool consider_startup
Definition: pathnodes.h:883
List * partial_pathlist
Definition: pathnodes.h:900
PlannerInfo * subroot
Definition: pathnodes.h:953
Cardinality rows
Definition: pathnodes.h:877
RTEKind rtekind
Definition: pathnodes.h:922
SetOperation op
Definition: parsenodes.h:2187
Index tleSortGroupRef
Definition: parsenodes.h:1438
Expr * expr
Definition: primnodes.h:2186
AttrNumber resno
Definition: primnodes.h:2188
Index ressortgroupref
Definition: primnodes.h:2192
char * flag(int b)
Definition: test-ctype.c:33
bool tlist_same_collations(List *tlist, List *colCollations, bool junkOK)
Definition: tlist.c:282
List * get_tlist_exprs(List *tlist, bool includeJunk)
Definition: tlist.c:163
bool grouping_is_sortable(List *groupClause)
Definition: tlist.c:540
List * make_tlist_from_pathtarget(PathTarget *target)
Definition: tlist.c:624
bool grouping_is_hashable(List *groupClause)
Definition: tlist.c:560
bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK)
Definition: tlist.c:248
#define create_pathtarget(root, tlist)
Definition: tlist.h:53