PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/htup_details.h"
22 #include "access/parallel.h"
23 #include "access/sysattr.h"
24 #include "access/xact.h"
26 #include "catalog/pg_proc.h"
27 #include "catalog/pg_type.h"
28 #include "executor/executor.h"
29 #include "executor/nodeAgg.h"
30 #include "foreign/fdwapi.h"
31 #include "miscadmin.h"
32 #include "lib/bipartite_match.h"
33 #include "lib/knapsack.h"
34 #include "nodes/makefuncs.h"
35 #include "nodes/nodeFuncs.h"
36 #ifdef OPTIMIZER_DEBUG
37 #include "nodes/print.h"
38 #endif
39 #include "optimizer/clauses.h"
40 #include "optimizer/cost.h"
41 #include "optimizer/pathnode.h"
42 #include "optimizer/paths.h"
43 #include "optimizer/plancat.h"
44 #include "optimizer/planmain.h"
45 #include "optimizer/planner.h"
46 #include "optimizer/prep.h"
47 #include "optimizer/subselect.h"
48 #include "optimizer/tlist.h"
49 #include "optimizer/var.h"
50 #include "parser/analyze.h"
51 #include "parser/parsetree.h"
52 #include "parser/parse_agg.h"
53 #include "rewrite/rewriteManip.h"
54 #include "storage/dsm_impl.h"
55 #include "utils/rel.h"
56 #include "utils/selfuncs.h"
57 #include "utils/lsyscache.h"
58 #include "utils/syscache.h"
59 
60 
61 /* GUC parameters */
64 
65 /* Hook for plugins to get control in planner() */
67 
68 /* Hook for plugins to get control when grouping_planner() plans upper rels */
70 
71 
72 /* Expression kind codes for preprocess_expression */
73 #define EXPRKIND_QUAL 0
74 #define EXPRKIND_TARGET 1
75 #define EXPRKIND_RTFUNC 2
76 #define EXPRKIND_RTFUNC_LATERAL 3
77 #define EXPRKIND_VALUES 4
78 #define EXPRKIND_VALUES_LATERAL 5
79 #define EXPRKIND_LIMIT 6
80 #define EXPRKIND_APPINFO 7
81 #define EXPRKIND_PHV 8
82 #define EXPRKIND_TABLESAMPLE 9
83 #define EXPRKIND_ARBITER_ELEM 10
84 #define EXPRKIND_TABLEFUNC 11
85 #define EXPRKIND_TABLEFUNC_LATERAL 12
86 
87 /* Passthrough data for standard_qp_callback */
88 typedef struct
89 {
90  List *tlist; /* preprocessed query targetlist */
91  List *activeWindows; /* active windows, if any */
92  List *groupClause; /* overrides parse->groupClause */
94 
95 /*
96  * Data specific to grouping sets
97  */
98 
99 typedef struct
100 {
110 
111 /* Local functions */
112 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
113 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
114 static void inheritance_planner(PlannerInfo *root);
115 static void grouping_planner(PlannerInfo *root, bool inheritance_update,
116  double tuple_fraction);
118 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
119  int *tleref_to_colnum_map);
120 static void preprocess_rowmarks(PlannerInfo *root);
121 static double preprocess_limit(PlannerInfo *root,
122  double tuple_fraction,
123  int64 *offset_est, int64 *count_est);
124 static bool limit_needed(Query *parse);
126 static List *preprocess_groupclause(PlannerInfo *root, List *force);
127 static List *extract_rollup_sets(List *groupingSets);
128 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
129 static void standard_qp_callback(PlannerInfo *root, void *extra);
130 static double get_number_of_groups(PlannerInfo *root,
131  double path_rows,
132  grouping_sets_data *gd);
134  const AggClauseCosts *agg_costs,
135  double dNumGroups);
137  RelOptInfo *input_rel,
138  PathTarget *target,
139  const AggClauseCosts *agg_costs,
140  grouping_sets_data *gd);
141 static void consider_groupingsets_paths(PlannerInfo *root,
142  RelOptInfo *grouped_rel,
143  Path *path,
144  bool is_sorted,
145  bool can_hash,
146  PathTarget *target,
147  grouping_sets_data *gd,
148  const AggClauseCosts *agg_costs,
149  double dNumGroups);
151  RelOptInfo *input_rel,
152  PathTarget *input_target,
153  PathTarget *output_target,
154  List *tlist,
155  WindowFuncLists *wflists,
156  List *activeWindows);
157 static void create_one_window_path(PlannerInfo *root,
158  RelOptInfo *window_rel,
159  Path *path,
160  PathTarget *input_target,
161  PathTarget *output_target,
162  List *tlist,
163  WindowFuncLists *wflists,
164  List *activeWindows);
166  RelOptInfo *input_rel);
168  RelOptInfo *input_rel,
169  PathTarget *target,
170  double limit_tuples);
172  PathTarget *final_target);
174  PathTarget *grouping_target);
175 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
176 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
178  PathTarget *final_target,
179  List *activeWindows);
181  List *tlist);
183  PathTarget *final_target,
184  bool *have_postponed_srfs);
185 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
186  List *targets, List *targets_contain_srfs);
187 
188 
189 /*****************************************************************************
190  *
191  * Query optimizer entry point
192  *
193  * To support loadable plugins that monitor or modify planner behavior,
194  * we provide a hook variable that lets a plugin get control before and
195  * after the standard planning process. The plugin would normally call
196  * standard_planner().
197  *
198  * Note to plugin authors: standard_planner() scribbles on its Query input,
199  * so you'd better copy that data structure if you want to plan more than once.
200  *
201  *****************************************************************************/
202 PlannedStmt *
203 planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
204 {
206 
207  if (planner_hook)
208  result = (*planner_hook) (parse, cursorOptions, boundParams);
209  else
210  result = standard_planner(parse, cursorOptions, boundParams);
211  return result;
212 }
213 
214 PlannedStmt *
215 standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
216 {
218  PlannerGlobal *glob;
219  double tuple_fraction;
220  PlannerInfo *root;
221  RelOptInfo *final_rel;
222  Path *best_path;
223  Plan *top_plan;
224  ListCell *lp,
225  *lr;
226 
227  /*
228  * Set up global state for this planner invocation. This data is needed
229  * across all levels of sub-Query that might exist in the given command,
230  * so we keep it in a separate struct that's linked to by each per-Query
231  * PlannerInfo.
232  */
233  glob = makeNode(PlannerGlobal);
234 
235  glob->boundParams = boundParams;
236  glob->subplans = NIL;
237  glob->subroots = NIL;
238  glob->rewindPlanIDs = NULL;
239  glob->finalrtable = NIL;
240  glob->finalrowmarks = NIL;
241  glob->resultRelations = NIL;
242  glob->nonleafResultRelations = NIL;
243  glob->relationOids = NIL;
244  glob->invalItems = NIL;
245  glob->nParamExec = 0;
246  glob->lastPHId = 0;
247  glob->lastRowMarkId = 0;
248  glob->lastPlanNodeId = 0;
249  glob->transientPlan = false;
250  glob->dependsOnRole = false;
251 
252  /*
253  * Assess whether it's feasible to use parallel mode for this query. We
254  * can't do this in a standalone backend, or if the command will try to
255  * modify any data, or if this is a cursor operation, or if GUCs are set
256  * to values that don't permit parallelism, or if parallel-unsafe
257  * functions are present in the query tree.
258  *
259  * For now, we don't try to use parallel mode if we're running inside a
260  * parallel worker. We might eventually be able to relax this
261  * restriction, but for now it seems best not to have parallel workers
262  * trying to create their own parallel workers.
263  *
264  * We can't use parallelism in serializable mode because the predicate
265  * locking code is not parallel-aware. It's not catastrophic if someone
266  * tries to run a parallel plan in serializable mode; it just won't get
267  * any workers and will run serially. But it seems like a good heuristic
268  * to assume that the same serialization level will be in effect at plan
269  * time and execution time, so don't generate a parallel plan if we're in
270  * serializable mode.
271  */
272  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
275  parse->commandType == CMD_SELECT &&
276  !parse->hasModifyingCTE &&
278  !IsParallelWorker() &&
280  {
281  /* all the cheap tests pass, so scan the query tree */
282  glob->maxParallelHazard = max_parallel_hazard(parse);
284  }
285  else
286  {
287  /* skip the query tree scan, just assume it's unsafe */
289  glob->parallelModeOK = false;
290  }
291 
292  /*
293  * glob->parallelModeNeeded should tell us whether it's necessary to
294  * impose the parallel mode restrictions, but we don't actually want to
295  * impose them unless we choose a parallel plan, so it is normally set
296  * only if a parallel plan is chosen (see create_gather_plan). That way,
297  * people who mislabel their functions but don't use parallelism anyway
298  * aren't harmed. But when force_parallel_mode is set, we enable the
299  * restrictions whenever possible for testing purposes.
300  */
301  glob->parallelModeNeeded = glob->parallelModeOK &&
303 
304  /* Determine what fraction of the plan is likely to be scanned */
305  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
306  {
307  /*
308  * We have no real idea how many tuples the user will ultimately FETCH
309  * from a cursor, but it is often the case that he doesn't want 'em
310  * all, or would prefer a fast-start plan anyway so that he can
311  * process some of the tuples sooner. Use a GUC parameter to decide
312  * what fraction to optimize for.
313  */
314  tuple_fraction = cursor_tuple_fraction;
315 
316  /*
317  * We document cursor_tuple_fraction as simply being a fraction, which
318  * means the edge cases 0 and 1 have to be treated specially here. We
319  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
320  */
321  if (tuple_fraction >= 1.0)
322  tuple_fraction = 0.0;
323  else if (tuple_fraction <= 0.0)
324  tuple_fraction = 1e-10;
325  }
326  else
327  {
328  /* Default assumption is we need all the tuples */
329  tuple_fraction = 0.0;
330  }
331 
332  /* primary planning entry point (may recurse for subqueries) */
333  root = subquery_planner(glob, parse, NULL,
334  false, tuple_fraction);
335 
336  /* Select best Path and turn it into a Plan */
337  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
338  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
339 
340  top_plan = create_plan(root, best_path);
341 
342  /*
343  * If creating a plan for a scrollable cursor, make sure it can run
344  * backwards on demand. Add a Material node at the top at need.
345  */
346  if (cursorOptions & CURSOR_OPT_SCROLL)
347  {
348  if (!ExecSupportsBackwardScan(top_plan))
349  top_plan = materialize_finished_plan(top_plan);
350  }
351 
352  /*
353  * Optionally add a Gather node for testing purposes, provided this is
354  * actually a safe thing to do. (Note: we assume adding a Material node
355  * above did not change the parallel safety of the plan, so we can still
356  * rely on best_path->parallel_safe.)
357  */
359  {
360  Gather *gather = makeNode(Gather);
361 
362  gather->plan.targetlist = top_plan->targetlist;
363  gather->plan.qual = NIL;
364  gather->plan.lefttree = top_plan;
365  gather->plan.righttree = NULL;
366  gather->num_workers = 1;
367  gather->single_copy = true;
369 
370  /*
371  * Ideally we'd use cost_gather here, but setting up dummy path data
372  * to satisfy it doesn't seem much cleaner than knowing what it does.
373  */
374  gather->plan.startup_cost = top_plan->startup_cost +
376  gather->plan.total_cost = top_plan->total_cost +
378  gather->plan.plan_rows = top_plan->plan_rows;
379  gather->plan.plan_width = top_plan->plan_width;
380  gather->plan.parallel_aware = false;
381 
382  /* use parallel mode for parallel plans. */
383  root->glob->parallelModeNeeded = true;
384 
385  top_plan = &gather->plan;
386  }
387 
388  /*
389  * If any Params were generated, run through the plan tree and compute
390  * each plan node's extParam/allParam sets. Ideally we'd merge this into
391  * set_plan_references' tree traversal, but for now it has to be separate
392  * because we need to visit subplans before not after main plan.
393  */
394  if (glob->nParamExec > 0)
395  {
396  Assert(list_length(glob->subplans) == list_length(glob->subroots));
397  forboth(lp, glob->subplans, lr, glob->subroots)
398  {
399  Plan *subplan = (Plan *) lfirst(lp);
400  PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
401 
402  SS_finalize_plan(subroot, subplan);
403  }
404  SS_finalize_plan(root, top_plan);
405  }
406 
407  /* final cleanup of the plan */
408  Assert(glob->finalrtable == NIL);
409  Assert(glob->finalrowmarks == NIL);
410  Assert(glob->resultRelations == NIL);
412  top_plan = set_plan_references(root, top_plan);
413  /* ... and the subplans (both regular subplans and initplans) */
414  Assert(list_length(glob->subplans) == list_length(glob->subroots));
415  forboth(lp, glob->subplans, lr, glob->subroots)
416  {
417  Plan *subplan = (Plan *) lfirst(lp);
418  PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
419 
420  lfirst(lp) = set_plan_references(subroot, subplan);
421  }
422 
423  /* build the PlannedStmt result */
424  result = makeNode(PlannedStmt);
425 
426  result->commandType = parse->commandType;
427  result->queryId = parse->queryId;
428  result->hasReturning = (parse->returningList != NIL);
429  result->hasModifyingCTE = parse->hasModifyingCTE;
430  result->canSetTag = parse->canSetTag;
431  result->transientPlan = glob->transientPlan;
432  result->dependsOnRole = glob->dependsOnRole;
433  result->parallelModeNeeded = glob->parallelModeNeeded;
434  result->planTree = top_plan;
435  result->rtable = glob->finalrtable;
436  result->resultRelations = glob->resultRelations;
438  result->subplans = glob->subplans;
439  result->rewindPlanIDs = glob->rewindPlanIDs;
440  result->rowMarks = glob->finalrowmarks;
441  result->relationOids = glob->relationOids;
442  result->invalItems = glob->invalItems;
443  result->nParamExec = glob->nParamExec;
444  /* utilityStmt should be null, but we might as well copy it */
445  result->utilityStmt = parse->utilityStmt;
446  result->stmt_location = parse->stmt_location;
447  result->stmt_len = parse->stmt_len;
448 
449  return result;
450 }
451 
452 
453 /*--------------------
454  * subquery_planner
455  * Invokes the planner on a subquery. We recurse to here for each
456  * sub-SELECT found in the query tree.
457  *
458  * glob is the global state for the current planner run.
459  * parse is the querytree produced by the parser & rewriter.
460  * parent_root is the immediate parent Query's info (NULL at the top level).
461  * hasRecursion is true if this is a recursive WITH query.
462  * tuple_fraction is the fraction of tuples we expect will be retrieved.
463  * tuple_fraction is interpreted as explained for grouping_planner, below.
464  *
465  * Basically, this routine does the stuff that should only be done once
466  * per Query object. It then calls grouping_planner. At one time,
467  * grouping_planner could be invoked recursively on the same Query object;
468  * that's not currently true, but we keep the separation between the two
469  * routines anyway, in case we need it again someday.
470  *
471  * subquery_planner will be called recursively to handle sub-Query nodes
472  * found within the query's expressions and rangetable.
473  *
474  * Returns the PlannerInfo struct ("root") that contains all data generated
475  * while planning the subquery. In particular, the Path(s) attached to
476  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
477  * cheapest way(s) to implement the query. The top level will select the
478  * best Path and pass it through createplan.c to produce a finished Plan.
479  *--------------------
480  */
481 PlannerInfo *
483  PlannerInfo *parent_root,
484  bool hasRecursion, double tuple_fraction)
485 {
486  PlannerInfo *root;
487  List *newWithCheckOptions;
488  List *newHaving;
489  bool hasOuterJoins;
490  RelOptInfo *final_rel;
491  ListCell *l;
492 
493  /* Create a PlannerInfo data structure for this subquery */
494  root = makeNode(PlannerInfo);
495  root->parse = parse;
496  root->glob = glob;
497  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
498  root->parent_root = parent_root;
499  root->plan_params = NIL;
500  root->outer_params = NULL;
502  root->init_plans = NIL;
503  root->cte_plan_ids = NIL;
504  root->multiexpr_params = NIL;
505  root->eq_classes = NIL;
506  root->append_rel_list = NIL;
507  root->pcinfo_list = NIL;
508  root->rowMarks = NIL;
509  memset(root->upper_rels, 0, sizeof(root->upper_rels));
510  memset(root->upper_targets, 0, sizeof(root->upper_targets));
511  root->processed_tlist = NIL;
512  root->grouping_map = NULL;
513  root->minmax_aggs = NIL;
514  root->qual_security_level = 0;
515  root->hasInheritedTarget = false;
516  root->hasRecursion = hasRecursion;
517  if (hasRecursion)
518  root->wt_param_id = SS_assign_special_param(root);
519  else
520  root->wt_param_id = -1;
521  root->non_recursive_path = NULL;
522 
523  /*
524  * If there is a WITH list, process each WITH query and build an initplan
525  * SubPlan structure for it.
526  */
527  if (parse->cteList)
528  SS_process_ctes(root);
529 
530  /*
531  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
532  * to transform them into joins. Note that this step does not descend
533  * into subqueries; if we pull up any subqueries below, their SubLinks are
534  * processed just before pulling them up.
535  */
536  if (parse->hasSubLinks)
537  pull_up_sublinks(root);
538 
539  /*
540  * Scan the rangetable for set-returning functions, and inline them if
541  * possible (producing subqueries that might get pulled up next).
542  * Recursion issues here are handled in the same way as for SubLinks.
543  */
545 
546  /*
547  * Check to see if any subqueries in the jointree can be merged into this
548  * query.
549  */
550  pull_up_subqueries(root);
551 
552  /*
553  * If this is a simple UNION ALL query, flatten it into an appendrel. We
554  * do this now because it requires applying pull_up_subqueries to the leaf
555  * queries of the UNION ALL, which weren't touched above because they
556  * weren't referenced by the jointree (they will be after we do this).
557  */
558  if (parse->setOperations)
560 
561  /*
562  * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
563  * avoid the expense of doing flatten_join_alias_vars(). Also check for
564  * outer joins --- if none, we can skip reduce_outer_joins(). And check
565  * for LATERAL RTEs, too. This must be done after we have done
566  * pull_up_subqueries(), of course.
567  */
568  root->hasJoinRTEs = false;
569  root->hasLateralRTEs = false;
570  hasOuterJoins = false;
571  foreach(l, parse->rtable)
572  {
573  RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
574 
575  if (rte->rtekind == RTE_JOIN)
576  {
577  root->hasJoinRTEs = true;
578  if (IS_OUTER_JOIN(rte->jointype))
579  hasOuterJoins = true;
580  }
581  if (rte->lateral)
582  root->hasLateralRTEs = true;
583  }
584 
585  /*
586  * Preprocess RowMark information. We need to do this after subquery
587  * pullup (so that all non-inherited RTEs are present) and before
588  * inheritance expansion (so that the info is available for
589  * expand_inherited_tables to examine and modify).
590  */
591  preprocess_rowmarks(root);
592 
593  /*
594  * Expand any rangetable entries that are inheritance sets into "append
595  * relations". This can add entries to the rangetable, but they must be
596  * plain base relations not joins, so it's OK (and marginally more
597  * efficient) to do it after checking for join RTEs. We must do it after
598  * pulling up subqueries, else we'd fail to handle inherited tables in
599  * subqueries.
600  */
602 
603  /*
604  * Set hasHavingQual to remember if HAVING clause is present. Needed
605  * because preprocess_expression will reduce a constant-true condition to
606  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
607  */
608  root->hasHavingQual = (parse->havingQual != NULL);
609 
610  /* Clear this flag; might get set in distribute_qual_to_rels */
611  root->hasPseudoConstantQuals = false;
612 
613  /*
614  * Do expression preprocessing on targetlist and quals, as well as other
615  * random expressions in the querytree. Note that we do not need to
616  * handle sort/group expressions explicitly, because they are actually
617  * part of the targetlist.
618  */
619  parse->targetList = (List *)
620  preprocess_expression(root, (Node *) parse->targetList,
622 
623  /* Constant-folding might have removed all set-returning functions */
624  if (parse->hasTargetSRFs)
626 
627  newWithCheckOptions = NIL;
628  foreach(l, parse->withCheckOptions)
629  {
630  WithCheckOption *wco = (WithCheckOption *) lfirst(l);
631 
632  wco->qual = preprocess_expression(root, wco->qual,
633  EXPRKIND_QUAL);
634  if (wco->qual != NULL)
635  newWithCheckOptions = lappend(newWithCheckOptions, wco);
636  }
637  parse->withCheckOptions = newWithCheckOptions;
638 
639  parse->returningList = (List *)
640  preprocess_expression(root, (Node *) parse->returningList,
642 
643  preprocess_qual_conditions(root, (Node *) parse->jointree);
644 
645  parse->havingQual = preprocess_expression(root, parse->havingQual,
646  EXPRKIND_QUAL);
647 
648  foreach(l, parse->windowClause)
649  {
650  WindowClause *wc = (WindowClause *) lfirst(l);
651 
652  /* partitionClause/orderClause are sort/group expressions */
655  wc->endOffset = preprocess_expression(root, wc->endOffset,
657  }
658 
659  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
661  parse->limitCount = preprocess_expression(root, parse->limitCount,
663 
664  if (parse->onConflict)
665  {
666  parse->onConflict->arbiterElems = (List *)
668  (Node *) parse->onConflict->arbiterElems,
670  parse->onConflict->arbiterWhere =
672  parse->onConflict->arbiterWhere,
673  EXPRKIND_QUAL);
674  parse->onConflict->onConflictSet = (List *)
676  (Node *) parse->onConflict->onConflictSet,
678  parse->onConflict->onConflictWhere =
680  parse->onConflict->onConflictWhere,
681  EXPRKIND_QUAL);
682  /* exclRelTlist contains only Vars, so no preprocessing needed */
683  }
684 
685  root->append_rel_list = (List *)
688 
689  /* Also need to preprocess expressions within RTEs */
690  foreach(l, parse->rtable)
691  {
692  RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
693  int kind;
694  ListCell *lcsq;
695 
696  if (rte->rtekind == RTE_RELATION)
697  {
698  if (rte->tablesample)
699  rte->tablesample = (TableSampleClause *)
701  (Node *) rte->tablesample,
703  }
704  else if (rte->rtekind == RTE_SUBQUERY)
705  {
706  /*
707  * We don't want to do all preprocessing yet on the subquery's
708  * expressions, since that will happen when we plan it. But if it
709  * contains any join aliases of our level, those have to get
710  * expanded now, because planning of the subquery won't do it.
711  * That's only possible if the subquery is LATERAL.
712  */
713  if (rte->lateral && root->hasJoinRTEs)
714  rte->subquery = (Query *)
715  flatten_join_alias_vars(root, (Node *) rte->subquery);
716  }
717  else if (rte->rtekind == RTE_FUNCTION)
718  {
719  /* Preprocess the function expression(s) fully */
721  rte->functions = (List *)
722  preprocess_expression(root, (Node *) rte->functions, kind);
723  }
724  else if (rte->rtekind == RTE_TABLEFUNC)
725  {
726  /* Preprocess the function expression(s) fully */
728  rte->tablefunc = (TableFunc *)
729  preprocess_expression(root, (Node *) rte->tablefunc, kind);
730  }
731  else if (rte->rtekind == RTE_VALUES)
732  {
733  /* Preprocess the values lists fully */
735  rte->values_lists = (List *)
736  preprocess_expression(root, (Node *) rte->values_lists, kind);
737  }
738 
739  /*
740  * Process each element of the securityQuals list as if it were a
741  * separate qual expression (as indeed it is). We need to do it this
742  * way to get proper canonicalization of AND/OR structure. Note that
743  * this converts each element into an implicit-AND sublist.
744  */
745  foreach(lcsq, rte->securityQuals)
746  {
747  lfirst(lcsq) = preprocess_expression(root,
748  (Node *) lfirst(lcsq),
749  EXPRKIND_QUAL);
750  }
751  }
752 
753  /*
754  * In some cases we may want to transfer a HAVING clause into WHERE. We
755  * cannot do so if the HAVING clause contains aggregates (obviously) or
756  * volatile functions (since a HAVING clause is supposed to be executed
757  * only once per group). We also can't do this if there are any nonempty
758  * grouping sets; moving such a clause into WHERE would potentially change
759  * the results, if any referenced column isn't present in all the grouping
760  * sets. (If there are only empty grouping sets, then the HAVING clause
761  * must be degenerate as discussed below.)
762  *
763  * Also, it may be that the clause is so expensive to execute that we're
764  * better off doing it only once per group, despite the loss of
765  * selectivity. This is hard to estimate short of doing the entire
766  * planning process twice, so we use a heuristic: clauses containing
767  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
768  * clause into WHERE, in hopes of eliminating tuples before aggregation
769  * instead of after.
770  *
771  * If the query has explicit grouping then we can simply move such a
772  * clause into WHERE; any group that fails the clause will not be in the
773  * output because none of its tuples will reach the grouping or
774  * aggregation stage. Otherwise we must have a degenerate (variable-free)
775  * HAVING clause, which we put in WHERE so that query_planner() can use it
776  * in a gating Result node, but also keep in HAVING to ensure that we
777  * don't emit a bogus aggregated row. (This could be done better, but it
778  * seems not worth optimizing.)
779  *
780  * Note that both havingQual and parse->jointree->quals are in
781  * implicitly-ANDed-list form at this point, even though they are declared
782  * as Node *.
783  */
784  newHaving = NIL;
785  foreach(l, (List *) parse->havingQual)
786  {
787  Node *havingclause = (Node *) lfirst(l);
788 
789  if ((parse->groupClause && parse->groupingSets) ||
790  contain_agg_clause(havingclause) ||
791  contain_volatile_functions(havingclause) ||
792  contain_subplans(havingclause))
793  {
794  /* keep it in HAVING */
795  newHaving = lappend(newHaving, havingclause);
796  }
797  else if (parse->groupClause && !parse->groupingSets)
798  {
799  /* move it to WHERE */
800  parse->jointree->quals = (Node *)
801  lappend((List *) parse->jointree->quals, havingclause);
802  }
803  else
804  {
805  /* put a copy in WHERE, keep it in HAVING */
806  parse->jointree->quals = (Node *)
807  lappend((List *) parse->jointree->quals,
808  copyObject(havingclause));
809  newHaving = lappend(newHaving, havingclause);
810  }
811  }
812  parse->havingQual = (Node *) newHaving;
813 
814  /* Remove any redundant GROUP BY columns */
816 
817  /*
818  * If we have any outer joins, try to reduce them to plain inner joins.
819  * This step is most easily done after we've done expression
820  * preprocessing.
821  */
822  if (hasOuterJoins)
823  reduce_outer_joins(root);
824 
825  /*
826  * Do the main planning. If we have an inherited target relation, that
827  * needs special processing, else go straight to grouping_planner.
828  */
829  if (parse->resultRelation &&
830  rt_fetch(parse->resultRelation, parse->rtable)->inh)
831  inheritance_planner(root);
832  else
833  grouping_planner(root, false, tuple_fraction);
834 
835  /*
836  * Capture the set of outer-level param IDs we have access to, for use in
837  * extParam/allParam calculations later.
838  */
840 
841  /*
842  * If any initPlans were created in this query level, adjust the surviving
843  * Paths' costs and parallel-safety flags to account for them. The
844  * initPlans won't actually get attached to the plan tree till
845  * create_plan() runs, but we must include their effects now.
846  */
847  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
848  SS_charge_for_initplans(root, final_rel);
849 
850  /*
851  * Make sure we've identified the cheapest Path for the final rel. (By
852  * doing this here not in grouping_planner, we include initPlan costs in
853  * the decision, though it's unlikely that will change anything.)
854  */
855  set_cheapest(final_rel);
856 
857  return root;
858 }
859 
860 /*
861  * preprocess_expression
862  * Do subquery_planner's preprocessing work for an expression,
863  * which can be a targetlist, a WHERE clause (including JOIN/ON
864  * conditions), a HAVING clause, or a few other things.
865  */
866 static Node *
867 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
868 {
869  /*
870  * Fall out quickly if expression is empty. This occurs often enough to
871  * be worth checking. Note that null->null is the correct conversion for
872  * implicit-AND result format, too.
873  */
874  if (expr == NULL)
875  return NULL;
876 
877  /*
878  * If the query has any join RTEs, replace join alias variables with
879  * base-relation variables. We must do this before sublink processing,
880  * else sublinks expanded out from join aliases would not get processed.
881  * We can skip it in non-lateral RTE functions, VALUES lists, and
882  * TABLESAMPLE clauses, however, since they can't contain any Vars of the
883  * current query level.
884  */
885  if (root->hasJoinRTEs &&
886  !(kind == EXPRKIND_RTFUNC ||
887  kind == EXPRKIND_VALUES ||
888  kind == EXPRKIND_TABLESAMPLE ||
889  kind == EXPRKIND_TABLEFUNC))
890  expr = flatten_join_alias_vars(root, expr);
891 
892  /*
893  * Simplify constant expressions.
894  *
895  * Note: an essential effect of this is to convert named-argument function
896  * calls to positional notation and insert the current actual values of
897  * any default arguments for functions. To ensure that happens, we *must*
898  * process all expressions here. Previous PG versions sometimes skipped
899  * const-simplification if it didn't seem worth the trouble, but we can't
900  * do that anymore.
901  *
902  * Note: this also flattens nested AND and OR expressions into N-argument
903  * form. All processing of a qual expression after this point must be
904  * careful to maintain AND/OR flatness --- that is, do not generate a tree
905  * with AND directly under AND, nor OR directly under OR.
906  */
907  expr = eval_const_expressions(root, expr);
908 
909  /*
910  * If it's a qual or havingQual, canonicalize it.
911  */
912  if (kind == EXPRKIND_QUAL)
913  {
914  expr = (Node *) canonicalize_qual((Expr *) expr);
915 
916 #ifdef OPTIMIZER_DEBUG
917  printf("After canonicalize_qual()\n");
918  pprint(expr);
919 #endif
920  }
921 
922  /* Expand SubLinks to SubPlans */
923  if (root->parse->hasSubLinks)
924  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
925 
926  /*
927  * XXX do not insert anything here unless you have grokked the comments in
928  * SS_replace_correlation_vars ...
929  */
930 
931  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
932  if (root->query_level > 1)
933  expr = SS_replace_correlation_vars(root, expr);
934 
935  /*
936  * If it's a qual or havingQual, convert it to implicit-AND format. (We
937  * don't want to do this before eval_const_expressions, since the latter
938  * would be unable to simplify a top-level AND correctly. Also,
939  * SS_process_sublinks expects explicit-AND format.)
940  */
941  if (kind == EXPRKIND_QUAL)
942  expr = (Node *) make_ands_implicit((Expr *) expr);
943 
944  return expr;
945 }
946 
947 /*
948  * preprocess_qual_conditions
949  * Recursively scan the query's jointree and do subquery_planner's
950  * preprocessing work on each qual condition found therein.
951  */
952 static void
954 {
955  if (jtnode == NULL)
956  return;
957  if (IsA(jtnode, RangeTblRef))
958  {
959  /* nothing to do here */
960  }
961  else if (IsA(jtnode, FromExpr))
962  {
963  FromExpr *f = (FromExpr *) jtnode;
964  ListCell *l;
965 
966  foreach(l, f->fromlist)
968 
970  }
971  else if (IsA(jtnode, JoinExpr))
972  {
973  JoinExpr *j = (JoinExpr *) jtnode;
974 
977 
979  }
980  else
981  elog(ERROR, "unrecognized node type: %d",
982  (int) nodeTag(jtnode));
983 }
984 
985 /*
986  * preprocess_phv_expression
987  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
988  *
989  * If a LATERAL subquery references an output of another subquery, and that
990  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
991  * join, then we'll push the PlaceHolderVar expression down into the subquery
992  * and later pull it back up during find_lateral_references, which runs after
993  * subquery_planner has preprocessed all the expressions that were in the
994  * current query level to start with. So we need to preprocess it then.
995  */
996 Expr *
998 {
999  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1000 }
1001 
1002 /*
1003  * inheritance_planner
1004  * Generate Paths in the case where the result relation is an
1005  * inheritance set.
1006  *
1007  * We have to handle this case differently from cases where a source relation
1008  * is an inheritance set. Source inheritance is expanded at the bottom of the
1009  * plan tree (see allpaths.c), but target inheritance has to be expanded at
1010  * the top. The reason is that for UPDATE, each target relation needs a
1011  * different targetlist matching its own column set. Fortunately,
1012  * the UPDATE/DELETE target can never be the nullable side of an outer join,
1013  * so it's OK to generate the plan this way.
1014  *
1015  * Returns nothing; the useful output is in the Paths we attach to
1016  * the (UPPERREL_FINAL, NULL) upperrel stored in *root.
1017  *
1018  * Note that we have not done set_cheapest() on the final rel; it's convenient
1019  * to leave this to the caller.
1020  */
1021 static void
1023 {
1024  Query *parse = root->parse;
1025  int parentRTindex = parse->resultRelation;
1026  Bitmapset *subqueryRTindexes;
1027  Bitmapset *modifiableARIindexes;
1028  int nominalRelation = -1;
1029  List *final_rtable = NIL;
1030  int save_rel_array_size = 0;
1031  RelOptInfo **save_rel_array = NULL;
1032  List *subpaths = NIL;
1033  List *subroots = NIL;
1034  List *resultRelations = NIL;
1035  List *withCheckOptionLists = NIL;
1036  List *returningLists = NIL;
1037  List *rowMarks;
1038  RelOptInfo *final_rel;
1039  ListCell *lc;
1040  Index rti;
1041  RangeTblEntry *parent_rte;
1042  List *partitioned_rels = NIL;
1043 
1044  Assert(parse->commandType != CMD_INSERT);
1045 
1046  /*
1047  * We generate a modified instance of the original Query for each target
1048  * relation, plan that, and put all the plans into a list that will be
1049  * controlled by a single ModifyTable node. All the instances share the
1050  * same rangetable, but each instance must have its own set of subquery
1051  * RTEs within the finished rangetable because (1) they are likely to get
1052  * scribbled on during planning, and (2) it's not inconceivable that
1053  * subqueries could get planned differently in different cases. We need
1054  * not create duplicate copies of other RTE kinds, in particular not the
1055  * target relations, because they don't have either of those issues. Not
1056  * having to duplicate the target relations is important because doing so
1057  * (1) would result in a rangetable of length O(N^2) for N targets, with
1058  * at least O(N^3) work expended here; and (2) would greatly complicate
1059  * management of the rowMarks list.
1060  *
1061  * To begin with, generate a bitmapset of the relids of the subquery RTEs.
1062  */
1063  subqueryRTindexes = NULL;
1064  rti = 1;
1065  foreach(lc, parse->rtable)
1066  {
1067  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1068 
1069  if (rte->rtekind == RTE_SUBQUERY)
1070  subqueryRTindexes = bms_add_member(subqueryRTindexes, rti);
1071  rti++;
1072  }
1073 
1074  /*
1075  * Next, we want to identify which AppendRelInfo items contain references
1076  * to any of the aforesaid subquery RTEs. These items will need to be
1077  * copied and modified to adjust their subquery references; whereas the
1078  * other ones need not be touched. It's worth being tense over this
1079  * because we can usually avoid processing most of the AppendRelInfo
1080  * items, thereby saving O(N^2) space and time when the target is a large
1081  * inheritance tree. We can identify AppendRelInfo items by their
1082  * child_relid, since that should be unique within the list.
1083  */
1084  modifiableARIindexes = NULL;
1085  if (subqueryRTindexes != NULL)
1086  {
1087  foreach(lc, root->append_rel_list)
1088  {
1089  AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1090 
1091  if (bms_is_member(appinfo->parent_relid, subqueryRTindexes) ||
1092  bms_is_member(appinfo->child_relid, subqueryRTindexes) ||
1094  subqueryRTindexes))
1095  modifiableARIindexes = bms_add_member(modifiableARIindexes,
1096  appinfo->child_relid);
1097  }
1098  }
1099 
1100  /*
1101  * If the parent RTE is a partitioned table, we should use that as the
1102  * nominal relation, because the RTEs added for partitioned tables
1103  * (including the root parent) as child members of the inheritance set
1104  * do not appear anywhere else in the plan. The situation is exactly
1105  * the opposite in the case of non-partitioned inheritance parent as
1106  * described below.
1107  */
1108  parent_rte = rt_fetch(parentRTindex, root->parse->rtable);
1109  if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1110  nominalRelation = parentRTindex;
1111 
1112  /*
1113  * And now we can get on with generating a plan for each child table.
1114  */
1115  foreach(lc, root->append_rel_list)
1116  {
1117  AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1118  PlannerInfo *subroot;
1119  RangeTblEntry *child_rte;
1120  RelOptInfo *sub_final_rel;
1121  Path *subpath;
1122 
1123  /* append_rel_list contains all append rels; ignore others */
1124  if (appinfo->parent_relid != parentRTindex)
1125  continue;
1126 
1127  /*
1128  * We need a working copy of the PlannerInfo so that we can control
1129  * propagation of information back to the main copy.
1130  */
1131  subroot = makeNode(PlannerInfo);
1132  memcpy(subroot, root, sizeof(PlannerInfo));
1133 
1134  /*
1135  * Generate modified query with this rel as target. We first apply
1136  * adjust_appendrel_attrs, which copies the Query and changes
1137  * references to the parent RTE to refer to the current child RTE,
1138  * then fool around with subquery RTEs.
1139  */
1140  subroot->parse = (Query *)
1142  (Node *) parse,
1143  appinfo);
1144 
1145  /*
1146  * If there are securityQuals attached to the parent, move them to the
1147  * child rel (they've already been transformed properly for that).
1148  */
1149  parent_rte = rt_fetch(parentRTindex, subroot->parse->rtable);
1150  child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable);
1151  child_rte->securityQuals = parent_rte->securityQuals;
1152  parent_rte->securityQuals = NIL;
1153 
1154  /*
1155  * The rowMarks list might contain references to subquery RTEs, so
1156  * make a copy that we can apply ChangeVarNodes to. (Fortunately, the
1157  * executor doesn't need to see the modified copies --- we can just
1158  * pass it the original rowMarks list.)
1159  */
1160  subroot->rowMarks = (List *) copyObject(root->rowMarks);
1161 
1162  /*
1163  * The append_rel_list likewise might contain references to subquery
1164  * RTEs (if any subqueries were flattenable UNION ALLs). So prepare
1165  * to apply ChangeVarNodes to that, too. As explained above, we only
1166  * want to copy items that actually contain such references; the rest
1167  * can just get linked into the subroot's append_rel_list.
1168  *
1169  * If we know there are no such references, we can just use the outer
1170  * append_rel_list unmodified.
1171  */
1172  if (modifiableARIindexes != NULL)
1173  {
1174  ListCell *lc2;
1175 
1176  subroot->append_rel_list = NIL;
1177  foreach(lc2, root->append_rel_list)
1178  {
1179  AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
1180 
1181  if (bms_is_member(appinfo2->child_relid, modifiableARIindexes))
1182  appinfo2 = (AppendRelInfo *) copyObject(appinfo2);
1183 
1184  subroot->append_rel_list = lappend(subroot->append_rel_list,
1185  appinfo2);
1186  }
1187  }
1188 
1189  /*
1190  * Add placeholders to the child Query's rangetable list to fill the
1191  * RT indexes already reserved for subqueries in previous children.
1192  * These won't be referenced, so there's no need to make them very
1193  * valid-looking.
1194  */
1195  while (list_length(subroot->parse->rtable) < list_length(final_rtable))
1196  subroot->parse->rtable = lappend(subroot->parse->rtable,
1198 
1199  /*
1200  * If this isn't the first child Query, generate duplicates of all
1201  * subquery RTEs, and adjust Var numbering to reference the
1202  * duplicates. To simplify the loop logic, we scan the original rtable
1203  * not the copy just made by adjust_appendrel_attrs; that should be OK
1204  * since subquery RTEs couldn't contain any references to the target
1205  * rel.
1206  */
1207  if (final_rtable != NIL && subqueryRTindexes != NULL)
1208  {
1209  ListCell *lr;
1210 
1211  rti = 1;
1212  foreach(lr, parse->rtable)
1213  {
1214  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lr);
1215 
1216  if (bms_is_member(rti, subqueryRTindexes))
1217  {
1218  Index newrti;
1219 
1220  /*
1221  * The RTE can't contain any references to its own RT
1222  * index, except in its securityQuals, so we can save a
1223  * few cycles by applying ChangeVarNodes to the rest of
1224  * the rangetable before we append the RTE to it.
1225  */
1226  newrti = list_length(subroot->parse->rtable) + 1;
1227  ChangeVarNodes((Node *) subroot->parse, rti, newrti, 0);
1228  ChangeVarNodes((Node *) subroot->rowMarks, rti, newrti, 0);
1229  /* Skip processing unchanging parts of append_rel_list */
1230  if (modifiableARIindexes != NULL)
1231  {
1232  ListCell *lc2;
1233 
1234  foreach(lc2, subroot->append_rel_list)
1235  {
1236  AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
1237 
1238  if (bms_is_member(appinfo2->child_relid,
1239  modifiableARIindexes))
1240  ChangeVarNodes((Node *) appinfo2, rti, newrti, 0);
1241  }
1242  }
1243  rte = copyObject(rte);
1244  ChangeVarNodes((Node *) rte->securityQuals, rti, newrti, 0);
1245  subroot->parse->rtable = lappend(subroot->parse->rtable,
1246  rte);
1247  }
1248  rti++;
1249  }
1250  }
1251 
1252  /* There shouldn't be any OJ info to translate, as yet */
1253  Assert(subroot->join_info_list == NIL);
1254  /* and we haven't created PlaceHolderInfos, either */
1255  Assert(subroot->placeholder_list == NIL);
1256  /* hack to mark target relation as an inheritance partition */
1257  subroot->hasInheritedTarget = true;
1258 
1259  /* Generate Path(s) for accessing this result relation */
1260  grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
1261 
1262  /*
1263  * Set the nomimal target relation of the ModifyTable node if not
1264  * already done. We use the inheritance parent RTE as the nominal
1265  * target relation if it's a partitioned table (see just above this
1266  * loop). In the non-partitioned parent case, we'll use the first
1267  * child relation (even if it's excluded) as the nominal target
1268  * relation. Because of the way expand_inherited_rtentry works, the
1269  * latter should be the RTE representing the parent table in its role
1270  * as a simple member of the inheritance set.
1271  *
1272  * It would be logically cleaner to *always* use the inheritance
1273  * parent RTE as the nominal relation; but that RTE is not otherwise
1274  * referenced in the plan in the non-partitioned inheritance case.
1275  * Instead the duplicate child RTE created by expand_inherited_rtentry
1276  * is used elsewhere in the plan, so using the original parent RTE
1277  * would give rise to confusing use of multiple aliases in EXPLAIN
1278  * output for what the user will think is the "same" table. OTOH,
1279  * it's not a problem in the partitioned inheritance case, because
1280  * the duplicate child RTE added for the parent does not appear
1281  * anywhere else in the plan tree.
1282  */
1283  if (nominalRelation < 0)
1284  nominalRelation = appinfo->child_relid;
1285 
1286  /*
1287  * Select cheapest path in case there's more than one. We always run
1288  * modification queries to conclusion, so we care only for the
1289  * cheapest-total path.
1290  */
1291  sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
1292  set_cheapest(sub_final_rel);
1293  subpath = sub_final_rel->cheapest_total_path;
1294 
1295  /*
1296  * If this child rel was excluded by constraint exclusion, exclude it
1297  * from the result plan.
1298  */
1299  if (IS_DUMMY_PATH(subpath))
1300  continue;
1301 
1302  /*
1303  * If this is the first non-excluded child, its post-planning rtable
1304  * becomes the initial contents of final_rtable; otherwise, append
1305  * just its modified subquery RTEs to final_rtable.
1306  */
1307  if (final_rtable == NIL)
1308  final_rtable = subroot->parse->rtable;
1309  else
1310  final_rtable = list_concat(final_rtable,
1311  list_copy_tail(subroot->parse->rtable,
1312  list_length(final_rtable)));
1313 
1314  /*
1315  * We need to collect all the RelOptInfos from all child plans into
1316  * the main PlannerInfo, since setrefs.c will need them. We use the
1317  * last child's simple_rel_array (previous ones are too short), so we
1318  * have to propagate forward the RelOptInfos that were already built
1319  * in previous children.
1320  */
1321  Assert(subroot->simple_rel_array_size >= save_rel_array_size);
1322  for (rti = 1; rti < save_rel_array_size; rti++)
1323  {
1324  RelOptInfo *brel = save_rel_array[rti];
1325 
1326  if (brel)
1327  subroot->simple_rel_array[rti] = brel;
1328  }
1329  save_rel_array_size = subroot->simple_rel_array_size;
1330  save_rel_array = subroot->simple_rel_array;
1331 
1332  /* Make sure any initplans from this rel get into the outer list */
1333  root->init_plans = subroot->init_plans;
1334 
1335  /* Build list of sub-paths */
1336  subpaths = lappend(subpaths, subpath);
1337 
1338  /* Build list of modified subroots, too */
1339  subroots = lappend(subroots, subroot);
1340 
1341  /* Build list of target-relation RT indexes */
1342  resultRelations = lappend_int(resultRelations, appinfo->child_relid);
1343 
1344  /* Build lists of per-relation WCO and RETURNING targetlists */
1345  if (parse->withCheckOptions)
1346  withCheckOptionLists = lappend(withCheckOptionLists,
1347  subroot->parse->withCheckOptions);
1348  if (parse->returningList)
1349  returningLists = lappend(returningLists,
1350  subroot->parse->returningList);
1351 
1352  Assert(!parse->onConflict);
1353  }
1354 
1355  if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1356  {
1357  partitioned_rels = get_partitioned_child_rels(root, parentRTindex);
1358  /* The root partitioned table is included as a child rel */
1359  Assert(list_length(partitioned_rels) >= 1);
1360  }
1361 
1362  /* Result path must go into outer query's FINAL upperrel */
1363  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1364 
1365  /*
1366  * We don't currently worry about setting final_rel's consider_parallel
1367  * flag in this case, nor about allowing FDWs or create_upper_paths_hook
1368  * to get control here.
1369  */
1370 
1371  /*
1372  * If we managed to exclude every child rel, return a dummy plan; it
1373  * doesn't even need a ModifyTable node.
1374  */
1375  if (subpaths == NIL)
1376  {
1377  set_dummy_rel_pathlist(final_rel);
1378  return;
1379  }
1380 
1381  /*
1382  * Put back the final adjusted rtable into the master copy of the Query.
1383  * (We mustn't do this if we found no non-excluded children.)
1384  */
1385  parse->rtable = final_rtable;
1386  root->simple_rel_array_size = save_rel_array_size;
1387  root->simple_rel_array = save_rel_array;
1388  /* Must reconstruct master's simple_rte_array, too */
1389  root->simple_rte_array = (RangeTblEntry **)
1390  palloc0((list_length(final_rtable) + 1) * sizeof(RangeTblEntry *));
1391  rti = 1;
1392  foreach(lc, final_rtable)
1393  {
1394  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1395 
1396  root->simple_rte_array[rti++] = rte;
1397  }
1398 
1399  /*
1400  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will
1401  * have dealt with fetching non-locked marked rows, else we need to have
1402  * ModifyTable do that.
1403  */
1404  if (parse->rowMarks)
1405  rowMarks = NIL;
1406  else
1407  rowMarks = root->rowMarks;
1408 
1409  /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
1410  add_path(final_rel, (Path *)
1411  create_modifytable_path(root, final_rel,
1412  parse->commandType,
1413  parse->canSetTag,
1414  nominalRelation,
1415  partitioned_rels,
1416  resultRelations,
1417  subpaths,
1418  subroots,
1419  withCheckOptionLists,
1420  returningLists,
1421  rowMarks,
1422  NULL,
1423  SS_assign_special_param(root)));
1424 }
1425 
1426 /*--------------------
1427  * grouping_planner
1428  * Perform planning steps related to grouping, aggregation, etc.
1429  *
1430  * This function adds all required top-level processing to the scan/join
1431  * Path(s) produced by query_planner.
1432  *
1433  * If inheritance_update is true, we're being called from inheritance_planner
1434  * and should not include a ModifyTable step in the resulting Path(s).
1435  * (inheritance_planner will create a single ModifyTable node covering all the
1436  * target tables.)
1437  *
1438  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1439  * tuple_fraction is interpreted as follows:
1440  * 0: expect all tuples to be retrieved (normal case)
1441  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1442  * from the plan to be retrieved
1443  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1444  * expected to be retrieved (ie, a LIMIT specification)
1445  *
1446  * Returns nothing; the useful output is in the Paths we attach to the
1447  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1448  * root->processed_tlist contains the final processed targetlist.
1449  *
1450  * Note that we have not done set_cheapest() on the final rel; it's convenient
1451  * to leave this to the caller.
1452  *--------------------
1453  */
1454 static void
1455 grouping_planner(PlannerInfo *root, bool inheritance_update,
1456  double tuple_fraction)
1457 {
1458  Query *parse = root->parse;
1459  List *tlist = parse->targetList;
1460  int64 offset_est = 0;
1461  int64 count_est = 0;
1462  double limit_tuples = -1.0;
1463  bool have_postponed_srfs = false;
1464  PathTarget *final_target;
1465  List *final_targets;
1466  List *final_targets_contain_srfs;
1467  RelOptInfo *current_rel;
1468  RelOptInfo *final_rel;
1469  ListCell *lc;
1470 
1471  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1472  if (parse->limitCount || parse->limitOffset)
1473  {
1474  tuple_fraction = preprocess_limit(root, tuple_fraction,
1475  &offset_est, &count_est);
1476 
1477  /*
1478  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1479  * estimate the effects of using a bounded sort.
1480  */
1481  if (count_est > 0 && offset_est >= 0)
1482  limit_tuples = (double) count_est + (double) offset_est;
1483  }
1484 
1485  /* Make tuple_fraction accessible to lower-level routines */
1486  root->tuple_fraction = tuple_fraction;
1487 
1488  if (parse->setOperations)
1489  {
1490  /*
1491  * If there's a top-level ORDER BY, assume we have to fetch all the
1492  * tuples. This might be too simplistic given all the hackery below
1493  * to possibly avoid the sort; but the odds of accurate estimates here
1494  * are pretty low anyway. XXX try to get rid of this in favor of
1495  * letting plan_set_operations generate both fast-start and
1496  * cheapest-total paths.
1497  */
1498  if (parse->sortClause)
1499  root->tuple_fraction = 0.0;
1500 
1501  /*
1502  * Construct Paths for set operations. The results will not need any
1503  * work except perhaps a top-level sort and/or LIMIT. Note that any
1504  * special work for recursive unions is the responsibility of
1505  * plan_set_operations.
1506  */
1507  current_rel = plan_set_operations(root);
1508 
1509  /*
1510  * We should not need to call preprocess_targetlist, since we must be
1511  * in a SELECT query node. Instead, use the targetlist returned by
1512  * plan_set_operations (since this tells whether it returned any
1513  * resjunk columns!), and transfer any sort key information from the
1514  * original tlist.
1515  */
1516  Assert(parse->commandType == CMD_SELECT);
1517 
1518  tlist = root->processed_tlist; /* from plan_set_operations */
1519 
1520  /* for safety, copy processed_tlist instead of modifying in-place */
1521  tlist = postprocess_setop_tlist(copyObject(tlist), parse->targetList);
1522 
1523  /* Save aside the final decorated tlist */
1524  root->processed_tlist = tlist;
1525 
1526  /* Also extract the PathTarget form of the setop result tlist */
1527  final_target = current_rel->cheapest_total_path->pathtarget;
1528 
1529  /* The setop result tlist couldn't contain any SRFs */
1530  Assert(!parse->hasTargetSRFs);
1531  final_targets = final_targets_contain_srfs = NIL;
1532 
1533  /*
1534  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1535  * checked already, but let's make sure).
1536  */
1537  if (parse->rowMarks)
1538  ereport(ERROR,
1539  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1540  /*------
1541  translator: %s is a SQL row locking clause such as FOR UPDATE */
1542  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1544  linitial(parse->rowMarks))->strength))));
1545 
1546  /*
1547  * Calculate pathkeys that represent result ordering requirements
1548  */
1549  Assert(parse->distinctClause == NIL);
1551  parse->sortClause,
1552  tlist);
1553  }
1554  else
1555  {
1556  /* No set operations, do regular planning */
1557  PathTarget *sort_input_target;
1558  List *sort_input_targets;
1559  List *sort_input_targets_contain_srfs;
1560  PathTarget *grouping_target;
1561  List *grouping_targets;
1562  List *grouping_targets_contain_srfs;
1563  PathTarget *scanjoin_target;
1564  List *scanjoin_targets;
1565  List *scanjoin_targets_contain_srfs;
1566  bool have_grouping;
1567  AggClauseCosts agg_costs;
1568  WindowFuncLists *wflists = NULL;
1569  List *activeWindows = NIL;
1570  grouping_sets_data *gset_data = NULL;
1571  standard_qp_extra qp_extra;
1572 
1573  /* A recursive query should always have setOperations */
1574  Assert(!root->hasRecursion);
1575 
1576  /* Preprocess grouping sets and GROUP BY clause, if any */
1577  if (parse->groupingSets)
1578  {
1579  gset_data = preprocess_grouping_sets(root);
1580  }
1581  else
1582  {
1583  /* Preprocess regular GROUP BY clause, if any */
1584  if (parse->groupClause)
1585  parse->groupClause = preprocess_groupclause(root, NIL);
1586  }
1587 
1588  /* Preprocess targetlist */
1589  tlist = preprocess_targetlist(root, tlist);
1590 
1591  if (parse->onConflict)
1592  parse->onConflict->onConflictSet =
1594  parse->resultRelation,
1595  parse->rtable);
1596 
1597  /*
1598  * We are now done hacking up the query's targetlist. Most of the
1599  * remaining planning work will be done with the PathTarget
1600  * representation of tlists, but save aside the full representation so
1601  * that we can transfer its decoration (resnames etc) to the topmost
1602  * tlist of the finished Plan.
1603  */
1604  root->processed_tlist = tlist;
1605 
1606  /*
1607  * Collect statistics about aggregates for estimating costs, and mark
1608  * all the aggregates with resolved aggtranstypes. We must do this
1609  * before slicing and dicing the tlist into various pathtargets, else
1610  * some copies of the Aggref nodes might escape being marked with the
1611  * correct transtypes.
1612  *
1613  * Note: currently, we do not detect duplicate aggregates here. This
1614  * may result in somewhat-overestimated cost, which is fine for our
1615  * purposes since all Paths will get charged the same. But at some
1616  * point we might wish to do that detection in the planner, rather
1617  * than during executor startup.
1618  */
1619  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
1620  if (parse->hasAggs)
1621  {
1622  get_agg_clause_costs(root, (Node *) tlist, AGGSPLIT_SIMPLE,
1623  &agg_costs);
1625  &agg_costs);
1626  }
1627 
1628  /*
1629  * Locate any window functions in the tlist. (We don't need to look
1630  * anywhere else, since expressions used in ORDER BY will be in there
1631  * too.) Note that they could all have been eliminated by constant
1632  * folding, in which case we don't need to do any more work.
1633  */
1634  if (parse->hasWindowFuncs)
1635  {
1636  wflists = find_window_functions((Node *) tlist,
1637  list_length(parse->windowClause));
1638  if (wflists->numWindowFuncs > 0)
1639  activeWindows = select_active_windows(root, wflists);
1640  else
1641  parse->hasWindowFuncs = false;
1642  }
1643 
1644  /*
1645  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1646  * adding logic between here and the query_planner() call. Anything
1647  * that is needed in MIN/MAX-optimizable cases will have to be
1648  * duplicated in planagg.c.
1649  */
1650  if (parse->hasAggs)
1651  preprocess_minmax_aggregates(root, tlist);
1652 
1653  /*
1654  * Figure out whether there's a hard limit on the number of rows that
1655  * query_planner's result subplan needs to return. Even if we know a
1656  * hard limit overall, it doesn't apply if the query has any
1657  * grouping/aggregation operations, or SRFs in the tlist.
1658  */
1659  if (parse->groupClause ||
1660  parse->groupingSets ||
1661  parse->distinctClause ||
1662  parse->hasAggs ||
1663  parse->hasWindowFuncs ||
1664  parse->hasTargetSRFs ||
1665  root->hasHavingQual)
1666  root->limit_tuples = -1.0;
1667  else
1668  root->limit_tuples = limit_tuples;
1669 
1670  /* Set up data needed by standard_qp_callback */
1671  qp_extra.tlist = tlist;
1672  qp_extra.activeWindows = activeWindows;
1673  qp_extra.groupClause = (gset_data
1674  ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
1675  : parse->groupClause);
1676 
1677  /*
1678  * Generate the best unsorted and presorted paths for the scan/join
1679  * portion of this Query, ie the processing represented by the
1680  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1681  * We also generate (in standard_qp_callback) pathkey representations
1682  * of the query's sort clause, distinct clause, etc.
1683  */
1684  current_rel = query_planner(root, tlist,
1685  standard_qp_callback, &qp_extra);
1686 
1687  /*
1688  * Convert the query's result tlist into PathTarget format.
1689  *
1690  * Note: it's desirable to not do this till after query_planner(),
1691  * because the target width estimates can use per-Var width numbers
1692  * that were obtained within query_planner().
1693  */
1694  final_target = create_pathtarget(root, tlist);
1695 
1696  /*
1697  * If ORDER BY was given, consider whether we should use a post-sort
1698  * projection, and compute the adjusted target for preceding steps if
1699  * so.
1700  */
1701  if (parse->sortClause)
1702  sort_input_target = make_sort_input_target(root,
1703  final_target,
1704  &have_postponed_srfs);
1705  else
1706  sort_input_target = final_target;
1707 
1708  /*
1709  * If we have window functions to deal with, the output from any
1710  * grouping step needs to be what the window functions want;
1711  * otherwise, it should be sort_input_target.
1712  */
1713  if (activeWindows)
1714  grouping_target = make_window_input_target(root,
1715  final_target,
1716  activeWindows);
1717  else
1718  grouping_target = sort_input_target;
1719 
1720  /*
1721  * If we have grouping or aggregation to do, the topmost scan/join
1722  * plan node must emit what the grouping step wants; otherwise, it
1723  * should emit grouping_target.
1724  */
1725  have_grouping = (parse->groupClause || parse->groupingSets ||
1726  parse->hasAggs || root->hasHavingQual);
1727  if (have_grouping)
1728  scanjoin_target = make_group_input_target(root, final_target);
1729  else
1730  scanjoin_target = grouping_target;
1731 
1732  /*
1733  * If there are any SRFs in the targetlist, we must separate each of
1734  * these PathTargets into SRF-computing and SRF-free targets. Replace
1735  * each of the named targets with a SRF-free version, and remember the
1736  * list of additional projection steps we need to add afterwards.
1737  */
1738  if (parse->hasTargetSRFs)
1739  {
1740  /* final_target doesn't recompute any SRFs in sort_input_target */
1741  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1742  &final_targets,
1743  &final_targets_contain_srfs);
1744  final_target = (PathTarget *) linitial(final_targets);
1745  Assert(!linitial_int(final_targets_contain_srfs));
1746  /* likewise for sort_input_target vs. grouping_target */
1747  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1748  &sort_input_targets,
1749  &sort_input_targets_contain_srfs);
1750  sort_input_target = (PathTarget *) linitial(sort_input_targets);
1751  Assert(!linitial_int(sort_input_targets_contain_srfs));
1752  /* likewise for grouping_target vs. scanjoin_target */
1753  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1754  &grouping_targets,
1755  &grouping_targets_contain_srfs);
1756  grouping_target = (PathTarget *) linitial(grouping_targets);
1757  Assert(!linitial_int(grouping_targets_contain_srfs));
1758  /* scanjoin_target will not have any SRFs precomputed for it */
1759  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1760  &scanjoin_targets,
1761  &scanjoin_targets_contain_srfs);
1762  scanjoin_target = (PathTarget *) linitial(scanjoin_targets);
1763  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1764  }
1765  else
1766  {
1767  /* initialize lists, just to keep compiler quiet */
1768  final_targets = final_targets_contain_srfs = NIL;
1769  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1770  grouping_targets = grouping_targets_contain_srfs = NIL;
1771  scanjoin_targets = scanjoin_targets_contain_srfs = NIL;
1772  }
1773 
1774  /*
1775  * Forcibly apply SRF-free scan/join target to all the Paths for the
1776  * scan/join rel.
1777  *
1778  * In principle we should re-run set_cheapest() here to identify the
1779  * cheapest path, but it seems unlikely that adding the same tlist
1780  * eval costs to all the paths would change that, so we don't bother.
1781  * Instead, just assume that the cheapest-startup and cheapest-total
1782  * paths remain so. (There should be no parameterized paths anymore,
1783  * so we needn't worry about updating cheapest_parameterized_paths.)
1784  */
1785  foreach(lc, current_rel->pathlist)
1786  {
1787  Path *subpath = (Path *) lfirst(lc);
1788  Path *path;
1789 
1790  Assert(subpath->param_info == NULL);
1791  path = apply_projection_to_path(root, current_rel,
1792  subpath, scanjoin_target);
1793  /* If we had to add a Result, path is different from subpath */
1794  if (path != subpath)
1795  {
1796  lfirst(lc) = path;
1797  if (subpath == current_rel->cheapest_startup_path)
1798  current_rel->cheapest_startup_path = path;
1799  if (subpath == current_rel->cheapest_total_path)
1800  current_rel->cheapest_total_path = path;
1801  }
1802  }
1803 
1804  /*
1805  * Upper planning steps which make use of the top scan/join rel's
1806  * partial pathlist will expect partial paths for that rel to produce
1807  * the same output as complete paths ... and we just changed the
1808  * output for the complete paths, so we'll need to do the same thing
1809  * for partial paths. But only parallel-safe expressions can be
1810  * computed by partial paths.
1811  */
1812  if (current_rel->partial_pathlist &&
1813  is_parallel_safe(root, (Node *) scanjoin_target->exprs))
1814  {
1815  /* Apply the scan/join target to each partial path */
1816  foreach(lc, current_rel->partial_pathlist)
1817  {
1818  Path *subpath = (Path *) lfirst(lc);
1819  Path *newpath;
1820 
1821  /* Shouldn't have any parameterized paths anymore */
1822  Assert(subpath->param_info == NULL);
1823 
1824  /*
1825  * Don't use apply_projection_to_path() here, because there
1826  * could be other pointers to these paths, and therefore we
1827  * mustn't modify them in place.
1828  */
1829  newpath = (Path *) create_projection_path(root,
1830  current_rel,
1831  subpath,
1832  scanjoin_target);
1833  lfirst(lc) = newpath;
1834  }
1835  }
1836  else
1837  {
1838  /*
1839  * In the unfortunate event that scanjoin_target is not
1840  * parallel-safe, we can't apply it to the partial paths; in that
1841  * case, we'll need to forget about the partial paths, which
1842  * aren't valid input for upper planning steps.
1843  */
1844  current_rel->partial_pathlist = NIL;
1845  }
1846 
1847  /* Now fix things up if scan/join target contains SRFs */
1848  if (parse->hasTargetSRFs)
1849  adjust_paths_for_srfs(root, current_rel,
1850  scanjoin_targets,
1851  scanjoin_targets_contain_srfs);
1852 
1853  /*
1854  * Save the various upper-rel PathTargets we just computed into
1855  * root->upper_targets[]. The core code doesn't use this, but it
1856  * provides a convenient place for extensions to get at the info. For
1857  * consistency, we save all the intermediate targets, even though some
1858  * of the corresponding upperrels might not be needed for this query.
1859  */
1860  root->upper_targets[UPPERREL_FINAL] = final_target;
1861  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1862  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1863 
1864  /*
1865  * If we have grouping and/or aggregation, consider ways to implement
1866  * that. We build a new upperrel representing the output of this
1867  * phase.
1868  */
1869  if (have_grouping)
1870  {
1871  current_rel = create_grouping_paths(root,
1872  current_rel,
1873  grouping_target,
1874  &agg_costs,
1875  gset_data);
1876  /* Fix things up if grouping_target contains SRFs */
1877  if (parse->hasTargetSRFs)
1878  adjust_paths_for_srfs(root, current_rel,
1879  grouping_targets,
1880  grouping_targets_contain_srfs);
1881  }
1882 
1883  /*
1884  * If we have window functions, consider ways to implement those. We
1885  * build a new upperrel representing the output of this phase.
1886  */
1887  if (activeWindows)
1888  {
1889  current_rel = create_window_paths(root,
1890  current_rel,
1891  grouping_target,
1892  sort_input_target,
1893  tlist,
1894  wflists,
1895  activeWindows);
1896  /* Fix things up if sort_input_target contains SRFs */
1897  if (parse->hasTargetSRFs)
1898  adjust_paths_for_srfs(root, current_rel,
1899  sort_input_targets,
1900  sort_input_targets_contain_srfs);
1901  }
1902 
1903  /*
1904  * If there is a DISTINCT clause, consider ways to implement that. We
1905  * build a new upperrel representing the output of this phase.
1906  */
1907  if (parse->distinctClause)
1908  {
1909  current_rel = create_distinct_paths(root,
1910  current_rel);
1911  }
1912  } /* end of if (setOperations) */
1913 
1914  /*
1915  * If ORDER BY was given, consider ways to implement that, and generate a
1916  * new upperrel containing only paths that emit the correct ordering and
1917  * project the correct final_target. We can apply the original
1918  * limit_tuples limit in sort costing here, but only if there are no
1919  * postponed SRFs.
1920  */
1921  if (parse->sortClause)
1922  {
1923  current_rel = create_ordered_paths(root,
1924  current_rel,
1925  final_target,
1926  have_postponed_srfs ? -1.0 :
1927  limit_tuples);
1928  /* Fix things up if final_target contains SRFs */
1929  if (parse->hasTargetSRFs)
1930  adjust_paths_for_srfs(root, current_rel,
1931  final_targets,
1932  final_targets_contain_srfs);
1933  }
1934 
1935  /*
1936  * Now we are prepared to build the final-output upperrel.
1937  */
1938  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1939 
1940  /*
1941  * If the input rel is marked consider_parallel and there's nothing that's
1942  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1943  * consider_parallel as well. Note that if the query has rowMarks or is
1944  * not a SELECT, consider_parallel will be false for every relation in the
1945  * query.
1946  */
1947  if (current_rel->consider_parallel &&
1948  is_parallel_safe(root, parse->limitOffset) &&
1949  is_parallel_safe(root, parse->limitCount))
1950  final_rel->consider_parallel = true;
1951 
1952  /*
1953  * If the current_rel belongs to a single FDW, so does the final_rel.
1954  */
1955  final_rel->serverid = current_rel->serverid;
1956  final_rel->userid = current_rel->userid;
1957  final_rel->useridiscurrent = current_rel->useridiscurrent;
1958  final_rel->fdwroutine = current_rel->fdwroutine;
1959 
1960  /*
1961  * Generate paths for the final_rel. Insert all surviving paths, with
1962  * LockRows, Limit, and/or ModifyTable steps added if needed.
1963  */
1964  foreach(lc, current_rel->pathlist)
1965  {
1966  Path *path = (Path *) lfirst(lc);
1967 
1968  /*
1969  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1970  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1971  * here. If there are only non-locking rowmarks, they should be
1972  * handled by the ModifyTable node instead. However, root->rowMarks
1973  * is what goes into the LockRows node.)
1974  */
1975  if (parse->rowMarks)
1976  {
1977  path = (Path *) create_lockrows_path(root, final_rel, path,
1978  root->rowMarks,
1979  SS_assign_special_param(root));
1980  }
1981 
1982  /*
1983  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1984  */
1985  if (limit_needed(parse))
1986  {
1987  path = (Path *) create_limit_path(root, final_rel, path,
1988  parse->limitOffset,
1989  parse->limitCount,
1990  offset_est, count_est);
1991  }
1992 
1993  /*
1994  * If this is an INSERT/UPDATE/DELETE, and we're not being called from
1995  * inheritance_planner, add the ModifyTable node.
1996  */
1997  if (parse->commandType != CMD_SELECT && !inheritance_update)
1998  {
1999  List *withCheckOptionLists;
2000  List *returningLists;
2001  List *rowMarks;
2002 
2003  /*
2004  * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if
2005  * needed.
2006  */
2007  if (parse->withCheckOptions)
2008  withCheckOptionLists = list_make1(parse->withCheckOptions);
2009  else
2010  withCheckOptionLists = NIL;
2011 
2012  if (parse->returningList)
2013  returningLists = list_make1(parse->returningList);
2014  else
2015  returningLists = NIL;
2016 
2017  /*
2018  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2019  * will have dealt with fetching non-locked marked rows, else we
2020  * need to have ModifyTable do that.
2021  */
2022  if (parse->rowMarks)
2023  rowMarks = NIL;
2024  else
2025  rowMarks = root->rowMarks;
2026 
2027  path = (Path *)
2028  create_modifytable_path(root, final_rel,
2029  parse->commandType,
2030  parse->canSetTag,
2031  parse->resultRelation,
2032  NIL,
2034  list_make1(path),
2035  list_make1(root),
2036  withCheckOptionLists,
2037  returningLists,
2038  rowMarks,
2039  parse->onConflict,
2040  SS_assign_special_param(root));
2041  }
2042 
2043  /* And shove it into final_rel */
2044  add_path(final_rel, path);
2045  }
2046 
2047  /*
2048  * If there is an FDW that's responsible for all baserels of the query,
2049  * let it consider adding ForeignPaths.
2050  */
2051  if (final_rel->fdwroutine &&
2052  final_rel->fdwroutine->GetForeignUpperPaths)
2054  current_rel, final_rel);
2055 
2056  /* Let extensions possibly add some more paths */
2058  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2059  current_rel, final_rel);
2060 
2061  /* Note: currently, we leave it to callers to do set_cheapest() */
2062 }
2063 
2064 /*
2065  * Do preprocessing for groupingSets clause and related data. This handles the
2066  * preliminary steps of expanding the grouping sets, organizing them into lists
2067  * of rollups, and preparing annotations which will later be filled in with
2068  * size estimates.
2069  */
2070 static grouping_sets_data *
2072 {
2073  Query *parse = root->parse;
2074  List *sets;
2075  int maxref = 0;
2076  ListCell *lc;
2077  ListCell *lc_set;
2079 
2080  parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
2081 
2082  gd->any_hashable = false;
2083  gd->unhashable_refs = NULL;
2084  gd->unsortable_refs = NULL;
2085  gd->unsortable_sets = NIL;
2086 
2087  if (parse->groupClause)
2088  {
2089  ListCell *lc;
2090 
2091  foreach(lc, parse->groupClause)
2092  {
2093  SortGroupClause *gc = lfirst(lc);
2094  Index ref = gc->tleSortGroupRef;
2095 
2096  if (ref > maxref)
2097  maxref = ref;
2098 
2099  if (!gc->hashable)
2101 
2102  if (!OidIsValid(gc->sortop))
2104  }
2105  }
2106 
2107  /* Allocate workspace array for remapping */
2108  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2109 
2110  /*
2111  * If we have any unsortable sets, we must extract them before trying to
2112  * prepare rollups. Unsortable sets don't go through
2113  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2114  * here.
2115  */
2116  if (!bms_is_empty(gd->unsortable_refs))
2117  {
2118  List *sortable_sets = NIL;
2119 
2120  foreach(lc, parse->groupingSets)
2121  {
2122  List *gset = lfirst(lc);
2123 
2124  if (bms_overlap_list(gd->unsortable_refs, gset))
2125  {
2127 
2128  gs->set = gset;
2129  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2130 
2131  /*
2132  * We must enforce here that an unsortable set is hashable;
2133  * later code assumes this. Parse analysis only checks that
2134  * every individual column is either hashable or sortable.
2135  *
2136  * Note that passing this test doesn't guarantee we can
2137  * generate a plan; there might be other showstoppers.
2138  */
2139  if (bms_overlap_list(gd->unhashable_refs, gset))
2140  ereport(ERROR,
2141  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2142  errmsg("could not implement GROUP BY"),
2143  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2144  }
2145  else
2146  sortable_sets = lappend(sortable_sets, gset);
2147  }
2148 
2149  if (sortable_sets)
2150  sets = extract_rollup_sets(sortable_sets);
2151  else
2152  sets = NIL;
2153  }
2154  else
2155  sets = extract_rollup_sets(parse->groupingSets);
2156 
2157  foreach(lc_set, sets)
2158  {
2159  List *current_sets = (List *) lfirst(lc_set);
2160  RollupData *rollup = makeNode(RollupData);
2161  GroupingSetData *gs;
2162 
2163  /*
2164  * Reorder the current list of grouping sets into correct prefix
2165  * order. If only one aggregation pass is needed, try to make the
2166  * list match the ORDER BY clause; if more than one pass is needed, we
2167  * don't bother with that.
2168  *
2169  * Note that this reorders the sets from smallest-member-first to
2170  * largest-member-first, and applies the GroupingSetData annotations,
2171  * though the data will be filled in later.
2172  */
2173  current_sets = reorder_grouping_sets(current_sets,
2174  (list_length(sets) == 1
2175  ? parse->sortClause
2176  : NIL));
2177 
2178  /*
2179  * Get the initial (and therefore largest) grouping set.
2180  */
2181  gs = linitial(current_sets);
2182 
2183  /*
2184  * Order the groupClause appropriately. If the first grouping set is
2185  * empty, then the groupClause must also be empty; otherwise we have
2186  * to force the groupClause to match that grouping set's order.
2187  *
2188  * (The first grouping set can be empty even though parse->groupClause
2189  * is not empty only if all non-empty grouping sets are unsortable.
2190  * The groupClauses for hashed grouping sets are built later on.)
2191  */
2192  if (gs->set)
2193  rollup->groupClause = preprocess_groupclause(root, gs->set);
2194  else
2195  rollup->groupClause = NIL;
2196 
2197  /*
2198  * Is it hashable? We pretend empty sets are hashable even though we
2199  * actually force them not to be hashed later. But don't bother if
2200  * there's nothing but empty sets (since in that case we can't hash
2201  * anything).
2202  */
2203  if (gs->set &&
2205  {
2206  rollup->hashable = true;
2207  gd->any_hashable = true;
2208  }
2209 
2210  /*
2211  * Now that we've pinned down an order for the groupClause for this
2212  * list of grouping sets, we need to remap the entries in the grouping
2213  * sets from sortgrouprefs to plain indices (0-based) into the
2214  * groupClause for this collection of grouping sets. We keep the
2215  * original form for later use, though.
2216  */
2217  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2218  current_sets,
2219  gd->tleref_to_colnum_map);
2220  rollup->gsets_data = current_sets;
2221 
2222  gd->rollups = lappend(gd->rollups, rollup);
2223  }
2224 
2225  if (gd->unsortable_sets)
2226  {
2227  /*
2228  * We have not yet pinned down a groupclause for this, but we will
2229  * need index-based lists for estimation purposes. Construct
2230  * hash_sets_idx based on the entire original groupclause for now.
2231  */
2233  gd->unsortable_sets,
2234  gd->tleref_to_colnum_map);
2235  gd->any_hashable = true;
2236  }
2237 
2238  return gd;
2239 }
2240 
2241 /*
2242  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2243  * (without annotation) mapped to indexes into the given groupclause.
2244  */
2245 static List *
2247  List *gsets,
2248  int *tleref_to_colnum_map)
2249 {
2250  int ref = 0;
2251  List *result = NIL;
2252  ListCell *lc;
2253 
2254  foreach(lc, groupClause)
2255  {
2256  SortGroupClause *gc = lfirst(lc);
2257 
2258  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2259  }
2260 
2261  foreach(lc, gsets)
2262  {
2263  List *set = NIL;
2264  ListCell *lc2;
2265  GroupingSetData *gs = lfirst(lc);
2266 
2267  foreach(lc2, gs->set)
2268  {
2269  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2270  }
2271 
2272  result = lappend(result, set);
2273  }
2274 
2275  return result;
2276 }
2277 
2278 
2279 
2280 /*
2281  * Detect whether a plan node is a "dummy" plan created when a relation
2282  * is deemed not to need scanning due to constraint exclusion.
2283  *
2284  * Currently, such dummy plans are Result nodes with constant FALSE
2285  * filter quals (see set_dummy_rel_pathlist and create_append_plan).
2286  *
2287  * XXX this probably ought to be somewhere else, but not clear where.
2288  */
2289 bool
2291 {
2292  if (IsA(plan, Result))
2293  {
2294  List *rcqual = (List *) ((Result *) plan)->resconstantqual;
2295 
2296  if (list_length(rcqual) == 1)
2297  {
2298  Const *constqual = (Const *) linitial(rcqual);
2299 
2300  if (constqual && IsA(constqual, Const))
2301  {
2302  if (!constqual->constisnull &&
2303  !DatumGetBool(constqual->constvalue))
2304  return true;
2305  }
2306  }
2307  }
2308  return false;
2309 }
2310 
2311 /*
2312  * preprocess_rowmarks - set up PlanRowMarks if needed
2313  */
2314 static void
2316 {
2317  Query *parse = root->parse;
2318  Bitmapset *rels;
2319  List *prowmarks;
2320  ListCell *l;
2321  int i;
2322 
2323  if (parse->rowMarks)
2324  {
2325  /*
2326  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2327  * grouping, since grouping renders a reference to individual tuple
2328  * CTIDs invalid. This is also checked at parse time, but that's
2329  * insufficient because of rule substitution, query pullup, etc.
2330  */
2331  CheckSelectLocking(parse, ((RowMarkClause *)
2332  linitial(parse->rowMarks))->strength);
2333  }
2334  else
2335  {
2336  /*
2337  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2338  * UPDATE/SHARE.
2339  */
2340  if (parse->commandType != CMD_UPDATE &&
2341  parse->commandType != CMD_DELETE)
2342  return;
2343  }
2344 
2345  /*
2346  * We need to have rowmarks for all base relations except the target. We
2347  * make a bitmapset of all base rels and then remove the items we don't
2348  * need or have FOR [KEY] UPDATE/SHARE marks for.
2349  */
2350  rels = get_relids_in_jointree((Node *) parse->jointree, false);
2351  if (parse->resultRelation)
2352  rels = bms_del_member(rels, parse->resultRelation);
2353 
2354  /*
2355  * Convert RowMarkClauses to PlanRowMark representation.
2356  */
2357  prowmarks = NIL;
2358  foreach(l, parse->rowMarks)
2359  {
2360  RowMarkClause *rc = (RowMarkClause *) lfirst(l);
2361  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2362  PlanRowMark *newrc;
2363 
2364  /*
2365  * Currently, it is syntactically impossible to have FOR UPDATE et al
2366  * applied to an update/delete target rel. If that ever becomes
2367  * possible, we should drop the target from the PlanRowMark list.
2368  */
2369  Assert(rc->rti != parse->resultRelation);
2370 
2371  /*
2372  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2373  * can't support true locking. Subqueries that got flattened into the
2374  * main query should be ignored completely. Any that didn't will get
2375  * ROW_MARK_COPY items in the next loop.
2376  */
2377  if (rte->rtekind != RTE_RELATION)
2378  continue;
2379 
2380  rels = bms_del_member(rels, rc->rti);
2381 
2382  newrc = makeNode(PlanRowMark);
2383  newrc->rti = newrc->prti = rc->rti;
2384  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2385  newrc->markType = select_rowmark_type(rte, rc->strength);
2386  newrc->allMarkTypes = (1 << newrc->markType);
2387  newrc->strength = rc->strength;
2388  newrc->waitPolicy = rc->waitPolicy;
2389  newrc->isParent = false;
2390 
2391  prowmarks = lappend(prowmarks, newrc);
2392  }
2393 
2394  /*
2395  * Now, add rowmarks for any non-target, non-locked base relations.
2396  */
2397  i = 0;
2398  foreach(l, parse->rtable)
2399  {
2400  RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
2401  PlanRowMark *newrc;
2402 
2403  i++;
2404  if (!bms_is_member(i, rels))
2405  continue;
2406 
2407  newrc = makeNode(PlanRowMark);
2408  newrc->rti = newrc->prti = i;
2409  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2410  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2411  newrc->allMarkTypes = (1 << newrc->markType);
2412  newrc->strength = LCS_NONE;
2413  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2414  newrc->isParent = false;
2415 
2416  prowmarks = lappend(prowmarks, newrc);
2417  }
2418 
2419  root->rowMarks = prowmarks;
2420 }
2421 
2422 /*
2423  * Select RowMarkType to use for a given table
2424  */
2427 {
2428  if (rte->rtekind != RTE_RELATION)
2429  {
2430  /* If it's not a table at all, use ROW_MARK_COPY */
2431  return ROW_MARK_COPY;
2432  }
2433  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2434  {
2435  /* Let the FDW select the rowmark type, if it wants to */
2436  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2437 
2438  if (fdwroutine->GetForeignRowMarkType != NULL)
2439  return fdwroutine->GetForeignRowMarkType(rte, strength);
2440  /* Otherwise, use ROW_MARK_COPY by default */
2441  return ROW_MARK_COPY;
2442  }
2443  else
2444  {
2445  /* Regular table, apply the appropriate lock type */
2446  switch (strength)
2447  {
2448  case LCS_NONE:
2449 
2450  /*
2451  * We don't need a tuple lock, only the ability to re-fetch
2452  * the row.
2453  */
2454  return ROW_MARK_REFERENCE;
2455  break;
2456  case LCS_FORKEYSHARE:
2457  return ROW_MARK_KEYSHARE;
2458  break;
2459  case LCS_FORSHARE:
2460  return ROW_MARK_SHARE;
2461  break;
2462  case LCS_FORNOKEYUPDATE:
2463  return ROW_MARK_NOKEYEXCLUSIVE;
2464  break;
2465  case LCS_FORUPDATE:
2466  return ROW_MARK_EXCLUSIVE;
2467  break;
2468  }
2469  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2470  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2471  }
2472 }
2473 
2474 /*
2475  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2476  *
2477  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2478  * results back in *count_est and *offset_est. These variables are set to
2479  * 0 if the corresponding clause is not present, and -1 if it's present
2480  * but we couldn't estimate the value for it. (The "0" convention is OK
2481  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2482  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2483  * usual practice of never estimating less than one row.) These values will
2484  * be passed to create_limit_path, which see if you change this code.
2485  *
2486  * The return value is the suitably adjusted tuple_fraction to use for
2487  * planning the query. This adjustment is not overridable, since it reflects
2488  * plan actions that grouping_planner() will certainly take, not assumptions
2489  * about context.
2490  */
2491 static double
2492 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2493  int64 *offset_est, int64 *count_est)
2494 {
2495  Query *parse = root->parse;
2496  Node *est;
2497  double limit_fraction;
2498 
2499  /* Should not be called unless LIMIT or OFFSET */
2500  Assert(parse->limitCount || parse->limitOffset);
2501 
2502  /*
2503  * Try to obtain the clause values. We use estimate_expression_value
2504  * primarily because it can sometimes do something useful with Params.
2505  */
2506  if (parse->limitCount)
2507  {
2508  est = estimate_expression_value(root, parse->limitCount);
2509  if (est && IsA(est, Const))
2510  {
2511  if (((Const *) est)->constisnull)
2512  {
2513  /* NULL indicates LIMIT ALL, ie, no limit */
2514  *count_est = 0; /* treat as not present */
2515  }
2516  else
2517  {
2518  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2519  if (*count_est <= 0)
2520  *count_est = 1; /* force to at least 1 */
2521  }
2522  }
2523  else
2524  *count_est = -1; /* can't estimate */
2525  }
2526  else
2527  *count_est = 0; /* not present */
2528 
2529  if (parse->limitOffset)
2530  {
2531  est = estimate_expression_value(root, parse->limitOffset);
2532  if (est && IsA(est, Const))
2533  {
2534  if (((Const *) est)->constisnull)
2535  {
2536  /* Treat NULL as no offset; the executor will too */
2537  *offset_est = 0; /* treat as not present */
2538  }
2539  else
2540  {
2541  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2542  if (*offset_est < 0)
2543  *offset_est = 0; /* treat as not present */
2544  }
2545  }
2546  else
2547  *offset_est = -1; /* can't estimate */
2548  }
2549  else
2550  *offset_est = 0; /* not present */
2551 
2552  if (*count_est != 0)
2553  {
2554  /*
2555  * A LIMIT clause limits the absolute number of tuples returned.
2556  * However, if it's not a constant LIMIT then we have to guess; for
2557  * lack of a better idea, assume 10% of the plan's result is wanted.
2558  */
2559  if (*count_est < 0 || *offset_est < 0)
2560  {
2561  /* LIMIT or OFFSET is an expression ... punt ... */
2562  limit_fraction = 0.10;
2563  }
2564  else
2565  {
2566  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2567  limit_fraction = (double) *count_est + (double) *offset_est;
2568  }
2569 
2570  /*
2571  * If we have absolute limits from both caller and LIMIT, use the
2572  * smaller value; likewise if they are both fractional. If one is
2573  * fractional and the other absolute, we can't easily determine which
2574  * is smaller, but we use the heuristic that the absolute will usually
2575  * be smaller.
2576  */
2577  if (tuple_fraction >= 1.0)
2578  {
2579  if (limit_fraction >= 1.0)
2580  {
2581  /* both absolute */
2582  tuple_fraction = Min(tuple_fraction, limit_fraction);
2583  }
2584  else
2585  {
2586  /* caller absolute, limit fractional; use caller's value */
2587  }
2588  }
2589  else if (tuple_fraction > 0.0)
2590  {
2591  if (limit_fraction >= 1.0)
2592  {
2593  /* caller fractional, limit absolute; use limit */
2594  tuple_fraction = limit_fraction;
2595  }
2596  else
2597  {
2598  /* both fractional */
2599  tuple_fraction = Min(tuple_fraction, limit_fraction);
2600  }
2601  }
2602  else
2603  {
2604  /* no info from caller, just use limit */
2605  tuple_fraction = limit_fraction;
2606  }
2607  }
2608  else if (*offset_est != 0 && tuple_fraction > 0.0)
2609  {
2610  /*
2611  * We have an OFFSET but no LIMIT. This acts entirely differently
2612  * from the LIMIT case: here, we need to increase rather than decrease
2613  * the caller's tuple_fraction, because the OFFSET acts to cause more
2614  * tuples to be fetched instead of fewer. This only matters if we got
2615  * a tuple_fraction > 0, however.
2616  *
2617  * As above, use 10% if OFFSET is present but unestimatable.
2618  */
2619  if (*offset_est < 0)
2620  limit_fraction = 0.10;
2621  else
2622  limit_fraction = (double) *offset_est;
2623 
2624  /*
2625  * If we have absolute counts from both caller and OFFSET, add them
2626  * together; likewise if they are both fractional. If one is
2627  * fractional and the other absolute, we want to take the larger, and
2628  * we heuristically assume that's the fractional one.
2629  */
2630  if (tuple_fraction >= 1.0)
2631  {
2632  if (limit_fraction >= 1.0)
2633  {
2634  /* both absolute, so add them together */
2635  tuple_fraction += limit_fraction;
2636  }
2637  else
2638  {
2639  /* caller absolute, limit fractional; use limit */
2640  tuple_fraction = limit_fraction;
2641  }
2642  }
2643  else
2644  {
2645  if (limit_fraction >= 1.0)
2646  {
2647  /* caller fractional, limit absolute; use caller's value */
2648  }
2649  else
2650  {
2651  /* both fractional, so add them together */
2652  tuple_fraction += limit_fraction;
2653  if (tuple_fraction >= 1.0)
2654  tuple_fraction = 0.0; /* assume fetch all */
2655  }
2656  }
2657  }
2658 
2659  return tuple_fraction;
2660 }
2661 
2662 /*
2663  * limit_needed - do we actually need a Limit plan node?
2664  *
2665  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2666  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2667  * locution for an optimization fence. (Because other places in the planner
2668  * merely check whether parse->limitOffset isn't NULL, it will still work as
2669  * an optimization fence --- we're just suppressing unnecessary run-time
2670  * overhead.)
2671  *
2672  * This might look like it could be merged into preprocess_limit, but there's
2673  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2674  * in preprocess_limit it's good enough to consider estimated values.
2675  */
2676 static bool
2678 {
2679  Node *node;
2680 
2681  node = parse->limitCount;
2682  if (node)
2683  {
2684  if (IsA(node, Const))
2685  {
2686  /* NULL indicates LIMIT ALL, ie, no limit */
2687  if (!((Const *) node)->constisnull)
2688  return true; /* LIMIT with a constant value */
2689  }
2690  else
2691  return true; /* non-constant LIMIT */
2692  }
2693 
2694  node = parse->limitOffset;
2695  if (node)
2696  {
2697  if (IsA(node, Const))
2698  {
2699  /* Treat NULL as no offset; the executor would too */
2700  if (!((Const *) node)->constisnull)
2701  {
2702  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2703 
2704  if (offset != 0)
2705  return true; /* OFFSET with a nonzero value */
2706  }
2707  }
2708  else
2709  return true; /* non-constant OFFSET */
2710  }
2711 
2712  return false; /* don't need a Limit plan node */
2713 }
2714 
2715 
2716 /*
2717  * remove_useless_groupby_columns
2718  * Remove any columns in the GROUP BY clause that are redundant due to
2719  * being functionally dependent on other GROUP BY columns.
2720  *
2721  * Since some other DBMSes do not allow references to ungrouped columns, it's
2722  * not unusual to find all columns listed in GROUP BY even though listing the
2723  * primary-key columns would be sufficient. Deleting such excess columns
2724  * avoids redundant sorting work, so it's worth doing. When we do this, we
2725  * must mark the plan as dependent on the pkey constraint (compare the
2726  * parser's check_ungrouped_columns() and check_functional_grouping()).
2727  *
2728  * In principle, we could treat any NOT-NULL columns appearing in a UNIQUE
2729  * index as the determining columns. But as with check_functional_grouping(),
2730  * there's currently no way to represent dependency on a NOT NULL constraint,
2731  * so we consider only the pkey for now.
2732  */
2733 static void
2735 {
2736  Query *parse = root->parse;
2737  Bitmapset **groupbyattnos;
2738  Bitmapset **surplusvars;
2739  ListCell *lc;
2740  int relid;
2741 
2742  /* No chance to do anything if there are less than two GROUP BY items */
2743  if (list_length(parse->groupClause) < 2)
2744  return;
2745 
2746  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2747  if (parse->groupingSets)
2748  return;
2749 
2750  /*
2751  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2752  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2753  * that are GROUP BY items.
2754  */
2755  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2756  (list_length(parse->rtable) + 1));
2757  foreach(lc, parse->groupClause)
2758  {
2759  SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
2760  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2761  Var *var = (Var *) tle->expr;
2762 
2763  /*
2764  * Ignore non-Vars and Vars from other query levels.
2765  *
2766  * XXX in principle, stable expressions containing Vars could also be
2767  * removed, if all the Vars are functionally dependent on other GROUP
2768  * BY items. But it's not clear that such cases occur often enough to
2769  * be worth troubling over.
2770  */
2771  if (!IsA(var, Var) ||
2772  var->varlevelsup > 0)
2773  continue;
2774 
2775  /* OK, remember we have this Var */
2776  relid = var->varno;
2777  Assert(relid <= list_length(parse->rtable));
2778  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2780  }
2781 
2782  /*
2783  * Consider each relation and see if it is possible to remove some of its
2784  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2785  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2786  * of the column attnos of RTE k that are removable GROUP BY items.
2787  */
2788  surplusvars = NULL; /* don't allocate array unless required */
2789  relid = 0;
2790  foreach(lc, parse->rtable)
2791  {
2792  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
2793  Bitmapset *relattnos;
2794  Bitmapset *pkattnos;
2795  Oid constraintOid;
2796 
2797  relid++;
2798 
2799  /* Only plain relations could have primary-key constraints */
2800  if (rte->rtekind != RTE_RELATION)
2801  continue;
2802 
2803  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2804  relattnos = groupbyattnos[relid];
2805  if (bms_membership(relattnos) != BMS_MULTIPLE)
2806  continue;
2807 
2808  /*
2809  * Can't remove any columns for this rel if there is no suitable
2810  * (i.e., nondeferrable) primary key constraint.
2811  */
2812  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2813  if (pkattnos == NULL)
2814  continue;
2815 
2816  /*
2817  * If the primary key is a proper subset of relattnos then we have
2818  * some items in the GROUP BY that can be removed.
2819  */
2820  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2821  {
2822  /*
2823  * To easily remember whether we've found anything to do, we don't
2824  * allocate the surplusvars[] array until we find something.
2825  */
2826  if (surplusvars == NULL)
2827  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2828  (list_length(parse->rtable) + 1));
2829 
2830  /* Remember the attnos of the removable columns */
2831  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2832 
2833  /* Also, mark the resulting plan as dependent on this constraint */
2834  parse->constraintDeps = lappend_oid(parse->constraintDeps,
2835  constraintOid);
2836  }
2837  }
2838 
2839  /*
2840  * If we found any surplus Vars, build a new GROUP BY clause without them.
2841  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2842  * markings, but that's harmless.)
2843  */
2844  if (surplusvars != NULL)
2845  {
2846  List *new_groupby = NIL;
2847 
2848  foreach(lc, parse->groupClause)
2849  {
2850  SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
2851  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2852  Var *var = (Var *) tle->expr;
2853 
2854  /*
2855  * New list must include non-Vars, outer Vars, and anything not
2856  * marked as surplus.
2857  */
2858  if (!IsA(var, Var) ||
2859  var->varlevelsup > 0 ||
2861  surplusvars[var->varno]))
2862  new_groupby = lappend(new_groupby, sgc);
2863  }
2864 
2865  parse->groupClause = new_groupby;
2866  }
2867 }
2868 
2869 /*
2870  * preprocess_groupclause - do preparatory work on GROUP BY clause
2871  *
2872  * The idea here is to adjust the ordering of the GROUP BY elements
2873  * (which in itself is semantically insignificant) to match ORDER BY,
2874  * thereby allowing a single sort operation to both implement the ORDER BY
2875  * requirement and set up for a Unique step that implements GROUP BY.
2876  *
2877  * In principle it might be interesting to consider other orderings of the
2878  * GROUP BY elements, which could match the sort ordering of other
2879  * possible plans (eg an indexscan) and thereby reduce cost. We don't
2880  * bother with that, though. Hashed grouping will frequently win anyway.
2881  *
2882  * Note: we need no comparable processing of the distinctClause because
2883  * the parser already enforced that that matches ORDER BY.
2884  *
2885  * For grouping sets, the order of items is instead forced to agree with that
2886  * of the grouping set (and items not in the grouping set are skipped). The
2887  * work of sorting the order of grouping set elements to match the ORDER BY if
2888  * possible is done elsewhere.
2889  */
2890 static List *
2892 {
2893  Query *parse = root->parse;
2894  List *new_groupclause = NIL;
2895  bool partial_match;
2896  ListCell *sl;
2897  ListCell *gl;
2898 
2899  /* For grouping sets, we need to force the ordering */
2900  if (force)
2901  {
2902  foreach(sl, force)
2903  {
2904  Index ref = lfirst_int(sl);
2906 
2907  new_groupclause = lappend(new_groupclause, cl);
2908  }
2909 
2910  return new_groupclause;
2911  }
2912 
2913  /* If no ORDER BY, nothing useful to do here */
2914  if (parse->sortClause == NIL)
2915  return parse->groupClause;
2916 
2917  /*
2918  * Scan the ORDER BY clause and construct a list of matching GROUP BY
2919  * items, but only as far as we can make a matching prefix.
2920  *
2921  * This code assumes that the sortClause contains no duplicate items.
2922  */
2923  foreach(sl, parse->sortClause)
2924  {
2925  SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
2926 
2927  foreach(gl, parse->groupClause)
2928  {
2929  SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2930 
2931  if (equal(gc, sc))
2932  {
2933  new_groupclause = lappend(new_groupclause, gc);
2934  break;
2935  }
2936  }
2937  if (gl == NULL)
2938  break; /* no match, so stop scanning */
2939  }
2940 
2941  /* Did we match all of the ORDER BY list, or just some of it? */
2942  partial_match = (sl != NULL);
2943 
2944  /* If no match at all, no point in reordering GROUP BY */
2945  if (new_groupclause == NIL)
2946  return parse->groupClause;
2947 
2948  /*
2949  * Add any remaining GROUP BY items to the new list, but only if we were
2950  * able to make a complete match. In other words, we only rearrange the
2951  * GROUP BY list if the result is that one list is a prefix of the other
2952  * --- otherwise there's no possibility of a common sort. Also, give up
2953  * if there are any non-sortable GROUP BY items, since then there's no
2954  * hope anyway.
2955  */
2956  foreach(gl, parse->groupClause)
2957  {
2958  SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2959 
2960  if (list_member_ptr(new_groupclause, gc))
2961  continue; /* it matched an ORDER BY item */
2962  if (partial_match)
2963  return parse->groupClause; /* give up, no common sort possible */
2964  if (!OidIsValid(gc->sortop))
2965  return parse->groupClause; /* give up, GROUP BY can't be sorted */
2966  new_groupclause = lappend(new_groupclause, gc);
2967  }
2968 
2969  /* Success --- install the rearranged GROUP BY list */
2970  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2971  return new_groupclause;
2972 }
2973 
2974 /*
2975  * Extract lists of grouping sets that can be implemented using a single
2976  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2977  *
2978  * Input must be sorted with smallest sets first. Result has each sublist
2979  * sorted with smallest sets first.
2980  *
2981  * We want to produce the absolute minimum possible number of lists here to
2982  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2983  * of finding the minimal partition of a partially-ordered set into chains
2984  * (which is what we need, taking the list of grouping sets as a poset ordered
2985  * by set inclusion) can be mapped to the problem of finding the maximum
2986  * cardinality matching on a bipartite graph, which is solvable in polynomial
2987  * time with a worst case of no worse than O(n^2.5) and usually much
2988  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2989  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2990  * half a second on my modest system even with optimization off and assertions
2991  * on.)
2992  */
2993 static List *
2995 {
2996  int num_sets_raw = list_length(groupingSets);
2997  int num_empty = 0;
2998  int num_sets = 0; /* distinct sets */
2999  int num_chains = 0;
3000  List *result = NIL;
3001  List **results;
3002  List **orig_sets;
3003  Bitmapset **set_masks;
3004  int *chains;
3005  short **adjacency;
3006  short *adjacency_buf;
3008  int i;
3009  int j;
3010  int j_size;
3011  ListCell *lc1 = list_head(groupingSets);
3012  ListCell *lc;
3013 
3014  /*
3015  * Start by stripping out empty sets. The algorithm doesn't require this,
3016  * but the planner currently needs all empty sets to be returned in the
3017  * first list, so we strip them here and add them back after.
3018  */
3019  while (lc1 && lfirst(lc1) == NIL)
3020  {
3021  ++num_empty;
3022  lc1 = lnext(lc1);
3023  }
3024 
3025  /* bail out now if it turns out that all we had were empty sets. */
3026  if (!lc1)
3027  return list_make1(groupingSets);
3028 
3029  /*----------
3030  * We don't strictly need to remove duplicate sets here, but if we don't,
3031  * they tend to become scattered through the result, which is a bit
3032  * confusing (and irritating if we ever decide to optimize them out).
3033  * So we remove them here and add them back after.
3034  *
3035  * For each non-duplicate set, we fill in the following:
3036  *
3037  * orig_sets[i] = list of the original set lists
3038  * set_masks[i] = bitmapset for testing inclusion
3039  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
3040  *
3041  * chains[i] will be the result group this set is assigned to.
3042  *
3043  * We index all of these from 1 rather than 0 because it is convenient
3044  * to leave 0 free for the NIL node in the graph algorithm.
3045  *----------
3046  */
3047  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
3048  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
3049  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
3050  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
3051 
3052  j_size = 0;
3053  j = 0;
3054  i = 1;
3055 
3056  for_each_cell(lc, lc1)
3057  {
3058  List *candidate = lfirst(lc);
3059  Bitmapset *candidate_set = NULL;
3060  ListCell *lc2;
3061  int dup_of = 0;
3062 
3063  foreach(lc2, candidate)
3064  {
3065  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
3066  }
3067 
3068  /* we can only be a dup if we're the same length as a previous set */
3069  if (j_size == list_length(candidate))
3070  {
3071  int k;
3072 
3073  for (k = j; k < i; ++k)
3074  {
3075  if (bms_equal(set_masks[k], candidate_set))
3076  {
3077  dup_of = k;
3078  break;
3079  }
3080  }
3081  }
3082  else if (j_size < list_length(candidate))
3083  {
3084  j_size = list_length(candidate);
3085  j = i;
3086  }
3087 
3088  if (dup_of > 0)
3089  {
3090  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3091  bms_free(candidate_set);
3092  }
3093  else
3094  {
3095  int k;
3096  int n_adj = 0;
3097 
3098  orig_sets[i] = list_make1(candidate);
3099  set_masks[i] = candidate_set;
3100 
3101  /* fill in adjacency list; no need to compare equal-size sets */
3102 
3103  for (k = j - 1; k > 0; --k)
3104  {
3105  if (bms_is_subset(set_masks[k], candidate_set))
3106  adjacency_buf[++n_adj] = k;
3107  }
3108 
3109  if (n_adj > 0)
3110  {
3111  adjacency_buf[0] = n_adj;
3112  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3113  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3114  }
3115  else
3116  adjacency[i] = NULL;
3117 
3118  ++i;
3119  }
3120  }
3121 
3122  num_sets = i - 1;
3123 
3124  /*
3125  * Apply the graph matching algorithm to do the work.
3126  */
3127  state = BipartiteMatch(num_sets, num_sets, adjacency);
3128 
3129  /*
3130  * Now, the state->pair* fields have the info we need to assign sets to
3131  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3132  * pair_vu[v] = u (both will be true, but we check both so that we can do
3133  * it in one pass)
3134  */
3135  chains = palloc0((num_sets + 1) * sizeof(int));
3136 
3137  for (i = 1; i <= num_sets; ++i)
3138  {
3139  int u = state->pair_vu[i];
3140  int v = state->pair_uv[i];
3141 
3142  if (u > 0 && u < i)
3143  chains[i] = chains[u];
3144  else if (v > 0 && v < i)
3145  chains[i] = chains[v];
3146  else
3147  chains[i] = ++num_chains;
3148  }
3149 
3150  /* build result lists. */
3151  results = palloc0((num_chains + 1) * sizeof(List *));
3152 
3153  for (i = 1; i <= num_sets; ++i)
3154  {
3155  int c = chains[i];
3156 
3157  Assert(c > 0);
3158 
3159  results[c] = list_concat(results[c], orig_sets[i]);
3160  }
3161 
3162  /* push any empty sets back on the first list. */
3163  while (num_empty-- > 0)
3164  results[1] = lcons(NIL, results[1]);
3165 
3166  /* make result list */
3167  for (i = 1; i <= num_chains; ++i)
3168  result = lappend(result, results[i]);
3169 
3170  /*
3171  * Free all the things.
3172  *
3173  * (This is over-fussy for small sets but for large sets we could have
3174  * tied up a nontrivial amount of memory.)
3175  */
3176  BipartiteMatchFree(state);
3177  pfree(results);
3178  pfree(chains);
3179  for (i = 1; i <= num_sets; ++i)
3180  if (adjacency[i])
3181  pfree(adjacency[i]);
3182  pfree(adjacency);
3183  pfree(adjacency_buf);
3184  pfree(orig_sets);
3185  for (i = 1; i <= num_sets; ++i)
3186  bms_free(set_masks[i]);
3187  pfree(set_masks);
3188 
3189  return result;
3190 }
3191 
3192 /*
3193  * Reorder the elements of a list of grouping sets such that they have correct
3194  * prefix relationships. Also inserts the GroupingSetData annotations.
3195  *
3196  * The input must be ordered with smallest sets first; the result is returned
3197  * with largest sets first. Note that the result shares no list substructure
3198  * with the input, so it's safe for the caller to modify it later.
3199  *
3200  * If we're passed in a sortclause, we follow its order of columns to the
3201  * extent possible, to minimize the chance that we add unnecessary sorts.
3202  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3203  * gets implemented in one pass.)
3204  */
3205 static List *
3206 reorder_grouping_sets(List *groupingsets, List *sortclause)
3207 {
3208  ListCell *lc;
3209  ListCell *lc2;
3210  List *previous = NIL;
3211  List *result = NIL;
3212 
3213  foreach(lc, groupingsets)
3214  {
3215  List *candidate = lfirst(lc);
3216  List *new_elems = list_difference_int(candidate, previous);
3218 
3219  if (list_length(new_elems) > 0)
3220  {
3221  while (list_length(sortclause) > list_length(previous))
3222  {
3223  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3224  int ref = sc->tleSortGroupRef;
3225 
3226  if (list_member_int(new_elems, ref))
3227  {
3228  previous = lappend_int(previous, ref);
3229  new_elems = list_delete_int(new_elems, ref);
3230  }
3231  else
3232  {
3233  /* diverged from the sortclause; give up on it */
3234  sortclause = NIL;
3235  break;
3236  }
3237  }
3238 
3239  foreach(lc2, new_elems)
3240  {
3241  previous = lappend_int(previous, lfirst_int(lc2));
3242  }
3243  }
3244 
3245  gs->set = list_copy(previous);
3246  result = lcons(gs, result);
3247  list_free(new_elems);
3248  }
3249 
3250  list_free(previous);
3251 
3252  return result;
3253 }
3254 
3255 /*
3256  * Compute query_pathkeys and other pathkeys during plan generation
3257  */
3258 static void
3260 {
3261  Query *parse = root->parse;
3262  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3263  List *tlist = qp_extra->tlist;
3264  List *activeWindows = qp_extra->activeWindows;
3265 
3266  /*
3267  * Calculate pathkeys that represent grouping/ordering requirements. The
3268  * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
3269  * be, in which case we just leave their pathkeys empty.
3270  */
3271  if (qp_extra->groupClause &&
3272  grouping_is_sortable(qp_extra->groupClause))
3273  root->group_pathkeys =
3275  qp_extra->groupClause,
3276  tlist);
3277  else
3278  root->group_pathkeys = NIL;
3279 
3280  /* We consider only the first (bottom) window in pathkeys logic */
3281  if (activeWindows != NIL)
3282  {
3283  WindowClause *wc = (WindowClause *) linitial(activeWindows);
3284 
3286  wc,
3287  tlist);
3288  }
3289  else
3290  root->window_pathkeys = NIL;
3291 
3292  if (parse->distinctClause &&
3294  root->distinct_pathkeys =
3296  parse->distinctClause,
3297  tlist);
3298  else
3299  root->distinct_pathkeys = NIL;
3300 
3301  root->sort_pathkeys =
3303  parse->sortClause,
3304  tlist);
3305 
3306  /*
3307  * Figure out whether we want a sorted result from query_planner.
3308  *
3309  * If we have a sortable GROUP BY clause, then we want a result sorted
3310  * properly for grouping. Otherwise, if we have window functions to
3311  * evaluate, we try to sort for the first window. Otherwise, if there's a
3312  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3313  * we try to produce output that's sufficiently well sorted for the
3314  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3315  * by the ORDER BY clause.
3316  *
3317  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3318  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3319  * that might just leave us failing to exploit an available sort order at
3320  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3321  * much easier, since we know that the parser ensured that one is a
3322  * superset of the other.
3323  */
3324  if (root->group_pathkeys)
3325  root->query_pathkeys = root->group_pathkeys;
3326  else if (root->window_pathkeys)
3327  root->query_pathkeys = root->window_pathkeys;
3328  else if (list_length(root->distinct_pathkeys) >
3329  list_length(root->sort_pathkeys))
3330  root->query_pathkeys = root->distinct_pathkeys;
3331  else if (root->sort_pathkeys)
3332  root->query_pathkeys = root->sort_pathkeys;
3333  else
3334  root->query_pathkeys = NIL;
3335 }
3336 
3337 /*
3338  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3339  *
3340  * path_rows: number of output rows from scan/join step
3341  * gsets: grouping set data, or NULL if not doing grouping sets
3342  *
3343  * If doing grouping sets, we also annotate the gsets data with the estimates
3344  * for each set and each individual rollup list, with a view to later
3345  * determining whether some combination of them could be hashed instead.
3346  */
3347 static double
3349  double path_rows,
3350  grouping_sets_data *gd)
3351 {
3352  Query *parse = root->parse;
3353  double dNumGroups;
3354 
3355  if (parse->groupClause)
3356  {
3357  List *groupExprs;
3358 
3359  if (parse->groupingSets)
3360  {
3361  /* Add up the estimates for each grouping set */
3362  ListCell *lc;
3363  ListCell *lc2;
3364 
3365  dNumGroups = 0;
3366 
3367  foreach(lc, gd->rollups)
3368  {
3369  RollupData *rollup = lfirst(lc);
3370  ListCell *lc;
3371 
3372  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3373  parse->targetList);
3374 
3375  rollup->numGroups = 0.0;
3376 
3377  forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
3378  {
3379  List *gset = (List *) lfirst(lc);
3380  GroupingSetData *gs = lfirst(lc2);
3381  double numGroups = estimate_num_groups(root,
3382  groupExprs,
3383  path_rows,
3384  &gset);
3385 
3386  gs->numGroups = numGroups;
3387  rollup->numGroups += numGroups;
3388  }
3389 
3390  dNumGroups += rollup->numGroups;
3391  }
3392 
3393  if (gd->hash_sets_idx)
3394  {
3395  ListCell *lc;
3396 
3397  gd->dNumHashGroups = 0;
3398 
3399  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3400  parse->targetList);
3401 
3402  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3403  {
3404  List *gset = (List *) lfirst(lc);
3405  GroupingSetData *gs = lfirst(lc2);
3406  double numGroups = estimate_num_groups(root,
3407  groupExprs,
3408  path_rows,
3409  &gset);
3410 
3411  gs->numGroups = numGroups;
3412  gd->dNumHashGroups += numGroups;
3413  }
3414 
3415  dNumGroups += gd->dNumHashGroups;
3416  }
3417  }
3418  else
3419  {
3420  /* Plain GROUP BY */
3421  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3422  parse->targetList);
3423 
3424  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3425  NULL);
3426  }
3427  }
3428  else if (parse->groupingSets)
3429  {
3430  /* Empty grouping sets ... one result row for each one */
3431  dNumGroups = list_length(parse->groupingSets);
3432  }
3433  else if (parse->hasAggs || root->hasHavingQual)
3434  {
3435  /* Plain aggregation, one result row */
3436  dNumGroups = 1;
3437  }
3438  else
3439  {
3440  /* Not grouping */
3441  dNumGroups = 1;
3442  }
3443 
3444  return dNumGroups;
3445 }
3446 
3447 /*
3448  * estimate_hashagg_tablesize
3449  * estimate the number of bytes that a hash aggregate hashtable will
3450  * require based on the agg_costs, path width and dNumGroups.
3451  *
3452  * XXX this may be over-estimating the size now that hashagg knows to omit
3453  * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3454  * grouping columns not in the hashed set are counted here even though hashagg
3455  * won't store them. Is this a problem?
3456  */
3457 static Size
3459  double dNumGroups)
3460 {
3461  Size hashentrysize;
3462 
3463  /* Estimate per-hash-entry space at tuple width... */
3464  hashentrysize = MAXALIGN(path->pathtarget->width) +
3466 
3467  /* plus space for pass-by-ref transition values... */
3468  hashentrysize += agg_costs->transitionSpace;
3469  /* plus the per-hash-entry overhead */
3470  hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3471 
3472  /*
3473  * Note that this disregards the effect of fill-factor and growth policy
3474  * of the hash-table. That's probably ok, given default the default
3475  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3476  * "double-in-size" growth policies here.
3477  */
3478  return hashentrysize * dNumGroups;
3479 }
3480 
3481 /*
3482  * create_grouping_paths
3483  *
3484  * Build a new upperrel containing Paths for grouping and/or aggregation.
3485  *
3486  * input_rel: contains the source-data Paths
3487  * target: the pathtarget for the result Paths to compute
3488  * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
3489  * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
3490  * rollup_groupclauses: list of grouping clauses for grouping sets,
3491  * or NIL if not doing grouping sets
3492  *
3493  * Note: all Paths in input_rel are expected to return the target computed
3494  * by make_group_input_target.
3495  *
3496  * We need to consider sorted and hashed aggregation in the same function,
3497  * because otherwise (1) it would be harder to throw an appropriate error
3498  * message if neither way works, and (2) we should not allow hashtable size
3499  * considerations to dissuade us from using hashing if sorting is not possible.
3500  */
3501 static RelOptInfo *
3503  RelOptInfo *input_rel,
3504  PathTarget *target,
3505  const AggClauseCosts *agg_costs,
3506  grouping_sets_data *gd)
3507 {
3508  Query *parse = root->parse;
3509  Path *cheapest_path = input_rel->cheapest_total_path;
3510  RelOptInfo *grouped_rel;
3511  PathTarget *partial_grouping_target = NULL;
3512  AggClauseCosts agg_partial_costs; /* parallel only */
3513  AggClauseCosts agg_final_costs; /* parallel only */
3514  Size hashaggtablesize;
3515  double dNumGroups;
3516  double dNumPartialGroups = 0;
3517  bool can_hash;
3518  bool can_sort;
3519  bool try_parallel_aggregation;
3520 
3521  ListCell *lc;
3522 
3523  /* For now, do all work in the (GROUP_AGG, NULL) upperrel */
3524  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3525 
3526  /*
3527  * If the input relation is not parallel-safe, then the grouped relation
3528  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3529  * target list and HAVING quals are parallel-safe.
3530  */
3531  if (input_rel->consider_parallel &&
3532  is_parallel_safe(root, (Node *) target->exprs) &&
3533  is_parallel_safe(root, (Node *) parse->havingQual))
3534  grouped_rel->consider_parallel = true;
3535 
3536  /*
3537  * If the input rel belongs to a single FDW, so does the grouped rel.
3538  */
3539  grouped_rel->serverid = input_rel->serverid;
3540  grouped_rel->userid = input_rel->userid;
3541  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3542  grouped_rel->fdwroutine = input_rel->fdwroutine;
3543 
3544  /*
3545  * Check for degenerate grouping.
3546  */
3547  if ((root->hasHavingQual || parse->groupingSets) &&
3548  !parse->hasAggs && parse->groupClause == NIL)
3549  {
3550  /*
3551  * We have a HAVING qual and/or grouping sets, but no aggregates and
3552  * no GROUP BY (which implies that the grouping sets are all empty).
3553  *
3554  * This is a degenerate case in which we are supposed to emit either
3555  * zero or one row for each grouping set depending on whether HAVING
3556  * succeeds. Furthermore, there cannot be any variables in either
3557  * HAVING or the targetlist, so we actually do not need the FROM table
3558  * at all! We can just throw away the plan-so-far and generate a
3559  * Result node. This is a sufficiently unusual corner case that it's
3560  * not worth contorting the structure of this module to avoid having
3561  * to generate the earlier paths in the first place.
3562  */
3563  int nrows = list_length(parse->groupingSets);
3564  Path *path;
3565 
3566  if (nrows > 1)
3567  {
3568  /*
3569  * Doesn't seem worthwhile writing code to cons up a
3570  * generate_series or a values scan to emit multiple rows. Instead
3571  * just make N clones and append them. (With a volatile HAVING
3572  * clause, this means you might get between 0 and N output rows.
3573  * Offhand I think that's desired.)
3574  */
3575  List *paths = NIL;
3576 
3577  while (--nrows >= 0)
3578  {
3579  path = (Path *)
3580  create_result_path(root, grouped_rel,
3581  target,
3582  (List *) parse->havingQual);
3583  paths = lappend(paths, path);
3584  }
3585  path = (Path *)
3586  create_append_path(grouped_rel,
3587  paths,
3588  NULL,
3589  0,
3590  NIL);
3591  path->pathtarget = target;
3592  }
3593  else
3594  {
3595  /* No grouping sets, or just one, so one output row */
3596  path = (Path *)
3597  create_result_path(root, grouped_rel,
3598  target,
3599  (List *) parse->havingQual);
3600  }
3601 
3602  add_path(grouped_rel, path);
3603 
3604  /* No need to consider any other alternatives. */
3605  set_cheapest(grouped_rel);
3606 
3607  return grouped_rel;
3608  }
3609 
3610  /*
3611  * Estimate number of groups.
3612  */
3613  dNumGroups = get_number_of_groups(root,
3614  cheapest_path->rows,
3615  gd);
3616 
3617  /*
3618  * Determine whether it's possible to perform sort-based implementations
3619  * of grouping. (Note that if groupClause is empty,
3620  * grouping_is_sortable() is trivially true, and all the
3621  * pathkeys_contained_in() tests will succeed too, so that we'll consider
3622  * every surviving input path.)
3623  *
3624  * If we have grouping sets, we might be able to sort some but not all of
3625  * them; in this case, we need can_sort to be true as long as we must
3626  * consider any sorted-input plan.
3627  */
3628  can_sort = (gd && gd->rollups != NIL)
3629  || grouping_is_sortable(parse->groupClause);
3630 
3631  /*
3632  * Determine whether we should consider hash-based implementations of
3633  * grouping.
3634  *
3635  * Hashed aggregation only applies if we're grouping. If we have grouping
3636  * sets, some groups might be hashable but others not; in this case we set
3637  * can_hash true as long as there is nothing globally preventing us from
3638  * hashing (and we should therefore consider plans with hashes).
3639  *
3640  * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
3641  * aggregates. (Doing so would imply storing *all* the input values in
3642  * the hash table, and/or running many sorts in parallel, either of which
3643  * seems like a certain loser.) We similarly don't support ordered-set
3644  * aggregates in hashed aggregation, but that case is also included in the
3645  * numOrderedAggs count.
3646  *
3647  * Note: grouping_is_hashable() is much more expensive to check than the
3648  * other gating conditions, so we want to do it last.
3649  */
3650  can_hash = (parse->groupClause != NIL &&
3651  agg_costs->numOrderedAggs == 0 &&
3652  (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
3653 
3654  /*
3655  * If grouped_rel->consider_parallel is true, then paths that we generate
3656  * for this grouping relation could be run inside of a worker, but that
3657  * doesn't mean we can actually use the PartialAggregate/FinalizeAggregate
3658  * execution strategy. Figure that out.
3659  */
3660  if (!grouped_rel->consider_parallel)
3661  {
3662  /* Not even parallel-safe. */
3663  try_parallel_aggregation = false;
3664  }
3665  else if (input_rel->partial_pathlist == NIL)
3666  {
3667  /* Nothing to use as input for partial aggregate. */
3668  try_parallel_aggregation = false;
3669  }
3670  else if (!parse->hasAggs && parse->groupClause == NIL)
3671  {
3672  /*
3673  * We don't know how to do parallel aggregation unless we have either
3674  * some aggregates or a grouping clause.
3675  */
3676  try_parallel_aggregation = false;
3677  }
3678  else if (parse->groupingSets)
3679  {
3680  /* We don't know how to do grouping sets in parallel. */
3681  try_parallel_aggregation = false;
3682  }
3683  else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
3684  {
3685  /* Insufficient support for partial mode. */
3686  try_parallel_aggregation = false;
3687  }
3688  else
3689  {
3690  /* Everything looks good. */
3691  try_parallel_aggregation = true;
3692  }
3693 
3694  /*
3695  * Before generating paths for grouped_rel, we first generate any possible
3696  * partial paths; that way, later code can easily consider both parallel
3697  * and non-parallel approaches to grouping. Note that the partial paths
3698  * we generate here are also partially aggregated, so simply pushing a
3699  * Gather node on top is insufficient to create a final path, as would be
3700  * the case for a scan/join rel.
3701  */
3702  if (try_parallel_aggregation)
3703  {
3704  Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
3705 
3706  /*
3707  * Build target list for partial aggregate paths. These paths cannot
3708  * just emit the same tlist as regular aggregate paths, because (1) we
3709  * must include Vars and Aggrefs needed in HAVING, which might not
3710  * appear in the result tlist, and (2) the Aggrefs must be set in
3711  * partial mode.
3712  */
3713  partial_grouping_target = make_partial_grouping_target(root, target);
3714 
3715  /* Estimate number of partial groups. */
3716  dNumPartialGroups = get_number_of_groups(root,
3717  cheapest_partial_path->rows,
3718  gd);
3719 
3720  /*
3721  * Collect statistics about aggregates for estimating costs of
3722  * performing aggregation in parallel.
3723  */
3724  MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
3725  MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
3726  if (parse->hasAggs)
3727  {
3728  /* partial phase */
3729  get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs,
3731  &agg_partial_costs);
3732 
3733  /* final phase */
3734  get_agg_clause_costs(root, (Node *) target->exprs,
3736  &agg_final_costs);
3737  get_agg_clause_costs(root, parse->havingQual,
3739  &agg_final_costs);
3740  }
3741 
3742  if (can_sort)
3743  {
3744  /* This was checked before setting try_parallel_aggregation */
3745  Assert(parse->hasAggs || parse->groupClause);
3746 
3747  /*
3748  * Use any available suitably-sorted path as input, and also
3749  * consider sorting the cheapest partial path.
3750  */
3751  foreach(lc, input_rel->partial_pathlist)
3752  {
3753  Path *path = (Path *) lfirst(lc);
3754  bool is_sorted;
3755 
3756  is_sorted = pathkeys_contained_in(root->group_pathkeys,
3757  path->pathkeys);
3758  if (path == cheapest_partial_path || is_sorted)
3759  {
3760  /* Sort the cheapest partial path, if it isn't already */
3761  if (!is_sorted)
3762  path = (Path *) create_sort_path(root,
3763  grouped_rel,
3764  path,
3765  root->group_pathkeys,
3766  -1.0);
3767 
3768  if (parse->hasAggs)
3769  add_partial_path(grouped_rel, (Path *)
3770  create_agg_path(root,
3771  grouped_rel,
3772  path,
3773  partial_grouping_target,
3774  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3776  parse->groupClause,
3777  NIL,
3778  &agg_partial_costs,
3779  dNumPartialGroups));
3780  else
3781  add_partial_path(grouped_rel, (Path *)
3782  create_group_path(root,
3783  grouped_rel,
3784  path,
3785  partial_grouping_target,
3786  parse->groupClause,
3787  NIL,
3788  dNumPartialGroups));
3789  }
3790  }
3791  }
3792 
3793  if (can_hash)
3794  {
3795  /* Checked above */
3796  Assert(parse->hasAggs || parse->groupClause);
3797 
3798  hashaggtablesize =
3799  estimate_hashagg_tablesize(cheapest_partial_path,
3800  &agg_partial_costs,
3801  dNumPartialGroups);
3802 
3803  /*
3804  * Tentatively produce a partial HashAgg Path, depending on if it
3805  * looks as if the hash table will fit in work_mem.
3806  */
3807  if (hashaggtablesize < work_mem * 1024L)
3808  {
3809  add_partial_path(grouped_rel, (Path *)
3810  create_agg_path(root,
3811  grouped_rel,
3812  cheapest_partial_path,
3813  partial_grouping_target,
3814  AGG_HASHED,
3816  parse->groupClause,
3817  NIL,
3818  &agg_partial_costs,
3819  dNumPartialGroups));
3820  }
3821  }
3822  }
3823 
3824  /* Build final grouping paths */
3825  if (can_sort)
3826  {
3827  /*
3828  * Use any available suitably-sorted path as input, and also consider
3829  * sorting the cheapest-total path.
3830  */
3831  foreach(lc, input_rel->pathlist)
3832  {
3833  Path *path = (Path *) lfirst(lc);
3834  bool is_sorted;
3835 
3836  is_sorted = pathkeys_contained_in(root->group_pathkeys,
3837  path->pathkeys);
3838  if (path == cheapest_path || is_sorted)
3839  {
3840  /* Sort the cheapest-total path if it isn't already sorted */
3841  if (!is_sorted)
3842  path = (Path *) create_sort_path(root,
3843  grouped_rel,
3844  path,
3845  root->group_pathkeys,
3846  -1.0);
3847 
3848  /* Now decide what to stick atop it */
3849  if (parse->groupingSets)
3850  {
3851  consider_groupingsets_paths(root, grouped_rel,
3852  path, true, can_hash, target,
3853  gd, agg_costs, dNumGroups);
3854  }
3855  else if (parse->hasAggs)
3856  {
3857  /*
3858  * We have aggregation, possibly with plain GROUP BY. Make
3859  * an AggPath.
3860  */
3861  add_path(grouped_rel, (Path *)
3862  create_agg_path(root,
3863  grouped_rel,
3864  path,
3865  target,
3866  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3868  parse->groupClause,
3869  (List *) parse->havingQual,
3870  agg_costs,
3871  dNumGroups));
3872  }
3873  else if (parse->groupClause)
3874  {
3875  /*
3876  * We have GROUP BY without aggregation or grouping sets.
3877  * Make a GroupPath.
3878  */
3879  add_path(grouped_rel, (Path *)
3880  create_group_path(root,
3881  grouped_rel,
3882  path,
3883  target,
3884  parse->groupClause,
3885  (List *) parse->havingQual,
3886  dNumGroups));
3887  }
3888  else
3889  {
3890  /* Other cases should have been handled above */
3891  Assert(false);
3892  }
3893  }
3894  }
3895 
3896  /*
3897  * Now generate a complete GroupAgg Path atop of the cheapest partial
3898  * path. We can do this using either Gather or Gather Merge.
3899  */
3900  if (grouped_rel->partial_pathlist)
3901  {
3902  Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
3903  double total_groups = path->rows * path->parallel_workers;
3904 
3905  path = (Path *) create_gather_path(root,
3906  grouped_rel,
3907  path,
3908  partial_grouping_target,
3909  NULL,
3910  &total_groups);
3911 
3912  /*
3913  * Since Gather's output is always unsorted, we'll need to sort,
3914  * unless there's no GROUP BY clause or a degenerate (constant)
3915  * one, in which case there will only be a single group.
3916  */
3917  if (root->group_pathkeys)
3918  path = (Path *) create_sort_path(root,
3919  grouped_rel,
3920  path,
3921  root->group_pathkeys,
3922  -1.0);
3923 
3924  if (parse->hasAggs)
3925  add_path(grouped_rel, (Path *)
3926  create_agg_path(root,
3927  grouped_rel,
3928  path,
3929  target,
3930  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3932  parse->groupClause,
3933  (List *) parse->havingQual,
3934  &agg_final_costs,
3935  dNumGroups));
3936  else
3937  add_path(grouped_rel, (Path *)
3938  create_group_path(root,
3939  grouped_rel,
3940  path,
3941  target,
3942  parse->groupClause,
3943  (List *) parse->havingQual,
3944  dNumGroups));
3945 
3946  /*
3947  * The point of using Gather Merge rather than Gather is that it
3948  * can preserve the ordering of the input path, so there's no
3949  * reason to try it unless (1) it's possible to produce more than
3950  * one output row and (2) we want the output path to be ordered.
3951  */
3952  if (parse->groupClause != NIL && root->group_pathkeys != NIL)
3953  {
3954  foreach(lc, grouped_rel->partial_pathlist)
3955  {
3956  Path *subpath = (Path *) lfirst(lc);
3957  Path *gmpath;
3958  double total_groups;
3959 
3960  /*
3961  * It's useful to consider paths that are already properly
3962  * ordered for Gather Merge, because those don't need a
3963  * sort. It's also useful to consider the cheapest path,
3964  * because sorting it in parallel and then doing Gather
3965  * Merge may be better than doing an unordered Gather
3966  * followed by a sort. But there's no point in
3967  * considering non-cheapest paths that aren't already
3968  * sorted correctly.
3969  */
3970  if (path != subpath &&
3972  subpath->pathkeys))
3973  continue;
3974 
3975  total_groups = subpath->rows * subpath->parallel_workers;
3976 
3977  gmpath = (Path *)
3979  grouped_rel,
3980  subpath,
3981  partial_grouping_target,
3982  root->group_pathkeys,
3983  NULL,
3984  &total_groups);
3985 
3986  if (parse->hasAggs)
3987  add_path(grouped_rel, (Path *)
3988  create_agg_path(root,
3989  grouped_rel,
3990  gmpath,
3991  target,
3992  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3994  parse->groupClause,
3995  (List *) parse->havingQual,
3996  &agg_final_costs,
3997  dNumGroups));
3998  else
3999  add_path(grouped_rel, (Path *)
4000  create_group_path(root,
4001  grouped_rel,
4002  gmpath,
4003  target,
4004  parse->groupClause,
4005  (List *) parse->havingQual,
4006  dNumGroups));
4007  }
4008  }
4009  }
4010  }
4011 
4012  if (can_hash)
4013  {
4014  if (parse->groupingSets)
4015  {
4016  /*
4017  * Try for a hash-only groupingsets path over unsorted input.
4018  */
4019  consider_groupingsets_paths(root, grouped_rel,
4020  cheapest_path, false, true, target,
4021  gd, agg_costs, dNumGroups);
4022  }
4023  else
4024  {
4025  hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
4026  agg_costs,
4027  dNumGroups);
4028 
4029  /*
4030  * Provided that the estimated size of the hashtable does not
4031  * exceed work_mem, we'll generate a HashAgg Path, although if we
4032  * were unable to sort above, then we'd better generate a Path, so
4033  * that we at least have one.
4034  */
4035  if (hashaggtablesize < work_mem * 1024L ||
4036  grouped_rel->pathlist == NIL)
4037  {
4038  /*
4039  * We just need an Agg over the cheapest-total input path,
4040  * since input order won't matter.
4041  */
4042  add_path(grouped_rel, (Path *)
4043  create_agg_path(root, grouped_rel,
4044  cheapest_path,
4045  target,
4046  AGG_HASHED,
4048  parse->groupClause,
4049  (List *) parse->havingQual,
4050  agg_costs,
4051  dNumGroups));
4052  }
4053  }
4054 
4055  /*
4056  * Generate a HashAgg Path atop of the cheapest partial path. Once
4057  * again, we'll only do this if it looks as though the hash table
4058  * won't exceed work_mem.
4059  */
4060  if (grouped_rel->partial_pathlist)
4061  {
4062  Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
4063 
4064  hashaggtablesize = estimate_hashagg_tablesize(path,
4065  &agg_final_costs,
4066  dNumGroups);
4067 
4068  if (hashaggtablesize < work_mem * 1024L)
4069  {
4070  double total_groups = path->rows * path->parallel_workers;
4071 
4072  path = (Path *) create_gather_path(root,
4073  grouped_rel,
4074  path,
4075  partial_grouping_target,
4076  NULL,
4077  &total_groups);
4078 
4079  add_path(grouped_rel, (Path *)
4080  create_agg_path(root,
4081  grouped_rel,
4082  path,
4083  target,
4084  AGG_HASHED,
4086  parse->groupClause,
4087  (List *) parse->havingQual,
4088  &agg_final_costs,
4089  dNumGroups));
4090  }
4091  }
4092  }
4093 
4094  /* Give a helpful error if we failed to find any implementation */
4095  if (grouped_rel->pathlist == NIL)
4096  ereport(ERROR,
4097  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4098  errmsg("could not implement GROUP BY"),
4099  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4100 
4101  /*
4102  * If there is an FDW that's responsible for all baserels of the query,
4103  * let it consider adding ForeignPaths.
4104  */
4105  if (grouped_rel->fdwroutine &&
4106  grouped_rel->fdwroutine->GetForeignUpperPaths)
4108  input_rel, grouped_rel);
4109 
4110  /* Let extensions possibly add some more paths */
4112  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4113  input_rel, grouped_rel);
4114 
4115  /* Now choose the best path(s) */
4116  set_cheapest(grouped_rel);
4117 
4118  /*
4119  * We've been using the partial pathlist for the grouped relation to hold
4120  * partially aggregated paths, but that's actually a little bit bogus
4121  * because it's unsafe for later planning stages -- like ordered_rel ---
4122  * to get the idea that they can use these partial paths as if they didn't
4123  * need a FinalizeAggregate step. Zap the partial pathlist at this stage
4124  * so we don't get confused.
4125  */
4126  grouped_rel->partial_pathlist = NIL;
4127 
4128  return grouped_rel;
4129 }
4130 
4131 
4132 /*
4133  * For a given input path, consider the possible ways of doing grouping sets on
4134  * it, by combinations of hashing and sorting. This can be called multiple
4135  * times, so it's important that it not scribble on input. No result is
4136  * returned, but any generated paths are added to grouped_rel.
4137  */
4138 static void
4140  RelOptInfo *grouped_rel,
4141  Path *path,
4142  bool is_sorted,
4143  bool can_hash,
4144  PathTarget *target,
4145  grouping_sets_data *gd,
4146  const AggClauseCosts *agg_costs,
4147  double dNumGroups)
4148 {
4149  Query *parse = root->parse;
4150 
4151  /*
4152  * If we're not being offered sorted input, then only consider plans that
4153  * can be done entirely by hashing.
4154  *
4155  * We can hash everything if it looks like it'll fit in work_mem. But if
4156  * the input is actually sorted despite not being advertised as such, we
4157  * prefer to make use of that in order to use less memory.
4158  *
4159  * If none of the grouping sets are sortable, then ignore the work_mem
4160  * limit and generate a path anyway, since otherwise we'll just fail.
4161  */
4162  if (!is_sorted)
4163  {
4164  List *new_rollups = NIL;
4165  RollupData *unhashed_rollup = NULL;
4166  List *sets_data;
4167  List *empty_sets_data = NIL;
4168  List *empty_sets = NIL;
4169  ListCell *lc;
4170  ListCell *l_start = list_head(gd->rollups);
4171  AggStrategy strat = AGG_HASHED;
4172  Size hashsize;
4173  double exclude_groups = 0.0;
4174 
4175  Assert(can_hash);
4176 
4177  if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4178  {
4179  unhashed_rollup = lfirst(l_start);
4180  exclude_groups = unhashed_rollup->numGroups;
4181  l_start = lnext(l_start);
4182  }
4183 
4184  hashsize = estimate_hashagg_tablesize(path,
4185  agg_costs,
4186  dNumGroups - exclude_groups);
4187 
4188  /*
4189  * gd->rollups is empty if we have only unsortable columns to work
4190  * with. Override work_mem in that case; otherwise, we'll rely on the
4191  * sorted-input case to generate usable mixed paths.
4192  */
4193  if (hashsize > work_mem * 1024L && gd->rollups)
4194  return; /* nope, won't fit */
4195 
4196  /*
4197  * We need to burst the existing rollups list into individual grouping
4198  * sets and recompute a groupClause for each set.
4199  */
4200  sets_data = list_copy(gd->unsortable_sets);
4201 
4202  for_each_cell(lc, l_start)
4203  {
4204  RollupData *rollup = lfirst(lc);
4205 
4206  /*
4207  * If we find an unhashable rollup that's not been skipped by the
4208  * "actually sorted" check above, we can't cope; we'd need sorted
4209  * input (with a different sort order) but we can't get that here.
4210  * So bail out; we'll get a valid path from the is_sorted case
4211  * instead.
4212  *
4213  * The mere presence of empty grouping sets doesn't make a rollup
4214  * unhashable (see preprocess_grouping_sets), we handle those
4215  * specially below.
4216  */
4217  if (!rollup->hashable)
4218  return;
4219  else
4220  sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
4221  }
4222  foreach(lc, sets_data)
4223  {
4224  GroupingSetData *gs = lfirst(lc);
4225  List *gset = gs->set;
4226  RollupData *rollup;
4227 
4228  if (gset == NIL)
4229  {
4230  /* Empty grouping sets can't be hashed. */
4231  empty_sets_data = lappend(empty_sets_data, gs);
4232  empty_sets = lappend(empty_sets, NIL);
4233  }
4234  else
4235  {
4236  rollup = makeNode(RollupData);
4237 
4238  rollup->groupClause = preprocess_groupclause(root, gset);
4239  rollup->gsets_data = list_make1(gs);
4240  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4241  rollup->gsets_data,
4242  gd->tleref_to_colnum_map);
4243  rollup->numGroups = gs->numGroups;
4244  rollup->hashable = true;
4245  rollup->is_hashed = true;
4246  new_rollups = lappend(new_rollups, rollup);
4247  }
4248  }
4249 
4250  /*
4251  * If we didn't find anything nonempty to hash, then bail. We'll
4252  * generate a path from the is_sorted case.
4253  */
4254  if (new_rollups == NIL)
4255  return;
4256 
4257  /*
4258  * If there were empty grouping sets they should have been in the
4259  * first rollup.
4260  */
4261  Assert(!unhashed_rollup || !empty_sets);
4262 
4263  if (unhashed_rollup)
4264  {
4265  new_rollups = lappend(new_rollups, unhashed_rollup);
4266  strat = AGG_MIXED;
4267  }
4268  else if (empty_sets)
4269  {
4270  RollupData *rollup = makeNode(RollupData);
4271 
4272  rollup->groupClause = NIL;
4273  rollup->gsets_data = empty_sets_data;
4274  rollup->gsets = empty_sets;
4275  rollup->numGroups = list_length(empty_sets);
4276  rollup->hashable = false;
4277  rollup->is_hashed = false;
4278  new_rollups = lappend(new_rollups, rollup);
4279  strat = AGG_MIXED;
4280  }
4281 
4282  add_path(grouped_rel, (Path *)
4284  grouped_rel,
4285  path,
4286  target,
4287  (List *) parse->havingQual,
4288  strat,
4289  new_rollups,
4290  agg_costs,
4291  dNumGroups));
4292  return;
4293  }
4294 
4295  /*
4296  * If we have sorted input but nothing we can do with it, bail.
4297  */
4298  if (list_length(gd->rollups) == 0)
4299  return;
4300 
4301  /*
4302  * Given sorted input, we try and make two paths: one sorted and one mixed
4303  * sort/hash. (We need to try both because hashagg might be disabled, or
4304  * some columns might not be sortable.)
4305  *
4306  * can_hash is passed in as false if some obstacle elsewhere (such as
4307  * ordered aggs) means that we shouldn't consider hashing at all.
4308  */
4309  if (can_hash && gd->any_hashable)
4310  {
4311  List *rollups = NIL;
4312  List *hash_sets = list_copy(gd->unsortable_sets);
4313  double availspace = (work_mem * 1024.0);
4314  ListCell *lc;
4315 
4316  /*
4317  * Account first for space needed for groups we can't sort at all.
4318  */
4319  availspace -= (double) estimate_hashagg_tablesize(path,
4320  agg_costs,
4321  gd->dNumHashGroups);
4322 
4323  if (availspace > 0 && list_length(gd->rollups) > 1)
4324  {
4325  double scale;
4326  int num_rollups = list_length(gd->rollups);
4327  int k_capacity;
4328  int *k_weights = palloc(num_rollups * sizeof(int));
4329  Bitmapset *hash_items = NULL;
4330  int i;
4331 
4332  /*
4333  * We treat this as a knapsack problem: the knapsack capacity
4334  * represents work_mem, the item weights are the estimated memory
4335  * usage of the hashtables needed to implement a single rollup, and
4336  * we really ought to use the cost saving as the item value;
4337  * however, currently the costs assigned to sort nodes don't
4338  * reflect the comparison costs well, and so we treat all items as
4339  * of equal value (each rollup we hash instead saves us one sort).
4340  *
4341  * To use the discrete knapsack, we need to scale the values to a
4342  * reasonably small bounded range. We choose to allow a 5% error
4343  * margin; we have no more than 4096 rollups in the worst possible
4344  * case, which with a 5% error margin will require a bit over 42MB
4345  * of workspace. (Anyone wanting to plan queries that complex had
4346  * better have the memory for it. In more reasonable cases, with
4347  * no more than a couple of dozen rollups, the memory usage will
4348  * be negligible.)
4349  *
4350  * k_capacity is naturally bounded, but we clamp the values for
4351  * scale and weight (below) to avoid overflows or underflows (or
4352  * uselessly trying to use a scale factor less than 1 byte).
4353  */
4354  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4355  k_capacity = (int) floor(availspace / scale);
4356 
4357  /*
4358  * We leave the first rollup out of consideration since it's the
4359  * one that matches the input sort order. We assign indexes "i"
4360  * to only those entries considered for hashing; the second loop,
4361  * below, must use the same condition.
4362  */
4363  i = 0;
4365  {
4366  RollupData *rollup = lfirst(lc);
4367 
4368  if (rollup->hashable)
4369  {
4370  double sz = estimate_hashagg_tablesize(path,
4371  agg_costs,
4372  rollup->numGroups);
4373 
4374  /*
4375  * If sz is enormous, but work_mem (and hence scale) is
4376  * small, avoid integer overflow here.
4377  */
4378  k_weights[i] = (int) Min(floor(sz / scale),
4379  k_capacity + 1.0);
4380  ++i;
4381  }
4382  }
4383 
4384  /*
4385  * Apply knapsack algorithm; compute the set of items which
4386  * maximizes the value stored (in this case the number of sorts
4387  * saved) while keeping the total size (approximately) within
4388  * capacity.
4389  */
4390  if (i > 0)
4391  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4392 
4393  if (!bms_is_empty(hash_items))
4394  {
4395  rollups = list_make1(linitial(gd->rollups));
4396 
4397  i = 0;
4399  {
4400  RollupData *rollup = lfirst(lc);
4401 
4402  if (rollup->hashable)
4403  {
4404  if (bms_is_member(i, hash_items))
4405  hash_sets = list_concat(hash_sets,
4406  list_copy(rollup->gsets_data));
4407  else
4408  rollups = lappend(rollups, rollup);
4409  ++i;
4410  }
4411  else
4412  rollups = lappend(rollups, rollup);
4413  }
4414  }
4415  }
4416 
4417  if (!rollups && hash_sets)
4418  rollups = list_copy(gd->rollups);
4419 
4420  foreach(lc, hash_sets)
4421  {
4422  GroupingSetData *gs = lfirst(lc);
4423  RollupData *rollup = makeNode(RollupData);
4424 
4425  Assert(gs->set != NIL);
4426 
4427  rollup->groupClause = preprocess_groupclause(root, gs->set);
4428  rollup->gsets_data = list_make1(gs);
4429  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4430  rollup->gsets_data,
4431  gd->tleref_to_colnum_map);
4432  rollup->numGroups = gs->numGroups;
4433  rollup->hashable = true;
4434  rollup->is_hashed = true;
4435  rollups = lcons(rollup, rollups);
4436  }
4437 
4438  if (rollups)
4439  {
4440  add_path(grouped_rel, (Path *)
4442  grouped_rel,
4443  path,
4444  target,
4445  (List *) parse->havingQual,
4446  AGG_MIXED,
4447  rollups,
4448  agg_costs,
4449  dNumGroups));
4450  }
4451  }
4452 
4453  /*
4454  * Now try the simple sorted case.
4455  */
4456  if (!gd->unsortable_sets)
4457  add_path(grouped_rel, (Path *)
4459  grouped_rel,
4460  path,
4461  target,
4462  (List *) parse->havingQual,
4463  AGG_SORTED,
4464  gd->rollups,
4465  agg_costs,
4466  dNumGroups));
4467 }
4468 
4469 /*
4470  * create_window_paths
4471  *
4472  * Build a new upperrel containing Paths for window-function evaluation.
4473  *
4474  * input_rel: contains the source-data Paths
4475  * input_target: result of make_window_input_target
4476  * output_target: what the topmost WindowAggPath should return
4477  * tlist: query's target list (needed to look up pathkeys)
4478  * wflists: result of find_window_functions
4479  * activeWindows: result of select_active_windows
4480  *
4481  * Note: all Paths in input_rel are expected to return input_target.
4482  */
4483 static RelOptInfo *
4485  RelOptInfo *input_rel,
4486  PathTarget *input_target,
4487  PathTarget *output_target,
4488  List *tlist,
4489  WindowFuncLists *wflists,
4490  List *activeWindows)
4491 {
4492  RelOptInfo *window_rel;
4493  ListCell *lc;
4494 
4495  /* For now, do all work in the (WINDOW, NULL) upperrel */
4496  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4497 
4498  /*
4499  * If the input relation is not parallel-safe, then the window relation
4500  * can't be parallel-safe, either. Otherwise, we need to examine the
4501  * target list and active windows for non-parallel-safe constructs.
4502  */
4503  if (input_rel->consider_parallel &&
4504  is_parallel_safe(root, (Node *) output_target->exprs) &&
4505  is_parallel_safe(root, (Node *) activeWindows))
4506  window_rel->consider_parallel = true;
4507 
4508  /*
4509  * If the input rel belongs to a single FDW, so does the window rel.
4510  */
4511  window_rel->serverid = input_rel->serverid;
4512  window_rel->userid = input_rel->userid;
4513  window_rel->useridiscurrent = input_rel->useridiscurrent;
4514  window_rel->fdwroutine = input_rel->fdwroutine;
4515 
4516  /*
4517  * Consider computing window functions starting from the existing
4518  * cheapest-total path (which will likely require a sort) as well as any
4519  * existing paths that satisfy root->window_pathkeys (which won't).
4520  */
4521  foreach(lc, input_rel->pathlist)
4522  {
4523  Path *path = (Path *) lfirst(lc);
4524 
4525  if (path == input_rel->cheapest_total_path ||
4528  window_rel,
4529  path,
4530  input_target,
4531  output_target,
4532  tlist,
4533  wflists,
4534  activeWindows);
4535  }
4536 
4537  /*
4538  * If there is an FDW that's responsible for all baserels of the query,
4539  * let it consider adding ForeignPaths.
4540  */
4541  if (window_rel->fdwroutine &&
4542  window_rel->fdwroutine->GetForeignUpperPaths)
4543  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4544  input_rel, window_rel);
4545 
4546  /* Let extensions possibly add some more paths */
4548  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4549  input_rel, window_rel);
4550 
4551  /* Now choose the best path(s) */
4552  set_cheapest(window_rel);
4553 
4554  return window_rel;
4555 }
4556 
4557 /*
4558  * Stack window-function implementation steps atop the given Path, and
4559  * add the result to window_rel.
4560  *
4561  * window_rel: upperrel to contain result
4562  * path: input Path to use (must return input_target)
4563  * input_target: result of make_window_input_target
4564  * output_target: what the topmost WindowAggPath should return
4565  * tlist: query's target list (needed to look up pathkeys)
4566  * wflists: result of find_window_functions
4567  * activeWindows: result of select_active_windows
4568  */
4569 static void
4571  RelOptInfo *window_rel,
4572  Path *path,
4573  PathTarget *input_target,
4574  PathTarget *output_target,
4575  List *tlist,
4576  WindowFuncLists *wflists,
4577  List *activeWindows)
4578 {
4579  PathTarget *window_target;
4580  ListCell *l;
4581 
4582  /*
4583  * Since each window clause could require a different sort order, we stack
4584  * up a WindowAgg node for each clause, with sort steps between them as
4585  * needed. (We assume that select_active_windows chose a good order for
4586  * executing the clauses in.)
4587  *
4588  * input_target should contain all Vars and Aggs needed for the result.
4589  * (In some cases we wouldn't need to propagate all of these all the way
4590  * to the top, since they might only be needed as inputs to WindowFuncs.
4591  * It's probably not worth trying to optimize that though.) It must also
4592  * contain all window partitioning and sorting expressions, to ensure
4593  * they're computed only once at the bottom of the stack (that's critical
4594  * for volatile functions). As we climb up the stack, we'll add outputs
4595  * for the WindowFuncs computed at each level.
4596  */
4597  window_target = input_target;
4598 
4599  foreach(l, activeWindows)
4600  {
4601  WindowClause *wc = (WindowClause *) lfirst(l);
4602  List *window_pathkeys;
4603 
4604  window_pathkeys = make_pathkeys_for_window(root,
4605  wc,
4606  tlist);
4607 
4608  /* Sort if necessary */
4609  if (!pathkeys_contained_in(window_pathkeys, path->pathkeys))
4610  {
4611  path = (Path *) create_sort_path(root, window_rel,
4612  path,
4613  window_pathkeys,
4614  -1.0);
4615  }
4616 
4617  if (lnext(l))
4618  {
4619  /*
4620  * Add the current WindowFuncs to the output target for this
4621  * intermediate WindowAggPath. We must copy window_target to
4622  * avoid changing the previous path's target.
4623  *
4624  * Note: a WindowFunc adds nothing to the target's eval costs; but
4625  * we do need to account for the increase in tlist width.
4626  */
4627  ListCell *lc2;
4628 
4629  window_target = copy_pathtarget(window_target);
4630  foreach(lc2, wflists->windowFuncs[wc->winref])
4631  {
4632  WindowFunc *wfunc = castNode(WindowFunc, lfirst(lc2));
4633 
4634  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4635  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4636  }
4637  }
4638  else
4639  {
4640  /* Install the goal target in the topmost WindowAgg */
4641  window_target = output_target;
4642  }
4643 
4644  path = (Path *)
4645  create_windowagg_path(root, window_rel, path, window_target,
4646  wflists->windowFuncs[wc->winref],
4647  wc,
4648  window_pathkeys);
4649  }
4650 
4651  add_path(window_rel, path);
4652 }
4653 
4654 /*
4655  * create_distinct_paths
4656  *
4657  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4658  *
4659  * input_rel: contains the source-data Paths
4660  *
4661  * Note: input paths should already compute the desired pathtarget, since
4662  * Sort/Unique won't project anything.
4663  */
4664 static RelOptInfo *
4666  RelOptInfo *input_rel)
4667 {
4668  Query *parse = root->parse;
4669  Path *cheapest_input_path = input_rel->cheapest_total_path;
4670  RelOptInfo *distinct_rel;
4671  double numDistinctRows;
4672  bool allow_hash;
4673  Path *path;
4674  ListCell *lc;
4675 
4676  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4677  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4678 
4679  /*
4680  * We don't compute anything at this level, so distinct_rel will be
4681  * parallel-safe if the input rel is parallel-safe. In particular, if
4682  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4683  * output those expressions, and will not be parallel-safe unless those
4684  * expressions are parallel-safe.
4685  */
4686  distinct_rel->consider_parallel = input_rel->consider_parallel;
4687 
4688  /*
4689  * If the input rel belongs to a single FDW, so does the distinct_rel.
4690  */
4691  distinct_rel->serverid = input_rel->serverid;
4692  distinct_rel->userid = input_rel->userid;
4693  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4694  distinct_rel->fdwroutine = input_rel->fdwroutine;
4695 
4696  /* Estimate number of distinct rows there will be */
4697  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4698  root->hasHavingQual)
4699  {
4700  /*
4701  * If there was grouping or aggregation, use the number of input rows
4702  * as the estimated number of DISTINCT rows (ie, assume the input is
4703  * already mostly unique).
4704  */
4705  numDistinctRows = cheapest_input_path->rows;
4706  }
4707  else
4708  {
4709  /*
4710  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4711  */
4712  List *distinctExprs;
4713 
4714  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4715  parse->targetList);
4716  numDistinctRows = estimate_num_groups(root, distinctExprs,
4717  cheapest_input_path->rows,
4718  NULL);
4719  }
4720 
4721  /*
4722  * Consider sort-based implementations of DISTINCT, if possible.
4723  */
4725  {
4726  /*
4727  * First, if we have any adequately-presorted paths, just stick a
4728  * Unique node on those. Then consider doing an explicit sort of the
4729  * cheapest input path and Unique'ing that.
4730  *
4731  * When we have DISTINCT ON, we must sort by the more rigorous of
4732  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4733  * Also, if we do have to do an explicit sort, we might as well use
4734  * the more rigorous ordering to avoid a second sort later. (Note
4735  * that the parser will have ensured that one clause is a prefix of
4736  * the other.)
4737  */
4738  List *needed_pathkeys;
4739 
4740  if (parse->hasDistinctOn &&
4742  list_length(root->sort_pathkeys))
4743  needed_pathkeys = root->sort_pathkeys;
4744  else
4745  needed_pathkeys = root->distinct_pathkeys;
4746 
4747  foreach(lc, input_rel->pathlist)
4748  {
4749  Path *path = (Path *) lfirst(lc);
4750 
4751  if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4752  {
4753  add_path(distinct_rel, (Path *)
4754  create_upper_unique_path(root, distinct_rel,
4755  path,
4757  numDistinctRows));
4758  }
4759  }
4760 
4761  /* For explicit-sort case, always use the more rigorous clause */
4762  if (list_length(root->distinct_pathkeys) <
4763  list_length(root->sort_pathkeys))
4764  {
4765  needed_pathkeys = root->sort_pathkeys;
4766  /* Assert checks that parser didn't mess up... */
4768  needed_pathkeys));
4769  }
4770  else
4771  needed_pathkeys = root->distinct_pathkeys;
4772 
4773  path = cheapest_input_path;
4774  if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4775  path = (Path *) create_sort_path(root, distinct_rel,
4776  path,
4777  needed_pathkeys,
4778  -1.0);
4779 
4780  add_path(distinct_rel, (Path *)
4781  create_upper_unique_path(root, distinct_rel,
4782  path,
4784  numDistinctRows));
4785  }
4786 
4787  /*
4788  * Consider hash-based implementations of DISTINCT, if possible.
4789  *
4790  * If we were not able to make any other types of path, we *must* hash or
4791  * die trying. If we do have other choices, there are several things that
4792  * should prevent selection of hashing: if the query uses DISTINCT ON
4793  * (because it won't really have the expected behavior if we hash), or if
4794  * enable_hashagg is off, or if it looks like the hashtable will exceed
4795  * work_mem.
4796  *
4797  * Note: grouping_is_hashable() is much more expensive to check than the
4798  * other gating conditions, so we want to do it last.
4799  */
4800  if (distinct_rel->pathlist == NIL)
4801  allow_hash = true; /* we have no alternatives */
4802  else if (parse->hasDistinctOn || !enable_hashagg)
4803  allow_hash = false; /* policy-based decision not to hash */
4804  else
4805  {
4806  Size hashentrysize;
4807 
4808  /* Estimate per-hash-entry space at tuple width... */
4809  hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) +
4811  /* plus the per-hash-entry overhead */
4812  hashentrysize += hash_agg_entry_size(0);
4813 
4814  /* Allow hashing only if hashtable is predicted to fit in work_mem */
4815  allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L);
4816  }
4817 
4818  if (allow_hash && grouping_is_hashable(parse->distinctClause))
4819  {
4820  /* Generate hashed aggregate path --- no sort needed */
4821  add_path(distinct_rel, (Path *)
4822  create_agg_path(root,
4823  distinct_rel,
4824  cheapest_input_path,
4825  cheapest_input_path->pathtarget,
4826  AGG_HASHED,
4828  parse->distinctClause,
4829  NIL,
4830  NULL,
4831  numDistinctRows));
4832  }
4833 
4834  /* Give a helpful error if we failed to find any implementation */
4835  if (distinct_rel->pathlist == NIL)
4836  ereport(ERROR,
4837  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4838  errmsg("could not implement DISTINCT"),
4839  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4840 
4841  /*
4842  * If there is an FDW that's responsible for all baserels of the query,
4843  * let it consider adding ForeignPaths.
4844  */
4845  if (distinct_rel->fdwroutine &&
4846  distinct_rel->fdwroutine->GetForeignUpperPaths)
4847  distinct_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_DISTINCT,
4848  input_rel, distinct_rel);
4849 
4850  /* Let extensions possibly add some more paths */
4852  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT,
4853  input_rel, distinct_rel);
4854 
4855  /* Now choose the best path(s) */
4856  set_cheapest(distinct_rel);
4857 
4858  return distinct_rel;
4859 }
4860 
4861 /*
4862  * create_ordered_paths
4863  *
4864  * Build a new upperrel containing Paths for ORDER BY evaluation.
4865  *
4866  * All paths in the result must satisfy the ORDER BY ordering.
4867  * The only new path we need consider is an explicit sort on the
4868  * cheapest-total existing path.
4869  *
4870  * input_rel: contains the source-data Paths
4871  * target: the output tlist the result Paths must emit
4872  * limit_tuples: estimated bound on the number of output tuples,
4873  * or -1 if no LIMIT or couldn't estimate
4874  */
4875 static RelOptInfo *
4877  RelOptInfo *input_rel,
4878  PathTarget *target,
4879  double limit_tuples)
4880 {
4881  Path *cheapest_input_path = input_rel->cheapest_total_path;
4882  RelOptInfo *ordered_rel;
4883  ListCell *lc;
4884 
4885  /* For now, do all work in the (ORDERED, NULL) upperrel */
4886  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4887 
4888  /*
4889  * If the input relation is not parallel-safe, then the ordered relation
4890  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
4891  * target list is parallel-safe.
4892  */
4893  if (input_rel->consider_parallel &&
4894  is_parallel_safe(root, (Node *) target->exprs))
4895  ordered_rel->consider_parallel = true;
4896 
4897  /*
4898  * If the input rel belongs to a single FDW, so does the ordered_rel.
4899  */
4900  ordered_rel->serverid = input_rel->serverid;
4901  ordered_rel->userid = input_rel->userid;
4902  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
4903  ordered_rel->fdwroutine = input_rel->fdwroutine;
4904 
4905  foreach(lc, input_rel->pathlist)
4906  {
4907  Path *path = (Path *) lfirst(lc);
4908  bool is_sorted;
4909 
4910  is_sorted = pathkeys_contained_in(root->sort_pathkeys,
4911  path->pathkeys);
4912  if (path == cheapest_input_path || is_sorted)
4913  {
4914  if (!is_sorted)
4915  {
4916  /* An explicit sort here can take advantage of LIMIT */
4917  path = (Path *) create_sort_path(root,
4918  ordered_rel,
4919  path,
4920  root->sort_pathkeys,
4921  limit_tuples);
4922  }
4923 
4924  /* Add projection step if needed */
4925  if (path->pathtarget != target)
4926  path = apply_projection_to_path(root, ordered_rel,
4927  path, target);
4928 
4929  add_path(ordered_rel, path);
4930  }
4931  }
4932 
4933  /*
4934  * generate_gather_paths() will have already generated a simple Gather
4935  * path for the best parallel path, if any, and the loop above will have
4936  * considered sorting it. Similarly, generate_gather_paths() will also
4937  * have generated order-preserving Gather Merge plans which can be used
4938  * without sorting if they happen to match the sort_pathkeys, and the loop
4939  * above will have handled those as well. However, there's one more
4940  * possibility: it may make sense to sort the cheapest partial path
4941  * according to the required output order and then use Gather Merge.
4942  */
4943  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
4944  input_rel->partial_pathlist != NIL)
4945  {
4946  Path *cheapest_partial_path;
4947 
4948  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4949 
4950  /*
4951  * If cheapest partial path doesn't need a sort, this is redundant
4952  * with what's already been tried.
4953  */
4955  cheapest_partial_path->pathkeys))
4956  {
4957  Path *path;
4958  double total_groups;
4959 
4960  path = (Path *) create_sort_path(root,
4961  ordered_rel,
4962  cheapest_partial_path,
4963  root->sort_pathkeys,
4964  -1.0);
4965 
4966  total_groups = cheapest_partial_path->rows *
4967  cheapest_partial_path->parallel_workers;
4968  path = (Path *)
4969  create_gather_merge_path(root, ordered_rel,
4970  path,
4971  target, root->sort_pathkeys, NULL,
4972  &total_groups);
4973 
4974  /* Add projection step if needed */
4975  if (path->pathtarget != target)
4976  path = apply_projection_to_path(root, ordered_rel,
4977  path, target);
4978 
4979  add_path(ordered_rel, path);
4980  }
4981  }
4982 
4983  /*
4984  * If there is an FDW that's responsible for all baserels of the query,
4985  * let it consider adding ForeignPaths.
4986  */
4987  if (ordered_rel->fdwroutine &&
4988  ordered_rel->fdwroutine->GetForeignUpperPaths)
4989  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
4990  input_rel, ordered_rel);
4991 
4992  /* Let extensions possibly add some more paths */
4994  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
4995  input_rel, ordered_rel);
4996 
4997  /*
4998  * No need to bother with set_cheapest here; grouping_planner does not
4999  * need us to do it.
5000  */
5001  Assert(ordered_rel->pathlist != NIL);
5002 
5003  return ordered_rel;
5004 }
5005 
5006 
5007 /*
5008  * make_group_input_target
5009  * Generate appropriate PathTarget for initial input to grouping nodes.
5010  *
5011  * If there is grouping or aggregation, the scan/join subplan cannot emit
5012  * the query's final targetlist; for example, it certainly can't emit any
5013  * aggregate function calls. This routine generates the correct target
5014  * for the scan/join subplan.
5015  *
5016  * The query target list passed from the parser already contains entries
5017  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5018  * for variables used only in HAVING clauses; so we need to add those
5019  * variables to the subplan target list. Also, we flatten all expressions
5020  * except GROUP BY items into their component variables; other expressions
5021  * will be computed by the upper plan nodes rather than by the subplan.
5022  * For example, given a query like
5023  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5024  * we want to pass this targetlist to the subplan:
5025  * a+b,c,d
5026  * where the a+b target will be used by the Sort/Group steps, and the
5027  * other targets will be used for computing the final results.
5028  *
5029  * 'final_target' is the query's final target list (in PathTarget form)
5030  *
5031  * The result is the PathTarget to be computed by the Paths returned from
5032  * query_planner().
5033  */
5034 static PathTarget *
5036 {
5037  Query *parse = root->parse;
5038  PathTarget *input_target;
5039  List *non_group_cols;
5040  List *non_group_vars;
5041  int i;
5042  ListCell *lc;
5043 
5044  /*
5045  * We must build a target containing all grouping columns, plus any other
5046  * Vars mentioned in the query's targetlist and HAVING qual.
5047  */
5048  input_target = create_empty_pathtarget();
5049  non_group_cols = NIL;
5050 
5051  i = 0;
5052  foreach(lc, final_target->exprs)
5053  {
5054  Expr *expr = (Expr *) lfirst(lc);
5055  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5056 
5057  if (sgref && parse->groupClause &&
5059  {
5060  /*
5061  * It's a grouping column, so add it to the input target as-is.
5062  */
5063  add_column_to_pathtarget(input_target, expr, sgref);
5064  }
5065  else
5066  {
5067  /*
5068  * Non-grouping column, so just remember the expression for later
5069  * call to pull_var_clause.
5070  */
5071  non_group_cols = lappend(non_group_cols, expr);
5072  }
5073 
5074  i++;
5075  }
5076 
5077  /*
5078  * If there's a HAVING clause, we'll need the Vars it uses, too.
5079  */
5080  if (parse->havingQual)
5081  non_group_cols = lappend(non_group_cols, parse->havingQual);
5082 
5083  /*
5084  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5085  * add them to the input target if not already present. (A Var used
5086  * directly as a GROUP BY item will be present already.) Note this
5087  * includes Vars used in resjunk items, so we are covering the needs of
5088  * ORDER BY and window specifications. Vars used within Aggrefs and
5089  * WindowFuncs will be pulled out here, too.
5090  */
5091  non_group_vars = pull_var_clause((Node *) non_group_cols,
5095  add_new_columns_to_pathtarget(input_target, non_group_vars);
5096 
5097  /* clean up cruft */
5098  list_free(non_group_vars);
5099  list_free(non_group_cols);
5100 
5101  /* XXX this causes some redundant cost calculation ... */
5102  return set_pathtarget_cost_width(root, input_target);
5103 }
5104 
5105 /*
5106  * make_partial_grouping_target
5107  * Generate appropriate PathTarget for output of partial aggregate
5108  * (or partial grouping, if there are no aggregates) nodes.
5109  *
5110  * A partial aggregation node needs to emit all the same aggregates that
5111  * a regular aggregation node would, plus any aggregates used in HAVING;
5112  * except that the Aggref nodes should be marked as partial aggregates.
5113  *
5114  * In addition, we'd better emit any Vars and PlaceholderVars that are
5115  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5116  * these would be Vars that are grouped by or used in grouping expressions.)
5117  *
5118  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5119  * We get the HAVING clause out of *root.
5120  */
5121 static PathTarget *
5123 {
5124  Query *parse = root->parse;
5125  PathTarget *partial_target;
5126  List *non_group_cols;
5127  List *non_group_exprs;
5128  int i;
5129  ListCell *lc;
5130 
5131  partial_target = create_empty_pathtarget();
5132  non_group_cols = NIL;
5133 
5134  i = 0;
5135  foreach(lc, grouping_target->exprs)
5136  {
5137  Expr *expr = (Expr *) lfirst(lc);
5138  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5139 
5140  if (sgref && parse->groupClause &&
5142  {
5143  /*
5144  * It's a grouping column, so add it to the partial_target as-is.
5145  * (This allows the upper agg step to repeat the grouping calcs.)
5146  */
5147  add_column_to_pathtarget(partial_target, expr, sgref);
5148  }
5149  else
5150  {
5151  /*
5152  * Non-grouping column, so just remember the expression for later
5153  * call to pull_var_clause.
5154  */
5155  non_group_cols = lappend(non_group_cols, expr);
5156  }
5157 
5158  i++;
5159  }
5160 
5161  /*
5162  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5163  */
5164  if (parse->havingQual)
5165  non_group_cols = lappend(non_group_cols, parse->havingQual);
5166 
5167  /*
5168  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5169  * non-group cols (plus HAVING), and add them to the partial_target if not
5170  * already present. (An expression used directly as a GROUP BY item will
5171  * be present already.) Note this includes Vars used in resjunk items, so
5172  * we are covering the needs of ORDER BY and window specifications.
5173  */
5174  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5178 
5179  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5180 
5181  /*
5182  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5183  * are at the top level of the target list, so we can just scan the list
5184  * rather than recursing through the expression trees.
5185  */
5186  foreach(lc, partial_target->exprs)
5187  {
5188  Aggref *aggref = (Aggref *) lfirst(lc);
5189 
5190  if (IsA(aggref, Aggref))
5191  {
5192  Aggref *newaggref;
5193 
5194  /*
5195  * We shouldn't need to copy the substructure of the Aggref node,
5196  * but flat-copy the node itself to avoid damaging other trees.
5197  */
5198  newaggref = makeNode(Aggref);
5199  memcpy(newaggref, aggref, sizeof(Aggref));
5200 
5201  /* For now, assume serialization is required */
5203 
5204  lfirst(lc) = newaggref;
5205  }
5206  }
5207 
5208  /* clean up cruft */
5209  list_free(non_group_exprs);
5210  list_free(non_group_cols);
5211 
5212  /* XXX this causes some redundant cost calculation ... */
5213  return set_pathtarget_cost_width(root, partial_target);
5214 }
5215 
5216 /*
5217  * mark_partial_aggref
5218  * Adjust an Aggref to make it represent a partial-aggregation step.
5219  *
5220  * The Aggref node is modified in-place; caller must do any copying required.
5221  */
5222 void
5224 {
5225  /* aggtranstype should be computed by this point */
5227  /* ... but aggsplit should still be as the parser left it */
5228  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5229 
5230  /* Mark the Aggref with the intended partial-aggregation mode */
5231  agg->aggsplit = aggsplit;
5232 
5233  /*
5234  * Adjust result type if needed. Normally, a partial aggregate returns
5235  * the aggregate's transition type; but if that's INTERNAL and we're
5236  * serializing, it returns BYTEA instead.
5237  */
5238  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5239  {
5240  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5241  agg->aggtype = BYTEAOID;
5242  else
5243  agg->aggtype = agg->aggtranstype;
5244  }
5245 }
5246 
5247 /*
5248  * postprocess_setop_tlist
5249  * Fix up targetlist returned by plan_set_operations().
5250  *
5251  * We need to transpose sort key info from the orig_tlist into new_tlist.
5252  * NOTE: this would not be good enough if we supported resjunk sort keys
5253  * for results of set operations --- then, we'd need to project a whole
5254  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5255  * find any resjunk columns in orig_tlist.
5256  */
5257 static List *
5258 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5259 {
5260  ListCell *l;
5261  ListCell *orig_tlist_item = list_head(orig_tlist);
5262 
5263  foreach(l, new_tlist)
5264  {
5265  TargetEntry *new_tle = (TargetEntry *) lfirst(l);
5266  TargetEntry *orig_tle;
5267 
5268  /* ignore resjunk columns in setop result */
5269  if (new_tle->resjunk)
5270  continue;
5271 
5272  Assert(orig_tlist_item != NULL);
5273  orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
5274  orig_tlist_item = lnext(orig_tlist_item);
5275  if (orig_tle->resjunk) /* should not happen */
5276  elog(ERROR, "resjunk output columns are not implemented");
5277  Assert(new_tle->resno == orig_tle->resno);
5278  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5279  }
5280  if (orig_tlist_item != NULL)
5281  elog(ERROR, "resjunk output columns are not implemented");
5282  return new_tlist;
5283 }
5284 
5285 /*
5286  * select_active_windows
5287  * Create a list of the "active" window clauses (ie, those referenced
5288  * by non-deleted WindowFuncs) in the order they are to be executed.
5289  */
5290 static List *
5292 {
5293  List *result;
5294  List *actives;
5295  ListCell *lc;
5296 
5297  /* First, make a list of the active windows */
5298  actives = NIL;
5299  foreach(lc, root->parse->windowClause)
5300  {
5301  WindowClause *wc = (WindowClause *) lfirst(lc);
5302 
5303  /* It's only active if wflists shows some related WindowFuncs */
5304  Assert(wc->winref <= wflists->maxWinRef);
5305  if (wflists->windowFuncs[wc->winref] != NIL)
5306  actives = lappend(actives, wc);
5307  }
5308 
5309  /*
5310  * Now, ensure that windows with identical partitioning/ordering clauses
5311  * are adjacent in the list. This is required by the SQL standard, which
5312  * says that only one sort is to be used for such windows, even if they
5313  * are otherwise distinct (eg, different names or framing clauses).
5314  *
5315  * There is room to be much smarter here, for example detecting whether
5316  * one window's sort keys are a prefix of another's (so that sorting for
5317  * the latter would do for the former), or putting windows first that
5318  * match a sort order available for the underlying query. For the moment
5319  * we are content with meeting the spec.
5320  */
5321  result = NIL;
5322  while (actives != NIL)
5323  {
5324  WindowClause *wc = (WindowClause *) linitial(actives);
5325  ListCell *prev;
5326  ListCell *next;
5327 
5328  /* Move wc from actives to result */
5329  actives = list_delete_first(actives);
5330  result = lappend(result, wc);
5331 
5332  /* Now move any matching windows from actives to result */
5333  prev = NULL;
5334  for (lc = list_head(actives); lc; lc = next)
5335  {
5336  WindowClause *wc2 = (WindowClause *) lfirst(lc);
5337 
5338  next = lnext(lc);
5339  /* framing options are NOT to be compared here! */
5340  if (equal(wc->partitionClause, wc2->partitionClause) &&
5341  equal(wc->orderClause, wc2->orderClause))
5342  {
5343  actives = list_delete_cell(actives, lc, prev);
5344  result = lappend(result, wc2);
5345  }
5346  else
5347  prev = lc;
5348  }
5349  }
5350 
5351  return result;
5352 }
5353 
5354 /*
5355  * make_window_input_target
5356  * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5357  *
5358  * When the query has window functions, this function computes the desired
5359  * target to be computed by the node just below the first WindowAgg.
5360  * This tlist must contain all values needed to evaluate the window functions,
5361  * compute the final target list, and perform any required final sort step.
5362  * If multiple WindowAggs are needed, each intermediate one adds its window
5363  * function results onto this base tlist; only the topmost WindowAgg computes
5364  * the actual desired target list.
5365  *
5366  * This function is much like make_group_input_target, though not quite enough
5367  * like it to share code. As in that function, we flatten most expressions
5368  * into their component variables. But we do not want to flatten window
5369  * PARTITION BY/ORDER BY clauses, since that might result in multiple
5370  * evaluations of them, which would be bad (possibly even resulting in
5371  * inconsistent answers, if they contain volatile functions).
5372  * Also, we must not flatten GROUP BY clauses that were left unflattened by
5373  * make_group_input_target, because we may no longer have access to the
5374  * individual Vars in them.
5375  *
5376  * Another key difference from make_group_input_target is that we don't
5377  * flatten Aggref expressions, since those are to be computed below the
5378  * window functions and just referenced like Vars above that.
5379  *
5380  * 'final_target' is the query's final target list (in PathTarget form)
5381  * 'activeWindows' is the list of active windows previously identified by
5382  * select_active_windows.
5383  *
5384  * The result is the PathTarget to be computed by the plan node immediately
5385  * below the first WindowAgg node.
5386  */
5387 static PathTarget *
5389  PathTarget *final_target,
5390  List *activeWindows)
5391 {
5392  Query *parse = root->parse;
5393  PathTarget *input_target;
5394  Bitmapset *sgrefs;
5395  List *flattenable_cols;
5396  List *flattenable_vars;
5397  int i;
5398  ListCell *lc;
5399 
5400  Assert(parse->hasWindowFuncs);
5401 
5402  /*
5403  * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5404  * into a bitmapset for convenient reference below.
5405  */
5406  sgrefs = NULL;
5407  foreach(lc, activeWindows)
5408  {
5409  WindowClause *wc = (WindowClause *) lfirst(lc);
5410  ListCell *lc2;
5411 
5412  foreach(lc2, wc->partitionClause)
5413  {
5414  SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
5415 
5416  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5417  }
5418  foreach(lc2, wc->orderClause)
5419  {
5420  SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
5421 
5422  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5423  }
5424  }
5425 
5426  /* Add in sortgroupref numbers of GROUP BY clauses, too */
5427  foreach(lc, parse->groupClause)
5428  {
5429  SortGroupClause *grpcl = (SortGroupClause *) lfirst(lc);
5430 
5431  sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5432  }
5433 
5434  /*
5435  * Construct a target containing all the non-flattenable targetlist items,
5436  * and save aside the others for a moment.
5437  */
5438  input_target = create_empty_pathtarget();
5439  flattenable_cols = NIL;
5440 
5441  i = 0;
5442  foreach(lc, final_target->exprs)
5443  {
5444  Expr *expr = (Expr *) lfirst(lc);
5445  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5446 
5447  /*
5448  * Don't want to deconstruct window clauses or GROUP BY items. (Note
5449  * that such items can't contain window functions, so it's okay to
5450  * compute them below the WindowAgg nodes.)
5451  */
5452  if (sgref != 0 && bms_is_member(sgref, sgrefs))
5453  {
5454  /*
5455  * Don't want to deconstruct this value, so add it to the input
5456  * target as-is.
5457  */
5458  add_column_to_pathtarget(input_target, expr, sgref);
5459  }
5460  else
5461  {
5462  /*
5463  * Column is to be flattened, so just remember the expression for
5464  * later call to pull_var_clause.
5465  */
5466  flattenable_cols = lappend(flattenable_cols, expr);
5467  }
5468 
5469  i++;
5470  }
5471 
5472  /*
5473  * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
5474  * add them to the input target if not already present. (Some might be
5475  * there already because they're used directly as window/group clauses.)
5476  *
5477  * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
5478  * Aggrefs are placed in the Agg node's tlist and not left to be computed
5479  * at higher levels. On the other hand, we should recurse into
5480  * WindowFuncs to make sure their input expressions are available.
5481  */
5482  flattenable_vars = pull_var_clause((Node *) flattenable_cols,
5486  add_new_columns_to_pathtarget(input_target, flattenable_vars);
5487 
5488  /* clean up cruft */
5489  list_free(flattenable_vars);
5490  list_free(flattenable_cols);
5491 
5492  /* XXX this causes some redundant cost calculation ... */
5493  return set_pathtarget_cost_width(root, input_target);
5494 }
5495 
5496 /*
5497  * make_pathkeys_for_window
5498  * Create a pathkeys list describing the required input ordering
5499  * for the given WindowClause.
5500  *
5501  * The required ordering is first the PARTITION keys, then the ORDER keys.
5502  * In the future we might try to implement windowing using hashing, in which
5503  * case the ordering could be relaxed, but for now we always sort.
5504  *
5505  * Caution: if you change this, see createplan.c's get_column_info_for_window!
5506  */
5507 static List *
5509  List *tlist)
5510 {
5511  List *window_pathkeys;
5512  List *window_sortclauses;
5513 
5514  /* Throw error if can't sort */
5516  ereport(ERROR,
5517  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5518  errmsg("could not implement window PARTITION BY"),
5519  errdetail("Window partitioning columns must be of sortable datatypes.")));
5521  ereport(ERROR,
5522  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5523  errmsg("could not implement window ORDER BY"),
5524  errdetail("Window ordering columns must be of sortable datatypes.")));
5525 
5526  /* Okay, make the combined pathkeys */
5527  window_sortclauses = list_concat(list_copy(wc->partitionClause),
5528  list_copy(wc->orderClause));
5529  window_pathkeys = make_pathkeys_for_sortclauses(root,
5530  window_sortclauses,
5531  tlist);
5532  list_free(window_sortclauses);
5533  return window_pathkeys;
5534 }
5535 
5536 /*
5537  * make_sort_input_target
5538  * Generate appropriate PathTarget for initial input to Sort step.
5539  *
5540  * If the query has ORDER BY, this function chooses the target to be computed
5541  * by the node just below the Sort (and DISTINCT, if any, since Unique can't
5542  * project) steps. This might or might not be identical to the query's final
5543  * output target.
5544  *
5545  * The main argument for keeping the sort-input tlist the same as the final
5546  * is that we avoid a separate projection node (which will be needed if
5547  * they're different, because Sort can't project). However, there are also
5548  * advantages to postponing tlist evaluation till after the Sort: it ensures
5549  * a consistent order of evaluation for any volatile functions in the tlist,
5550  * and if there's also a LIMIT, we can stop the query without ever computing
5551  * tlist functions for later rows, which is beneficial for both volatile and
5552  * expensive functions.
5553  *
5554  * Our current policy is to postpone volatile expressions till after the sort
5555  * unconditionally (assuming that that's possible, ie they are in plain tlist
5556  * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
5557  * postpone set-returning expressions, because running them beforehand would
5558  * bloat the sort dataset, and because it might cause unexpected output order
5559  * if the sort isn't stable. However there's a constraint on that: all SRFs
5560  * in the tlist should be evaluated at the same plan step, so that they can
5561  * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
5562  * mustn't postpone any SRFs. (Note that in principle that policy should
5563  * probably get applied to the group/window input targetlists too, but we
5564  * have not done that historically.) Lastly, expensive expressions are
5565  * postponed if there is a LIMIT, or if root->tuple_fraction shows that
5566  * partial evaluation of the query is possible (if neither is true, we expect
5567  * to have to evaluate the expressions for every row anyway), or if there are
5568  * any volatile or set-returning expressions (since once we've put in a
5569  * projection at all, it won't cost any more to postpone more stuff).
5570  *
5571  * Another issue that could potentially be considered here is that
5572  * evaluating tlist expressions could result in data that's either wider
5573  * or narrower than the input Vars, thus changing the volume of data that
5574  * has to go through the Sort. However, we usually have only a very bad
5575  * idea of the output width of any expression more complex than a Var,
5576  * so for now it seems too risky to try to optimize on that basis.
5577  *
5578  * Note that if we do produce a modified sort-input target, and then the
5579  * query ends up not using an explicit Sort, no particular harm is done:
5580  * we'll initially use the modified target for the preceding path nodes,
5581  * but then change them to the final target with apply_projection_to_path.
5582  * Moreover, in such a case the guarantees about evaluation order of
5583  * volatile functions still hold, since the rows are sorted already.
5584  *
5585  * This function has some things in common with make_group_input_target and
5586  * make_window_input_target, though the detailed rules for what to do are
5587  * different. We never flatten/postpone any grouping or ordering columns;
5588  * those are needed before the sort. If we do flatten a particular
5589  * expression, we leave Aggref and WindowFunc nodes alone, since those were
5590  * computed earlier.
5591  *
5592  * 'final_target' is the query's final target list (in PathTarget form)
5593  * 'have_postponed_srfs' is an output argument, see below
5594  *
5595  * The result is the PathTarget to be computed by the plan node immediately
5596  * below the Sort step (and the Distinct step, if any). This will be
5597  * exactly final_target if we decide a projection step wouldn't be helpful.
5598  *
5599  * In addition, *have_postponed_srfs is set to TRUE if we choose to postpone
5600  * any set-returning functions to after the Sort.
5601  */
5602 static PathTarget *
5604  PathTarget *final_target,
5605  bool *have_postponed_srfs)
5606 {
5607  Query *parse = root->parse;
5608  PathTarget *input_target;
5609  int ncols;
5610  bool *col_is_srf;
5611  bool *postpone_col;
5612  bool have_srf;
5613  bool have_volatile;
5614  bool have_expensive;
5615  bool have_srf_sortcols;
5616  bool postpone_srfs;
5617  List *postponable_cols;
5618  List *postponable_vars;
5619  int i;
5620  ListCell *lc;
5621 
5622  /* Shouldn't get here unless query has ORDER BY */
5623  Assert(parse->sortClause);
5624 
5625  *have_postponed_srfs = false; /* default result */
5626 
5627  /* Inspect tlist and collect per-column information */
5628  ncols = list_length(final_target->exprs);
5629  col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
5630  postpone_col = (bool *) palloc0(ncols * sizeof(bool));
5631  have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
5632 
5633  i = 0;
5634  foreach(lc, final_target->exprs)
5635  {
5636  Expr *expr = (Expr *) lfirst(lc);
5637 
5638  /*
5639  * If the column has a sortgroupref, assume it has to be evaluated
5640  * before sorting. Generally such columns would be ORDER BY, GROUP
5641  * BY, etc targets. One exception is columns that were removed from
5642  * GROUP BY by remove_useless_groupby_columns() ... but those would
5643  * only be Vars anyway. There don't seem to be any cases where it
5644  * would be worth the trouble to double-check.
5645  */
5646  if (get_pathtarget_sortgroupref(final_target, i) == 0)
5647  {
5648  /*
5649  * Check for SRF or volatile functions. Check the SRF case first
5650  * because we must know whether we have any postponed SRFs.
5651  */
5652  if (parse->hasTargetSRFs &&
5653  expression_returns_set((Node *) expr))
5654  {
5655  /* We'll decide below whether these are postponable */
5656  col_is_srf[i] = true;
5657  have_srf = true;
5658  }
5659  else if (contain_volatile_functions((Node *) expr))
5660  {
5661  /* Unconditionally postpone */
5662  postpone_col[i] = true;
5663  have_volatile = true;
5664  }
5665  else
5666  {
5667  /*
5668  * Else check the cost. XXX it's annoying to have to do this
5669  * when set_pathtarget_cost_width() just did it. Refactor to
5670  * allow sharing the work?
5671  */
5672  QualCost cost;
5673 
5674  cost_qual_eval_node(&cost, (Node *) expr, root);
5675 
5676  /*
5677  * We arbitrarily define "expensive" as "more than 10X
5678  * cpu_operator_cost". Note this will take in any PL function
5679  * with default cost.
5680  */
5681  if (cost.per_tuple > 10 * cpu_operator_cost)
5682  {
5683  postpone_col[i] = true;
5684  have_expensive = true;
5685  }
5686  }
5687  }
5688  else
5689  {
5690  /* For sortgroupref cols, just check if any contain SRFs */
5691  if (!have_srf_sortcols &&
5692  parse->hasTargetSRFs &&
5693  expression_returns_set((Node *) expr))
5694  have_srf_sortcols = true;
5695  }
5696 
5697  i++;
5698  }
5699 
5700  /*
5701  * We can postpone SRFs if we have some but none are in sortgroupref cols.
5702  */
5703  postpone_srfs = (have_srf && !have_srf_sortcols);
5704 
5705  /*
5706  * If we don't need a post-sort projection, just return final_target.
5707  */
5708  if (!(postpone_srfs || have_volatile ||
5709  (have_expensive &&
5710  (parse->limitCount || root->tuple_fraction > 0))))
5711  return final_target;
5712 
5713  /*
5714  * Report whether the post-sort projection will contain set-returning
5715  * functions. This is important because it affects whether the Sort can
5716  * rely on the query's LIMIT (if any) to bound the number of rows it needs
5717  * to return.
5718  */
5719  *have_postponed_srfs = postpone_srfs;
5720 
5721  /*
5722  * Construct the sort-input target, taking all non-postponable columns and
5723  * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
5724  * the postponable ones.
5725  */
5726  input_target = create_empty_pathtarget();
5727  postponable_cols = NIL;
5728 
5729  i = 0;
5730  foreach(lc, final_target->exprs)
5731  {
5732  Expr *expr = (Expr *) lfirst(lc);
5733 
5734  if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
5735  postponable_cols = lappend(postponable_cols, expr);
5736  else
5737  add_column_to_pathtarget(input_target, expr,
5738  get_pathtarget_sortgroupref(final_target, i));
5739 
5740  i++;
5741  }
5742 
5743  /*
5744  * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
5745  * postponable columns, and add them to the sort-input target if not
5746  * already present. (Some might be there already.) We mustn't
5747  * deconstruct Aggrefs or WindowFuncs here, since the projection node
5748  * would be unable to recompute them.
5749  */
5750  postponable_vars = pull_var_clause((Node *) postponable_cols,
5754  add_new_columns_to_pathtarget(input_target, postponable_vars);
5755 
5756  /* clean up cruft */
5757  list_free(postponable_vars);
5758  list_free(postponable_cols);
5759 
5760  /* XXX this represents even more redundant cost calculation ... */
5761  return set_pathtarget_cost_width(root, input_target);
5762 }
5763 
5764 /*
5765  * get_cheapest_fractional_path
5766  * Find the cheapest path for retrieving a specified fraction of all
5767  * the tuples expected to be returned by the given relation.
5768  *
5769  * We interpret tuple_fraction the same way as grouping_planner.
5770  *
5771  * We assume set_cheapest() has been run on the given rel.
5772  */
5773 Path *
5774 get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
5775 {
5776  Path *best_path = rel->cheapest_total_path;
5777  ListCell *l;
5778 
5779  /* If all tuples will be retrieved, just return the cheapest-total path */
5780  if (tuple_fraction <= 0.0)
5781  return best_path;
5782 
5783  /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
5784  if (tuple_fraction >= 1.0 && best_path->rows > 0)
5785  tuple_fraction /= best_path->rows;
5786 
5787  foreach(l, rel->pathlist)
5788  {
5789  Path *path = (Path *) lfirst(l);
5790 
5791  if (path == rel->cheapest_total_path ||
5792  compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
5793  continue;
5794 
5795  best_path = path;
5796  }
5797 
5798  return best_path;
5799 }
5800 
5801 /*
5802  * adjust_paths_for_srfs
5803  * Fix up the Paths of the given upperrel to handle tSRFs properly.
5804  *
5805  * The executor can only handle set-returning functions that appear at the
5806  * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
5807  * that are not at top level, we need to split up the evaluation into multiple
5808  * plan levels in which each level satisfies this constraint. This function
5809  * modifies each Path of an upperrel that (might) compute any SRFs in its
5810  * output tlist to insert appropriate projection steps.
5811  *
5812  * The given targets and targets_contain_srfs lists are from
5813  * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
5814  * target in targets.
5815  */
5816 static void
5818  List *targets, List *targets_contain_srfs)
5819 {
5820  ListCell *lc;
5821 
5822  Assert(list_length(targets) == list_length(targets_contain_srfs));
5823  Assert(!linitial_int(targets_contain_srfs));
5824 
5825  /* If no SRFs appear at this plan level, nothing to do */
5826  if (list_length(targets) == 1)
5827  return;
5828 
5829  /*
5830  * Stack SRF-evaluation nodes atop each path for the rel.
5831  *
5832  * In principle we should re-run set_cheapest() here to identify the
5833  * cheapest path, but it seems unlikely that adding the same tlist eval
5834  * costs to all the paths would change that, so we don't bother. Instead,
5835  * just assume that the cheapest-startup and cheapest-total paths remain
5836  * so. (There should be no parameterized paths anymore, so we needn't
5837  * worry about updating cheapest_parameterized_paths.)
5838  */
5839  foreach(lc, rel->pathlist)
5840  {
5841  Path *subpath = (Path *) lfirst(lc);
5842  Path *newpath = subpath;
5843  ListCell *lc1,
5844  *lc2;