PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/htup_details.h"
22 #include "access/parallel.h"
23 #include "access/sysattr.h"
24 #include "access/xact.h"
26 #include "catalog/pg_proc.h"
27 #include "catalog/pg_type.h"
28 #include "executor/executor.h"
29 #include "executor/nodeAgg.h"
30 #include "foreign/fdwapi.h"
31 #include "miscadmin.h"
32 #include "lib/bipartite_match.h"
33 #include "lib/knapsack.h"
34 #include "nodes/makefuncs.h"
35 #include "nodes/nodeFuncs.h"
36 #ifdef OPTIMIZER_DEBUG
37 #include "nodes/print.h"
38 #endif
39 #include "optimizer/clauses.h"
40 #include "optimizer/cost.h"
41 #include "optimizer/pathnode.h"
42 #include "optimizer/paths.h"
43 #include "optimizer/plancat.h"
44 #include "optimizer/planmain.h"
45 #include "optimizer/planner.h"
46 #include "optimizer/prep.h"
47 #include "optimizer/subselect.h"
48 #include "optimizer/tlist.h"
49 #include "optimizer/var.h"
50 #include "parser/analyze.h"
51 #include "parser/parsetree.h"
52 #include "parser/parse_agg.h"
53 #include "rewrite/rewriteManip.h"
54 #include "storage/dsm_impl.h"
55 #include "utils/rel.h"
56 #include "utils/selfuncs.h"
57 #include "utils/lsyscache.h"
58 #include "utils/syscache.h"
59 
60 
61 /* GUC parameters */
64 
65 /* Hook for plugins to get control in planner() */
67 
68 /* Hook for plugins to get control when grouping_planner() plans upper rels */
70 
71 
72 /* Expression kind codes for preprocess_expression */
73 #define EXPRKIND_QUAL 0
74 #define EXPRKIND_TARGET 1
75 #define EXPRKIND_RTFUNC 2
76 #define EXPRKIND_RTFUNC_LATERAL 3
77 #define EXPRKIND_VALUES 4
78 #define EXPRKIND_VALUES_LATERAL 5
79 #define EXPRKIND_LIMIT 6
80 #define EXPRKIND_APPINFO 7
81 #define EXPRKIND_PHV 8
82 #define EXPRKIND_TABLESAMPLE 9
83 #define EXPRKIND_ARBITER_ELEM 10
84 #define EXPRKIND_TABLEFUNC 11
85 #define EXPRKIND_TABLEFUNC_LATERAL 12
86 
87 /* Passthrough data for standard_qp_callback */
88 typedef struct
89 {
90  List *tlist; /* preprocessed query targetlist */
91  List *activeWindows; /* active windows, if any */
92  List *groupClause; /* overrides parse->groupClause */
94 
95 /*
96  * Data specific to grouping sets
97  */
98 
99 typedef struct
100 {
110 
111 /* Local functions */
112 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
113 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
114 static void inheritance_planner(PlannerInfo *root);
115 static void grouping_planner(PlannerInfo *root, bool inheritance_update,
116  double tuple_fraction);
118 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
119  int *tleref_to_colnum_map);
120 static void preprocess_rowmarks(PlannerInfo *root);
121 static double preprocess_limit(PlannerInfo *root,
122  double tuple_fraction,
123  int64 *offset_est, int64 *count_est);
124 static bool limit_needed(Query *parse);
126 static List *preprocess_groupclause(PlannerInfo *root, List *force);
127 static List *extract_rollup_sets(List *groupingSets);
128 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
129 static void standard_qp_callback(PlannerInfo *root, void *extra);
130 static double get_number_of_groups(PlannerInfo *root,
131  double path_rows,
132  grouping_sets_data *gd);
134  const AggClauseCosts *agg_costs,
135  double dNumGroups);
137  RelOptInfo *input_rel,
138  PathTarget *target,
139  const AggClauseCosts *agg_costs,
140  grouping_sets_data *gd);
141 static void consider_groupingsets_paths(PlannerInfo *root,
142  RelOptInfo *grouped_rel,
143  Path *path,
144  bool is_sorted,
145  bool can_hash,
146  PathTarget *target,
147  grouping_sets_data *gd,
148  const AggClauseCosts *agg_costs,
149  double dNumGroups);
151  RelOptInfo *input_rel,
152  PathTarget *input_target,
153  PathTarget *output_target,
154  List *tlist,
155  WindowFuncLists *wflists,
156  List *activeWindows);
157 static void create_one_window_path(PlannerInfo *root,
158  RelOptInfo *window_rel,
159  Path *path,
160  PathTarget *input_target,
161  PathTarget *output_target,
162  List *tlist,
163  WindowFuncLists *wflists,
164  List *activeWindows);
166  RelOptInfo *input_rel);
168  RelOptInfo *input_rel,
169  PathTarget *target,
170  double limit_tuples);
172  PathTarget *final_target);
174  PathTarget *grouping_target);
175 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
176 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
178  PathTarget *final_target,
179  List *activeWindows);
181  List *tlist);
183  PathTarget *final_target,
184  bool *have_postponed_srfs);
185 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
186  List *targets, List *targets_contain_srfs);
187 
188 
189 /*****************************************************************************
190  *
191  * Query optimizer entry point
192  *
193  * To support loadable plugins that monitor or modify planner behavior,
194  * we provide a hook variable that lets a plugin get control before and
195  * after the standard planning process. The plugin would normally call
196  * standard_planner().
197  *
198  * Note to plugin authors: standard_planner() scribbles on its Query input,
199  * so you'd better copy that data structure if you want to plan more than once.
200  *
201  *****************************************************************************/
202 PlannedStmt *
203 planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
204 {
206 
207  if (planner_hook)
208  result = (*planner_hook) (parse, cursorOptions, boundParams);
209  else
210  result = standard_planner(parse, cursorOptions, boundParams);
211  return result;
212 }
213 
214 PlannedStmt *
215 standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
216 {
218  PlannerGlobal *glob;
219  double tuple_fraction;
220  PlannerInfo *root;
221  RelOptInfo *final_rel;
222  Path *best_path;
223  Plan *top_plan;
224  ListCell *lp,
225  *lr;
226 
227  /*
228  * Set up global state for this planner invocation. This data is needed
229  * across all levels of sub-Query that might exist in the given command,
230  * so we keep it in a separate struct that's linked to by each per-Query
231  * PlannerInfo.
232  */
233  glob = makeNode(PlannerGlobal);
234 
235  glob->boundParams = boundParams;
236  glob->subplans = NIL;
237  glob->subroots = NIL;
238  glob->rewindPlanIDs = NULL;
239  glob->finalrtable = NIL;
240  glob->finalrowmarks = NIL;
241  glob->resultRelations = NIL;
242  glob->nonleafResultRelations = NIL;
243  glob->rootResultRelations = NIL;
244  glob->relationOids = NIL;
245  glob->invalItems = NIL;
246  glob->nParamExec = 0;
247  glob->lastPHId = 0;
248  glob->lastRowMarkId = 0;
249  glob->lastPlanNodeId = 0;
250  glob->transientPlan = false;
251  glob->dependsOnRole = false;
252 
253  /*
254  * Assess whether it's feasible to use parallel mode for this query. We
255  * can't do this in a standalone backend, or if the command will try to
256  * modify any data, or if this is a cursor operation, or if GUCs are set
257  * to values that don't permit parallelism, or if parallel-unsafe
258  * functions are present in the query tree.
259  *
260  * For now, we don't try to use parallel mode if we're running inside a
261  * parallel worker. We might eventually be able to relax this
262  * restriction, but for now it seems best not to have parallel workers
263  * trying to create their own parallel workers.
264  *
265  * We can't use parallelism in serializable mode because the predicate
266  * locking code is not parallel-aware. It's not catastrophic if someone
267  * tries to run a parallel plan in serializable mode; it just won't get
268  * any workers and will run serially. But it seems like a good heuristic
269  * to assume that the same serialization level will be in effect at plan
270  * time and execution time, so don't generate a parallel plan if we're in
271  * serializable mode.
272  */
273  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
276  parse->commandType == CMD_SELECT &&
277  !parse->hasModifyingCTE &&
279  !IsParallelWorker() &&
281  {
282  /* all the cheap tests pass, so scan the query tree */
283  glob->maxParallelHazard = max_parallel_hazard(parse);
285  }
286  else
287  {
288  /* skip the query tree scan, just assume it's unsafe */
290  glob->parallelModeOK = false;
291  }
292 
293  /*
294  * glob->parallelModeNeeded should tell us whether it's necessary to
295  * impose the parallel mode restrictions, but we don't actually want to
296  * impose them unless we choose a parallel plan, so it is normally set
297  * only if a parallel plan is chosen (see create_gather_plan). That way,
298  * people who mislabel their functions but don't use parallelism anyway
299  * aren't harmed. But when force_parallel_mode is set, we enable the
300  * restrictions whenever possible for testing purposes.
301  */
302  glob->parallelModeNeeded = glob->parallelModeOK &&
304 
305  /* Determine what fraction of the plan is likely to be scanned */
306  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
307  {
308  /*
309  * We have no real idea how many tuples the user will ultimately FETCH
310  * from a cursor, but it is often the case that he doesn't want 'em
311  * all, or would prefer a fast-start plan anyway so that he can
312  * process some of the tuples sooner. Use a GUC parameter to decide
313  * what fraction to optimize for.
314  */
315  tuple_fraction = cursor_tuple_fraction;
316 
317  /*
318  * We document cursor_tuple_fraction as simply being a fraction, which
319  * means the edge cases 0 and 1 have to be treated specially here. We
320  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
321  */
322  if (tuple_fraction >= 1.0)
323  tuple_fraction = 0.0;
324  else if (tuple_fraction <= 0.0)
325  tuple_fraction = 1e-10;
326  }
327  else
328  {
329  /* Default assumption is we need all the tuples */
330  tuple_fraction = 0.0;
331  }
332 
333  /* primary planning entry point (may recurse for subqueries) */
334  root = subquery_planner(glob, parse, NULL,
335  false, tuple_fraction);
336 
337  /* Select best Path and turn it into a Plan */
338  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
339  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
340 
341  top_plan = create_plan(root, best_path);
342 
343  /*
344  * If creating a plan for a scrollable cursor, make sure it can run
345  * backwards on demand. Add a Material node at the top at need.
346  */
347  if (cursorOptions & CURSOR_OPT_SCROLL)
348  {
349  if (!ExecSupportsBackwardScan(top_plan))
350  top_plan = materialize_finished_plan(top_plan);
351  }
352 
353  /*
354  * Optionally add a Gather node for testing purposes, provided this is
355  * actually a safe thing to do.
356  */
358  {
359  Gather *gather = makeNode(Gather);
360 
361  gather->plan.targetlist = top_plan->targetlist;
362  gather->plan.qual = NIL;
363  gather->plan.lefttree = top_plan;
364  gather->plan.righttree = NULL;
365  gather->num_workers = 1;
366  gather->single_copy = true;
368 
369  /*
370  * Ideally we'd use cost_gather here, but setting up dummy path data
371  * to satisfy it doesn't seem much cleaner than knowing what it does.
372  */
373  gather->plan.startup_cost = top_plan->startup_cost +
375  gather->plan.total_cost = top_plan->total_cost +
377  gather->plan.plan_rows = top_plan->plan_rows;
378  gather->plan.plan_width = top_plan->plan_width;
379  gather->plan.parallel_aware = false;
380  gather->plan.parallel_safe = false;
381 
382  /* use parallel mode for parallel plans. */
383  root->glob->parallelModeNeeded = true;
384 
385  top_plan = &gather->plan;
386  }
387 
388  /*
389  * If any Params were generated, run through the plan tree and compute
390  * each plan node's extParam/allParam sets. Ideally we'd merge this into
391  * set_plan_references' tree traversal, but for now it has to be separate
392  * because we need to visit subplans before not after main plan.
393  */
394  if (glob->nParamExec > 0)
395  {
396  Assert(list_length(glob->subplans) == list_length(glob->subroots));
397  forboth(lp, glob->subplans, lr, glob->subroots)
398  {
399  Plan *subplan = (Plan *) lfirst(lp);
400  PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
401 
402  SS_finalize_plan(subroot, subplan);
403  }
404  SS_finalize_plan(root, top_plan);
405  }
406 
407  /* final cleanup of the plan */
408  Assert(glob->finalrtable == NIL);
409  Assert(glob->finalrowmarks == NIL);
410  Assert(glob->resultRelations == NIL);
412  Assert(glob->rootResultRelations == NIL);
413  top_plan = set_plan_references(root, top_plan);
414  /* ... and the subplans (both regular subplans and initplans) */
415  Assert(list_length(glob->subplans) == list_length(glob->subroots));
416  forboth(lp, glob->subplans, lr, glob->subroots)
417  {
418  Plan *subplan = (Plan *) lfirst(lp);
419  PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
420 
421  lfirst(lp) = set_plan_references(subroot, subplan);
422  }
423 
424  /* build the PlannedStmt result */
425  result = makeNode(PlannedStmt);
426 
427  result->commandType = parse->commandType;
428  result->queryId = parse->queryId;
429  result->hasReturning = (parse->returningList != NIL);
430  result->hasModifyingCTE = parse->hasModifyingCTE;
431  result->canSetTag = parse->canSetTag;
432  result->transientPlan = glob->transientPlan;
433  result->dependsOnRole = glob->dependsOnRole;
434  result->parallelModeNeeded = glob->parallelModeNeeded;
435  result->planTree = top_plan;
436  result->rtable = glob->finalrtable;
437  result->resultRelations = glob->resultRelations;
440  result->subplans = glob->subplans;
441  result->rewindPlanIDs = glob->rewindPlanIDs;
442  result->rowMarks = glob->finalrowmarks;
443  result->relationOids = glob->relationOids;
444  result->invalItems = glob->invalItems;
445  result->nParamExec = glob->nParamExec;
446  /* utilityStmt should be null, but we might as well copy it */
447  result->utilityStmt = parse->utilityStmt;
448  result->stmt_location = parse->stmt_location;
449  result->stmt_len = parse->stmt_len;
450 
451  return result;
452 }
453 
454 
455 /*--------------------
456  * subquery_planner
457  * Invokes the planner on a subquery. We recurse to here for each
458  * sub-SELECT found in the query tree.
459  *
460  * glob is the global state for the current planner run.
461  * parse is the querytree produced by the parser & rewriter.
462  * parent_root is the immediate parent Query's info (NULL at the top level).
463  * hasRecursion is true if this is a recursive WITH query.
464  * tuple_fraction is the fraction of tuples we expect will be retrieved.
465  * tuple_fraction is interpreted as explained for grouping_planner, below.
466  *
467  * Basically, this routine does the stuff that should only be done once
468  * per Query object. It then calls grouping_planner. At one time,
469  * grouping_planner could be invoked recursively on the same Query object;
470  * that's not currently true, but we keep the separation between the two
471  * routines anyway, in case we need it again someday.
472  *
473  * subquery_planner will be called recursively to handle sub-Query nodes
474  * found within the query's expressions and rangetable.
475  *
476  * Returns the PlannerInfo struct ("root") that contains all data generated
477  * while planning the subquery. In particular, the Path(s) attached to
478  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
479  * cheapest way(s) to implement the query. The top level will select the
480  * best Path and pass it through createplan.c to produce a finished Plan.
481  *--------------------
482  */
483 PlannerInfo *
485  PlannerInfo *parent_root,
486  bool hasRecursion, double tuple_fraction)
487 {
488  PlannerInfo *root;
489  List *newWithCheckOptions;
490  List *newHaving;
491  bool hasOuterJoins;
492  RelOptInfo *final_rel;
493  ListCell *l;
494 
495  /* Create a PlannerInfo data structure for this subquery */
496  root = makeNode(PlannerInfo);
497  root->parse = parse;
498  root->glob = glob;
499  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
500  root->parent_root = parent_root;
501  root->plan_params = NIL;
502  root->outer_params = NULL;
504  root->init_plans = NIL;
505  root->cte_plan_ids = NIL;
506  root->multiexpr_params = NIL;
507  root->eq_classes = NIL;
508  root->append_rel_list = NIL;
509  root->pcinfo_list = NIL;
510  root->rowMarks = NIL;
511  memset(root->upper_rels, 0, sizeof(root->upper_rels));
512  memset(root->upper_targets, 0, sizeof(root->upper_targets));
513  root->processed_tlist = NIL;
514  root->grouping_map = NULL;
515  root->minmax_aggs = NIL;
516  root->qual_security_level = 0;
517  root->hasInheritedTarget = false;
518  root->hasRecursion = hasRecursion;
519  if (hasRecursion)
520  root->wt_param_id = SS_assign_special_param(root);
521  else
522  root->wt_param_id = -1;
523  root->non_recursive_path = NULL;
524 
525  /*
526  * If there is a WITH list, process each WITH query and build an initplan
527  * SubPlan structure for it.
528  */
529  if (parse->cteList)
530  SS_process_ctes(root);
531 
532  /*
533  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
534  * to transform them into joins. Note that this step does not descend
535  * into subqueries; if we pull up any subqueries below, their SubLinks are
536  * processed just before pulling them up.
537  */
538  if (parse->hasSubLinks)
539  pull_up_sublinks(root);
540 
541  /*
542  * Scan the rangetable for set-returning functions, and inline them if
543  * possible (producing subqueries that might get pulled up next).
544  * Recursion issues here are handled in the same way as for SubLinks.
545  */
547 
548  /*
549  * Check to see if any subqueries in the jointree can be merged into this
550  * query.
551  */
552  pull_up_subqueries(root);
553 
554  /*
555  * If this is a simple UNION ALL query, flatten it into an appendrel. We
556  * do this now because it requires applying pull_up_subqueries to the leaf
557  * queries of the UNION ALL, which weren't touched above because they
558  * weren't referenced by the jointree (they will be after we do this).
559  */
560  if (parse->setOperations)
562 
563  /*
564  * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
565  * avoid the expense of doing flatten_join_alias_vars(). Also check for
566  * outer joins --- if none, we can skip reduce_outer_joins(). And check
567  * for LATERAL RTEs, too. This must be done after we have done
568  * pull_up_subqueries(), of course.
569  */
570  root->hasJoinRTEs = false;
571  root->hasLateralRTEs = false;
572  hasOuterJoins = false;
573  foreach(l, parse->rtable)
574  {
575  RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
576 
577  if (rte->rtekind == RTE_JOIN)
578  {
579  root->hasJoinRTEs = true;
580  if (IS_OUTER_JOIN(rte->jointype))
581  hasOuterJoins = true;
582  }
583  if (rte->lateral)
584  root->hasLateralRTEs = true;
585  }
586 
587  /*
588  * Preprocess RowMark information. We need to do this after subquery
589  * pullup (so that all non-inherited RTEs are present) and before
590  * inheritance expansion (so that the info is available for
591  * expand_inherited_tables to examine and modify).
592  */
593  preprocess_rowmarks(root);
594 
595  /*
596  * Expand any rangetable entries that are inheritance sets into "append
597  * relations". This can add entries to the rangetable, but they must be
598  * plain base relations not joins, so it's OK (and marginally more
599  * efficient) to do it after checking for join RTEs. We must do it after
600  * pulling up subqueries, else we'd fail to handle inherited tables in
601  * subqueries.
602  */
604 
605  /*
606  * Set hasHavingQual to remember if HAVING clause is present. Needed
607  * because preprocess_expression will reduce a constant-true condition to
608  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
609  */
610  root->hasHavingQual = (parse->havingQual != NULL);
611 
612  /* Clear this flag; might get set in distribute_qual_to_rels */
613  root->hasPseudoConstantQuals = false;
614 
615  /*
616  * Do expression preprocessing on targetlist and quals, as well as other
617  * random expressions in the querytree. Note that we do not need to
618  * handle sort/group expressions explicitly, because they are actually
619  * part of the targetlist.
620  */
621  parse->targetList = (List *)
622  preprocess_expression(root, (Node *) parse->targetList,
624 
625  /* Constant-folding might have removed all set-returning functions */
626  if (parse->hasTargetSRFs)
628 
629  newWithCheckOptions = NIL;
630  foreach(l, parse->withCheckOptions)
631  {
632  WithCheckOption *wco = (WithCheckOption *) lfirst(l);
633 
634  wco->qual = preprocess_expression(root, wco->qual,
635  EXPRKIND_QUAL);
636  if (wco->qual != NULL)
637  newWithCheckOptions = lappend(newWithCheckOptions, wco);
638  }
639  parse->withCheckOptions = newWithCheckOptions;
640 
641  parse->returningList = (List *)
642  preprocess_expression(root, (Node *) parse->returningList,
644 
645  preprocess_qual_conditions(root, (Node *) parse->jointree);
646 
647  parse->havingQual = preprocess_expression(root, parse->havingQual,
648  EXPRKIND_QUAL);
649 
650  foreach(l, parse->windowClause)
651  {
652  WindowClause *wc = (WindowClause *) lfirst(l);
653 
654  /* partitionClause/orderClause are sort/group expressions */
657  wc->endOffset = preprocess_expression(root, wc->endOffset,
659  }
660 
661  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
663  parse->limitCount = preprocess_expression(root, parse->limitCount,
665 
666  if (parse->onConflict)
667  {
668  parse->onConflict->arbiterElems = (List *)
670  (Node *) parse->onConflict->arbiterElems,
672  parse->onConflict->arbiterWhere =
674  parse->onConflict->arbiterWhere,
675  EXPRKIND_QUAL);
676  parse->onConflict->onConflictSet = (List *)
678  (Node *) parse->onConflict->onConflictSet,
680  parse->onConflict->onConflictWhere =
682  parse->onConflict->onConflictWhere,
683  EXPRKIND_QUAL);
684  /* exclRelTlist contains only Vars, so no preprocessing needed */
685  }
686 
687  root->append_rel_list = (List *)
690 
691  /* Also need to preprocess expressions within RTEs */
692  foreach(l, parse->rtable)
693  {
694  RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
695  int kind;
696  ListCell *lcsq;
697 
698  if (rte->rtekind == RTE_RELATION)
699  {
700  if (rte->tablesample)
701  rte->tablesample = (TableSampleClause *)
703  (Node *) rte->tablesample,
705  }
706  else if (rte->rtekind == RTE_SUBQUERY)
707  {
708  /*
709  * We don't want to do all preprocessing yet on the subquery's
710  * expressions, since that will happen when we plan it. But if it
711  * contains any join aliases of our level, those have to get
712  * expanded now, because planning of the subquery won't do it.
713  * That's only possible if the subquery is LATERAL.
714  */
715  if (rte->lateral && root->hasJoinRTEs)
716  rte->subquery = (Query *)
717  flatten_join_alias_vars(root, (Node *) rte->subquery);
718  }
719  else if (rte->rtekind == RTE_FUNCTION)
720  {
721  /* Preprocess the function expression(s) fully */
723  rte->functions = (List *)
724  preprocess_expression(root, (Node *) rte->functions, kind);
725  }
726  else if (rte->rtekind == RTE_TABLEFUNC)
727  {
728  /* Preprocess the function expression(s) fully */
730  rte->tablefunc = (TableFunc *)
731  preprocess_expression(root, (Node *) rte->tablefunc, kind);
732  }
733  else if (rte->rtekind == RTE_VALUES)
734  {
735  /* Preprocess the values lists fully */
737  rte->values_lists = (List *)
738  preprocess_expression(root, (Node *) rte->values_lists, kind);
739  }
740 
741  /*
742  * Process each element of the securityQuals list as if it were a
743  * separate qual expression (as indeed it is). We need to do it this
744  * way to get proper canonicalization of AND/OR structure. Note that
745  * this converts each element into an implicit-AND sublist.
746  */
747  foreach(lcsq, rte->securityQuals)
748  {
749  lfirst(lcsq) = preprocess_expression(root,
750  (Node *) lfirst(lcsq),
751  EXPRKIND_QUAL);
752  }
753  }
754 
755  /*
756  * In some cases we may want to transfer a HAVING clause into WHERE. We
757  * cannot do so if the HAVING clause contains aggregates (obviously) or
758  * volatile functions (since a HAVING clause is supposed to be executed
759  * only once per group). We also can't do this if there are any nonempty
760  * grouping sets; moving such a clause into WHERE would potentially change
761  * the results, if any referenced column isn't present in all the grouping
762  * sets. (If there are only empty grouping sets, then the HAVING clause
763  * must be degenerate as discussed below.)
764  *
765  * Also, it may be that the clause is so expensive to execute that we're
766  * better off doing it only once per group, despite the loss of
767  * selectivity. This is hard to estimate short of doing the entire
768  * planning process twice, so we use a heuristic: clauses containing
769  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
770  * clause into WHERE, in hopes of eliminating tuples before aggregation
771  * instead of after.
772  *
773  * If the query has explicit grouping then we can simply move such a
774  * clause into WHERE; any group that fails the clause will not be in the
775  * output because none of its tuples will reach the grouping or
776  * aggregation stage. Otherwise we must have a degenerate (variable-free)
777  * HAVING clause, which we put in WHERE so that query_planner() can use it
778  * in a gating Result node, but also keep in HAVING to ensure that we
779  * don't emit a bogus aggregated row. (This could be done better, but it
780  * seems not worth optimizing.)
781  *
782  * Note that both havingQual and parse->jointree->quals are in
783  * implicitly-ANDed-list form at this point, even though they are declared
784  * as Node *.
785  */
786  newHaving = NIL;
787  foreach(l, (List *) parse->havingQual)
788  {
789  Node *havingclause = (Node *) lfirst(l);
790 
791  if ((parse->groupClause && parse->groupingSets) ||
792  contain_agg_clause(havingclause) ||
793  contain_volatile_functions(havingclause) ||
794  contain_subplans(havingclause))
795  {
796  /* keep it in HAVING */
797  newHaving = lappend(newHaving, havingclause);
798  }
799  else if (parse->groupClause && !parse->groupingSets)
800  {
801  /* move it to WHERE */
802  parse->jointree->quals = (Node *)
803  lappend((List *) parse->jointree->quals, havingclause);
804  }
805  else
806  {
807  /* put a copy in WHERE, keep it in HAVING */
808  parse->jointree->quals = (Node *)
809  lappend((List *) parse->jointree->quals,
810  copyObject(havingclause));
811  newHaving = lappend(newHaving, havingclause);
812  }
813  }
814  parse->havingQual = (Node *) newHaving;
815 
816  /* Remove any redundant GROUP BY columns */
818 
819  /*
820  * If we have any outer joins, try to reduce them to plain inner joins.
821  * This step is most easily done after we've done expression
822  * preprocessing.
823  */
824  if (hasOuterJoins)
825  reduce_outer_joins(root);
826 
827  /*
828  * Do the main planning. If we have an inherited target relation, that
829  * needs special processing, else go straight to grouping_planner.
830  */
831  if (parse->resultRelation &&
832  rt_fetch(parse->resultRelation, parse->rtable)->inh)
833  inheritance_planner(root);
834  else
835  grouping_planner(root, false, tuple_fraction);
836 
837  /*
838  * Capture the set of outer-level param IDs we have access to, for use in
839  * extParam/allParam calculations later.
840  */
842 
843  /*
844  * If any initPlans were created in this query level, adjust the surviving
845  * Paths' costs and parallel-safety flags to account for them. The
846  * initPlans won't actually get attached to the plan tree till
847  * create_plan() runs, but we must include their effects now.
848  */
849  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
850  SS_charge_for_initplans(root, final_rel);
851 
852  /*
853  * Make sure we've identified the cheapest Path for the final rel. (By
854  * doing this here not in grouping_planner, we include initPlan costs in
855  * the decision, though it's unlikely that will change anything.)
856  */
857  set_cheapest(final_rel);
858 
859  return root;
860 }
861 
862 /*
863  * preprocess_expression
864  * Do subquery_planner's preprocessing work for an expression,
865  * which can be a targetlist, a WHERE clause (including JOIN/ON
866  * conditions), a HAVING clause, or a few other things.
867  */
868 static Node *
869 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
870 {
871  /*
872  * Fall out quickly if expression is empty. This occurs often enough to
873  * be worth checking. Note that null->null is the correct conversion for
874  * implicit-AND result format, too.
875  */
876  if (expr == NULL)
877  return NULL;
878 
879  /*
880  * If the query has any join RTEs, replace join alias variables with
881  * base-relation variables. We must do this before sublink processing,
882  * else sublinks expanded out from join aliases would not get processed.
883  * We can skip it in non-lateral RTE functions, VALUES lists, and
884  * TABLESAMPLE clauses, however, since they can't contain any Vars of the
885  * current query level.
886  */
887  if (root->hasJoinRTEs &&
888  !(kind == EXPRKIND_RTFUNC ||
889  kind == EXPRKIND_VALUES ||
890  kind == EXPRKIND_TABLESAMPLE ||
891  kind == EXPRKIND_TABLEFUNC))
892  expr = flatten_join_alias_vars(root, expr);
893 
894  /*
895  * Simplify constant expressions.
896  *
897  * Note: an essential effect of this is to convert named-argument function
898  * calls to positional notation and insert the current actual values of
899  * any default arguments for functions. To ensure that happens, we *must*
900  * process all expressions here. Previous PG versions sometimes skipped
901  * const-simplification if it didn't seem worth the trouble, but we can't
902  * do that anymore.
903  *
904  * Note: this also flattens nested AND and OR expressions into N-argument
905  * form. All processing of a qual expression after this point must be
906  * careful to maintain AND/OR flatness --- that is, do not generate a tree
907  * with AND directly under AND, nor OR directly under OR.
908  */
909  expr = eval_const_expressions(root, expr);
910 
911  /*
912  * If it's a qual or havingQual, canonicalize it.
913  */
914  if (kind == EXPRKIND_QUAL)
915  {
916  expr = (Node *) canonicalize_qual((Expr *) expr);
917 
918 #ifdef OPTIMIZER_DEBUG
919  printf("After canonicalize_qual()\n");
920  pprint(expr);
921 #endif
922  }
923 
924  /* Expand SubLinks to SubPlans */
925  if (root->parse->hasSubLinks)
926  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
927 
928  /*
929  * XXX do not insert anything here unless you have grokked the comments in
930  * SS_replace_correlation_vars ...
931  */
932 
933  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
934  if (root->query_level > 1)
935  expr = SS_replace_correlation_vars(root, expr);
936 
937  /*
938  * If it's a qual or havingQual, convert it to implicit-AND format. (We
939  * don't want to do this before eval_const_expressions, since the latter
940  * would be unable to simplify a top-level AND correctly. Also,
941  * SS_process_sublinks expects explicit-AND format.)
942  */
943  if (kind == EXPRKIND_QUAL)
944  expr = (Node *) make_ands_implicit((Expr *) expr);
945 
946  return expr;
947 }
948 
949 /*
950  * preprocess_qual_conditions
951  * Recursively scan the query's jointree and do subquery_planner's
952  * preprocessing work on each qual condition found therein.
953  */
954 static void
956 {
957  if (jtnode == NULL)
958  return;
959  if (IsA(jtnode, RangeTblRef))
960  {
961  /* nothing to do here */
962  }
963  else if (IsA(jtnode, FromExpr))
964  {
965  FromExpr *f = (FromExpr *) jtnode;
966  ListCell *l;
967 
968  foreach(l, f->fromlist)
970 
972  }
973  else if (IsA(jtnode, JoinExpr))
974  {
975  JoinExpr *j = (JoinExpr *) jtnode;
976 
979 
981  }
982  else
983  elog(ERROR, "unrecognized node type: %d",
984  (int) nodeTag(jtnode));
985 }
986 
987 /*
988  * preprocess_phv_expression
989  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
990  *
991  * If a LATERAL subquery references an output of another subquery, and that
992  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
993  * join, then we'll push the PlaceHolderVar expression down into the subquery
994  * and later pull it back up during find_lateral_references, which runs after
995  * subquery_planner has preprocessed all the expressions that were in the
996  * current query level to start with. So we need to preprocess it then.
997  */
998 Expr *
1000 {
1001  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1002 }
1003 
1004 /*
1005  * inheritance_planner
1006  * Generate Paths in the case where the result relation is an
1007  * inheritance set.
1008  *
1009  * We have to handle this case differently from cases where a source relation
1010  * is an inheritance set. Source inheritance is expanded at the bottom of the
1011  * plan tree (see allpaths.c), but target inheritance has to be expanded at
1012  * the top. The reason is that for UPDATE, each target relation needs a
1013  * different targetlist matching its own column set. Fortunately,
1014  * the UPDATE/DELETE target can never be the nullable side of an outer join,
1015  * so it's OK to generate the plan this way.
1016  *
1017  * Returns nothing; the useful output is in the Paths we attach to
1018  * the (UPPERREL_FINAL, NULL) upperrel stored in *root.
1019  *
1020  * Note that we have not done set_cheapest() on the final rel; it's convenient
1021  * to leave this to the caller.
1022  */
1023 static void
1025 {
1026  Query *parse = root->parse;
1027  int parentRTindex = parse->resultRelation;
1028  Bitmapset *subqueryRTindexes;
1029  Bitmapset *modifiableARIindexes;
1030  int nominalRelation = -1;
1031  List *final_rtable = NIL;
1032  int save_rel_array_size = 0;
1033  RelOptInfo **save_rel_array = NULL;
1034  List *subpaths = NIL;
1035  List *subroots = NIL;
1036  List *resultRelations = NIL;
1037  List *withCheckOptionLists = NIL;
1038  List *returningLists = NIL;
1039  List *rowMarks;
1040  RelOptInfo *final_rel;
1041  ListCell *lc;
1042  Index rti;
1043  RangeTblEntry *parent_rte;
1044  List *partitioned_rels = NIL;
1045 
1046  Assert(parse->commandType != CMD_INSERT);
1047 
1048  /*
1049  * We generate a modified instance of the original Query for each target
1050  * relation, plan that, and put all the plans into a list that will be
1051  * controlled by a single ModifyTable node. All the instances share the
1052  * same rangetable, but each instance must have its own set of subquery
1053  * RTEs within the finished rangetable because (1) they are likely to get
1054  * scribbled on during planning, and (2) it's not inconceivable that
1055  * subqueries could get planned differently in different cases. We need
1056  * not create duplicate copies of other RTE kinds, in particular not the
1057  * target relations, because they don't have either of those issues. Not
1058  * having to duplicate the target relations is important because doing so
1059  * (1) would result in a rangetable of length O(N^2) for N targets, with
1060  * at least O(N^3) work expended here; and (2) would greatly complicate
1061  * management of the rowMarks list.
1062  *
1063  * To begin with, generate a bitmapset of the relids of the subquery RTEs.
1064  */
1065  subqueryRTindexes = NULL;
1066  rti = 1;
1067  foreach(lc, parse->rtable)
1068  {
1069  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1070 
1071  if (rte->rtekind == RTE_SUBQUERY)
1072  subqueryRTindexes = bms_add_member(subqueryRTindexes, rti);
1073  rti++;
1074  }
1075 
1076  /*
1077  * Next, we want to identify which AppendRelInfo items contain references
1078  * to any of the aforesaid subquery RTEs. These items will need to be
1079  * copied and modified to adjust their subquery references; whereas the
1080  * other ones need not be touched. It's worth being tense over this
1081  * because we can usually avoid processing most of the AppendRelInfo
1082  * items, thereby saving O(N^2) space and time when the target is a large
1083  * inheritance tree. We can identify AppendRelInfo items by their
1084  * child_relid, since that should be unique within the list.
1085  */
1086  modifiableARIindexes = NULL;
1087  if (subqueryRTindexes != NULL)
1088  {
1089  foreach(lc, root->append_rel_list)
1090  {
1091  AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1092 
1093  if (bms_is_member(appinfo->parent_relid, subqueryRTindexes) ||
1094  bms_is_member(appinfo->child_relid, subqueryRTindexes) ||
1096  subqueryRTindexes))
1097  modifiableARIindexes = bms_add_member(modifiableARIindexes,
1098  appinfo->child_relid);
1099  }
1100  }
1101 
1102  /*
1103  * If the parent RTE is a partitioned table, we should use that as the
1104  * nominal relation, because the RTEs added for partitioned tables
1105  * (including the root parent) as child members of the inheritance set do
1106  * not appear anywhere else in the plan. The situation is exactly the
1107  * opposite in the case of non-partitioned inheritance parent as described
1108  * below.
1109  */
1110  parent_rte = rt_fetch(parentRTindex, root->parse->rtable);
1111  if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1112  nominalRelation = parentRTindex;
1113 
1114  /*
1115  * And now we can get on with generating a plan for each child table.
1116  */
1117  foreach(lc, root->append_rel_list)
1118  {
1119  AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1120  PlannerInfo *subroot;
1121  RangeTblEntry *child_rte;
1122  RelOptInfo *sub_final_rel;
1123  Path *subpath;
1124 
1125  /* append_rel_list contains all append rels; ignore others */
1126  if (appinfo->parent_relid != parentRTindex)
1127  continue;
1128 
1129  /*
1130  * We need a working copy of the PlannerInfo so that we can control
1131  * propagation of information back to the main copy.
1132  */
1133  subroot = makeNode(PlannerInfo);
1134  memcpy(subroot, root, sizeof(PlannerInfo));
1135 
1136  /*
1137  * Generate modified query with this rel as target. We first apply
1138  * adjust_appendrel_attrs, which copies the Query and changes
1139  * references to the parent RTE to refer to the current child RTE,
1140  * then fool around with subquery RTEs.
1141  */
1142  subroot->parse = (Query *)
1144  (Node *) parse,
1145  appinfo);
1146 
1147  /*
1148  * If there are securityQuals attached to the parent, move them to the
1149  * child rel (they've already been transformed properly for that).
1150  */
1151  parent_rte = rt_fetch(parentRTindex, subroot->parse->rtable);
1152  child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable);
1153  child_rte->securityQuals = parent_rte->securityQuals;
1154  parent_rte->securityQuals = NIL;
1155 
1156  /*
1157  * The rowMarks list might contain references to subquery RTEs, so
1158  * make a copy that we can apply ChangeVarNodes to. (Fortunately, the
1159  * executor doesn't need to see the modified copies --- we can just
1160  * pass it the original rowMarks list.)
1161  */
1162  subroot->rowMarks = copyObject(root->rowMarks);
1163 
1164  /*
1165  * The append_rel_list likewise might contain references to subquery
1166  * RTEs (if any subqueries were flattenable UNION ALLs). So prepare
1167  * to apply ChangeVarNodes to that, too. As explained above, we only
1168  * want to copy items that actually contain such references; the rest
1169  * can just get linked into the subroot's append_rel_list.
1170  *
1171  * If we know there are no such references, we can just use the outer
1172  * append_rel_list unmodified.
1173  */
1174  if (modifiableARIindexes != NULL)
1175  {
1176  ListCell *lc2;
1177 
1178  subroot->append_rel_list = NIL;
1179  foreach(lc2, root->append_rel_list)
1180  {
1181  AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
1182 
1183  if (bms_is_member(appinfo2->child_relid, modifiableARIindexes))
1184  appinfo2 = copyObject(appinfo2);
1185 
1186  subroot->append_rel_list = lappend(subroot->append_rel_list,
1187  appinfo2);
1188  }
1189  }
1190 
1191  /*
1192  * Add placeholders to the child Query's rangetable list to fill the
1193  * RT indexes already reserved for subqueries in previous children.
1194  * These won't be referenced, so there's no need to make them very
1195  * valid-looking.
1196  */
1197  while (list_length(subroot->parse->rtable) < list_length(final_rtable))
1198  subroot->parse->rtable = lappend(subroot->parse->rtable,
1200 
1201  /*
1202  * If this isn't the first child Query, generate duplicates of all
1203  * subquery RTEs, and adjust Var numbering to reference the
1204  * duplicates. To simplify the loop logic, we scan the original rtable
1205  * not the copy just made by adjust_appendrel_attrs; that should be OK
1206  * since subquery RTEs couldn't contain any references to the target
1207  * rel.
1208  */
1209  if (final_rtable != NIL && subqueryRTindexes != NULL)
1210  {
1211  ListCell *lr;
1212 
1213  rti = 1;
1214  foreach(lr, parse->rtable)
1215  {
1216  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lr);
1217 
1218  if (bms_is_member(rti, subqueryRTindexes))
1219  {
1220  Index newrti;
1221 
1222  /*
1223  * The RTE can't contain any references to its own RT
1224  * index, except in its securityQuals, so we can save a
1225  * few cycles by applying ChangeVarNodes to the rest of
1226  * the rangetable before we append the RTE to it.
1227  */
1228  newrti = list_length(subroot->parse->rtable) + 1;
1229  ChangeVarNodes((Node *) subroot->parse, rti, newrti, 0);
1230  ChangeVarNodes((Node *) subroot->rowMarks, rti, newrti, 0);
1231  /* Skip processing unchanging parts of append_rel_list */
1232  if (modifiableARIindexes != NULL)
1233  {
1234  ListCell *lc2;
1235 
1236  foreach(lc2, subroot->append_rel_list)
1237  {
1238  AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
1239 
1240  if (bms_is_member(appinfo2->child_relid,
1241  modifiableARIindexes))
1242  ChangeVarNodes((Node *) appinfo2, rti, newrti, 0);
1243  }
1244  }
1245  rte = copyObject(rte);
1246  ChangeVarNodes((Node *) rte->securityQuals, rti, newrti, 0);
1247  subroot->parse->rtable = lappend(subroot->parse->rtable,
1248  rte);
1249  }
1250  rti++;
1251  }
1252  }
1253 
1254  /* There shouldn't be any OJ info to translate, as yet */
1255  Assert(subroot->join_info_list == NIL);
1256  /* and we haven't created PlaceHolderInfos, either */
1257  Assert(subroot->placeholder_list == NIL);
1258  /* hack to mark target relation as an inheritance partition */
1259  subroot->hasInheritedTarget = true;
1260 
1261  /* Generate Path(s) for accessing this result relation */
1262  grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
1263 
1264  /*
1265  * Set the nomimal target relation of the ModifyTable node if not
1266  * already done. We use the inheritance parent RTE as the nominal
1267  * target relation if it's a partitioned table (see just above this
1268  * loop). In the non-partitioned parent case, we'll use the first
1269  * child relation (even if it's excluded) as the nominal target
1270  * relation. Because of the way expand_inherited_rtentry works, the
1271  * latter should be the RTE representing the parent table in its role
1272  * as a simple member of the inheritance set.
1273  *
1274  * It would be logically cleaner to *always* use the inheritance
1275  * parent RTE as the nominal relation; but that RTE is not otherwise
1276  * referenced in the plan in the non-partitioned inheritance case.
1277  * Instead the duplicate child RTE created by expand_inherited_rtentry
1278  * is used elsewhere in the plan, so using the original parent RTE
1279  * would give rise to confusing use of multiple aliases in EXPLAIN
1280  * output for what the user will think is the "same" table. OTOH,
1281  * it's not a problem in the partitioned inheritance case, because the
1282  * duplicate child RTE added for the parent does not appear anywhere
1283  * else in the plan tree.
1284  */
1285  if (nominalRelation < 0)
1286  nominalRelation = appinfo->child_relid;
1287 
1288  /*
1289  * Select cheapest path in case there's more than one. We always run
1290  * modification queries to conclusion, so we care only for the
1291  * cheapest-total path.
1292  */
1293  sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
1294  set_cheapest(sub_final_rel);
1295  subpath = sub_final_rel->cheapest_total_path;
1296 
1297  /*
1298  * If this child rel was excluded by constraint exclusion, exclude it
1299  * from the result plan.
1300  */
1301  if (IS_DUMMY_PATH(subpath))
1302  continue;
1303 
1304  /*
1305  * If this is the first non-excluded child, its post-planning rtable
1306  * becomes the initial contents of final_rtable; otherwise, append
1307  * just its modified subquery RTEs to final_rtable.
1308  */
1309  if (final_rtable == NIL)
1310  final_rtable = subroot->parse->rtable;
1311  else
1312  final_rtable = list_concat(final_rtable,
1313  list_copy_tail(subroot->parse->rtable,
1314  list_length(final_rtable)));
1315 
1316  /*
1317  * We need to collect all the RelOptInfos from all child plans into
1318  * the main PlannerInfo, since setrefs.c will need them. We use the
1319  * last child's simple_rel_array (previous ones are too short), so we
1320  * have to propagate forward the RelOptInfos that were already built
1321  * in previous children.
1322  */
1323  Assert(subroot->simple_rel_array_size >= save_rel_array_size);
1324  for (rti = 1; rti < save_rel_array_size; rti++)
1325  {
1326  RelOptInfo *brel = save_rel_array[rti];
1327 
1328  if (brel)
1329  subroot->simple_rel_array[rti] = brel;
1330  }
1331  save_rel_array_size = subroot->simple_rel_array_size;
1332  save_rel_array = subroot->simple_rel_array;
1333 
1334  /* Make sure any initplans from this rel get into the outer list */
1335  root->init_plans = subroot->init_plans;
1336 
1337  /* Build list of sub-paths */
1338  subpaths = lappend(subpaths, subpath);
1339 
1340  /* Build list of modified subroots, too */
1341  subroots = lappend(subroots, subroot);
1342 
1343  /* Build list of target-relation RT indexes */
1344  resultRelations = lappend_int(resultRelations, appinfo->child_relid);
1345 
1346  /* Build lists of per-relation WCO and RETURNING targetlists */
1347  if (parse->withCheckOptions)
1348  withCheckOptionLists = lappend(withCheckOptionLists,
1349  subroot->parse->withCheckOptions);
1350  if (parse->returningList)
1351  returningLists = lappend(returningLists,
1352  subroot->parse->returningList);
1353 
1354  Assert(!parse->onConflict);
1355  }
1356 
1357  if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1358  {
1359  partitioned_rels = get_partitioned_child_rels(root, parentRTindex);
1360  /* The root partitioned table is included as a child rel */
1361  Assert(list_length(partitioned_rels) >= 1);
1362  }
1363 
1364  /* Result path must go into outer query's FINAL upperrel */
1365  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1366 
1367  /*
1368  * We don't currently worry about setting final_rel's consider_parallel
1369  * flag in this case, nor about allowing FDWs or create_upper_paths_hook
1370  * to get control here.
1371  */
1372 
1373  /*
1374  * If we managed to exclude every child rel, return a dummy plan; it
1375  * doesn't even need a ModifyTable node.
1376  */
1377  if (subpaths == NIL)
1378  {
1379  set_dummy_rel_pathlist(final_rel);
1380  return;
1381  }
1382 
1383  /*
1384  * Put back the final adjusted rtable into the master copy of the Query.
1385  * (We mustn't do this if we found no non-excluded children.)
1386  */
1387  parse->rtable = final_rtable;
1388  root->simple_rel_array_size = save_rel_array_size;
1389  root->simple_rel_array = save_rel_array;
1390  /* Must reconstruct master's simple_rte_array, too */
1391  root->simple_rte_array = (RangeTblEntry **)
1392  palloc0((list_length(final_rtable) + 1) * sizeof(RangeTblEntry *));
1393  rti = 1;
1394  foreach(lc, final_rtable)
1395  {
1396  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1397 
1398  root->simple_rte_array[rti++] = rte;
1399  }
1400 
1401  /*
1402  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will
1403  * have dealt with fetching non-locked marked rows, else we need to have
1404  * ModifyTable do that.
1405  */
1406  if (parse->rowMarks)
1407  rowMarks = NIL;
1408  else
1409  rowMarks = root->rowMarks;
1410 
1411  /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
1412  add_path(final_rel, (Path *)
1413  create_modifytable_path(root, final_rel,
1414  parse->commandType,
1415  parse->canSetTag,
1416  nominalRelation,
1417  partitioned_rels,
1418  resultRelations,
1419  subpaths,
1420  subroots,
1421  withCheckOptionLists,
1422  returningLists,
1423  rowMarks,
1424  NULL,
1425  SS_assign_special_param(root)));
1426 }
1427 
1428 /*--------------------
1429  * grouping_planner
1430  * Perform planning steps related to grouping, aggregation, etc.
1431  *
1432  * This function adds all required top-level processing to the scan/join
1433  * Path(s) produced by query_planner.
1434  *
1435  * If inheritance_update is true, we're being called from inheritance_planner
1436  * and should not include a ModifyTable step in the resulting Path(s).
1437  * (inheritance_planner will create a single ModifyTable node covering all the
1438  * target tables.)
1439  *
1440  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1441  * tuple_fraction is interpreted as follows:
1442  * 0: expect all tuples to be retrieved (normal case)
1443  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1444  * from the plan to be retrieved
1445  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1446  * expected to be retrieved (ie, a LIMIT specification)
1447  *
1448  * Returns nothing; the useful output is in the Paths we attach to the
1449  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1450  * root->processed_tlist contains the final processed targetlist.
1451  *
1452  * Note that we have not done set_cheapest() on the final rel; it's convenient
1453  * to leave this to the caller.
1454  *--------------------
1455  */
1456 static void
1457 grouping_planner(PlannerInfo *root, bool inheritance_update,
1458  double tuple_fraction)
1459 {
1460  Query *parse = root->parse;
1461  List *tlist = parse->targetList;
1462  int64 offset_est = 0;
1463  int64 count_est = 0;
1464  double limit_tuples = -1.0;
1465  bool have_postponed_srfs = false;
1466  PathTarget *final_target;
1467  List *final_targets;
1468  List *final_targets_contain_srfs;
1469  RelOptInfo *current_rel;
1470  RelOptInfo *final_rel;
1471  ListCell *lc;
1472 
1473  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1474  if (parse->limitCount || parse->limitOffset)
1475  {
1476  tuple_fraction = preprocess_limit(root, tuple_fraction,
1477  &offset_est, &count_est);
1478 
1479  /*
1480  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1481  * estimate the effects of using a bounded sort.
1482  */
1483  if (count_est > 0 && offset_est >= 0)
1484  limit_tuples = (double) count_est + (double) offset_est;
1485  }
1486 
1487  /* Make tuple_fraction accessible to lower-level routines */
1488  root->tuple_fraction = tuple_fraction;
1489 
1490  if (parse->setOperations)
1491  {
1492  /*
1493  * If there's a top-level ORDER BY, assume we have to fetch all the
1494  * tuples. This might be too simplistic given all the hackery below
1495  * to possibly avoid the sort; but the odds of accurate estimates here
1496  * are pretty low anyway. XXX try to get rid of this in favor of
1497  * letting plan_set_operations generate both fast-start and
1498  * cheapest-total paths.
1499  */
1500  if (parse->sortClause)
1501  root->tuple_fraction = 0.0;
1502 
1503  /*
1504  * Construct Paths for set operations. The results will not need any
1505  * work except perhaps a top-level sort and/or LIMIT. Note that any
1506  * special work for recursive unions is the responsibility of
1507  * plan_set_operations.
1508  */
1509  current_rel = plan_set_operations(root);
1510 
1511  /*
1512  * We should not need to call preprocess_targetlist, since we must be
1513  * in a SELECT query node. Instead, use the targetlist returned by
1514  * plan_set_operations (since this tells whether it returned any
1515  * resjunk columns!), and transfer any sort key information from the
1516  * original tlist.
1517  */
1518  Assert(parse->commandType == CMD_SELECT);
1519 
1520  tlist = root->processed_tlist; /* from plan_set_operations */
1521 
1522  /* for safety, copy processed_tlist instead of modifying in-place */
1523  tlist = postprocess_setop_tlist(copyObject(tlist), parse->targetList);
1524 
1525  /* Save aside the final decorated tlist */
1526  root->processed_tlist = tlist;
1527 
1528  /* Also extract the PathTarget form of the setop result tlist */
1529  final_target = current_rel->cheapest_total_path->pathtarget;
1530 
1531  /* The setop result tlist couldn't contain any SRFs */
1532  Assert(!parse->hasTargetSRFs);
1533  final_targets = final_targets_contain_srfs = NIL;
1534 
1535  /*
1536  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1537  * checked already, but let's make sure).
1538  */
1539  if (parse->rowMarks)
1540  ereport(ERROR,
1541  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1542  /*------
1543  translator: %s is a SQL row locking clause such as FOR UPDATE */
1544  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1546  linitial(parse->rowMarks))->strength))));
1547 
1548  /*
1549  * Calculate pathkeys that represent result ordering requirements
1550  */
1551  Assert(parse->distinctClause == NIL);
1553  parse->sortClause,
1554  tlist);
1555  }
1556  else
1557  {
1558  /* No set operations, do regular planning */
1559  PathTarget *sort_input_target;
1560  List *sort_input_targets;
1561  List *sort_input_targets_contain_srfs;
1562  PathTarget *grouping_target;
1563  List *grouping_targets;
1564  List *grouping_targets_contain_srfs;
1565  PathTarget *scanjoin_target;
1566  List *scanjoin_targets;
1567  List *scanjoin_targets_contain_srfs;
1568  bool have_grouping;
1569  AggClauseCosts agg_costs;
1570  WindowFuncLists *wflists = NULL;
1571  List *activeWindows = NIL;
1572  grouping_sets_data *gset_data = NULL;
1573  standard_qp_extra qp_extra;
1574 
1575  /* A recursive query should always have setOperations */
1576  Assert(!root->hasRecursion);
1577 
1578  /* Preprocess grouping sets and GROUP BY clause, if any */
1579  if (parse->groupingSets)
1580  {
1581  gset_data = preprocess_grouping_sets(root);
1582  }
1583  else
1584  {
1585  /* Preprocess regular GROUP BY clause, if any */
1586  if (parse->groupClause)
1587  parse->groupClause = preprocess_groupclause(root, NIL);
1588  }
1589 
1590  /* Preprocess targetlist */
1591  tlist = preprocess_targetlist(root, tlist);
1592 
1593  if (parse->onConflict)
1594  parse->onConflict->onConflictSet =
1596  parse->resultRelation,
1597  parse->rtable);
1598 
1599  /*
1600  * We are now done hacking up the query's targetlist. Most of the
1601  * remaining planning work will be done with the PathTarget
1602  * representation of tlists, but save aside the full representation so
1603  * that we can transfer its decoration (resnames etc) to the topmost
1604  * tlist of the finished Plan.
1605  */
1606  root->processed_tlist = tlist;
1607 
1608  /*
1609  * Collect statistics about aggregates for estimating costs, and mark
1610  * all the aggregates with resolved aggtranstypes. We must do this
1611  * before slicing and dicing the tlist into various pathtargets, else
1612  * some copies of the Aggref nodes might escape being marked with the
1613  * correct transtypes.
1614  *
1615  * Note: currently, we do not detect duplicate aggregates here. This
1616  * may result in somewhat-overestimated cost, which is fine for our
1617  * purposes since all Paths will get charged the same. But at some
1618  * point we might wish to do that detection in the planner, rather
1619  * than during executor startup.
1620  */
1621  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
1622  if (parse->hasAggs)
1623  {
1624  get_agg_clause_costs(root, (Node *) tlist, AGGSPLIT_SIMPLE,
1625  &agg_costs);
1627  &agg_costs);
1628  }
1629 
1630  /*
1631  * Locate any window functions in the tlist. (We don't need to look
1632  * anywhere else, since expressions used in ORDER BY will be in there
1633  * too.) Note that they could all have been eliminated by constant
1634  * folding, in which case we don't need to do any more work.
1635  */
1636  if (parse->hasWindowFuncs)
1637  {
1638  wflists = find_window_functions((Node *) tlist,
1639  list_length(parse->windowClause));
1640  if (wflists->numWindowFuncs > 0)
1641  activeWindows = select_active_windows(root, wflists);
1642  else
1643  parse->hasWindowFuncs = false;
1644  }
1645 
1646  /*
1647  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1648  * adding logic between here and the query_planner() call. Anything
1649  * that is needed in MIN/MAX-optimizable cases will have to be
1650  * duplicated in planagg.c.
1651  */
1652  if (parse->hasAggs)
1653  preprocess_minmax_aggregates(root, tlist);
1654 
1655  /*
1656  * Figure out whether there's a hard limit on the number of rows that
1657  * query_planner's result subplan needs to return. Even if we know a
1658  * hard limit overall, it doesn't apply if the query has any
1659  * grouping/aggregation operations, or SRFs in the tlist.
1660  */
1661  if (parse->groupClause ||
1662  parse->groupingSets ||
1663  parse->distinctClause ||
1664  parse->hasAggs ||
1665  parse->hasWindowFuncs ||
1666  parse->hasTargetSRFs ||
1667  root->hasHavingQual)
1668  root->limit_tuples = -1.0;
1669  else
1670  root->limit_tuples = limit_tuples;
1671 
1672  /* Set up data needed by standard_qp_callback */
1673  qp_extra.tlist = tlist;
1674  qp_extra.activeWindows = activeWindows;
1675  qp_extra.groupClause = (gset_data
1676  ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
1677  : parse->groupClause);
1678 
1679  /*
1680  * Generate the best unsorted and presorted paths for the scan/join
1681  * portion of this Query, ie the processing represented by the
1682  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1683  * We also generate (in standard_qp_callback) pathkey representations
1684  * of the query's sort clause, distinct clause, etc.
1685  */
1686  current_rel = query_planner(root, tlist,
1687  standard_qp_callback, &qp_extra);
1688 
1689  /*
1690  * Convert the query's result tlist into PathTarget format.
1691  *
1692  * Note: it's desirable to not do this till after query_planner(),
1693  * because the target width estimates can use per-Var width numbers
1694  * that were obtained within query_planner().
1695  */
1696  final_target = create_pathtarget(root, tlist);
1697 
1698  /*
1699  * If ORDER BY was given, consider whether we should use a post-sort
1700  * projection, and compute the adjusted target for preceding steps if
1701  * so.
1702  */
1703  if (parse->sortClause)
1704  sort_input_target = make_sort_input_target(root,
1705  final_target,
1706  &have_postponed_srfs);
1707  else
1708  sort_input_target = final_target;
1709 
1710  /*
1711  * If we have window functions to deal with, the output from any
1712  * grouping step needs to be what the window functions want;
1713  * otherwise, it should be sort_input_target.
1714  */
1715  if (activeWindows)
1716  grouping_target = make_window_input_target(root,
1717  final_target,
1718  activeWindows);
1719  else
1720  grouping_target = sort_input_target;
1721 
1722  /*
1723  * If we have grouping or aggregation to do, the topmost scan/join
1724  * plan node must emit what the grouping step wants; otherwise, it
1725  * should emit grouping_target.
1726  */
1727  have_grouping = (parse->groupClause || parse->groupingSets ||
1728  parse->hasAggs || root->hasHavingQual);
1729  if (have_grouping)
1730  scanjoin_target = make_group_input_target(root, final_target);
1731  else
1732  scanjoin_target = grouping_target;
1733 
1734  /*
1735  * If there are any SRFs in the targetlist, we must separate each of
1736  * these PathTargets into SRF-computing and SRF-free targets. Replace
1737  * each of the named targets with a SRF-free version, and remember the
1738  * list of additional projection steps we need to add afterwards.
1739  */
1740  if (parse->hasTargetSRFs)
1741  {
1742  /* final_target doesn't recompute any SRFs in sort_input_target */
1743  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1744  &final_targets,
1745  &final_targets_contain_srfs);
1746  final_target = (PathTarget *) linitial(final_targets);
1747  Assert(!linitial_int(final_targets_contain_srfs));
1748  /* likewise for sort_input_target vs. grouping_target */
1749  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1750  &sort_input_targets,
1751  &sort_input_targets_contain_srfs);
1752  sort_input_target = (PathTarget *) linitial(sort_input_targets);
1753  Assert(!linitial_int(sort_input_targets_contain_srfs));
1754  /* likewise for grouping_target vs. scanjoin_target */
1755  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1756  &grouping_targets,
1757  &grouping_targets_contain_srfs);
1758  grouping_target = (PathTarget *) linitial(grouping_targets);
1759  Assert(!linitial_int(grouping_targets_contain_srfs));
1760  /* scanjoin_target will not have any SRFs precomputed for it */
1761  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1762  &scanjoin_targets,
1763  &scanjoin_targets_contain_srfs);
1764  scanjoin_target = (PathTarget *) linitial(scanjoin_targets);
1765  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1766  }
1767  else
1768  {
1769  /* initialize lists, just to keep compiler quiet */
1770  final_targets = final_targets_contain_srfs = NIL;
1771  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1772  grouping_targets = grouping_targets_contain_srfs = NIL;
1773  scanjoin_targets = scanjoin_targets_contain_srfs = NIL;
1774  }
1775 
1776  /*
1777  * Forcibly apply SRF-free scan/join target to all the Paths for the
1778  * scan/join rel.
1779  *
1780  * In principle we should re-run set_cheapest() here to identify the
1781  * cheapest path, but it seems unlikely that adding the same tlist
1782  * eval costs to all the paths would change that, so we don't bother.
1783  * Instead, just assume that the cheapest-startup and cheapest-total
1784  * paths remain so. (There should be no parameterized paths anymore,
1785  * so we needn't worry about updating cheapest_parameterized_paths.)
1786  */
1787  foreach(lc, current_rel->pathlist)
1788  {
1789  Path *subpath = (Path *) lfirst(lc);
1790  Path *path;
1791 
1792  Assert(subpath->param_info == NULL);
1793  path = apply_projection_to_path(root, current_rel,
1794  subpath, scanjoin_target);
1795  /* If we had to add a Result, path is different from subpath */
1796  if (path != subpath)
1797  {
1798  lfirst(lc) = path;
1799  if (subpath == current_rel->cheapest_startup_path)
1800  current_rel->cheapest_startup_path = path;
1801  if (subpath == current_rel->cheapest_total_path)
1802  current_rel->cheapest_total_path = path;
1803  }
1804  }
1805 
1806  /*
1807  * Upper planning steps which make use of the top scan/join rel's
1808  * partial pathlist will expect partial paths for that rel to produce
1809  * the same output as complete paths ... and we just changed the
1810  * output for the complete paths, so we'll need to do the same thing
1811  * for partial paths. But only parallel-safe expressions can be
1812  * computed by partial paths.
1813  */
1814  if (current_rel->partial_pathlist &&
1815  is_parallel_safe(root, (Node *) scanjoin_target->exprs))
1816  {
1817  /* Apply the scan/join target to each partial path */
1818  foreach(lc, current_rel->partial_pathlist)
1819  {
1820  Path *subpath = (Path *) lfirst(lc);
1821  Path *newpath;
1822 
1823  /* Shouldn't have any parameterized paths anymore */
1824  Assert(subpath->param_info == NULL);
1825 
1826  /*
1827  * Don't use apply_projection_to_path() here, because there
1828  * could be other pointers to these paths, and therefore we
1829  * mustn't modify them in place.
1830  */
1831  newpath = (Path *) create_projection_path(root,
1832  current_rel,
1833  subpath,
1834  scanjoin_target);
1835  lfirst(lc) = newpath;
1836  }
1837  }
1838  else
1839  {
1840  /*
1841  * In the unfortunate event that scanjoin_target is not
1842  * parallel-safe, we can't apply it to the partial paths; in that
1843  * case, we'll need to forget about the partial paths, which
1844  * aren't valid input for upper planning steps.
1845  */
1846  current_rel->partial_pathlist = NIL;
1847  }
1848 
1849  /* Now fix things up if scan/join target contains SRFs */
1850  if (parse->hasTargetSRFs)
1851  adjust_paths_for_srfs(root, current_rel,
1852  scanjoin_targets,
1853  scanjoin_targets_contain_srfs);
1854 
1855  /*
1856  * Save the various upper-rel PathTargets we just computed into
1857  * root->upper_targets[]. The core code doesn't use this, but it
1858  * provides a convenient place for extensions to get at the info. For
1859  * consistency, we save all the intermediate targets, even though some
1860  * of the corresponding upperrels might not be needed for this query.
1861  */
1862  root->upper_targets[UPPERREL_FINAL] = final_target;
1863  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1864  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1865 
1866  /*
1867  * If we have grouping and/or aggregation, consider ways to implement
1868  * that. We build a new upperrel representing the output of this
1869  * phase.
1870  */
1871  if (have_grouping)
1872  {
1873  current_rel = create_grouping_paths(root,
1874  current_rel,
1875  grouping_target,
1876  &agg_costs,
1877  gset_data);
1878  /* Fix things up if grouping_target contains SRFs */
1879  if (parse->hasTargetSRFs)
1880  adjust_paths_for_srfs(root, current_rel,
1881  grouping_targets,
1882  grouping_targets_contain_srfs);
1883  }
1884 
1885  /*
1886  * If we have window functions, consider ways to implement those. We
1887  * build a new upperrel representing the output of this phase.
1888  */
1889  if (activeWindows)
1890  {
1891  current_rel = create_window_paths(root,
1892  current_rel,
1893  grouping_target,
1894  sort_input_target,
1895  tlist,
1896  wflists,
1897  activeWindows);
1898  /* Fix things up if sort_input_target contains SRFs */
1899  if (parse->hasTargetSRFs)
1900  adjust_paths_for_srfs(root, current_rel,
1901  sort_input_targets,
1902  sort_input_targets_contain_srfs);
1903  }
1904 
1905  /*
1906  * If there is a DISTINCT clause, consider ways to implement that. We
1907  * build a new upperrel representing the output of this phase.
1908  */
1909  if (parse->distinctClause)
1910  {
1911  current_rel = create_distinct_paths(root,
1912  current_rel);
1913  }
1914  } /* end of if (setOperations) */
1915 
1916  /*
1917  * If ORDER BY was given, consider ways to implement that, and generate a
1918  * new upperrel containing only paths that emit the correct ordering and
1919  * project the correct final_target. We can apply the original
1920  * limit_tuples limit in sort costing here, but only if there are no
1921  * postponed SRFs.
1922  */
1923  if (parse->sortClause)
1924  {
1925  current_rel = create_ordered_paths(root,
1926  current_rel,
1927  final_target,
1928  have_postponed_srfs ? -1.0 :
1929  limit_tuples);
1930  /* Fix things up if final_target contains SRFs */
1931  if (parse->hasTargetSRFs)
1932  adjust_paths_for_srfs(root, current_rel,
1933  final_targets,
1934  final_targets_contain_srfs);
1935  }
1936 
1937  /*
1938  * Now we are prepared to build the final-output upperrel.
1939  */
1940  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1941 
1942  /*
1943  * If the input rel is marked consider_parallel and there's nothing that's
1944  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1945  * consider_parallel as well. Note that if the query has rowMarks or is
1946  * not a SELECT, consider_parallel will be false for every relation in the
1947  * query.
1948  */
1949  if (current_rel->consider_parallel &&
1950  is_parallel_safe(root, parse->limitOffset) &&
1951  is_parallel_safe(root, parse->limitCount))
1952  final_rel->consider_parallel = true;
1953 
1954  /*
1955  * If the current_rel belongs to a single FDW, so does the final_rel.
1956  */
1957  final_rel->serverid = current_rel->serverid;
1958  final_rel->userid = current_rel->userid;
1959  final_rel->useridiscurrent = current_rel->useridiscurrent;
1960  final_rel->fdwroutine = current_rel->fdwroutine;
1961 
1962  /*
1963  * Generate paths for the final_rel. Insert all surviving paths, with
1964  * LockRows, Limit, and/or ModifyTable steps added if needed.
1965  */
1966  foreach(lc, current_rel->pathlist)
1967  {
1968  Path *path = (Path *) lfirst(lc);
1969 
1970  /*
1971  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1972  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1973  * here. If there are only non-locking rowmarks, they should be
1974  * handled by the ModifyTable node instead. However, root->rowMarks
1975  * is what goes into the LockRows node.)
1976  */
1977  if (parse->rowMarks)
1978  {
1979  path = (Path *) create_lockrows_path(root, final_rel, path,
1980  root->rowMarks,
1981  SS_assign_special_param(root));
1982  }
1983 
1984  /*
1985  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1986  */
1987  if (limit_needed(parse))
1988  {
1989  path = (Path *) create_limit_path(root, final_rel, path,
1990  parse->limitOffset,
1991  parse->limitCount,
1992  offset_est, count_est);
1993  }
1994 
1995  /*
1996  * If this is an INSERT/UPDATE/DELETE, and we're not being called from
1997  * inheritance_planner, add the ModifyTable node.
1998  */
1999  if (parse->commandType != CMD_SELECT && !inheritance_update)
2000  {
2001  List *withCheckOptionLists;
2002  List *returningLists;
2003  List *rowMarks;
2004 
2005  /*
2006  * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if
2007  * needed.
2008  */
2009  if (parse->withCheckOptions)
2010  withCheckOptionLists = list_make1(parse->withCheckOptions);
2011  else
2012  withCheckOptionLists = NIL;
2013 
2014  if (parse->returningList)
2015  returningLists = list_make1(parse->returningList);
2016  else
2017  returningLists = NIL;
2018 
2019  /*
2020  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2021  * will have dealt with fetching non-locked marked rows, else we
2022  * need to have ModifyTable do that.
2023  */
2024  if (parse->rowMarks)
2025  rowMarks = NIL;
2026  else
2027  rowMarks = root->rowMarks;
2028 
2029  path = (Path *)
2030  create_modifytable_path(root, final_rel,
2031  parse->commandType,
2032  parse->canSetTag,
2033  parse->resultRelation,
2034  NIL,
2036  list_make1(path),
2037  list_make1(root),
2038  withCheckOptionLists,
2039  returningLists,
2040  rowMarks,
2041  parse->onConflict,
2042  SS_assign_special_param(root));
2043  }
2044 
2045  /* And shove it into final_rel */
2046  add_path(final_rel, path);
2047  }
2048 
2049  /*
2050  * If there is an FDW that's responsible for all baserels of the query,
2051  * let it consider adding ForeignPaths.
2052  */
2053  if (final_rel->fdwroutine &&
2054  final_rel->fdwroutine->GetForeignUpperPaths)
2056  current_rel, final_rel);
2057 
2058  /* Let extensions possibly add some more paths */
2060  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2061  current_rel, final_rel);
2062 
2063  /* Note: currently, we leave it to callers to do set_cheapest() */
2064 }
2065 
2066 /*
2067  * Do preprocessing for groupingSets clause and related data. This handles the
2068  * preliminary steps of expanding the grouping sets, organizing them into lists
2069  * of rollups, and preparing annotations which will later be filled in with
2070  * size estimates.
2071  */
2072 static grouping_sets_data *
2074 {
2075  Query *parse = root->parse;
2076  List *sets;
2077  int maxref = 0;
2078  ListCell *lc;
2079  ListCell *lc_set;
2081 
2082  parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
2083 
2084  gd->any_hashable = false;
2085  gd->unhashable_refs = NULL;
2086  gd->unsortable_refs = NULL;
2087  gd->unsortable_sets = NIL;
2088 
2089  if (parse->groupClause)
2090  {
2091  ListCell *lc;
2092 
2093  foreach(lc, parse->groupClause)
2094  {
2095  SortGroupClause *gc = lfirst(lc);
2096  Index ref = gc->tleSortGroupRef;
2097 
2098  if (ref > maxref)
2099  maxref = ref;
2100 
2101  if (!gc->hashable)
2103 
2104  if (!OidIsValid(gc->sortop))
2106  }
2107  }
2108 
2109  /* Allocate workspace array for remapping */
2110  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2111 
2112  /*
2113  * If we have any unsortable sets, we must extract them before trying to
2114  * prepare rollups. Unsortable sets don't go through
2115  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2116  * here.
2117  */
2118  if (!bms_is_empty(gd->unsortable_refs))
2119  {
2120  List *sortable_sets = NIL;
2121 
2122  foreach(lc, parse->groupingSets)
2123  {
2124  List *gset = lfirst(lc);
2125 
2126  if (bms_overlap_list(gd->unsortable_refs, gset))
2127  {
2129 
2130  gs->set = gset;
2131  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2132 
2133  /*
2134  * We must enforce here that an unsortable set is hashable;
2135  * later code assumes this. Parse analysis only checks that
2136  * every individual column is either hashable or sortable.
2137  *
2138  * Note that passing this test doesn't guarantee we can
2139  * generate a plan; there might be other showstoppers.
2140  */
2141  if (bms_overlap_list(gd->unhashable_refs, gset))
2142  ereport(ERROR,
2143  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2144  errmsg("could not implement GROUP BY"),
2145  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2146  }
2147  else
2148  sortable_sets = lappend(sortable_sets, gset);
2149  }
2150 
2151  if (sortable_sets)
2152  sets = extract_rollup_sets(sortable_sets);
2153  else
2154  sets = NIL;
2155  }
2156  else
2157  sets = extract_rollup_sets(parse->groupingSets);
2158 
2159  foreach(lc_set, sets)
2160  {
2161  List *current_sets = (List *) lfirst(lc_set);
2162  RollupData *rollup = makeNode(RollupData);
2163  GroupingSetData *gs;
2164 
2165  /*
2166  * Reorder the current list of grouping sets into correct prefix
2167  * order. If only one aggregation pass is needed, try to make the
2168  * list match the ORDER BY clause; if more than one pass is needed, we
2169  * don't bother with that.
2170  *
2171  * Note that this reorders the sets from smallest-member-first to
2172  * largest-member-first, and applies the GroupingSetData annotations,
2173  * though the data will be filled in later.
2174  */
2175  current_sets = reorder_grouping_sets(current_sets,
2176  (list_length(sets) == 1
2177  ? parse->sortClause
2178  : NIL));
2179 
2180  /*
2181  * Get the initial (and therefore largest) grouping set.
2182  */
2183  gs = linitial(current_sets);
2184 
2185  /*
2186  * Order the groupClause appropriately. If the first grouping set is
2187  * empty, then the groupClause must also be empty; otherwise we have
2188  * to force the groupClause to match that grouping set's order.
2189  *
2190  * (The first grouping set can be empty even though parse->groupClause
2191  * is not empty only if all non-empty grouping sets are unsortable.
2192  * The groupClauses for hashed grouping sets are built later on.)
2193  */
2194  if (gs->set)
2195  rollup->groupClause = preprocess_groupclause(root, gs->set);
2196  else
2197  rollup->groupClause = NIL;
2198 
2199  /*
2200  * Is it hashable? We pretend empty sets are hashable even though we
2201  * actually force them not to be hashed later. But don't bother if
2202  * there's nothing but empty sets (since in that case we can't hash
2203  * anything).
2204  */
2205  if (gs->set &&
2207  {
2208  rollup->hashable = true;
2209  gd->any_hashable = true;
2210  }
2211 
2212  /*
2213  * Now that we've pinned down an order for the groupClause for this
2214  * list of grouping sets, we need to remap the entries in the grouping
2215  * sets from sortgrouprefs to plain indices (0-based) into the
2216  * groupClause for this collection of grouping sets. We keep the
2217  * original form for later use, though.
2218  */
2219  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2220  current_sets,
2221  gd->tleref_to_colnum_map);
2222  rollup->gsets_data = current_sets;
2223 
2224  gd->rollups = lappend(gd->rollups, rollup);
2225  }
2226 
2227  if (gd->unsortable_sets)
2228  {
2229  /*
2230  * We have not yet pinned down a groupclause for this, but we will
2231  * need index-based lists for estimation purposes. Construct
2232  * hash_sets_idx based on the entire original groupclause for now.
2233  */
2235  gd->unsortable_sets,
2236  gd->tleref_to_colnum_map);
2237  gd->any_hashable = true;
2238  }
2239 
2240  return gd;
2241 }
2242 
2243 /*
2244  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2245  * (without annotation) mapped to indexes into the given groupclause.
2246  */
2247 static List *
2249  List *gsets,
2250  int *tleref_to_colnum_map)
2251 {
2252  int ref = 0;
2253  List *result = NIL;
2254  ListCell *lc;
2255 
2256  foreach(lc, groupClause)
2257  {
2258  SortGroupClause *gc = lfirst(lc);
2259 
2260  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2261  }
2262 
2263  foreach(lc, gsets)
2264  {
2265  List *set = NIL;
2266  ListCell *lc2;
2267  GroupingSetData *gs = lfirst(lc);
2268 
2269  foreach(lc2, gs->set)
2270  {
2271  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2272  }
2273 
2274  result = lappend(result, set);
2275  }
2276 
2277  return result;
2278 }
2279 
2280 
2281 
2282 /*
2283  * Detect whether a plan node is a "dummy" plan created when a relation
2284  * is deemed not to need scanning due to constraint exclusion.
2285  *
2286  * Currently, such dummy plans are Result nodes with constant FALSE
2287  * filter quals (see set_dummy_rel_pathlist and create_append_plan).
2288  *
2289  * XXX this probably ought to be somewhere else, but not clear where.
2290  */
2291 bool
2293 {
2294  if (IsA(plan, Result))
2295  {
2296  List *rcqual = (List *) ((Result *) plan)->resconstantqual;
2297 
2298  if (list_length(rcqual) == 1)
2299  {
2300  Const *constqual = (Const *) linitial(rcqual);
2301 
2302  if (constqual && IsA(constqual, Const))
2303  {
2304  if (!constqual->constisnull &&
2305  !DatumGetBool(constqual->constvalue))
2306  return true;
2307  }
2308  }
2309  }
2310  return false;
2311 }
2312 
2313 /*
2314  * preprocess_rowmarks - set up PlanRowMarks if needed
2315  */
2316 static void
2318 {
2319  Query *parse = root->parse;
2320  Bitmapset *rels;
2321  List *prowmarks;
2322  ListCell *l;
2323  int i;
2324 
2325  if (parse->rowMarks)
2326  {
2327  /*
2328  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2329  * grouping, since grouping renders a reference to individual tuple
2330  * CTIDs invalid. This is also checked at parse time, but that's
2331  * insufficient because of rule substitution, query pullup, etc.
2332  */
2333  CheckSelectLocking(parse, ((RowMarkClause *)
2334  linitial(parse->rowMarks))->strength);
2335  }
2336  else
2337  {
2338  /*
2339  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2340  * UPDATE/SHARE.
2341  */
2342  if (parse->commandType != CMD_UPDATE &&
2343  parse->commandType != CMD_DELETE)
2344  return;
2345  }
2346 
2347  /*
2348  * We need to have rowmarks for all base relations except the target. We
2349  * make a bitmapset of all base rels and then remove the items we don't
2350  * need or have FOR [KEY] UPDATE/SHARE marks for.
2351  */
2352  rels = get_relids_in_jointree((Node *) parse->jointree, false);
2353  if (parse->resultRelation)
2354  rels = bms_del_member(rels, parse->resultRelation);
2355 
2356  /*
2357  * Convert RowMarkClauses to PlanRowMark representation.
2358  */
2359  prowmarks = NIL;
2360  foreach(l, parse->rowMarks)
2361  {
2362  RowMarkClause *rc = (RowMarkClause *) lfirst(l);
2363  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2364  PlanRowMark *newrc;
2365 
2366  /*
2367  * Currently, it is syntactically impossible to have FOR UPDATE et al
2368  * applied to an update/delete target rel. If that ever becomes
2369  * possible, we should drop the target from the PlanRowMark list.
2370  */
2371  Assert(rc->rti != parse->resultRelation);
2372 
2373  /*
2374  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2375  * can't support true locking. Subqueries that got flattened into the
2376  * main query should be ignored completely. Any that didn't will get
2377  * ROW_MARK_COPY items in the next loop.
2378  */
2379  if (rte->rtekind != RTE_RELATION)
2380  continue;
2381 
2382  rels = bms_del_member(rels, rc->rti);
2383 
2384  newrc = makeNode(PlanRowMark);
2385  newrc->rti = newrc->prti = rc->rti;
2386  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2387  newrc->markType = select_rowmark_type(rte, rc->strength);
2388  newrc->allMarkTypes = (1 << newrc->markType);
2389  newrc->strength = rc->strength;
2390  newrc->waitPolicy = rc->waitPolicy;
2391  newrc->isParent = false;
2392 
2393  prowmarks = lappend(prowmarks, newrc);
2394  }
2395 
2396  /*
2397  * Now, add rowmarks for any non-target, non-locked base relations.
2398  */
2399  i = 0;
2400  foreach(l, parse->rtable)
2401  {
2402  RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
2403  PlanRowMark *newrc;
2404 
2405  i++;
2406  if (!bms_is_member(i, rels))
2407  continue;
2408 
2409  newrc = makeNode(PlanRowMark);
2410  newrc->rti = newrc->prti = i;
2411  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2412  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2413  newrc->allMarkTypes = (1 << newrc->markType);
2414  newrc->strength = LCS_NONE;
2415  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2416  newrc->isParent = false;
2417 
2418  prowmarks = lappend(prowmarks, newrc);
2419  }
2420 
2421  root->rowMarks = prowmarks;
2422 }
2423 
2424 /*
2425  * Select RowMarkType to use for a given table
2426  */
2429 {
2430  if (rte->rtekind != RTE_RELATION)
2431  {
2432  /* If it's not a table at all, use ROW_MARK_COPY */
2433  return ROW_MARK_COPY;
2434  }
2435  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2436  {
2437  /* Let the FDW select the rowmark type, if it wants to */
2438  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2439 
2440  if (fdwroutine->GetForeignRowMarkType != NULL)
2441  return fdwroutine->GetForeignRowMarkType(rte, strength);
2442  /* Otherwise, use ROW_MARK_COPY by default */
2443  return ROW_MARK_COPY;
2444  }
2445  else
2446  {
2447  /* Regular table, apply the appropriate lock type */
2448  switch (strength)
2449  {
2450  case LCS_NONE:
2451 
2452  /*
2453  * We don't need a tuple lock, only the ability to re-fetch
2454  * the row.
2455  */
2456  return ROW_MARK_REFERENCE;
2457  break;
2458  case LCS_FORKEYSHARE:
2459  return ROW_MARK_KEYSHARE;
2460  break;
2461  case LCS_FORSHARE:
2462  return ROW_MARK_SHARE;
2463  break;
2464  case LCS_FORNOKEYUPDATE:
2465  return ROW_MARK_NOKEYEXCLUSIVE;
2466  break;
2467  case LCS_FORUPDATE:
2468  return ROW_MARK_EXCLUSIVE;
2469  break;
2470  }
2471  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2472  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2473  }
2474 }
2475 
2476 /*
2477  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2478  *
2479  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2480  * results back in *count_est and *offset_est. These variables are set to
2481  * 0 if the corresponding clause is not present, and -1 if it's present
2482  * but we couldn't estimate the value for it. (The "0" convention is OK
2483  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2484  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2485  * usual practice of never estimating less than one row.) These values will
2486  * be passed to create_limit_path, which see if you change this code.
2487  *
2488  * The return value is the suitably adjusted tuple_fraction to use for
2489  * planning the query. This adjustment is not overridable, since it reflects
2490  * plan actions that grouping_planner() will certainly take, not assumptions
2491  * about context.
2492  */
2493 static double
2494 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2495  int64 *offset_est, int64 *count_est)
2496 {
2497  Query *parse = root->parse;
2498  Node *est;
2499  double limit_fraction;
2500 
2501  /* Should not be called unless LIMIT or OFFSET */
2502  Assert(parse->limitCount || parse->limitOffset);
2503 
2504  /*
2505  * Try to obtain the clause values. We use estimate_expression_value
2506  * primarily because it can sometimes do something useful with Params.
2507  */
2508  if (parse->limitCount)
2509  {
2510  est = estimate_expression_value(root, parse->limitCount);
2511  if (est && IsA(est, Const))
2512  {
2513  if (((Const *) est)->constisnull)
2514  {
2515  /* NULL indicates LIMIT ALL, ie, no limit */
2516  *count_est = 0; /* treat as not present */
2517  }
2518  else
2519  {
2520  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2521  if (*count_est <= 0)
2522  *count_est = 1; /* force to at least 1 */
2523  }
2524  }
2525  else
2526  *count_est = -1; /* can't estimate */
2527  }
2528  else
2529  *count_est = 0; /* not present */
2530 
2531  if (parse->limitOffset)
2532  {
2533  est = estimate_expression_value(root, parse->limitOffset);
2534  if (est && IsA(est, Const))
2535  {
2536  if (((Const *) est)->constisnull)
2537  {
2538  /* Treat NULL as no offset; the executor will too */
2539  *offset_est = 0; /* treat as not present */
2540  }
2541  else
2542  {
2543  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2544  if (*offset_est < 0)
2545  *offset_est = 0; /* treat as not present */
2546  }
2547  }
2548  else
2549  *offset_est = -1; /* can't estimate */
2550  }
2551  else
2552  *offset_est = 0; /* not present */
2553 
2554  if (*count_est != 0)
2555  {
2556  /*
2557  * A LIMIT clause limits the absolute number of tuples returned.
2558  * However, if it's not a constant LIMIT then we have to guess; for
2559  * lack of a better idea, assume 10% of the plan's result is wanted.
2560  */
2561  if (*count_est < 0 || *offset_est < 0)
2562  {
2563  /* LIMIT or OFFSET is an expression ... punt ... */
2564  limit_fraction = 0.10;
2565  }
2566  else
2567  {
2568  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2569  limit_fraction = (double) *count_est + (double) *offset_est;
2570  }
2571 
2572  /*
2573  * If we have absolute limits from both caller and LIMIT, use the
2574  * smaller value; likewise if they are both fractional. If one is
2575  * fractional and the other absolute, we can't easily determine which
2576  * is smaller, but we use the heuristic that the absolute will usually
2577  * be smaller.
2578  */
2579  if (tuple_fraction >= 1.0)
2580  {
2581  if (limit_fraction >= 1.0)
2582  {
2583  /* both absolute */
2584  tuple_fraction = Min(tuple_fraction, limit_fraction);
2585  }
2586  else
2587  {
2588  /* caller absolute, limit fractional; use caller's value */
2589  }
2590  }
2591  else if (tuple_fraction > 0.0)
2592  {
2593  if (limit_fraction >= 1.0)
2594  {
2595  /* caller fractional, limit absolute; use limit */
2596  tuple_fraction = limit_fraction;
2597  }
2598  else
2599  {
2600  /* both fractional */
2601  tuple_fraction = Min(tuple_fraction, limit_fraction);
2602  }
2603  }
2604  else
2605  {
2606  /* no info from caller, just use limit */
2607  tuple_fraction = limit_fraction;
2608  }
2609  }
2610  else if (*offset_est != 0 && tuple_fraction > 0.0)
2611  {
2612  /*
2613  * We have an OFFSET but no LIMIT. This acts entirely differently
2614  * from the LIMIT case: here, we need to increase rather than decrease
2615  * the caller's tuple_fraction, because the OFFSET acts to cause more
2616  * tuples to be fetched instead of fewer. This only matters if we got
2617  * a tuple_fraction > 0, however.
2618  *
2619  * As above, use 10% if OFFSET is present but unestimatable.
2620  */
2621  if (*offset_est < 0)
2622  limit_fraction = 0.10;
2623  else
2624  limit_fraction = (double) *offset_est;
2625 
2626  /*
2627  * If we have absolute counts from both caller and OFFSET, add them
2628  * together; likewise if they are both fractional. If one is
2629  * fractional and the other absolute, we want to take the larger, and
2630  * we heuristically assume that's the fractional one.
2631  */
2632  if (tuple_fraction >= 1.0)
2633  {
2634  if (limit_fraction >= 1.0)
2635  {
2636  /* both absolute, so add them together */
2637  tuple_fraction += limit_fraction;
2638  }
2639  else
2640  {
2641  /* caller absolute, limit fractional; use limit */
2642  tuple_fraction = limit_fraction;
2643  }
2644  }
2645  else
2646  {
2647  if (limit_fraction >= 1.0)
2648  {
2649  /* caller fractional, limit absolute; use caller's value */
2650  }
2651  else
2652  {
2653  /* both fractional, so add them together */
2654  tuple_fraction += limit_fraction;
2655  if (tuple_fraction >= 1.0)
2656  tuple_fraction = 0.0; /* assume fetch all */
2657  }
2658  }
2659  }
2660 
2661  return tuple_fraction;
2662 }
2663 
2664 /*
2665  * limit_needed - do we actually need a Limit plan node?
2666  *
2667  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2668  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2669  * locution for an optimization fence. (Because other places in the planner
2670  * merely check whether parse->limitOffset isn't NULL, it will still work as
2671  * an optimization fence --- we're just suppressing unnecessary run-time
2672  * overhead.)
2673  *
2674  * This might look like it could be merged into preprocess_limit, but there's
2675  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2676  * in preprocess_limit it's good enough to consider estimated values.
2677  */
2678 static bool
2680 {
2681  Node *node;
2682 
2683  node = parse->limitCount;
2684  if (node)
2685  {
2686  if (IsA(node, Const))
2687  {
2688  /* NULL indicates LIMIT ALL, ie, no limit */
2689  if (!((Const *) node)->constisnull)
2690  return true; /* LIMIT with a constant value */
2691  }
2692  else
2693  return true; /* non-constant LIMIT */
2694  }
2695 
2696  node = parse->limitOffset;
2697  if (node)
2698  {
2699  if (IsA(node, Const))
2700  {
2701  /* Treat NULL as no offset; the executor would too */
2702  if (!((Const *) node)->constisnull)
2703  {
2704  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2705 
2706  if (offset != 0)
2707  return true; /* OFFSET with a nonzero value */
2708  }
2709  }
2710  else
2711  return true; /* non-constant OFFSET */
2712  }
2713 
2714  return false; /* don't need a Limit plan node */
2715 }
2716 
2717 
2718 /*
2719  * remove_useless_groupby_columns
2720  * Remove any columns in the GROUP BY clause that are redundant due to
2721  * being functionally dependent on other GROUP BY columns.
2722  *
2723  * Since some other DBMSes do not allow references to ungrouped columns, it's
2724  * not unusual to find all columns listed in GROUP BY even though listing the
2725  * primary-key columns would be sufficient. Deleting such excess columns
2726  * avoids redundant sorting work, so it's worth doing. When we do this, we
2727  * must mark the plan as dependent on the pkey constraint (compare the
2728  * parser's check_ungrouped_columns() and check_functional_grouping()).
2729  *
2730  * In principle, we could treat any NOT-NULL columns appearing in a UNIQUE
2731  * index as the determining columns. But as with check_functional_grouping(),
2732  * there's currently no way to represent dependency on a NOT NULL constraint,
2733  * so we consider only the pkey for now.
2734  */
2735 static void
2737 {
2738  Query *parse = root->parse;
2739  Bitmapset **groupbyattnos;
2740  Bitmapset **surplusvars;
2741  ListCell *lc;
2742  int relid;
2743 
2744  /* No chance to do anything if there are less than two GROUP BY items */
2745  if (list_length(parse->groupClause) < 2)
2746  return;
2747 
2748  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2749  if (parse->groupingSets)
2750  return;
2751 
2752  /*
2753  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2754  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2755  * that are GROUP BY items.
2756  */
2757  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2758  (list_length(parse->rtable) + 1));
2759  foreach(lc, parse->groupClause)
2760  {
2761  SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
2762  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2763  Var *var = (Var *) tle->expr;
2764 
2765  /*
2766  * Ignore non-Vars and Vars from other query levels.
2767  *
2768  * XXX in principle, stable expressions containing Vars could also be
2769  * removed, if all the Vars are functionally dependent on other GROUP
2770  * BY items. But it's not clear that such cases occur often enough to
2771  * be worth troubling over.
2772  */
2773  if (!IsA(var, Var) ||
2774  var->varlevelsup > 0)
2775  continue;
2776 
2777  /* OK, remember we have this Var */
2778  relid = var->varno;
2779  Assert(relid <= list_length(parse->rtable));
2780  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2782  }
2783 
2784  /*
2785  * Consider each relation and see if it is possible to remove some of its
2786  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2787  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2788  * of the column attnos of RTE k that are removable GROUP BY items.
2789  */
2790  surplusvars = NULL; /* don't allocate array unless required */
2791  relid = 0;
2792  foreach(lc, parse->rtable)
2793  {
2794  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
2795  Bitmapset *relattnos;
2796  Bitmapset *pkattnos;
2797  Oid constraintOid;
2798 
2799  relid++;
2800 
2801  /* Only plain relations could have primary-key constraints */
2802  if (rte->rtekind != RTE_RELATION)
2803  continue;
2804 
2805  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2806  relattnos = groupbyattnos[relid];
2807  if (bms_membership(relattnos) != BMS_MULTIPLE)
2808  continue;
2809 
2810  /*
2811  * Can't remove any columns for this rel if there is no suitable
2812  * (i.e., nondeferrable) primary key constraint.
2813  */
2814  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2815  if (pkattnos == NULL)
2816  continue;
2817 
2818  /*
2819  * If the primary key is a proper subset of relattnos then we have
2820  * some items in the GROUP BY that can be removed.
2821  */
2822  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2823  {
2824  /*
2825  * To easily remember whether we've found anything to do, we don't
2826  * allocate the surplusvars[] array until we find something.
2827  */
2828  if (surplusvars == NULL)
2829  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2830  (list_length(parse->rtable) + 1));
2831 
2832  /* Remember the attnos of the removable columns */
2833  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2834 
2835  /* Also, mark the resulting plan as dependent on this constraint */
2836  parse->constraintDeps = lappend_oid(parse->constraintDeps,
2837  constraintOid);
2838  }
2839  }
2840 
2841  /*
2842  * If we found any surplus Vars, build a new GROUP BY clause without them.
2843  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2844  * markings, but that's harmless.)
2845  */
2846  if (surplusvars != NULL)
2847  {
2848  List *new_groupby = NIL;
2849 
2850  foreach(lc, parse->groupClause)
2851  {
2852  SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
2853  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2854  Var *var = (Var *) tle->expr;
2855 
2856  /*
2857  * New list must include non-Vars, outer Vars, and anything not
2858  * marked as surplus.
2859  */
2860  if (!IsA(var, Var) ||
2861  var->varlevelsup > 0 ||
2863  surplusvars[var->varno]))
2864  new_groupby = lappend(new_groupby, sgc);
2865  }
2866 
2867  parse->groupClause = new_groupby;
2868  }
2869 }
2870 
2871 /*
2872  * preprocess_groupclause - do preparatory work on GROUP BY clause
2873  *
2874  * The idea here is to adjust the ordering of the GROUP BY elements
2875  * (which in itself is semantically insignificant) to match ORDER BY,
2876  * thereby allowing a single sort operation to both implement the ORDER BY
2877  * requirement and set up for a Unique step that implements GROUP BY.
2878  *
2879  * In principle it might be interesting to consider other orderings of the
2880  * GROUP BY elements, which could match the sort ordering of other
2881  * possible plans (eg an indexscan) and thereby reduce cost. We don't
2882  * bother with that, though. Hashed grouping will frequently win anyway.
2883  *
2884  * Note: we need no comparable processing of the distinctClause because
2885  * the parser already enforced that that matches ORDER BY.
2886  *
2887  * For grouping sets, the order of items is instead forced to agree with that
2888  * of the grouping set (and items not in the grouping set are skipped). The
2889  * work of sorting the order of grouping set elements to match the ORDER BY if
2890  * possible is done elsewhere.
2891  */
2892 static List *
2894 {
2895  Query *parse = root->parse;
2896  List *new_groupclause = NIL;
2897  bool partial_match;
2898  ListCell *sl;
2899  ListCell *gl;
2900 
2901  /* For grouping sets, we need to force the ordering */
2902  if (force)
2903  {
2904  foreach(sl, force)
2905  {
2906  Index ref = lfirst_int(sl);
2908 
2909  new_groupclause = lappend(new_groupclause, cl);
2910  }
2911 
2912  return new_groupclause;
2913  }
2914 
2915  /* If no ORDER BY, nothing useful to do here */
2916  if (parse->sortClause == NIL)
2917  return parse->groupClause;
2918 
2919  /*
2920  * Scan the ORDER BY clause and construct a list of matching GROUP BY
2921  * items, but only as far as we can make a matching prefix.
2922  *
2923  * This code assumes that the sortClause contains no duplicate items.
2924  */
2925  foreach(sl, parse->sortClause)
2926  {
2927  SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
2928 
2929  foreach(gl, parse->groupClause)
2930  {
2931  SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2932 
2933  if (equal(gc, sc))
2934  {
2935  new_groupclause = lappend(new_groupclause, gc);
2936  break;
2937  }
2938  }
2939  if (gl == NULL)
2940  break; /* no match, so stop scanning */
2941  }
2942 
2943  /* Did we match all of the ORDER BY list, or just some of it? */
2944  partial_match = (sl != NULL);
2945 
2946  /* If no match at all, no point in reordering GROUP BY */
2947  if (new_groupclause == NIL)
2948  return parse->groupClause;
2949 
2950  /*
2951  * Add any remaining GROUP BY items to the new list, but only if we were
2952  * able to make a complete match. In other words, we only rearrange the
2953  * GROUP BY list if the result is that one list is a prefix of the other
2954  * --- otherwise there's no possibility of a common sort. Also, give up
2955  * if there are any non-sortable GROUP BY items, since then there's no
2956  * hope anyway.
2957  */
2958  foreach(gl, parse->groupClause)
2959  {
2960  SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2961 
2962  if (list_member_ptr(new_groupclause, gc))
2963  continue; /* it matched an ORDER BY item */
2964  if (partial_match)
2965  return parse->groupClause; /* give up, no common sort possible */
2966  if (!OidIsValid(gc->sortop))
2967  return parse->groupClause; /* give up, GROUP BY can't be sorted */
2968  new_groupclause = lappend(new_groupclause, gc);
2969  }
2970 
2971  /* Success --- install the rearranged GROUP BY list */
2972  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2973  return new_groupclause;
2974 }
2975 
2976 /*
2977  * Extract lists of grouping sets that can be implemented using a single
2978  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2979  *
2980  * Input must be sorted with smallest sets first. Result has each sublist
2981  * sorted with smallest sets first.
2982  *
2983  * We want to produce the absolute minimum possible number of lists here to
2984  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2985  * of finding the minimal partition of a partially-ordered set into chains
2986  * (which is what we need, taking the list of grouping sets as a poset ordered
2987  * by set inclusion) can be mapped to the problem of finding the maximum
2988  * cardinality matching on a bipartite graph, which is solvable in polynomial
2989  * time with a worst case of no worse than O(n^2.5) and usually much
2990  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2991  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2992  * half a second on my modest system even with optimization off and assertions
2993  * on.)
2994  */
2995 static List *
2997 {
2998  int num_sets_raw = list_length(groupingSets);
2999  int num_empty = 0;
3000  int num_sets = 0; /* distinct sets */
3001  int num_chains = 0;
3002  List *result = NIL;
3003  List **results;
3004  List **orig_sets;
3005  Bitmapset **set_masks;
3006  int *chains;
3007  short **adjacency;
3008  short *adjacency_buf;
3010  int i;
3011  int j;
3012  int j_size;
3013  ListCell *lc1 = list_head(groupingSets);
3014  ListCell *lc;
3015 
3016  /*
3017  * Start by stripping out empty sets. The algorithm doesn't require this,
3018  * but the planner currently needs all empty sets to be returned in the
3019  * first list, so we strip them here and add them back after.
3020  */
3021  while (lc1 && lfirst(lc1) == NIL)
3022  {
3023  ++num_empty;
3024  lc1 = lnext(lc1);
3025  }
3026 
3027  /* bail out now if it turns out that all we had were empty sets. */
3028  if (!lc1)
3029  return list_make1(groupingSets);
3030 
3031  /*----------
3032  * We don't strictly need to remove duplicate sets here, but if we don't,
3033  * they tend to become scattered through the result, which is a bit
3034  * confusing (and irritating if we ever decide to optimize them out).
3035  * So we remove them here and add them back after.
3036  *
3037  * For each non-duplicate set, we fill in the following:
3038  *
3039  * orig_sets[i] = list of the original set lists
3040  * set_masks[i] = bitmapset for testing inclusion
3041  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
3042  *
3043  * chains[i] will be the result group this set is assigned to.
3044  *
3045  * We index all of these from 1 rather than 0 because it is convenient
3046  * to leave 0 free for the NIL node in the graph algorithm.
3047  *----------
3048  */
3049  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
3050  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
3051  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
3052  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
3053 
3054  j_size = 0;
3055  j = 0;
3056  i = 1;
3057 
3058  for_each_cell(lc, lc1)
3059  {
3060  List *candidate = lfirst(lc);
3061  Bitmapset *candidate_set = NULL;
3062  ListCell *lc2;
3063  int dup_of = 0;
3064 
3065  foreach(lc2, candidate)
3066  {
3067  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
3068  }
3069 
3070  /* we can only be a dup if we're the same length as a previous set */
3071  if (j_size == list_length(candidate))
3072  {
3073  int k;
3074 
3075  for (k = j; k < i; ++k)
3076  {
3077  if (bms_equal(set_masks[k], candidate_set))
3078  {
3079  dup_of = k;
3080  break;
3081  }
3082  }
3083  }
3084  else if (j_size < list_length(candidate))
3085  {
3086  j_size = list_length(candidate);
3087  j = i;
3088  }
3089 
3090  if (dup_of > 0)
3091  {
3092  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3093  bms_free(candidate_set);
3094  }
3095  else
3096  {
3097  int k;
3098  int n_adj = 0;
3099 
3100  orig_sets[i] = list_make1(candidate);
3101  set_masks[i] = candidate_set;
3102 
3103  /* fill in adjacency list; no need to compare equal-size sets */
3104 
3105  for (k = j - 1; k > 0; --k)
3106  {
3107  if (bms_is_subset(set_masks[k], candidate_set))
3108  adjacency_buf[++n_adj] = k;
3109  }
3110 
3111  if (n_adj > 0)
3112  {
3113  adjacency_buf[0] = n_adj;
3114  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3115  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3116  }
3117  else
3118  adjacency[i] = NULL;
3119 
3120  ++i;
3121  }
3122  }
3123 
3124  num_sets = i - 1;
3125 
3126  /*
3127  * Apply the graph matching algorithm to do the work.
3128  */
3129  state = BipartiteMatch(num_sets, num_sets, adjacency);
3130 
3131  /*
3132  * Now, the state->pair* fields have the info we need to assign sets to
3133  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3134  * pair_vu[v] = u (both will be true, but we check both so that we can do
3135  * it in one pass)
3136  */
3137  chains = palloc0((num_sets + 1) * sizeof(int));
3138 
3139  for (i = 1; i <= num_sets; ++i)
3140  {
3141  int u = state->pair_vu[i];
3142  int v = state->pair_uv[i];
3143 
3144  if (u > 0 && u < i)
3145  chains[i] = chains[u];
3146  else if (v > 0 && v < i)
3147  chains[i] = chains[v];
3148  else
3149  chains[i] = ++num_chains;
3150  }
3151 
3152  /* build result lists. */
3153  results = palloc0((num_chains + 1) * sizeof(List *));
3154 
3155  for (i = 1; i <= num_sets; ++i)
3156  {
3157  int c = chains[i];
3158 
3159  Assert(c > 0);
3160 
3161  results[c] = list_concat(results[c], orig_sets[i]);
3162  }
3163 
3164  /* push any empty sets back on the first list. */
3165  while (num_empty-- > 0)
3166  results[1] = lcons(NIL, results[1]);
3167 
3168  /* make result list */
3169  for (i = 1; i <= num_chains; ++i)
3170  result = lappend(result, results[i]);
3171 
3172  /*
3173  * Free all the things.
3174  *
3175  * (This is over-fussy for small sets but for large sets we could have
3176  * tied up a nontrivial amount of memory.)
3177  */
3178  BipartiteMatchFree(state);
3179  pfree(results);
3180  pfree(chains);
3181  for (i = 1; i <= num_sets; ++i)
3182  if (adjacency[i])
3183  pfree(adjacency[i]);
3184  pfree(adjacency);
3185  pfree(adjacency_buf);
3186  pfree(orig_sets);
3187  for (i = 1; i <= num_sets; ++i)
3188  bms_free(set_masks[i]);
3189  pfree(set_masks);
3190 
3191  return result;
3192 }
3193 
3194 /*
3195  * Reorder the elements of a list of grouping sets such that they have correct
3196  * prefix relationships. Also inserts the GroupingSetData annotations.
3197  *
3198  * The input must be ordered with smallest sets first; the result is returned
3199  * with largest sets first. Note that the result shares no list substructure
3200  * with the input, so it's safe for the caller to modify it later.
3201  *
3202  * If we're passed in a sortclause, we follow its order of columns to the
3203  * extent possible, to minimize the chance that we add unnecessary sorts.
3204  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3205  * gets implemented in one pass.)
3206  */
3207 static List *
3208 reorder_grouping_sets(List *groupingsets, List *sortclause)
3209 {
3210  ListCell *lc;
3211  ListCell *lc2;
3212  List *previous = NIL;
3213  List *result = NIL;
3214 
3215  foreach(lc, groupingsets)
3216  {
3217  List *candidate = lfirst(lc);
3218  List *new_elems = list_difference_int(candidate, previous);
3220 
3221  if (list_length(new_elems) > 0)
3222  {
3223  while (list_length(sortclause) > list_length(previous))
3224  {
3225  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3226  int ref = sc->tleSortGroupRef;
3227 
3228  if (list_member_int(new_elems, ref))
3229  {
3230  previous = lappend_int(previous, ref);
3231  new_elems = list_delete_int(new_elems, ref);
3232  }
3233  else
3234  {
3235  /* diverged from the sortclause; give up on it */
3236  sortclause = NIL;
3237  break;
3238  }
3239  }
3240 
3241  foreach(lc2, new_elems)
3242  {
3243  previous = lappend_int(previous, lfirst_int(lc2));
3244  }
3245  }
3246 
3247  gs->set = list_copy(previous);
3248  result = lcons(gs, result);
3249  list_free(new_elems);
3250  }
3251 
3252  list_free(previous);
3253 
3254  return result;
3255 }
3256 
3257 /*
3258  * Compute query_pathkeys and other pathkeys during plan generation
3259  */
3260 static void
3262 {
3263  Query *parse = root->parse;
3264  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3265  List *tlist = qp_extra->tlist;
3266  List *activeWindows = qp_extra->activeWindows;
3267 
3268  /*
3269  * Calculate pathkeys that represent grouping/ordering requirements. The
3270  * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
3271  * be, in which case we just leave their pathkeys empty.
3272  */
3273  if (qp_extra->groupClause &&
3274  grouping_is_sortable(qp_extra->groupClause))
3275  root->group_pathkeys =
3277  qp_extra->groupClause,
3278  tlist);
3279  else
3280  root->group_pathkeys = NIL;
3281 
3282  /* We consider only the first (bottom) window in pathkeys logic */
3283  if (activeWindows != NIL)
3284  {
3285  WindowClause *wc = (WindowClause *) linitial(activeWindows);
3286 
3288  wc,
3289  tlist);
3290  }
3291  else
3292  root->window_pathkeys = NIL;
3293 
3294  if (parse->distinctClause &&
3296  root->distinct_pathkeys =
3298  parse->distinctClause,
3299  tlist);
3300  else
3301  root->distinct_pathkeys = NIL;
3302 
3303  root->sort_pathkeys =
3305  parse->sortClause,
3306  tlist);
3307 
3308  /*
3309  * Figure out whether we want a sorted result from query_planner.
3310  *
3311  * If we have a sortable GROUP BY clause, then we want a result sorted
3312  * properly for grouping. Otherwise, if we have window functions to
3313  * evaluate, we try to sort for the first window. Otherwise, if there's a
3314  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3315  * we try to produce output that's sufficiently well sorted for the
3316  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3317  * by the ORDER BY clause.
3318  *
3319  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3320  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3321  * that might just leave us failing to exploit an available sort order at
3322  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3323  * much easier, since we know that the parser ensured that one is a
3324  * superset of the other.
3325  */
3326  if (root->group_pathkeys)
3327  root->query_pathkeys = root->group_pathkeys;
3328  else if (root->window_pathkeys)
3329  root->query_pathkeys = root->window_pathkeys;
3330  else if (list_length(root->distinct_pathkeys) >
3331  list_length(root->sort_pathkeys))
3332  root->query_pathkeys = root->distinct_pathkeys;
3333  else if (root->sort_pathkeys)
3334  root->query_pathkeys = root->sort_pathkeys;
3335  else
3336  root->query_pathkeys = NIL;
3337 }
3338 
3339 /*
3340  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3341  *
3342  * path_rows: number of output rows from scan/join step
3343  * gsets: grouping set data, or NULL if not doing grouping sets
3344  *
3345  * If doing grouping sets, we also annotate the gsets data with the estimates
3346  * for each set and each individual rollup list, with a view to later
3347  * determining whether some combination of them could be hashed instead.
3348  */
3349 static double
3351  double path_rows,
3352  grouping_sets_data *gd)
3353 {
3354  Query *parse = root->parse;
3355  double dNumGroups;
3356 
3357  if (parse->groupClause)
3358  {
3359  List *groupExprs;
3360 
3361  if (parse->groupingSets)
3362  {
3363  /* Add up the estimates for each grouping set */
3364  ListCell *lc;
3365  ListCell *lc2;
3366 
3367  Assert(gd); /* keep Coverity happy */
3368 
3369  dNumGroups = 0;
3370 
3371  foreach(lc, gd->rollups)
3372  {
3373  RollupData *rollup = lfirst(lc);
3374  ListCell *lc;
3375 
3376  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3377  parse->targetList);
3378 
3379  rollup->numGroups = 0.0;
3380 
3381  forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
3382  {
3383  List *gset = (List *) lfirst(lc);
3384  GroupingSetData *gs = lfirst(lc2);
3385  double numGroups = estimate_num_groups(root,
3386  groupExprs,
3387  path_rows,
3388  &gset);
3389 
3390  gs->numGroups = numGroups;
3391  rollup->numGroups += numGroups;
3392  }
3393 
3394  dNumGroups += rollup->numGroups;
3395  }
3396 
3397  if (gd->hash_sets_idx)
3398  {
3399  ListCell *lc;
3400 
3401  gd->dNumHashGroups = 0;
3402 
3403  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3404  parse->targetList);
3405 
3406  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3407  {
3408  List *gset = (List *) lfirst(lc);
3409  GroupingSetData *gs = lfirst(lc2);
3410  double numGroups = estimate_num_groups(root,
3411  groupExprs,
3412  path_rows,
3413  &gset);
3414 
3415  gs->numGroups = numGroups;
3416  gd->dNumHashGroups += numGroups;
3417  }
3418 
3419  dNumGroups += gd->dNumHashGroups;
3420  }
3421  }
3422  else
3423  {
3424  /* Plain GROUP BY */
3425  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3426  parse->targetList);
3427 
3428  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3429  NULL);
3430  }
3431  }
3432  else if (parse->groupingSets)
3433  {
3434  /* Empty grouping sets ... one result row for each one */
3435  dNumGroups = list_length(parse->groupingSets);
3436  }
3437  else if (parse->hasAggs || root->hasHavingQual)
3438  {
3439  /* Plain aggregation, one result row */
3440  dNumGroups = 1;
3441  }
3442  else
3443  {
3444  /* Not grouping */
3445  dNumGroups = 1;
3446  }
3447 
3448  return dNumGroups;
3449 }
3450 
3451 /*
3452  * estimate_hashagg_tablesize
3453  * estimate the number of bytes that a hash aggregate hashtable will
3454  * require based on the agg_costs, path width and dNumGroups.
3455  *
3456  * XXX this may be over-estimating the size now that hashagg knows to omit
3457  * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3458  * grouping columns not in the hashed set are counted here even though hashagg
3459  * won't store them. Is this a problem?
3460  */
3461 static Size
3463  double dNumGroups)
3464 {
3465  Size hashentrysize;
3466 
3467  /* Estimate per-hash-entry space at tuple width... */
3468  hashentrysize = MAXALIGN(path->pathtarget->width) +
3470 
3471  /* plus space for pass-by-ref transition values... */
3472  hashentrysize += agg_costs->transitionSpace;
3473  /* plus the per-hash-entry overhead */
3474  hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3475 
3476  /*
3477  * Note that this disregards the effect of fill-factor and growth policy
3478  * of the hash-table. That's probably ok, given default the default
3479  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3480  * "double-in-size" growth policies here.
3481  */
3482  return hashentrysize * dNumGroups;
3483 }
3484 
3485 /*
3486  * create_grouping_paths
3487  *
3488  * Build a new upperrel containing Paths for grouping and/or aggregation.
3489  *
3490  * input_rel: contains the source-data Paths
3491  * target: the pathtarget for the result Paths to compute
3492  * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
3493  * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
3494  * rollup_groupclauses: list of grouping clauses for grouping sets,
3495  * or NIL if not doing grouping sets
3496  *
3497  * Note: all Paths in input_rel are expected to return the target computed
3498  * by make_group_input_target.
3499  *
3500  * We need to consider sorted and hashed aggregation in the same function,
3501  * because otherwise (1) it would be harder to throw an appropriate error
3502  * message if neither way works, and (2) we should not allow hashtable size
3503  * considerations to dissuade us from using hashing if sorting is not possible.
3504  */
3505 static RelOptInfo *
3507  RelOptInfo *input_rel,
3508  PathTarget *target,
3509  const AggClauseCosts *agg_costs,
3510  grouping_sets_data *gd)
3511 {
3512  Query *parse = root->parse;
3513  Path *cheapest_path = input_rel->cheapest_total_path;
3514  RelOptInfo *grouped_rel;
3515  PathTarget *partial_grouping_target = NULL;
3516  AggClauseCosts agg_partial_costs; /* parallel only */
3517  AggClauseCosts agg_final_costs; /* parallel only */
3518  Size hashaggtablesize;
3519  double dNumGroups;
3520  double dNumPartialGroups = 0;
3521  bool can_hash;
3522  bool can_sort;
3523  bool try_parallel_aggregation;
3524 
3525  ListCell *lc;
3526 
3527  /* For now, do all work in the (GROUP_AGG, NULL) upperrel */
3528  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3529 
3530  /*
3531  * If the input relation is not parallel-safe, then the grouped relation
3532  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3533  * target list and HAVING quals are parallel-safe.
3534  */
3535  if (input_rel->consider_parallel &&
3536  is_parallel_safe(root, (Node *) target->exprs) &&
3537  is_parallel_safe(root, (Node *) parse->havingQual))
3538  grouped_rel->consider_parallel = true;
3539 
3540  /*
3541  * If the input rel belongs to a single FDW, so does the grouped rel.
3542  */
3543  grouped_rel->serverid = input_rel->serverid;
3544  grouped_rel->userid = input_rel->userid;
3545  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3546  grouped_rel->fdwroutine = input_rel->fdwroutine;
3547 
3548  /*
3549  * Check for degenerate grouping.
3550  */
3551  if ((root->hasHavingQual || parse->groupingSets) &&
3552  !parse->hasAggs && parse->groupClause == NIL)
3553  {
3554  /*
3555  * We have a HAVING qual and/or grouping sets, but no aggregates and
3556  * no GROUP BY (which implies that the grouping sets are all empty).
3557  *
3558  * This is a degenerate case in which we are supposed to emit either
3559  * zero or one row for each grouping set depending on whether HAVING
3560  * succeeds. Furthermore, there cannot be any variables in either
3561  * HAVING or the targetlist, so we actually do not need the FROM table
3562  * at all! We can just throw away the plan-so-far and generate a
3563  * Result node. This is a sufficiently unusual corner case that it's
3564  * not worth contorting the structure of this module to avoid having
3565  * to generate the earlier paths in the first place.
3566  */
3567  int nrows = list_length(parse->groupingSets);
3568  Path *path;
3569 
3570  if (nrows > 1)
3571  {
3572  /*
3573  * Doesn't seem worthwhile writing code to cons up a
3574  * generate_series or a values scan to emit multiple rows. Instead
3575  * just make N clones and append them. (With a volatile HAVING
3576  * clause, this means you might get between 0 and N output rows.
3577  * Offhand I think that's desired.)
3578  */
3579  List *paths = NIL;
3580 
3581  while (--nrows >= 0)
3582  {
3583  path = (Path *)
3584  create_result_path(root, grouped_rel,
3585  target,
3586  (List *) parse->havingQual);
3587  paths = lappend(paths, path);
3588  }
3589  path = (Path *)
3590  create_append_path(grouped_rel,
3591  paths,
3592  NULL,
3593  0,
3594  NIL);
3595  path->pathtarget = target;
3596  }
3597  else
3598  {
3599  /* No grouping sets, or just one, so one output row */
3600  path = (Path *)
3601  create_result_path(root, grouped_rel,
3602  target,
3603  (List *) parse->havingQual);
3604  }
3605 
3606  add_path(grouped_rel, path);
3607 
3608  /* No need to consider any other alternatives. */
3609  set_cheapest(grouped_rel);
3610 
3611  return grouped_rel;
3612  }
3613 
3614  /*
3615  * Estimate number of groups.
3616  */
3617  dNumGroups = get_number_of_groups(root,
3618  cheapest_path->rows,
3619  gd);
3620 
3621  /*
3622  * Determine whether it's possible to perform sort-based implementations
3623  * of grouping. (Note that if groupClause is empty,
3624  * grouping_is_sortable() is trivially true, and all the
3625  * pathkeys_contained_in() tests will succeed too, so that we'll consider
3626  * every surviving input path.)
3627  *
3628  * If we have grouping sets, we might be able to sort some but not all of
3629  * them; in this case, we need can_sort to be true as long as we must
3630  * consider any sorted-input plan.
3631  */
3632  can_sort = (gd && gd->rollups != NIL)
3633  || grouping_is_sortable(parse->groupClause);
3634 
3635  /*
3636  * Determine whether we should consider hash-based implementations of
3637  * grouping.
3638  *
3639  * Hashed aggregation only applies if we're grouping. If we have grouping
3640  * sets, some groups might be hashable but others not; in this case we set
3641  * can_hash true as long as there is nothing globally preventing us from
3642  * hashing (and we should therefore consider plans with hashes).
3643  *
3644  * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
3645  * aggregates. (Doing so would imply storing *all* the input values in
3646  * the hash table, and/or running many sorts in parallel, either of which
3647  * seems like a certain loser.) We similarly don't support ordered-set
3648  * aggregates in hashed aggregation, but that case is also included in the
3649  * numOrderedAggs count.
3650  *
3651  * Note: grouping_is_hashable() is much more expensive to check than the
3652  * other gating conditions, so we want to do it last.
3653  */
3654  can_hash = (parse->groupClause != NIL &&
3655  agg_costs->numOrderedAggs == 0 &&
3656  (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
3657 
3658  /*
3659  * If grouped_rel->consider_parallel is true, then paths that we generate
3660  * for this grouping relation could be run inside of a worker, but that
3661  * doesn't mean we can actually use the PartialAggregate/FinalizeAggregate
3662  * execution strategy. Figure that out.
3663  */
3664  if (!grouped_rel->consider_parallel)
3665  {
3666  /* Not even parallel-safe. */
3667  try_parallel_aggregation = false;
3668  }
3669  else if (input_rel->partial_pathlist == NIL)
3670  {
3671  /* Nothing to use as input for partial aggregate. */
3672  try_parallel_aggregation = false;
3673  }
3674  else if (!parse->hasAggs && parse->groupClause == NIL)
3675  {
3676  /*
3677  * We don't know how to do parallel aggregation unless we have either
3678  * some aggregates or a grouping clause.
3679  */
3680  try_parallel_aggregation = false;
3681  }
3682  else if (parse->groupingSets)
3683  {
3684  /* We don't know how to do grouping sets in parallel. */
3685  try_parallel_aggregation = false;
3686  }
3687  else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
3688  {
3689  /* Insufficient support for partial mode. */
3690  try_parallel_aggregation = false;
3691  }
3692  else
3693  {
3694  /* Everything looks good. */
3695  try_parallel_aggregation = true;
3696  }
3697 
3698  /*
3699  * Before generating paths for grouped_rel, we first generate any possible
3700  * partial paths; that way, later code can easily consider both parallel
3701  * and non-parallel approaches to grouping. Note that the partial paths
3702  * we generate here are also partially aggregated, so simply pushing a
3703  * Gather node on top is insufficient to create a final path, as would be
3704  * the case for a scan/join rel.
3705  */
3706  if (try_parallel_aggregation)
3707  {
3708  Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
3709 
3710  /*
3711  * Build target list for partial aggregate paths. These paths cannot
3712  * just emit the same tlist as regular aggregate paths, because (1) we
3713  * must include Vars and Aggrefs needed in HAVING, which might not
3714  * appear in the result tlist, and (2) the Aggrefs must be set in
3715  * partial mode.
3716  */
3717  partial_grouping_target = make_partial_grouping_target(root, target);
3718 
3719  /* Estimate number of partial groups. */
3720  dNumPartialGroups = get_number_of_groups(root,
3721  cheapest_partial_path->rows,
3722  gd);
3723 
3724  /*
3725  * Collect statistics about aggregates for estimating costs of
3726  * performing aggregation in parallel.
3727  */
3728  MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
3729  MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
3730  if (parse->hasAggs)
3731  {
3732  /* partial phase */
3733  get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs,
3735  &agg_partial_costs);
3736 
3737  /* final phase */
3738  get_agg_clause_costs(root, (Node *) target->exprs,
3740  &agg_final_costs);
3741  get_agg_clause_costs(root, parse->havingQual,
3743  &agg_final_costs);
3744  }
3745 
3746  if (can_sort)
3747  {
3748  /* This was checked before setting try_parallel_aggregation */
3749  Assert(parse->hasAggs || parse->groupClause);
3750 
3751  /*
3752  * Use any available suitably-sorted path as input, and also
3753  * consider sorting the cheapest partial path.
3754  */
3755  foreach(lc, input_rel->partial_pathlist)
3756  {
3757  Path *path = (Path *) lfirst(lc);
3758  bool is_sorted;
3759 
3760  is_sorted = pathkeys_contained_in(root->group_pathkeys,
3761  path->pathkeys);
3762  if (path == cheapest_partial_path || is_sorted)
3763  {
3764  /* Sort the cheapest partial path, if it isn't already */
3765  if (!is_sorted)
3766  path = (Path *) create_sort_path(root,
3767  grouped_rel,
3768  path,
3769  root->group_pathkeys,
3770  -1.0);
3771 
3772  if (parse->hasAggs)
3773  add_partial_path(grouped_rel, (Path *)
3774  create_agg_path(root,
3775  grouped_rel,
3776  path,
3777  partial_grouping_target,
3778  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3780  parse->groupClause,
3781  NIL,
3782  &agg_partial_costs,
3783  dNumPartialGroups));
3784  else
3785  add_partial_path(grouped_rel, (Path *)
3786  create_group_path(root,
3787  grouped_rel,
3788  path,
3789  partial_grouping_target,
3790  parse->groupClause,
3791  NIL,
3792  dNumPartialGroups));
3793  }
3794  }
3795  }
3796 
3797  if (can_hash)
3798  {
3799  /* Checked above */
3800  Assert(parse->hasAggs || parse->groupClause);
3801 
3802  hashaggtablesize =
3803  estimate_hashagg_tablesize(cheapest_partial_path,
3804  &agg_partial_costs,
3805  dNumPartialGroups);
3806 
3807  /*
3808  * Tentatively produce a partial HashAgg Path, depending on if it
3809  * looks as if the hash table will fit in work_mem.
3810  */
3811  if (hashaggtablesize < work_mem * 1024L)
3812  {
3813  add_partial_path(grouped_rel, (Path *)
3814  create_agg_path(root,
3815  grouped_rel,
3816  cheapest_partial_path,
3817  partial_grouping_target,
3818  AGG_HASHED,
3820  parse->groupClause,
3821  NIL,
3822  &agg_partial_costs,
3823  dNumPartialGroups));
3824  }
3825  }
3826  }
3827 
3828  /* Build final grouping paths */
3829  if (can_sort)
3830  {
3831  /*
3832  * Use any available suitably-sorted path as input, and also consider
3833  * sorting the cheapest-total path.
3834  */
3835  foreach(lc, input_rel->pathlist)
3836  {
3837  Path *path = (Path *) lfirst(lc);
3838  bool is_sorted;
3839 
3840  is_sorted = pathkeys_contained_in(root->group_pathkeys,
3841  path->pathkeys);
3842  if (path == cheapest_path || is_sorted)
3843  {
3844  /* Sort the cheapest-total path if it isn't already sorted */
3845  if (!is_sorted)
3846  path = (Path *) create_sort_path(root,
3847  grouped_rel,
3848  path,
3849  root->group_pathkeys,
3850  -1.0);
3851 
3852  /* Now decide what to stick atop it */
3853  if (parse->groupingSets)
3854  {
3855  consider_groupingsets_paths(root, grouped_rel,
3856  path, true, can_hash, target,
3857  gd, agg_costs, dNumGroups);
3858  }
3859  else if (parse->hasAggs)
3860  {
3861  /*
3862  * We have aggregation, possibly with plain GROUP BY. Make
3863  * an AggPath.
3864  */
3865  add_path(grouped_rel, (Path *)
3866  create_agg_path(root,
3867  grouped_rel,
3868  path,
3869  target,
3870  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3872  parse->groupClause,
3873  (List *) parse->havingQual,
3874  agg_costs,
3875  dNumGroups));
3876  }
3877  else if (parse->groupClause)
3878  {
3879  /*
3880  * We have GROUP BY without aggregation or grouping sets.
3881  * Make a GroupPath.
3882  */
3883  add_path(grouped_rel, (Path *)
3884  create_group_path(root,
3885  grouped_rel,
3886  path,
3887  target,
3888  parse->groupClause,
3889  (List *) parse->havingQual,
3890  dNumGroups));
3891  }
3892  else
3893  {
3894  /* Other cases should have been handled above */
3895  Assert(false);
3896  }
3897  }
3898  }
3899 
3900  /*
3901  * Now generate a complete GroupAgg Path atop of the cheapest partial
3902  * path. We can do this using either Gather or Gather Merge.
3903  */
3904  if (grouped_rel->partial_pathlist)
3905  {
3906  Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
3907  double total_groups = path->rows * path->parallel_workers;
3908 
3909  path = (Path *) create_gather_path(root,
3910  grouped_rel,
3911  path,
3912  partial_grouping_target,
3913  NULL,
3914  &total_groups);
3915 
3916  /*
3917  * Since Gather's output is always unsorted, we'll need to sort,
3918  * unless there's no GROUP BY clause or a degenerate (constant)
3919  * one, in which case there will only be a single group.
3920  */
3921  if (root->group_pathkeys)
3922  path = (Path *) create_sort_path(root,
3923  grouped_rel,
3924  path,
3925  root->group_pathkeys,
3926  -1.0);
3927 
3928  if (parse->hasAggs)
3929  add_path(grouped_rel, (Path *)
3930  create_agg_path(root,
3931  grouped_rel,
3932  path,
3933  target,
3934  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3936  parse->groupClause,
3937  (List *) parse->havingQual,
3938  &agg_final_costs,
3939  dNumGroups));
3940  else
3941  add_path(grouped_rel, (Path *)
3942  create_group_path(root,
3943  grouped_rel,
3944  path,
3945  target,
3946  parse->groupClause,
3947  (List *) parse->havingQual,
3948  dNumGroups));
3949 
3950  /*
3951  * The point of using Gather Merge rather than Gather is that it
3952  * can preserve the ordering of the input path, so there's no
3953  * reason to try it unless (1) it's possible to produce more than
3954  * one output row and (2) we want the output path to be ordered.
3955  */
3956  if (parse->groupClause != NIL && root->group_pathkeys != NIL)
3957  {
3958  foreach(lc, grouped_rel->partial_pathlist)
3959  {
3960  Path *subpath = (Path *) lfirst(lc);
3961  Path *gmpath;
3962  double total_groups;
3963 
3964  /*
3965  * It's useful to consider paths that are already properly
3966  * ordered for Gather Merge, because those don't need a
3967  * sort. It's also useful to consider the cheapest path,
3968  * because sorting it in parallel and then doing Gather
3969  * Merge may be better than doing an unordered Gather
3970  * followed by a sort. But there's no point in
3971  * considering non-cheapest paths that aren't already
3972  * sorted correctly.
3973  */
3974  if (path != subpath &&
3976  subpath->pathkeys))
3977  continue;
3978 
3979  total_groups = subpath->rows * subpath->parallel_workers;
3980 
3981  gmpath = (Path *)
3983  grouped_rel,
3984  subpath,
3985  partial_grouping_target,
3986  root->group_pathkeys,
3987  NULL,
3988  &total_groups);
3989 
3990  if (parse->hasAggs)
3991  add_path(grouped_rel, (Path *)
3992  create_agg_path(root,
3993  grouped_rel,
3994  gmpath,
3995  target,
3996  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3998  parse->groupClause,
3999  (List *) parse->havingQual,
4000  &agg_final_costs,
4001  dNumGroups));
4002  else
4003  add_path(grouped_rel, (Path *)
4004  create_group_path(root,
4005  grouped_rel,
4006  gmpath,
4007  target,
4008  parse->groupClause,
4009  (List *) parse->havingQual,
4010  dNumGroups));
4011  }
4012  }
4013  }
4014  }
4015 
4016  if (can_hash)
4017  {
4018  if (parse->groupingSets)
4019  {
4020  /*
4021  * Try for a hash-only groupingsets path over unsorted input.
4022  */
4023  consider_groupingsets_paths(root, grouped_rel,
4024  cheapest_path, false, true, target,
4025  gd, agg_costs, dNumGroups);
4026  }
4027  else
4028  {
4029  hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
4030  agg_costs,
4031  dNumGroups);
4032 
4033  /*
4034  * Provided that the estimated size of the hashtable does not
4035  * exceed work_mem, we'll generate a HashAgg Path, although if we
4036  * were unable to sort above, then we'd better generate a Path, so
4037  * that we at least have one.
4038  */
4039  if (hashaggtablesize < work_mem * 1024L ||
4040  grouped_rel->pathlist == NIL)
4041  {
4042  /*
4043  * We just need an Agg over the cheapest-total input path,
4044  * since input order won't matter.
4045  */
4046  add_path(grouped_rel, (Path *)
4047  create_agg_path(root, grouped_rel,
4048  cheapest_path,
4049  target,
4050  AGG_HASHED,
4052  parse->groupClause,
4053  (List *) parse->havingQual,
4054  agg_costs,
4055  dNumGroups));
4056  }
4057  }
4058 
4059  /*
4060  * Generate a HashAgg Path atop of the cheapest partial path. Once
4061  * again, we'll only do this if it looks as though the hash table
4062  * won't exceed work_mem.
4063  */
4064  if (grouped_rel->partial_pathlist)
4065  {
4066  Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
4067 
4068  hashaggtablesize = estimate_hashagg_tablesize(path,
4069  &agg_final_costs,
4070  dNumGroups);
4071 
4072  if (hashaggtablesize < work_mem * 1024L)
4073  {
4074  double total_groups = path->rows * path->parallel_workers;
4075 
4076  path = (Path *) create_gather_path(root,
4077  grouped_rel,
4078  path,
4079  partial_grouping_target,
4080  NULL,
4081  &total_groups);
4082 
4083  add_path(grouped_rel, (Path *)
4084  create_agg_path(root,
4085  grouped_rel,
4086  path,
4087  target,
4088  AGG_HASHED,
4090  parse->groupClause,
4091  (List *) parse->havingQual,
4092  &agg_final_costs,
4093  dNumGroups));
4094  }
4095  }
4096  }
4097 
4098  /* Give a helpful error if we failed to find any implementation */
4099  if (grouped_rel->pathlist == NIL)
4100  ereport(ERROR,
4101  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4102  errmsg("could not implement GROUP BY"),
4103  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4104 
4105  /*
4106  * If there is an FDW that's responsible for all baserels of the query,
4107  * let it consider adding ForeignPaths.
4108  */
4109  if (grouped_rel->fdwroutine &&
4110  grouped_rel->fdwroutine->GetForeignUpperPaths)
4112  input_rel, grouped_rel);
4113 
4114  /* Let extensions possibly add some more paths */
4116  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4117  input_rel, grouped_rel);
4118 
4119  /* Now choose the best path(s) */
4120  set_cheapest(grouped_rel);
4121 
4122  /*
4123  * We've been using the partial pathlist for the grouped relation to hold
4124  * partially aggregated paths, but that's actually a little bit bogus
4125  * because it's unsafe for later planning stages -- like ordered_rel ---
4126  * to get the idea that they can use these partial paths as if they didn't
4127  * need a FinalizeAggregate step. Zap the partial pathlist at this stage
4128  * so we don't get confused.
4129  */
4130  grouped_rel->partial_pathlist = NIL;
4131 
4132  return grouped_rel;
4133 }
4134 
4135 
4136 /*
4137  * For a given input path, consider the possible ways of doing grouping sets on
4138  * it, by combinations of hashing and sorting. This can be called multiple
4139  * times, so it's important that it not scribble on input. No result is
4140  * returned, but any generated paths are added to grouped_rel.
4141  */
4142 static void
4144  RelOptInfo *grouped_rel,
4145  Path *path,
4146  bool is_sorted,
4147  bool can_hash,
4148  PathTarget *target,
4149  grouping_sets_data *gd,
4150  const AggClauseCosts *agg_costs,
4151  double dNumGroups)
4152 {
4153  Query *parse = root->parse;
4154 
4155  /*
4156  * If we're not being offered sorted input, then only consider plans that
4157  * can be done entirely by hashing.
4158  *
4159  * We can hash everything if it looks like it'll fit in work_mem. But if
4160  * the input is actually sorted despite not being advertised as such, we
4161  * prefer to make use of that in order to use less memory.
4162  *
4163  * If none of the grouping sets are sortable, then ignore the work_mem
4164  * limit and generate a path anyway, since otherwise we'll just fail.
4165  */
4166  if (!is_sorted)
4167  {
4168  List *new_rollups = NIL;
4169  RollupData *unhashed_rollup = NULL;
4170  List *sets_data;
4171  List *empty_sets_data = NIL;
4172  List *empty_sets = NIL;
4173  ListCell *lc;
4174  ListCell *l_start = list_head(gd->rollups);
4175  AggStrategy strat = AGG_HASHED;
4176  Size hashsize;
4177  double exclude_groups = 0.0;
4178 
4179  Assert(can_hash);
4180 
4181  if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4182  {
4183  unhashed_rollup = lfirst(l_start);
4184  exclude_groups = unhashed_rollup->numGroups;
4185  l_start = lnext(l_start);
4186  }
4187 
4188  hashsize = estimate_hashagg_tablesize(path,
4189  agg_costs,
4190  dNumGroups - exclude_groups);
4191 
4192  /*
4193  * gd->rollups is empty if we have only unsortable columns to work
4194  * with. Override work_mem in that case; otherwise, we'll rely on the
4195  * sorted-input case to generate usable mixed paths.
4196  */
4197  if (hashsize > work_mem * 1024L && gd->rollups)
4198  return; /* nope, won't fit */
4199 
4200  /*
4201  * We need to burst the existing rollups list into individual grouping
4202  * sets and recompute a groupClause for each set.
4203  */
4204  sets_data = list_copy(gd->unsortable_sets);
4205 
4206  for_each_cell(lc, l_start)
4207  {
4208  RollupData *rollup = lfirst(lc);
4209 
4210  /*
4211  * If we find an unhashable rollup that's not been skipped by the
4212  * "actually sorted" check above, we can't cope; we'd need sorted
4213  * input (with a different sort order) but we can't get that here.
4214  * So bail out; we'll get a valid path from the is_sorted case
4215  * instead.
4216  *
4217  * The mere presence of empty grouping sets doesn't make a rollup
4218  * unhashable (see preprocess_grouping_sets), we handle those
4219  * specially below.
4220  */
4221  if (!rollup->hashable)
4222  return;
4223  else
4224  sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
4225  }
4226  foreach(lc, sets_data)
4227  {
4228  GroupingSetData *gs = lfirst(lc);
4229  List *gset = gs->set;
4230  RollupData *rollup;
4231 
4232  if (gset == NIL)
4233  {
4234  /* Empty grouping sets can't be hashed. */
4235  empty_sets_data = lappend(empty_sets_data, gs);
4236  empty_sets = lappend(empty_sets, NIL);
4237  }
4238  else
4239  {
4240  rollup = makeNode(RollupData);
4241 
4242  rollup->groupClause = preprocess_groupclause(root, gset);
4243  rollup->gsets_data = list_make1(gs);
4244  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4245  rollup->gsets_data,
4246  gd->tleref_to_colnum_map);
4247  rollup->numGroups = gs->numGroups;
4248  rollup->hashable = true;
4249  rollup->is_hashed = true;
4250  new_rollups = lappend(new_rollups, rollup);
4251  }
4252  }
4253 
4254  /*
4255  * If we didn't find anything nonempty to hash, then bail. We'll
4256  * generate a path from the is_sorted case.
4257  */
4258  if (new_rollups == NIL)
4259  return;
4260 
4261  /*
4262  * If there were empty grouping sets they should have been in the
4263  * first rollup.
4264  */
4265  Assert(!unhashed_rollup || !empty_sets);
4266 
4267  if (unhashed_rollup)
4268  {
4269  new_rollups = lappend(new_rollups, unhashed_rollup);
4270  strat = AGG_MIXED;
4271  }
4272  else if (empty_sets)
4273  {
4274  RollupData *rollup = makeNode(RollupData);
4275 
4276  rollup->groupClause = NIL;
4277  rollup->gsets_data = empty_sets_data;
4278  rollup->gsets = empty_sets;
4279  rollup->numGroups = list_length(empty_sets);
4280  rollup->hashable = false;
4281  rollup->is_hashed = false;
4282  new_rollups = lappend(new_rollups, rollup);
4283  strat = AGG_MIXED;
4284  }
4285 
4286  add_path(grouped_rel, (Path *)
4288  grouped_rel,
4289  path,
4290  target,
4291  (List *) parse->havingQual,
4292  strat,
4293  new_rollups,
4294  agg_costs,
4295  dNumGroups));
4296  return;
4297  }
4298 
4299  /*
4300  * If we have sorted input but nothing we can do with it, bail.
4301  */
4302  if (list_length(gd->rollups) == 0)
4303  return;
4304 
4305  /*
4306  * Given sorted input, we try and make two paths: one sorted and one mixed
4307  * sort/hash. (We need to try both because hashagg might be disabled, or
4308  * some columns might not be sortable.)
4309  *
4310  * can_hash is passed in as false if some obstacle elsewhere (such as
4311  * ordered aggs) means that we shouldn't consider hashing at all.
4312  */
4313  if (can_hash && gd->any_hashable)
4314  {
4315  List *rollups = NIL;
4316  List *hash_sets = list_copy(gd->unsortable_sets);
4317  double availspace = (work_mem * 1024.0);
4318  ListCell *lc;
4319 
4320  /*
4321  * Account first for space needed for groups we can't sort at all.
4322  */
4323  availspace -= (double) estimate_hashagg_tablesize(path,
4324  agg_costs,
4325  gd->dNumHashGroups);
4326 
4327  if (availspace > 0 && list_length(gd->rollups) > 1)
4328  {
4329  double scale;
4330  int num_rollups = list_length(gd->rollups);
4331  int k_capacity;
4332  int *k_weights = palloc(num_rollups * sizeof(int));
4333  Bitmapset *hash_items = NULL;
4334  int i;
4335 
4336  /*
4337  * We treat this as a knapsack problem: the knapsack capacity
4338  * represents work_mem, the item weights are the estimated memory
4339  * usage of the hashtables needed to implement a single rollup,
4340  * and we really ought to use the cost saving as the item value;
4341  * however, currently the costs assigned to sort nodes don't
4342  * reflect the comparison costs well, and so we treat all items as
4343  * of equal value (each rollup we hash instead saves us one sort).
4344  *
4345  * To use the discrete knapsack, we need to scale the values to a
4346  * reasonably small bounded range. We choose to allow a 5% error
4347  * margin; we have no more than 4096 rollups in the worst possible
4348  * case, which with a 5% error margin will require a bit over 42MB
4349  * of workspace. (Anyone wanting to plan queries that complex had
4350  * better have the memory for it. In more reasonable cases, with
4351  * no more than a couple of dozen rollups, the memory usage will
4352  * be negligible.)
4353  *
4354  * k_capacity is naturally bounded, but we clamp the values for
4355  * scale and weight (below) to avoid overflows or underflows (or
4356  * uselessly trying to use a scale factor less than 1 byte).
4357  */
4358  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4359  k_capacity = (int) floor(availspace / scale);
4360 
4361  /*
4362  * We leave the first rollup out of consideration since it's the
4363  * one that matches the input sort order. We assign indexes "i"
4364  * to only those entries considered for hashing; the second loop,
4365  * below, must use the same condition.
4366  */
4367  i = 0;
4369  {
4370  RollupData *rollup = lfirst(lc);
4371 
4372  if (rollup->hashable)
4373  {
4374  double sz = estimate_hashagg_tablesize(path,
4375  agg_costs,
4376  rollup->numGroups);
4377 
4378  /*
4379  * If sz is enormous, but work_mem (and hence scale) is
4380  * small, avoid integer overflow here.
4381  */
4382  k_weights[i] = (int) Min(floor(sz / scale),
4383  k_capacity + 1.0);
4384  ++i;
4385  }
4386  }
4387 
4388  /*
4389  * Apply knapsack algorithm; compute the set of items which
4390  * maximizes the value stored (in this case the number of sorts
4391  * saved) while keeping the total size (approximately) within
4392  * capacity.
4393  */
4394  if (i > 0)
4395  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4396 
4397  if (!bms_is_empty(hash_items))
4398  {
4399  rollups = list_make1(linitial(gd->rollups));
4400 
4401  i = 0;
4403  {
4404  RollupData *rollup = lfirst(lc);
4405 
4406  if (rollup->hashable)
4407  {
4408  if (bms_is_member(i, hash_items))
4409  hash_sets = list_concat(hash_sets,
4410  list_copy(rollup->gsets_data));
4411  else
4412  rollups = lappend(rollups, rollup);
4413  ++i;
4414  }
4415  else
4416  rollups = lappend(rollups, rollup);
4417  }
4418  }
4419  }
4420 
4421  if (!rollups && hash_sets)
4422  rollups = list_copy(gd->rollups);
4423 
4424  foreach(lc, hash_sets)
4425  {
4426  GroupingSetData *gs = lfirst(lc);
4427  RollupData *rollup = makeNode(RollupData);
4428 
4429  Assert(gs->set != NIL);
4430 
4431  rollup->groupClause = preprocess_groupclause(root, gs->set);
4432  rollup->gsets_data = list_make1(gs);
4433  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4434  rollup->gsets_data,
4435  gd->tleref_to_colnum_map);
4436  rollup->numGroups = gs->numGroups;
4437  rollup->hashable = true;
4438  rollup->is_hashed = true;
4439  rollups = lcons(rollup, rollups);
4440  }
4441 
4442  if (rollups)
4443  {
4444  add_path(grouped_rel, (Path *)
4446  grouped_rel,
4447  path,
4448  target,
4449  (List *) parse->havingQual,
4450  AGG_MIXED,
4451  rollups,
4452  agg_costs,
4453  dNumGroups));
4454  }
4455  }
4456 
4457  /*
4458  * Now try the simple sorted case.
4459  */
4460  if (!gd->unsortable_sets)
4461  add_path(grouped_rel, (Path *)
4463  grouped_rel,
4464  path,
4465  target,
4466  (List *) parse->havingQual,
4467  AGG_SORTED,
4468  gd->rollups,
4469  agg_costs,
4470  dNumGroups));
4471 }
4472 
4473 /*
4474  * create_window_paths
4475  *
4476  * Build a new upperrel containing Paths for window-function evaluation.
4477  *
4478  * input_rel: contains the source-data Paths
4479  * input_target: result of make_window_input_target
4480  * output_target: what the topmost WindowAggPath should return
4481  * tlist: query's target list (needed to look up pathkeys)
4482  * wflists: result of find_window_functions
4483  * activeWindows: result of select_active_windows
4484  *
4485  * Note: all Paths in input_rel are expected to return input_target.
4486  */
4487 static RelOptInfo *
4489  RelOptInfo *input_rel,
4490  PathTarget *input_target,
4491  PathTarget *output_target,
4492  List *tlist,
4493  WindowFuncLists *wflists,
4494  List *activeWindows)
4495 {
4496  RelOptInfo *window_rel;
4497  ListCell *lc;
4498 
4499  /* For now, do all work in the (WINDOW, NULL) upperrel */
4500  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4501 
4502  /*
4503  * If the input relation is not parallel-safe, then the window relation
4504  * can't be parallel-safe, either. Otherwise, we need to examine the
4505  * target list and active windows for non-parallel-safe constructs.
4506  */
4507  if (input_rel->consider_parallel &&
4508  is_parallel_safe(root, (Node *) output_target->exprs) &&
4509  is_parallel_safe(root, (Node *) activeWindows))
4510  window_rel->consider_parallel = true;
4511 
4512  /*
4513  * If the input rel belongs to a single FDW, so does the window rel.
4514  */
4515  window_rel->serverid = input_rel->serverid;
4516  window_rel->userid = input_rel->userid;
4517  window_rel->useridiscurrent = input_rel->useridiscurrent;
4518  window_rel->fdwroutine = input_rel->fdwroutine;
4519 
4520  /*
4521  * Consider computing window functions starting from the existing
4522  * cheapest-total path (which will likely require a sort) as well as any
4523  * existing paths that satisfy root->window_pathkeys (which won't).
4524  */
4525  foreach(lc, input_rel->pathlist)
4526  {
4527  Path *path = (Path *) lfirst(lc);
4528 
4529  if (path == input_rel->cheapest_total_path ||
4532  window_rel,
4533  path,
4534  input_target,
4535  output_target,
4536  tlist,
4537  wflists,
4538  activeWindows);
4539  }
4540 
4541  /*
4542  * If there is an FDW that's responsible for all baserels of the query,
4543  * let it consider adding ForeignPaths.
4544  */
4545  if (window_rel->fdwroutine &&
4546  window_rel->fdwroutine->GetForeignUpperPaths)
4547  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4548  input_rel, window_rel);
4549 
4550  /* Let extensions possibly add some more paths */
4552  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4553  input_rel, window_rel);
4554 
4555  /* Now choose the best path(s) */
4556  set_cheapest(window_rel);
4557 
4558  return window_rel;
4559 }
4560 
4561 /*
4562  * Stack window-function implementation steps atop the given Path, and
4563  * add the result to window_rel.
4564  *
4565  * window_rel: upperrel to contain result
4566  * path: input Path to use (must return input_target)
4567  * input_target: result of make_window_input_target
4568  * output_target: what the topmost WindowAggPath should return
4569  * tlist: query's target list (needed to look up pathkeys)
4570  * wflists: result of find_window_functions
4571  * activeWindows: result of select_active_windows
4572  */
4573 static void
4575  RelOptInfo *window_rel,
4576  Path *path,
4577  PathTarget *input_target,
4578  PathTarget *output_target,
4579  List *tlist,
4580  WindowFuncLists *wflists,
4581  List *activeWindows)
4582 {
4583  PathTarget *window_target;
4584  ListCell *l;
4585 
4586  /*
4587  * Since each window clause could require a different sort order, we stack
4588  * up a WindowAgg node for each clause, with sort steps between them as
4589  * needed. (We assume that select_active_windows chose a good order for
4590  * executing the clauses in.)
4591  *
4592  * input_target should contain all Vars and Aggs needed for the result.
4593  * (In some cases we wouldn't need to propagate all of these all the way
4594  * to the top, since they might only be needed as inputs to WindowFuncs.
4595  * It's probably not worth trying to optimize that though.) It must also
4596  * contain all window partitioning and sorting expressions, to ensure
4597  * they're computed only once at the bottom of the stack (that's critical
4598  * for volatile functions). As we climb up the stack, we'll add outputs
4599  * for the WindowFuncs computed at each level.
4600  */
4601  window_target = input_target;
4602 
4603  foreach(l, activeWindows)
4604  {
4605  WindowClause *wc = (WindowClause *) lfirst(l);
4606  List *window_pathkeys;
4607 
4608  window_pathkeys = make_pathkeys_for_window(root,
4609  wc,
4610  tlist);
4611 
4612  /* Sort if necessary */
4613  if (!pathkeys_contained_in(window_pathkeys, path->pathkeys))
4614  {
4615  path = (Path *) create_sort_path(root, window_rel,
4616  path,
4617  window_pathkeys,
4618  -1.0);
4619  }
4620 
4621  if (lnext(l))
4622  {
4623  /*
4624  * Add the current WindowFuncs to the output target for this
4625  * intermediate WindowAggPath. We must copy window_target to
4626  * avoid changing the previous path's target.
4627  *
4628  * Note: a WindowFunc adds nothing to the target's eval costs; but
4629  * we do need to account for the increase in tlist width.
4630  */
4631  ListCell *lc2;
4632 
4633  window_target = copy_pathtarget(window_target);
4634  foreach(lc2, wflists->windowFuncs[wc->winref])
4635  {
4636  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4637 
4638  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4639  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4640  }
4641  }
4642  else
4643  {
4644  /* Install the goal target in the topmost WindowAgg */
4645  window_target = output_target;
4646  }
4647 
4648  path = (Path *)
4649  create_windowagg_path(root, window_rel, path, window_target,
4650  wflists->windowFuncs[wc->winref],
4651  wc,
4652  window_pathkeys);
4653  }
4654 
4655  add_path(window_rel, path);
4656 }
4657 
4658 /*
4659  * create_distinct_paths
4660  *
4661  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4662  *
4663  * input_rel: contains the source-data Paths
4664  *
4665  * Note: input paths should already compute the desired pathtarget, since
4666  * Sort/Unique won't project anything.
4667  */
4668 static RelOptInfo *
4670  RelOptInfo *input_rel)
4671 {
4672  Query *parse = root->parse;
4673  Path *cheapest_input_path = input_rel->cheapest_total_path;
4674  RelOptInfo *distinct_rel;
4675  double numDistinctRows;
4676  bool allow_hash;
4677  Path *path;
4678  ListCell *lc;
4679 
4680  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4681  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4682 
4683  /*
4684  * We don't compute anything at this level, so distinct_rel will be
4685  * parallel-safe if the input rel is parallel-safe. In particular, if
4686  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4687  * output those expressions, and will not be parallel-safe unless those
4688  * expressions are parallel-safe.
4689  */
4690  distinct_rel->consider_parallel = input_rel->consider_parallel;
4691 
4692  /*
4693  * If the input rel belongs to a single FDW, so does the distinct_rel.
4694  */
4695  distinct_rel->serverid = input_rel->serverid;
4696  distinct_rel->userid = input_rel->userid;
4697  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4698  distinct_rel->fdwroutine = input_rel->fdwroutine;
4699 
4700  /* Estimate number of distinct rows there will be */
4701  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4702  root->hasHavingQual)
4703  {
4704  /*
4705  * If there was grouping or aggregation, use the number of input rows
4706  * as the estimated number of DISTINCT rows (ie, assume the input is
4707  * already mostly unique).
4708  */
4709  numDistinctRows = cheapest_input_path->rows;
4710  }
4711  else
4712  {
4713  /*
4714  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4715  */
4716  List *distinctExprs;
4717 
4718  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4719  parse->targetList);
4720  numDistinctRows = estimate_num_groups(root, distinctExprs,
4721  cheapest_input_path->rows,
4722  NULL);
4723  }
4724 
4725  /*
4726  * Consider sort-based implementations of DISTINCT, if possible.
4727  */
4729  {
4730  /*
4731  * First, if we have any adequately-presorted paths, just stick a
4732  * Unique node on those. Then consider doing an explicit sort of the
4733  * cheapest input path and Unique'ing that.
4734  *
4735  * When we have DISTINCT ON, we must sort by the more rigorous of
4736  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4737  * Also, if we do have to do an explicit sort, we might as well use
4738  * the more rigorous ordering to avoid a second sort later. (Note
4739  * that the parser will have ensured that one clause is a prefix of
4740  * the other.)
4741  */
4742  List *needed_pathkeys;
4743 
4744  if (parse->hasDistinctOn &&
4746  list_length(root->sort_pathkeys))
4747  needed_pathkeys = root->sort_pathkeys;
4748  else
4749  needed_pathkeys = root->distinct_pathkeys;
4750 
4751  foreach(lc, input_rel->pathlist)
4752  {
4753  Path *path = (Path *) lfirst(lc);
4754 
4755  if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4756  {
4757  add_path(distinct_rel, (Path *)
4758  create_upper_unique_path(root, distinct_rel,
4759  path,
4761  numDistinctRows));
4762  }
4763  }
4764 
4765  /* For explicit-sort case, always use the more rigorous clause */
4766  if (list_length(root->distinct_pathkeys) <
4767  list_length(root->sort_pathkeys))
4768  {
4769  needed_pathkeys = root->sort_pathkeys;
4770  /* Assert checks that parser didn't mess up... */
4772  needed_pathkeys));
4773  }
4774  else
4775  needed_pathkeys = root->distinct_pathkeys;
4776 
4777  path = cheapest_input_path;
4778  if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4779  path = (Path *) create_sort_path(root, distinct_rel,
4780  path,
4781  needed_pathkeys,
4782  -1.0);
4783 
4784  add_path(distinct_rel, (Path *)
4785  create_upper_unique_path(root, distinct_rel,
4786  path,
4788  numDistinctRows));
4789  }
4790 
4791  /*
4792  * Consider hash-based implementations of DISTINCT, if possible.
4793  *
4794  * If we were not able to make any other types of path, we *must* hash or
4795  * die trying. If we do have other choices, there are several things that
4796  * should prevent selection of hashing: if the query uses DISTINCT ON
4797  * (because it won't really have the expected behavior if we hash), or if
4798  * enable_hashagg is off, or if it looks like the hashtable will exceed
4799  * work_mem.
4800  *
4801  * Note: grouping_is_hashable() is much more expensive to check than the
4802  * other gating conditions, so we want to do it last.
4803  */
4804  if (distinct_rel->pathlist == NIL)
4805  allow_hash = true; /* we have no alternatives */
4806  else if (parse->hasDistinctOn || !enable_hashagg)
4807  allow_hash = false; /* policy-based decision not to hash */
4808  else
4809  {
4810  Size hashentrysize;
4811 
4812  /* Estimate per-hash-entry space at tuple width... */
4813  hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) +
4815  /* plus the per-hash-entry overhead */
4816  hashentrysize += hash_agg_entry_size(0);
4817 
4818  /* Allow hashing only if hashtable is predicted to fit in work_mem */
4819  allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L);
4820  }
4821 
4822  if (allow_hash && grouping_is_hashable(parse->distinctClause))
4823  {
4824  /* Generate hashed aggregate path --- no sort needed */
4825  add_path(distinct_rel, (Path *)
4826  create_agg_path(root,
4827  distinct_rel,
4828  cheapest_input_path,
4829  cheapest_input_path->pathtarget,
4830  AGG_HASHED,
4832  parse->distinctClause,
4833  NIL,
4834  NULL,
4835  numDistinctRows));
4836  }
4837 
4838  /* Give a helpful error if we failed to find any implementation */
4839  if (distinct_rel->pathlist == NIL)
4840  ereport(ERROR,
4841  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4842  errmsg("could not implement DISTINCT"),
4843  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4844 
4845  /*
4846  * If there is an FDW that's responsible for all baserels of the query,
4847  * let it consider adding ForeignPaths.
4848  */
4849  if (distinct_rel->fdwroutine &&
4850  distinct_rel->fdwroutine->GetForeignUpperPaths)
4851  distinct_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_DISTINCT,
4852  input_rel, distinct_rel);
4853 
4854  /* Let extensions possibly add some more paths */
4856  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT,
4857  input_rel, distinct_rel);
4858 
4859  /* Now choose the best path(s) */
4860  set_cheapest(distinct_rel);
4861 
4862  return distinct_rel;
4863 }
4864 
4865 /*
4866  * create_ordered_paths
4867  *
4868  * Build a new upperrel containing Paths for ORDER BY evaluation.
4869  *
4870  * All paths in the result must satisfy the ORDER BY ordering.
4871  * The only new path we need consider is an explicit sort on the
4872  * cheapest-total existing path.
4873  *
4874  * input_rel: contains the source-data Paths
4875  * target: the output tlist the result Paths must emit
4876  * limit_tuples: estimated bound on the number of output tuples,
4877  * or -1 if no LIMIT or couldn't estimate
4878  */
4879 static RelOptInfo *
4881  RelOptInfo *input_rel,
4882  PathTarget *target,
4883  double limit_tuples)
4884 {
4885  Path *cheapest_input_path = input_rel->cheapest_total_path;
4886  RelOptInfo *ordered_rel;
4887  ListCell *lc;
4888 
4889  /* For now, do all work in the (ORDERED, NULL) upperrel */
4890  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4891 
4892  /*
4893  * If the input relation is not parallel-safe, then the ordered relation
4894  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
4895  * target list is parallel-safe.
4896  */
4897  if (input_rel->consider_parallel &&
4898  is_parallel_safe(root, (Node *) target->exprs))
4899  ordered_rel->consider_parallel = true;
4900 
4901  /*
4902  * If the input rel belongs to a single FDW, so does the ordered_rel.
4903  */
4904  ordered_rel->serverid = input_rel->serverid;
4905  ordered_rel->userid = input_rel->userid;
4906  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
4907  ordered_rel->fdwroutine = input_rel->fdwroutine;
4908 
4909  foreach(lc, input_rel->pathlist)
4910  {
4911  Path *path = (Path *) lfirst(lc);
4912  bool is_sorted;
4913 
4914  is_sorted = pathkeys_contained_in(root->sort_pathkeys,
4915  path->pathkeys);
4916  if (path == cheapest_input_path || is_sorted)
4917  {
4918  if (!is_sorted)
4919  {
4920  /* An explicit sort here can take advantage of LIMIT */
4921  path = (Path *) create_sort_path(root,
4922  ordered_rel,
4923  path,
4924  root->sort_pathkeys,
4925  limit_tuples);
4926  }
4927 
4928  /* Add projection step if needed */
4929  if (path->pathtarget != target)
4930  path = apply_projection_to_path(root, ordered_rel,
4931  path, target);
4932 
4933  add_path(ordered_rel, path);
4934  }
4935  }
4936 
4937  /*
4938  * generate_gather_paths() will have already generated a simple Gather
4939  * path for the best parallel path, if any, and the loop above will have
4940  * considered sorting it. Similarly, generate_gather_paths() will also
4941  * have generated order-preserving Gather Merge plans which can be used
4942  * without sorting if they happen to match the sort_pathkeys, and the loop
4943  * above will have handled those as well. However, there's one more
4944  * possibility: it may make sense to sort the cheapest partial path
4945  * according to the required output order and then use Gather Merge.
4946  */
4947  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
4948  input_rel->partial_pathlist != NIL)
4949  {
4950  Path *cheapest_partial_path;
4951 
4952  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4953 
4954  /*
4955  * If cheapest partial path doesn't need a sort, this is redundant
4956  * with what's already been tried.
4957  */
4959  cheapest_partial_path->pathkeys))
4960  {
4961  Path *path;
4962  double total_groups;
4963 
4964  path = (Path *) create_sort_path(root,
4965  ordered_rel,
4966  cheapest_partial_path,
4967  root->sort_pathkeys,
4968  -1.0);
4969 
4970  total_groups = cheapest_partial_path->rows *
4971  cheapest_partial_path->parallel_workers;
4972  path = (Path *)
4973  create_gather_merge_path(root, ordered_rel,
4974  path,
4975  target, root->sort_pathkeys, NULL,
4976  &total_groups);
4977 
4978  /* Add projection step if needed */
4979  if (path->pathtarget != target)
4980  path = apply_projection_to_path(root, ordered_rel,
4981  path, target);
4982 
4983  add_path(ordered_rel, path);
4984  }
4985  }
4986 
4987  /*
4988  * If there is an FDW that's responsible for all baserels of the query,
4989  * let it consider adding ForeignPaths.
4990  */
4991  if (ordered_rel->fdwroutine &&
4992  ordered_rel->fdwroutine->GetForeignUpperPaths)
4993  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
4994  input_rel, ordered_rel);
4995 
4996  /* Let extensions possibly add some more paths */
4998  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
4999  input_rel, ordered_rel);
5000 
5001  /*
5002  * No need to bother with set_cheapest here; grouping_planner does not
5003  * need us to do it.
5004  */
5005  Assert(ordered_rel->pathlist != NIL);
5006 
5007  return ordered_rel;
5008 }
5009 
5010 
5011 /*
5012  * make_group_input_target
5013  * Generate appropriate PathTarget for initial input to grouping nodes.
5014  *
5015  * If there is grouping or aggregation, the scan/join subplan cannot emit
5016  * the query's final targetlist; for example, it certainly can't emit any
5017  * aggregate function calls. This routine generates the correct target
5018  * for the scan/join subplan.
5019  *
5020  * The query target list passed from the parser already contains entries
5021  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5022  * for variables used only in HAVING clauses; so we need to add those
5023  * variables to the subplan target list. Also, we flatten all expressions
5024  * except GROUP BY items into their component variables; other expressions
5025  * will be computed by the upper plan nodes rather than by the subplan.
5026  * For example, given a query like
5027  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5028  * we want to pass this targetlist to the subplan:
5029  * a+b,c,d
5030  * where the a+b target will be used by the Sort/Group steps, and the
5031  * other targets will be used for computing the final results.
5032  *
5033  * 'final_target' is the query's final target list (in PathTarget form)
5034  *
5035  * The result is the PathTarget to be computed by the Paths returned from
5036  * query_planner().
5037  */
5038 static PathTarget *
5040 {
5041  Query *parse = root->parse;
5042  PathTarget *input_target;
5043  List *non_group_cols;
5044  List *non_group_vars;
5045  int i;
5046  ListCell *lc;
5047 
5048  /*
5049  * We must build a target containing all grouping columns, plus any other
5050  * Vars mentioned in the query's targetlist and HAVING qual.
5051  */
5052  input_target = create_empty_pathtarget();
5053  non_group_cols = NIL;
5054 
5055  i = 0;
5056  foreach(lc, final_target->exprs)
5057  {
5058  Expr *expr = (Expr *) lfirst(lc);
5059  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5060 
5061  if (sgref && parse->groupClause &&
5063  {
5064  /*
5065  * It's a grouping column, so add it to the input target as-is.
5066  */
5067  add_column_to_pathtarget(input_target, expr, sgref);
5068  }
5069  else
5070  {
5071  /*
5072  * Non-grouping column, so just remember the expression for later
5073  * call to pull_var_clause.
5074  */
5075  non_group_cols = lappend(non_group_cols, expr);
5076  }
5077 
5078  i++;
5079  }
5080 
5081  /*
5082  * If there's a HAVING clause, we'll need the Vars it uses, too.
5083  */
5084  if (parse->havingQual)
5085  non_group_cols = lappend(non_group_cols, parse->havingQual);
5086 
5087  /*
5088  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5089  * add them to the input target if not already present. (A Var used
5090  * directly as a GROUP BY item will be present already.) Note this
5091  * includes Vars used in resjunk items, so we are covering the needs of
5092  * ORDER BY and window specifications. Vars used within Aggrefs and
5093  * WindowFuncs will be pulled out here, too.
5094  */
5095  non_group_vars = pull_var_clause((Node *) non_group_cols,
5099  add_new_columns_to_pathtarget(input_target, non_group_vars);
5100 
5101  /* clean up cruft */
5102  list_free(non_group_vars);
5103  list_free(non_group_cols);
5104 
5105  /* XXX this causes some redundant cost calculation ... */
5106  return set_pathtarget_cost_width(root, input_target);
5107 }
5108 
5109 /*
5110  * make_partial_grouping_target
5111  * Generate appropriate PathTarget for output of partial aggregate
5112  * (or partial grouping, if there are no aggregates) nodes.
5113  *
5114  * A partial aggregation node needs to emit all the same aggregates that
5115  * a regular aggregation node would, plus any aggregates used in HAVING;
5116  * except that the Aggref nodes should be marked as partial aggregates.
5117  *
5118  * In addition, we'd better emit any Vars and PlaceholderVars that are
5119  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5120  * these would be Vars that are grouped by or used in grouping expressions.)
5121  *
5122  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5123  * We get the HAVING clause out of *root.
5124  */
5125 static PathTarget *
5127 {
5128  Query *parse = root->parse;
5129  PathTarget *partial_target;
5130  List *non_group_cols;
5131  List *non_group_exprs;
5132  int i;
5133  ListCell *lc;
5134 
5135  partial_target = create_empty_pathtarget();
5136  non_group_cols = NIL;
5137 
5138  i = 0;
5139  foreach(lc, grouping_target->exprs)
5140  {
5141  Expr *expr = (Expr *) lfirst(lc);
5142  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5143 
5144  if (sgref && parse->groupClause &&
5146  {
5147  /*
5148  * It's a grouping column, so add it to the partial_target as-is.
5149  * (This allows the upper agg step to repeat the grouping calcs.)
5150  */
5151  add_column_to_pathtarget(partial_target, expr, sgref);
5152  }
5153  else
5154  {
5155  /*
5156  * Non-grouping column, so just remember the expression for later
5157  * call to pull_var_clause.
5158  */
5159  non_group_cols = lappend(non_group_cols, expr);
5160  }
5161 
5162  i++;
5163  }
5164 
5165  /*
5166  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5167  */
5168  if (parse->havingQual)
5169  non_group_cols = lappend(non_group_cols, parse->havingQual);
5170 
5171  /*
5172  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5173  * non-group cols (plus HAVING), and add them to the partial_target if not
5174  * already present. (An expression used directly as a GROUP BY item will
5175  * be present already.) Note this includes Vars used in resjunk items, so
5176  * we are covering the needs of ORDER BY and window specifications.
5177  */
5178  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5182 
5183  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5184 
5185  /*
5186  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5187  * are at the top level of the target list, so we can just scan the list
5188  * rather than recursing through the expression trees.
5189  */
5190  foreach(lc, partial_target->exprs)
5191  {
5192  Aggref *aggref = (Aggref *) lfirst(lc);
5193 
5194  if (IsA(aggref, Aggref))
5195  {
5196  Aggref *newaggref;
5197 
5198  /*
5199  * We shouldn't need to copy the substructure of the Aggref node,
5200  * but flat-copy the node itself to avoid damaging other trees.
5201  */
5202  newaggref = makeNode(Aggref);
5203  memcpy(newaggref, aggref, sizeof(Aggref));
5204 
5205  /* For now, assume serialization is required */
5207 
5208  lfirst(lc) = newaggref;
5209  }
5210  }
5211 
5212  /* clean up cruft */
5213  list_free(non_group_exprs);
5214  list_free(non_group_cols);
5215 
5216  /* XXX this causes some redundant cost calculation ... */
5217  return set_pathtarget_cost_width(root, partial_target);
5218 }
5219 
5220 /*
5221  * mark_partial_aggref
5222  * Adjust an Aggref to make it represent a partial-aggregation step.
5223  *
5224  * The Aggref node is modified in-place; caller must do any copying required.
5225  */
5226 void
5228 {
5229  /* aggtranstype should be computed by this point */
5231  /* ... but aggsplit should still be as the parser left it */
5232  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5233 
5234  /* Mark the Aggref with the intended partial-aggregation mode */
5235  agg->aggsplit = aggsplit;
5236 
5237  /*
5238  * Adjust result type if needed. Normally, a partial aggregate returns
5239  * the aggregate's transition type; but if that's INTERNAL and we're
5240  * serializing, it returns BYTEA instead.
5241  */
5242  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5243  {
5244  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5245  agg->aggtype = BYTEAOID;
5246  else
5247  agg->aggtype = agg->aggtranstype;
5248  }
5249 }
5250 
5251 /*
5252  * postprocess_setop_tlist
5253  * Fix up targetlist returned by plan_set_operations().
5254  *
5255  * We need to transpose sort key info from the orig_tlist into new_tlist.
5256  * NOTE: this would not be good enough if we supported resjunk sort keys
5257  * for results of set operations --- then, we'd need to project a whole
5258  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5259  * find any resjunk columns in orig_tlist.
5260  */
5261 static List *
5262 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5263 {
5264  ListCell *l;
5265  ListCell *orig_tlist_item = list_head(orig_tlist);
5266 
5267  foreach(l, new_tlist)
5268  {
5269  TargetEntry *new_tle = (TargetEntry *) lfirst(l);
5270  TargetEntry *orig_tle;
5271 
5272  /* ignore resjunk columns in setop result */
5273  if (new_tle->resjunk)
5274  continue;
5275 
5276  Assert(orig_tlist_item != NULL);
5277  orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
5278  orig_tlist_item = lnext(orig_tlist_item);
5279  if (orig_tle->resjunk) /* should not happen */
5280  elog(ERROR, "resjunk output columns are not implemented");
5281  Assert(new_tle->resno == orig_tle->resno);
5282  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5283  }
5284  if (orig_tlist_item != NULL)
5285  elog(ERROR, "resjunk output columns are not implemented");
5286  return new_tlist;
5287 }
5288 
5289 /*
5290  * select_active_windows
5291  * Create a list of the "active" window clauses (ie, those referenced
5292  * by non-deleted WindowFuncs) in the order they are to be executed.
5293  */
5294 static List *
5296 {
5297  List *result;
5298  List *actives;
5299  ListCell *lc;
5300 
5301  /* First, make a list of the active windows */
5302  actives = NIL;
5303  foreach(lc, root->parse->windowClause)
5304  {
5305  WindowClause *wc = (WindowClause *) lfirst(lc);
5306 
5307  /* It's only active if wflists shows some related WindowFuncs */
5308  Assert(wc->winref <= wflists->maxWinRef);
5309  if (wflists->windowFuncs[wc->winref] != NIL)
5310  actives = lappend(actives, wc);
5311  }
5312 
5313  /*
5314  * Now, ensure that windows with identical partitioning/ordering clauses
5315  * are adjacent in the list. This is required by the SQL standard, which
5316  * says that only one sort is to be used for such windows, even if they
5317  * are otherwise distinct (eg, different names or framing clauses).
5318  *
5319  * There is room to be much smarter here, for example detecting whether
5320  * one window's sort keys are a prefix of another's (so that sorting for
5321  * the latter would do for the former), or putting windows first that
5322  * match a sort order available for the underlying query. For the moment
5323  * we are content with meeting the spec.
5324  */
5325  result = NIL;
5326  while (actives != NIL)
5327  {
5328  WindowClause *wc = (WindowClause *) linitial(actives);
5329  ListCell *prev;
5330  ListCell *next;
5331 
5332  /* Move wc from actives to result */
5333  actives = list_delete_first(actives);
5334  result = lappend(result, wc);
5335 
5336  /* Now move any matching windows from actives to result */
5337  prev = NULL;
5338  for (lc = list_head(actives); lc; lc = next)
5339  {
5340  WindowClause *wc2 = (WindowClause *) lfirst(lc);
5341 
5342  next = lnext(lc);
5343  /* framing options are NOT to be compared here! */
5344  if (equal(wc->partitionClause, wc2->partitionClause) &&
5345  equal(wc->orderClause, wc2->orderClause))
5346  {
5347  actives = list_delete_cell(actives, lc, prev);
5348  result = lappend(result, wc2);
5349  }
5350  else
5351  prev = lc;
5352  }
5353  }
5354 
5355  return result;
5356 }
5357 
5358 /*
5359  * make_window_input_target
5360  * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5361  *
5362  * When the query has window functions, this function computes the desired
5363  * target to be computed by the node just below the first WindowAgg.
5364  * This tlist must contain all values needed to evaluate the window functions,
5365  * compute the final target list, and perform any required final sort step.
5366  * If multiple WindowAggs are needed, each intermediate one adds its window
5367  * function results onto this base tlist; only the topmost WindowAgg computes
5368  * the actual desired target list.
5369  *
5370  * This function is much like make_group_input_target, though not quite enough
5371  * like it to share code. As in that function, we flatten most expressions
5372  * into their component variables. But we do not want to flatten window
5373  * PARTITION BY/ORDER BY clauses, since that might result in multiple
5374  * evaluations of them, which would be bad (possibly even resulting in
5375  * inconsistent answers, if they contain volatile functions).
5376  * Also, we must not flatten GROUP BY clauses that were left unflattened by
5377  * make_group_input_target, because we may no longer have access to the
5378  * individual Vars in them.
5379  *
5380  * Another key difference from make_group_input_target is that we don't
5381  * flatten Aggref expressions, since those are to be computed below the
5382  * window functions and just referenced like Vars above that.
5383  *
5384  * 'final_target' is the query's final target list (in PathTarget form)
5385  * 'activeWindows' is the list of active windows previously identified by
5386  * select_active_windows.
5387  *
5388  * The result is the PathTarget to be computed by the plan node immediately
5389  * below the first WindowAgg node.
5390  */
5391 static PathTarget *
5393  PathTarget *final_target,
5394  List *activeWindows)
5395 {
5396  Query *parse = root->parse;
5397  PathTarget *input_target;
5398  Bitmapset *sgrefs;
5399  List *flattenable_cols;
5400  List *flattenable_vars;
5401  int i;
5402  ListCell *lc;
5403 
5404  Assert(parse->hasWindowFuncs);
5405 
5406  /*
5407  * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5408  * into a bitmapset for convenient reference below.
5409  */
5410  sgrefs = NULL;
5411  foreach(lc, activeWindows)
5412  {
5413  WindowClause *wc = (WindowClause *) lfirst(lc);
5414  ListCell *lc2;
5415 
5416  foreach(lc2, wc->partitionClause)
5417  {
5418  SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
5419 
5420  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5421  }
5422  foreach(lc2, wc->orderClause)
5423  {
5424  SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
5425 
5426  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5427  }
5428  }
5429 
5430  /* Add in sortgroupref numbers of GROUP BY clauses, too */
5431  foreach(lc, parse->groupClause)
5432  {
5433  SortGroupClause *grpcl = (SortGroupClause *) lfirst(lc);
5434 
5435  sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5436  }
5437 
5438  /*
5439  * Construct a target containing all the non-flattenable targetlist items,
5440  * and save aside the others for a moment.
5441  */
5442  input_target = create_empty_pathtarget();
5443  flattenable_cols = NIL;
5444 
5445  i = 0;
5446  foreach(lc, final_target->exprs)
5447  {
5448  Expr *expr = (Expr *) lfirst(lc);
5449  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5450 
5451  /*
5452  * Don't want to deconstruct window clauses or GROUP BY items. (Note
5453  * that such items can't contain window functions, so it's okay to
5454  * compute them below the WindowAgg nodes.)
5455  */
5456  if (sgref != 0 && bms_is_member(sgref, sgrefs))
5457  {
5458  /*
5459  * Don't want to deconstruct this value, so add it to the input
5460  * target as-is.
5461  */
5462  add_column_to_pathtarget(input_target, expr, sgref);
5463  }
5464  else
5465  {
5466  /*
5467  * Column is to be flattened, so just remember the expression for
5468  * later call to pull_var_clause.
5469  */
5470  flattenable_cols = lappend(flattenable_cols, expr);
5471  }
5472 
5473  i++;
5474  }
5475 
5476  /*
5477  * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
5478  * add them to the input target if not already present. (Some might be
5479  * there already because they're used directly as window/group clauses.)
5480  *
5481  * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
5482  * Aggrefs are placed in the Agg node's tlist and not left to be computed
5483  * at higher levels. On the other hand, we should recurse into
5484  * WindowFuncs to make sure their input expressions are available.
5485  */
5486  flattenable_vars = pull_var_clause((Node *) flattenable_cols,
5490  add_new_columns_to_pathtarget(input_target, flattenable_vars);
5491 
5492  /* clean up cruft */
5493  list_free(flattenable_vars);
5494  list_free(flattenable_cols);
5495 
5496  /* XXX this causes some redundant cost calculation ... */
5497  return set_pathtarget_cost_width(root, input_target);
5498 }
5499 
5500 /*
5501  * make_pathkeys_for_window
5502  * Create a pathkeys list describing the required input ordering
5503  * for the given WindowClause.
5504  *
5505  * The required ordering is first the PARTITION keys, then the ORDER keys.
5506  * In the future we might try to implement windowing using hashing, in which
5507  * case the ordering could be relaxed, but for now we always sort.
5508  *
5509  * Caution: if you change this, see createplan.c's get_column_info_for_window!
5510  */
5511 static List *
5513  List *tlist)
5514 {
5515  List *window_pathkeys;
5516  List *window_sortclauses;
5517 
5518  /* Throw error if can't sort */
5520  ereport(ERROR,
5521  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5522  errmsg("could not implement window PARTITION BY"),
5523  errdetail("Window partitioning columns must be of sortable datatypes.")));
5525  ereport(ERROR,
5526  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5527  errmsg("could not implement window ORDER BY"),
5528  errdetail("Window ordering columns must be of sortable datatypes.")));
5529 
5530  /* Okay, make the combined pathkeys */
5531  window_sortclauses = list_concat(list_copy(wc->partitionClause),
5532  list_copy(wc->orderClause));
5533  window_pathkeys = make_pathkeys_for_sortclauses(root,
5534  window_sortclauses,
5535  tlist);
5536  list_free(window_sortclauses);
5537  return window_pathkeys;
5538 }
5539 
5540 /*
5541  * make_sort_input_target
5542  * Generate appropriate PathTarget for initial input to Sort step.
5543  *
5544  * If the query has ORDER BY, this function chooses the target to be computed
5545  * by the node just below the Sort (and DISTINCT, if any, since Unique can't
5546  * project) steps. This might or might not be identical to the query's final
5547  * output target.
5548  *
5549  * The main argument for keeping the sort-input tlist the same as the final
5550  * is that we avoid a separate projection node (which will be needed if
5551  * they're different, because Sort can't project). However, there are also
5552  * advantages to postponing tlist evaluation till after the Sort: it ensures
5553  * a consistent order of evaluation for any volatile functions in the tlist,
5554  * and if there's also a LIMIT, we can stop the query without ever computing
5555  * tlist functions for later rows, which is beneficial for both volatile and
5556  * expensive functions.
5557  *
5558  * Our current policy is to postpone volatile expressions till after the sort
5559  * unconditionally (assuming that that's possible, ie they are in plain tlist
5560  * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
5561  * postpone set-returning expressions, because running them beforehand would
5562  * bloat the sort dataset, and because it might cause unexpected output order
5563  * if the sort isn't stable. However there's a constraint on that: all SRFs
5564  * in the tlist should be evaluated at the same plan step, so that they can
5565  * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
5566  * mustn't postpone any SRFs. (Note that in principle that policy should
5567  * probably get applied to the group/window input targetlists too, but we
5568  * have not done that historically.) Lastly, expensive expressions are
5569  * postponed if there is a LIMIT, or if root->tuple_fraction shows that
5570  * partial evaluation of the query is possible (if neither is true, we expect
5571  * to have to evaluate the expressions for every row anyway), or if there are
5572  * any volatile or set-returning expressions (since once we've put in a
5573  * projection at all, it won't cost any more to postpone more stuff).
5574  *
5575  * Another issue that could potentially be considered here is that
5576  * evaluating tlist expressions could result in data that's either wider
5577  * or narrower than the input Vars, thus changing the volume of data that
5578  * has to go through the Sort. However, we usually have only a very bad
5579  * idea of the output width of any expression more complex than a Var,
5580  * so for now it seems too risky to try to optimize on that basis.
5581  *
5582  * Note that if we do produce a modified sort-input target, and then the
5583  * query ends up not using an explicit Sort, no particular harm is done:
5584  * we'll initially use the modified target for the preceding path nodes,
5585  * but then change them to the final target with apply_projection_to_path.
5586  * Moreover, in such a case the guarantees about evaluation order of
5587  * volatile functions still hold, since the rows are sorted already.
5588  *
5589  * This function has some things in common with make_group_input_target and
5590  * make_window_input_target, though the detailed rules for what to do are
5591  * different. We never flatten/postpone any grouping or ordering columns;
5592  * those are needed before the sort. If we do flatten a particular
5593  * expression, we leave Aggref and WindowFunc nodes alone, since those were
5594  * computed earlier.
5595  *
5596  * 'final_target' is the query's final target list (in PathTarget form)
5597  * 'have_postponed_srfs' is an output argument, see below
5598  *
5599  * The result is the PathTarget to be computed by the plan node immediately
5600  * below the Sort step (and the Distinct step, if any). This will be
5601  * exactly final_target if we decide a projection step wouldn't be helpful.
5602  *
5603  * In addition, *have_postponed_srfs is set to TRUE if we choose to postpone
5604  * any set-returning functions to after the Sort.
5605  */
5606 static PathTarget *
5608  PathTarget *final_target,
5609  bool *have_postponed_srfs)
5610 {
5611  Query *parse = root->parse;
5612  PathTarget *input_target;
5613  int ncols;
5614  bool *col_is_srf;
5615  bool *postpone_col;
5616  bool have_srf;
5617  bool have_volatile;
5618  bool have_expensive;
5619  bool have_srf_sortcols;
5620  bool postpone_srfs;
5621  List *postponable_cols;
5622  List *postponable_vars;
5623  int i;
5624  ListCell *lc;
5625 
5626  /* Shouldn't get here unless query has ORDER BY */
5627  Assert(parse->sortClause);
5628 
5629  *have_postponed_srfs = false; /* default result */
5630 
5631  /* Inspect tlist and collect per-column information */
5632  ncols = list_length(final_target->exprs);
5633  col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
5634  postpone_col = (bool *) palloc0(ncols * sizeof(bool));
5635  have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
5636 
5637  i = 0;
5638  foreach(lc, final_target->exprs)
5639  {
5640  Expr *expr = (Expr *) lfirst(lc);
5641 
5642  /*
5643  * If the column has a sortgroupref, assume it has to be evaluated
5644  * before sorting. Generally such columns would be ORDER BY, GROUP
5645  * BY, etc targets. One exception is columns that were removed from
5646  * GROUP BY by remove_useless_groupby_columns() ... but those would
5647  * only be Vars anyway. There don't seem to be any cases where it
5648  * would be worth the trouble to double-check.
5649  */
5650  if (get_pathtarget_sortgroupref(final_target, i) == 0)
5651  {
5652  /*
5653  * Check for SRF or volatile functions. Check the SRF case first
5654  * because we must know whether we have any postponed SRFs.
5655  */
5656  if (parse->hasTargetSRFs &&
5657  expression_returns_set((Node *) expr))
5658  {
5659  /* We'll decide below whether these are postponable */
5660  col_is_srf[i] = true;
5661  have_srf = true;
5662  }
5663  else if (contain_volatile_functions((Node *) expr))
5664  {
5665  /* Unconditionally postpone */
5666  postpone_col[i] = true;
5667  have_volatile = true;
5668  }
5669  else
5670  {
5671  /*
5672  * Else check the cost. XXX it's annoying to have to do this
5673  * when set_pathtarget_cost_width() just did it. Refactor to
5674  * allow sharing the work?
5675  */
5676  QualCost cost;
5677 
5678  cost_qual_eval_node(&cost, (Node *) expr, root);
5679 
5680  /*
5681  * We arbitrarily define "expensive" as "more than 10X
5682  * cpu_operator_cost". Note this will take in any PL function
5683  * with default cost.
5684  */
5685  if (cost.per_tuple > 10 * cpu_operator_cost)
5686  {
5687  postpone_col[i] = true;
5688  have_expensive = true;
5689  }
5690  }
5691  }
5692  else
5693  {
5694  /* For sortgroupref cols, just check if any contain SRFs */
5695  if (!have_srf_sortcols &&
5696  parse->hasTargetSRFs &&
5697  expression_returns_set((Node *) expr))
5698  have_srf_sortcols = true;
5699  }
5700 
5701  i++;
5702  }
5703 
5704  /*
5705  * We can postpone SRFs if we have some but none are in sortgroupref cols.
5706  */
5707  postpone_srfs = (have_srf && !have_srf_sortcols);
5708 
5709  /*
5710  * If we don't need a post-sort projection, just return final_target.
5711  */
5712  if (!(postpone_srfs || have_volatile ||
5713  (have_expensive &&
5714  (parse->limitCount || root->tuple_fraction > 0))))
5715  return final_target;
5716 
5717  /*
5718  * Report whether the post-sort projection will contain set-returning
5719  * functions. This is important because it affects whether the Sort can
5720  * rely on the query's LIMIT (if any) to bound the number of rows it needs
5721  * to return.
5722  */
5723  *have_postponed_srfs = postpone_srfs;
5724 
5725  /*
5726  * Construct the sort-input target, taking all non-postponable columns and
5727  * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
5728  * the postponable ones.
5729  */
5730  input_target = create_empty_pathtarget();
5731  postponable_cols = NIL;
5732 
5733  i = 0;
5734  foreach(lc, final_target->exprs)
5735  {
5736  Expr *expr = (Expr *) lfirst(lc);
5737 
5738  if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
5739  postponable_cols = lappend(postponable_cols, expr);
5740  else
5741  add_column_to_pathtarget(input_target, expr,
5742  get_pathtarget_sortgroupref(final_target, i));
5743 
5744  i++;
5745  }
5746 
5747  /*
5748  * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
5749  * postponable columns, and add them to the sort-input target if not
5750  * already present. (Some might be there already.) We mustn't
5751  * deconstruct Aggrefs or WindowFuncs here, since the projection node
5752  * would be unable to recompute them.
5753  */
5754  postponable_vars = pull_var_clause((Node *) postponable_cols,
5758  add_new_columns_to_pathtarget(input_target, postponable_vars);
5759 
5760  /* clean up cruft */
5761  list_free(postponable_vars);
5762  list_free(postponable_cols);
5763 
5764  /* XXX this represents even more redundant cost calculation ... */
5765  return set_pathtarget_cost_width(root, input_target);
5766 }
5767 
5768 /*
5769  * get_cheapest_fractional_path
5770  * Find the cheapest path for retrieving a specified fraction of all
5771  * the tuples expected to be returned by the given relation.
5772  *
5773  * We interpret tuple_fraction the same way as grouping_planner.
5774  *
5775  * We assume set_cheapest() has been run on the given rel.
5776  */
5777 Path *
5778 get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
5779 {
5780  Path *best_path = rel->cheapest_total_path;
5781  ListCell *l;
5782 
5783  /* If all tuples will be retrieved, just return the cheapest-total path */
5784  if (tuple_fraction <= 0.0)
5785  return best_path;
5786 
5787  /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
5788  if (tuple_fraction >= 1.0 && best_path->rows > 0)
5789  tuple_fraction /= best_path->rows;
5790 
5791  foreach(l, rel->pathlist)
5792  {
5793  Path *path = (Path *) lfirst(l);
5794 
5795  if (path == rel->cheapest_total_path ||
5796  compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
5797  continue;
5798 
5799  best_path = path;
5800  }
5801 
5802  return best_path;
5803 }
5804 
5805 /*
5806  * adjust_paths_for_srfs
5807  * Fix up the Paths of the given upperrel to handle tSRFs properly.
5808  *
5809  * The executor can only handle set-returning functions that appear at the
5810  * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
5811  * that are not at top level, we need to split up the evaluation into multiple
5812  * plan levels in which each level satisfies this constraint. This function
5813  * modifies each Path of an upperrel that (might) compute any SRFs in its
5814  * output tlist to insert appropriate projection steps.
5815  *
5816  * The given targets and targets_contain_srfs lists are from
5817  * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
5818  * target in targets.
5819  */
5820 static void
5822  List *targets, List *targets_contain_srfs)
5823 {
5824  ListCell *lc;
5825 
5826  Assert(list_length(targets) == list_length(targets_contain_srfs));
5827  Assert(!linitial_int(targets_contain_srfs));
5828 
5829  /* If no SRFs appear at this plan level, nothing to do */
5830  if (list_length(targets) == 1)
5831  return;
5832 
5833  /*
5834  * Stack SRF-evaluation nodes atop each path for the rel.
5835  *
5836  * In principle we should re-run set_cheapest() here to identify the
5837  * cheapest path, but it seems unlikely that adding the same tlist eval
5838  * costs to all the paths would change that, so we don't bother. Instead,
5839  * just assume that the cheapest-startup and cheapest-total paths remain
5840  * so. (There should be no parameterized paths anymore, so we needn't
5841  * worry about updating cheapest_parameterized_paths.)
5842  */