PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/htup_details.h"
22 #include "access/parallel.h"
23 #include "access/sysattr.h"
24 #include "access/xact.h"
26 #include "catalog/pg_proc.h"
27 #include "catalog/pg_type.h"
28 #include "executor/executor.h"
29 #include "executor/nodeAgg.h"
30 #include "foreign/fdwapi.h"
31 #include "miscadmin.h"
32 #include "lib/bipartite_match.h"
33 #include "lib/knapsack.h"
34 #include "nodes/makefuncs.h"
35 #include "nodes/nodeFuncs.h"
36 #ifdef OPTIMIZER_DEBUG
37 #include "nodes/print.h"
38 #endif
39 #include "optimizer/clauses.h"
40 #include "optimizer/cost.h"
41 #include "optimizer/pathnode.h"
42 #include "optimizer/paths.h"
43 #include "optimizer/plancat.h"
44 #include "optimizer/planmain.h"
45 #include "optimizer/planner.h"
46 #include "optimizer/prep.h"
47 #include "optimizer/subselect.h"
48 #include "optimizer/tlist.h"
49 #include "optimizer/var.h"
50 #include "parser/analyze.h"
51 #include "parser/parsetree.h"
52 #include "parser/parse_agg.h"
53 #include "rewrite/rewriteManip.h"
54 #include "storage/dsm_impl.h"
55 #include "utils/rel.h"
56 #include "utils/selfuncs.h"
57 #include "utils/lsyscache.h"
58 #include "utils/syscache.h"
59 
60 
61 /* GUC parameters */
64 
65 /* Hook for plugins to get control in planner() */
67 
68 /* Hook for plugins to get control when grouping_planner() plans upper rels */
70 
71 
72 /* Expression kind codes for preprocess_expression */
73 #define EXPRKIND_QUAL 0
74 #define EXPRKIND_TARGET 1
75 #define EXPRKIND_RTFUNC 2
76 #define EXPRKIND_RTFUNC_LATERAL 3
77 #define EXPRKIND_VALUES 4
78 #define EXPRKIND_VALUES_LATERAL 5
79 #define EXPRKIND_LIMIT 6
80 #define EXPRKIND_APPINFO 7
81 #define EXPRKIND_PHV 8
82 #define EXPRKIND_TABLESAMPLE 9
83 #define EXPRKIND_ARBITER_ELEM 10
84 #define EXPRKIND_TABLEFUNC 11
85 #define EXPRKIND_TABLEFUNC_LATERAL 12
86 
87 /* Passthrough data for standard_qp_callback */
88 typedef struct
89 {
90  List *tlist; /* preprocessed query targetlist */
91  List *activeWindows; /* active windows, if any */
92  List *groupClause; /* overrides parse->groupClause */
94 
95 /*
96  * Data specific to grouping sets
97  */
98 
99 typedef struct
100 {
110 
111 /* Local functions */
112 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
113 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
114 static void inheritance_planner(PlannerInfo *root);
115 static void grouping_planner(PlannerInfo *root, bool inheritance_update,
116  double tuple_fraction);
118 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
119  int *tleref_to_colnum_map);
120 static void preprocess_rowmarks(PlannerInfo *root);
121 static double preprocess_limit(PlannerInfo *root,
122  double tuple_fraction,
123  int64 *offset_est, int64 *count_est);
124 static bool limit_needed(Query *parse);
126 static List *preprocess_groupclause(PlannerInfo *root, List *force);
127 static List *extract_rollup_sets(List *groupingSets);
128 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
129 static void standard_qp_callback(PlannerInfo *root, void *extra);
130 static double get_number_of_groups(PlannerInfo *root,
131  double path_rows,
132  grouping_sets_data *gd);
134  const AggClauseCosts *agg_costs,
135  double dNumGroups);
137  RelOptInfo *input_rel,
138  PathTarget *target,
139  const AggClauseCosts *agg_costs,
140  grouping_sets_data *gd);
141 static void consider_groupingsets_paths(PlannerInfo *root,
142  RelOptInfo *grouped_rel,
143  Path *path,
144  bool is_sorted,
145  bool can_hash,
146  PathTarget *target,
147  grouping_sets_data *gd,
148  const AggClauseCosts *agg_costs,
149  double dNumGroups);
151  RelOptInfo *input_rel,
152  PathTarget *input_target,
153  PathTarget *output_target,
154  List *tlist,
155  WindowFuncLists *wflists,
156  List *activeWindows);
157 static void create_one_window_path(PlannerInfo *root,
158  RelOptInfo *window_rel,
159  Path *path,
160  PathTarget *input_target,
161  PathTarget *output_target,
162  List *tlist,
163  WindowFuncLists *wflists,
164  List *activeWindows);
166  RelOptInfo *input_rel);
168  RelOptInfo *input_rel,
169  PathTarget *target,
170  double limit_tuples);
172  PathTarget *final_target);
174  PathTarget *grouping_target);
175 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
176 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
178  PathTarget *final_target,
179  List *activeWindows);
181  List *tlist);
183  PathTarget *final_target,
184  bool *have_postponed_srfs);
185 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
186  List *targets, List *targets_contain_srfs);
187 
188 
189 /*****************************************************************************
190  *
191  * Query optimizer entry point
192  *
193  * To support loadable plugins that monitor or modify planner behavior,
194  * we provide a hook variable that lets a plugin get control before and
195  * after the standard planning process. The plugin would normally call
196  * standard_planner().
197  *
198  * Note to plugin authors: standard_planner() scribbles on its Query input,
199  * so you'd better copy that data structure if you want to plan more than once.
200  *
201  *****************************************************************************/
202 PlannedStmt *
203 planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
204 {
206 
207  if (planner_hook)
208  result = (*planner_hook) (parse, cursorOptions, boundParams);
209  else
210  result = standard_planner(parse, cursorOptions, boundParams);
211  return result;
212 }
213 
214 PlannedStmt *
215 standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
216 {
218  PlannerGlobal *glob;
219  double tuple_fraction;
220  PlannerInfo *root;
221  RelOptInfo *final_rel;
222  Path *best_path;
223  Plan *top_plan;
224  ListCell *lp,
225  *lr;
226 
227  /*
228  * Set up global state for this planner invocation. This data is needed
229  * across all levels of sub-Query that might exist in the given command,
230  * so we keep it in a separate struct that's linked to by each per-Query
231  * PlannerInfo.
232  */
233  glob = makeNode(PlannerGlobal);
234 
235  glob->boundParams = boundParams;
236  glob->subplans = NIL;
237  glob->subroots = NIL;
238  glob->rewindPlanIDs = NULL;
239  glob->finalrtable = NIL;
240  glob->finalrowmarks = NIL;
241  glob->resultRelations = NIL;
242  glob->nonleafResultRelations = NIL;
243  glob->relationOids = NIL;
244  glob->invalItems = NIL;
245  glob->nParamExec = 0;
246  glob->lastPHId = 0;
247  glob->lastRowMarkId = 0;
248  glob->lastPlanNodeId = 0;
249  glob->transientPlan = false;
250  glob->dependsOnRole = false;
251 
252  /*
253  * Assess whether it's feasible to use parallel mode for this query. We
254  * can't do this in a standalone backend, or if the command will try to
255  * modify any data, or if this is a cursor operation, or if GUCs are set
256  * to values that don't permit parallelism, or if parallel-unsafe
257  * functions are present in the query tree.
258  *
259  * For now, we don't try to use parallel mode if we're running inside a
260  * parallel worker. We might eventually be able to relax this
261  * restriction, but for now it seems best not to have parallel workers
262  * trying to create their own parallel workers.
263  *
264  * We can't use parallelism in serializable mode because the predicate
265  * locking code is not parallel-aware. It's not catastrophic if someone
266  * tries to run a parallel plan in serializable mode; it just won't get
267  * any workers and will run serially. But it seems like a good heuristic
268  * to assume that the same serialization level will be in effect at plan
269  * time and execution time, so don't generate a parallel plan if we're in
270  * serializable mode.
271  */
272  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
275  parse->commandType == CMD_SELECT &&
276  !parse->hasModifyingCTE &&
278  !IsParallelWorker() &&
280  {
281  /* all the cheap tests pass, so scan the query tree */
282  glob->maxParallelHazard = max_parallel_hazard(parse);
284  }
285  else
286  {
287  /* skip the query tree scan, just assume it's unsafe */
289  glob->parallelModeOK = false;
290  }
291 
292  /*
293  * glob->parallelModeNeeded should tell us whether it's necessary to
294  * impose the parallel mode restrictions, but we don't actually want to
295  * impose them unless we choose a parallel plan, so it is normally set
296  * only if a parallel plan is chosen (see create_gather_plan). That way,
297  * people who mislabel their functions but don't use parallelism anyway
298  * aren't harmed. But when force_parallel_mode is set, we enable the
299  * restrictions whenever possible for testing purposes.
300  */
301  glob->parallelModeNeeded = glob->parallelModeOK &&
303 
304  /* Determine what fraction of the plan is likely to be scanned */
305  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
306  {
307  /*
308  * We have no real idea how many tuples the user will ultimately FETCH
309  * from a cursor, but it is often the case that he doesn't want 'em
310  * all, or would prefer a fast-start plan anyway so that he can
311  * process some of the tuples sooner. Use a GUC parameter to decide
312  * what fraction to optimize for.
313  */
314  tuple_fraction = cursor_tuple_fraction;
315 
316  /*
317  * We document cursor_tuple_fraction as simply being a fraction, which
318  * means the edge cases 0 and 1 have to be treated specially here. We
319  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
320  */
321  if (tuple_fraction >= 1.0)
322  tuple_fraction = 0.0;
323  else if (tuple_fraction <= 0.0)
324  tuple_fraction = 1e-10;
325  }
326  else
327  {
328  /* Default assumption is we need all the tuples */
329  tuple_fraction = 0.0;
330  }
331 
332  /* primary planning entry point (may recurse for subqueries) */
333  root = subquery_planner(glob, parse, NULL,
334  false, tuple_fraction);
335 
336  /* Select best Path and turn it into a Plan */
337  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
338  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
339 
340  top_plan = create_plan(root, best_path);
341 
342  /*
343  * If creating a plan for a scrollable cursor, make sure it can run
344  * backwards on demand. Add a Material node at the top at need.
345  */
346  if (cursorOptions & CURSOR_OPT_SCROLL)
347  {
348  if (!ExecSupportsBackwardScan(top_plan))
349  top_plan = materialize_finished_plan(top_plan);
350  }
351 
352  /*
353  * Optionally add a Gather node for testing purposes, provided this is
354  * actually a safe thing to do.
355  */
357  {
358  Gather *gather = makeNode(Gather);
359 
360  gather->plan.targetlist = top_plan->targetlist;
361  gather->plan.qual = NIL;
362  gather->plan.lefttree = top_plan;
363  gather->plan.righttree = NULL;
364  gather->num_workers = 1;
365  gather->single_copy = true;
367 
368  /*
369  * Ideally we'd use cost_gather here, but setting up dummy path data
370  * to satisfy it doesn't seem much cleaner than knowing what it does.
371  */
372  gather->plan.startup_cost = top_plan->startup_cost +
374  gather->plan.total_cost = top_plan->total_cost +
376  gather->plan.plan_rows = top_plan->plan_rows;
377  gather->plan.plan_width = top_plan->plan_width;
378  gather->plan.parallel_aware = false;
379  gather->plan.parallel_safe = false;
380 
381  /* use parallel mode for parallel plans. */
382  root->glob->parallelModeNeeded = true;
383 
384  top_plan = &gather->plan;
385  }
386 
387  /*
388  * If any Params were generated, run through the plan tree and compute
389  * each plan node's extParam/allParam sets. Ideally we'd merge this into
390  * set_plan_references' tree traversal, but for now it has to be separate
391  * because we need to visit subplans before not after main plan.
392  */
393  if (glob->nParamExec > 0)
394  {
395  Assert(list_length(glob->subplans) == list_length(glob->subroots));
396  forboth(lp, glob->subplans, lr, glob->subroots)
397  {
398  Plan *subplan = (Plan *) lfirst(lp);
399  PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
400 
401  SS_finalize_plan(subroot, subplan);
402  }
403  SS_finalize_plan(root, top_plan);
404  }
405 
406  /* final cleanup of the plan */
407  Assert(glob->finalrtable == NIL);
408  Assert(glob->finalrowmarks == NIL);
409  Assert(glob->resultRelations == NIL);
411  top_plan = set_plan_references(root, top_plan);
412  /* ... and the subplans (both regular subplans and initplans) */
413  Assert(list_length(glob->subplans) == list_length(glob->subroots));
414  forboth(lp, glob->subplans, lr, glob->subroots)
415  {
416  Plan *subplan = (Plan *) lfirst(lp);
417  PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
418 
419  lfirst(lp) = set_plan_references(subroot, subplan);
420  }
421 
422  /* build the PlannedStmt result */
423  result = makeNode(PlannedStmt);
424 
425  result->commandType = parse->commandType;
426  result->queryId = parse->queryId;
427  result->hasReturning = (parse->returningList != NIL);
428  result->hasModifyingCTE = parse->hasModifyingCTE;
429  result->canSetTag = parse->canSetTag;
430  result->transientPlan = glob->transientPlan;
431  result->dependsOnRole = glob->dependsOnRole;
432  result->parallelModeNeeded = glob->parallelModeNeeded;
433  result->planTree = top_plan;
434  result->rtable = glob->finalrtable;
435  result->resultRelations = glob->resultRelations;
437  result->subplans = glob->subplans;
438  result->rewindPlanIDs = glob->rewindPlanIDs;
439  result->rowMarks = glob->finalrowmarks;
440  result->relationOids = glob->relationOids;
441  result->invalItems = glob->invalItems;
442  result->nParamExec = glob->nParamExec;
443  /* utilityStmt should be null, but we might as well copy it */
444  result->utilityStmt = parse->utilityStmt;
445  result->stmt_location = parse->stmt_location;
446  result->stmt_len = parse->stmt_len;
447 
448  return result;
449 }
450 
451 
452 /*--------------------
453  * subquery_planner
454  * Invokes the planner on a subquery. We recurse to here for each
455  * sub-SELECT found in the query tree.
456  *
457  * glob is the global state for the current planner run.
458  * parse is the querytree produced by the parser & rewriter.
459  * parent_root is the immediate parent Query's info (NULL at the top level).
460  * hasRecursion is true if this is a recursive WITH query.
461  * tuple_fraction is the fraction of tuples we expect will be retrieved.
462  * tuple_fraction is interpreted as explained for grouping_planner, below.
463  *
464  * Basically, this routine does the stuff that should only be done once
465  * per Query object. It then calls grouping_planner. At one time,
466  * grouping_planner could be invoked recursively on the same Query object;
467  * that's not currently true, but we keep the separation between the two
468  * routines anyway, in case we need it again someday.
469  *
470  * subquery_planner will be called recursively to handle sub-Query nodes
471  * found within the query's expressions and rangetable.
472  *
473  * Returns the PlannerInfo struct ("root") that contains all data generated
474  * while planning the subquery. In particular, the Path(s) attached to
475  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
476  * cheapest way(s) to implement the query. The top level will select the
477  * best Path and pass it through createplan.c to produce a finished Plan.
478  *--------------------
479  */
480 PlannerInfo *
482  PlannerInfo *parent_root,
483  bool hasRecursion, double tuple_fraction)
484 {
485  PlannerInfo *root;
486  List *newWithCheckOptions;
487  List *newHaving;
488  bool hasOuterJoins;
489  RelOptInfo *final_rel;
490  ListCell *l;
491 
492  /* Create a PlannerInfo data structure for this subquery */
493  root = makeNode(PlannerInfo);
494  root->parse = parse;
495  root->glob = glob;
496  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
497  root->parent_root = parent_root;
498  root->plan_params = NIL;
499  root->outer_params = NULL;
501  root->init_plans = NIL;
502  root->cte_plan_ids = NIL;
503  root->multiexpr_params = NIL;
504  root->eq_classes = NIL;
505  root->append_rel_list = NIL;
506  root->pcinfo_list = NIL;
507  root->rowMarks = NIL;
508  memset(root->upper_rels, 0, sizeof(root->upper_rels));
509  memset(root->upper_targets, 0, sizeof(root->upper_targets));
510  root->processed_tlist = NIL;
511  root->grouping_map = NULL;
512  root->minmax_aggs = NIL;
513  root->qual_security_level = 0;
514  root->hasInheritedTarget = false;
515  root->hasRecursion = hasRecursion;
516  if (hasRecursion)
517  root->wt_param_id = SS_assign_special_param(root);
518  else
519  root->wt_param_id = -1;
520  root->non_recursive_path = NULL;
521 
522  /*
523  * If there is a WITH list, process each WITH query and build an initplan
524  * SubPlan structure for it.
525  */
526  if (parse->cteList)
527  SS_process_ctes(root);
528 
529  /*
530  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
531  * to transform them into joins. Note that this step does not descend
532  * into subqueries; if we pull up any subqueries below, their SubLinks are
533  * processed just before pulling them up.
534  */
535  if (parse->hasSubLinks)
536  pull_up_sublinks(root);
537 
538  /*
539  * Scan the rangetable for set-returning functions, and inline them if
540  * possible (producing subqueries that might get pulled up next).
541  * Recursion issues here are handled in the same way as for SubLinks.
542  */
544 
545  /*
546  * Check to see if any subqueries in the jointree can be merged into this
547  * query.
548  */
549  pull_up_subqueries(root);
550 
551  /*
552  * If this is a simple UNION ALL query, flatten it into an appendrel. We
553  * do this now because it requires applying pull_up_subqueries to the leaf
554  * queries of the UNION ALL, which weren't touched above because they
555  * weren't referenced by the jointree (they will be after we do this).
556  */
557  if (parse->setOperations)
559 
560  /*
561  * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
562  * avoid the expense of doing flatten_join_alias_vars(). Also check for
563  * outer joins --- if none, we can skip reduce_outer_joins(). And check
564  * for LATERAL RTEs, too. This must be done after we have done
565  * pull_up_subqueries(), of course.
566  */
567  root->hasJoinRTEs = false;
568  root->hasLateralRTEs = false;
569  hasOuterJoins = false;
570  foreach(l, parse->rtable)
571  {
572  RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
573 
574  if (rte->rtekind == RTE_JOIN)
575  {
576  root->hasJoinRTEs = true;
577  if (IS_OUTER_JOIN(rte->jointype))
578  hasOuterJoins = true;
579  }
580  if (rte->lateral)
581  root->hasLateralRTEs = true;
582  }
583 
584  /*
585  * Preprocess RowMark information. We need to do this after subquery
586  * pullup (so that all non-inherited RTEs are present) and before
587  * inheritance expansion (so that the info is available for
588  * expand_inherited_tables to examine and modify).
589  */
590  preprocess_rowmarks(root);
591 
592  /*
593  * Expand any rangetable entries that are inheritance sets into "append
594  * relations". This can add entries to the rangetable, but they must be
595  * plain base relations not joins, so it's OK (and marginally more
596  * efficient) to do it after checking for join RTEs. We must do it after
597  * pulling up subqueries, else we'd fail to handle inherited tables in
598  * subqueries.
599  */
601 
602  /*
603  * Set hasHavingQual to remember if HAVING clause is present. Needed
604  * because preprocess_expression will reduce a constant-true condition to
605  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
606  */
607  root->hasHavingQual = (parse->havingQual != NULL);
608 
609  /* Clear this flag; might get set in distribute_qual_to_rels */
610  root->hasPseudoConstantQuals = false;
611 
612  /*
613  * Do expression preprocessing on targetlist and quals, as well as other
614  * random expressions in the querytree. Note that we do not need to
615  * handle sort/group expressions explicitly, because they are actually
616  * part of the targetlist.
617  */
618  parse->targetList = (List *)
619  preprocess_expression(root, (Node *) parse->targetList,
621 
622  /* Constant-folding might have removed all set-returning functions */
623  if (parse->hasTargetSRFs)
625 
626  newWithCheckOptions = NIL;
627  foreach(l, parse->withCheckOptions)
628  {
629  WithCheckOption *wco = (WithCheckOption *) lfirst(l);
630 
631  wco->qual = preprocess_expression(root, wco->qual,
632  EXPRKIND_QUAL);
633  if (wco->qual != NULL)
634  newWithCheckOptions = lappend(newWithCheckOptions, wco);
635  }
636  parse->withCheckOptions = newWithCheckOptions;
637 
638  parse->returningList = (List *)
639  preprocess_expression(root, (Node *) parse->returningList,
641 
642  preprocess_qual_conditions(root, (Node *) parse->jointree);
643 
644  parse->havingQual = preprocess_expression(root, parse->havingQual,
645  EXPRKIND_QUAL);
646 
647  foreach(l, parse->windowClause)
648  {
649  WindowClause *wc = (WindowClause *) lfirst(l);
650 
651  /* partitionClause/orderClause are sort/group expressions */
654  wc->endOffset = preprocess_expression(root, wc->endOffset,
656  }
657 
658  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
660  parse->limitCount = preprocess_expression(root, parse->limitCount,
662 
663  if (parse->onConflict)
664  {
665  parse->onConflict->arbiterElems = (List *)
667  (Node *) parse->onConflict->arbiterElems,
669  parse->onConflict->arbiterWhere =
671  parse->onConflict->arbiterWhere,
672  EXPRKIND_QUAL);
673  parse->onConflict->onConflictSet = (List *)
675  (Node *) parse->onConflict->onConflictSet,
677  parse->onConflict->onConflictWhere =
679  parse->onConflict->onConflictWhere,
680  EXPRKIND_QUAL);
681  /* exclRelTlist contains only Vars, so no preprocessing needed */
682  }
683 
684  root->append_rel_list = (List *)
687 
688  /* Also need to preprocess expressions within RTEs */
689  foreach(l, parse->rtable)
690  {
691  RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
692  int kind;
693  ListCell *lcsq;
694 
695  if (rte->rtekind == RTE_RELATION)
696  {
697  if (rte->tablesample)
698  rte->tablesample = (TableSampleClause *)
700  (Node *) rte->tablesample,
702  }
703  else if (rte->rtekind == RTE_SUBQUERY)
704  {
705  /*
706  * We don't want to do all preprocessing yet on the subquery's
707  * expressions, since that will happen when we plan it. But if it
708  * contains any join aliases of our level, those have to get
709  * expanded now, because planning of the subquery won't do it.
710  * That's only possible if the subquery is LATERAL.
711  */
712  if (rte->lateral && root->hasJoinRTEs)
713  rte->subquery = (Query *)
714  flatten_join_alias_vars(root, (Node *) rte->subquery);
715  }
716  else if (rte->rtekind == RTE_FUNCTION)
717  {
718  /* Preprocess the function expression(s) fully */
720  rte->functions = (List *)
721  preprocess_expression(root, (Node *) rte->functions, kind);
722  }
723  else if (rte->rtekind == RTE_TABLEFUNC)
724  {
725  /* Preprocess the function expression(s) fully */
727  rte->tablefunc = (TableFunc *)
728  preprocess_expression(root, (Node *) rte->tablefunc, kind);
729  }
730  else if (rte->rtekind == RTE_VALUES)
731  {
732  /* Preprocess the values lists fully */
734  rte->values_lists = (List *)
735  preprocess_expression(root, (Node *) rte->values_lists, kind);
736  }
737 
738  /*
739  * Process each element of the securityQuals list as if it were a
740  * separate qual expression (as indeed it is). We need to do it this
741  * way to get proper canonicalization of AND/OR structure. Note that
742  * this converts each element into an implicit-AND sublist.
743  */
744  foreach(lcsq, rte->securityQuals)
745  {
746  lfirst(lcsq) = preprocess_expression(root,
747  (Node *) lfirst(lcsq),
748  EXPRKIND_QUAL);
749  }
750  }
751 
752  /*
753  * In some cases we may want to transfer a HAVING clause into WHERE. We
754  * cannot do so if the HAVING clause contains aggregates (obviously) or
755  * volatile functions (since a HAVING clause is supposed to be executed
756  * only once per group). We also can't do this if there are any nonempty
757  * grouping sets; moving such a clause into WHERE would potentially change
758  * the results, if any referenced column isn't present in all the grouping
759  * sets. (If there are only empty grouping sets, then the HAVING clause
760  * must be degenerate as discussed below.)
761  *
762  * Also, it may be that the clause is so expensive to execute that we're
763  * better off doing it only once per group, despite the loss of
764  * selectivity. This is hard to estimate short of doing the entire
765  * planning process twice, so we use a heuristic: clauses containing
766  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
767  * clause into WHERE, in hopes of eliminating tuples before aggregation
768  * instead of after.
769  *
770  * If the query has explicit grouping then we can simply move such a
771  * clause into WHERE; any group that fails the clause will not be in the
772  * output because none of its tuples will reach the grouping or
773  * aggregation stage. Otherwise we must have a degenerate (variable-free)
774  * HAVING clause, which we put in WHERE so that query_planner() can use it
775  * in a gating Result node, but also keep in HAVING to ensure that we
776  * don't emit a bogus aggregated row. (This could be done better, but it
777  * seems not worth optimizing.)
778  *
779  * Note that both havingQual and parse->jointree->quals are in
780  * implicitly-ANDed-list form at this point, even though they are declared
781  * as Node *.
782  */
783  newHaving = NIL;
784  foreach(l, (List *) parse->havingQual)
785  {
786  Node *havingclause = (Node *) lfirst(l);
787 
788  if ((parse->groupClause && parse->groupingSets) ||
789  contain_agg_clause(havingclause) ||
790  contain_volatile_functions(havingclause) ||
791  contain_subplans(havingclause))
792  {
793  /* keep it in HAVING */
794  newHaving = lappend(newHaving, havingclause);
795  }
796  else if (parse->groupClause && !parse->groupingSets)
797  {
798  /* move it to WHERE */
799  parse->jointree->quals = (Node *)
800  lappend((List *) parse->jointree->quals, havingclause);
801  }
802  else
803  {
804  /* put a copy in WHERE, keep it in HAVING */
805  parse->jointree->quals = (Node *)
806  lappend((List *) parse->jointree->quals,
807  copyObject(havingclause));
808  newHaving = lappend(newHaving, havingclause);
809  }
810  }
811  parse->havingQual = (Node *) newHaving;
812 
813  /* Remove any redundant GROUP BY columns */
815 
816  /*
817  * If we have any outer joins, try to reduce them to plain inner joins.
818  * This step is most easily done after we've done expression
819  * preprocessing.
820  */
821  if (hasOuterJoins)
822  reduce_outer_joins(root);
823 
824  /*
825  * Do the main planning. If we have an inherited target relation, that
826  * needs special processing, else go straight to grouping_planner.
827  */
828  if (parse->resultRelation &&
829  rt_fetch(parse->resultRelation, parse->rtable)->inh)
830  inheritance_planner(root);
831  else
832  grouping_planner(root, false, tuple_fraction);
833 
834  /*
835  * Capture the set of outer-level param IDs we have access to, for use in
836  * extParam/allParam calculations later.
837  */
839 
840  /*
841  * If any initPlans were created in this query level, adjust the surviving
842  * Paths' costs and parallel-safety flags to account for them. The
843  * initPlans won't actually get attached to the plan tree till
844  * create_plan() runs, but we must include their effects now.
845  */
846  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
847  SS_charge_for_initplans(root, final_rel);
848 
849  /*
850  * Make sure we've identified the cheapest Path for the final rel. (By
851  * doing this here not in grouping_planner, we include initPlan costs in
852  * the decision, though it's unlikely that will change anything.)
853  */
854  set_cheapest(final_rel);
855 
856  return root;
857 }
858 
859 /*
860  * preprocess_expression
861  * Do subquery_planner's preprocessing work for an expression,
862  * which can be a targetlist, a WHERE clause (including JOIN/ON
863  * conditions), a HAVING clause, or a few other things.
864  */
865 static Node *
866 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
867 {
868  /*
869  * Fall out quickly if expression is empty. This occurs often enough to
870  * be worth checking. Note that null->null is the correct conversion for
871  * implicit-AND result format, too.
872  */
873  if (expr == NULL)
874  return NULL;
875 
876  /*
877  * If the query has any join RTEs, replace join alias variables with
878  * base-relation variables. We must do this before sublink processing,
879  * else sublinks expanded out from join aliases would not get processed.
880  * We can skip it in non-lateral RTE functions, VALUES lists, and
881  * TABLESAMPLE clauses, however, since they can't contain any Vars of the
882  * current query level.
883  */
884  if (root->hasJoinRTEs &&
885  !(kind == EXPRKIND_RTFUNC ||
886  kind == EXPRKIND_VALUES ||
887  kind == EXPRKIND_TABLESAMPLE ||
888  kind == EXPRKIND_TABLEFUNC))
889  expr = flatten_join_alias_vars(root, expr);
890 
891  /*
892  * Simplify constant expressions.
893  *
894  * Note: an essential effect of this is to convert named-argument function
895  * calls to positional notation and insert the current actual values of
896  * any default arguments for functions. To ensure that happens, we *must*
897  * process all expressions here. Previous PG versions sometimes skipped
898  * const-simplification if it didn't seem worth the trouble, but we can't
899  * do that anymore.
900  *
901  * Note: this also flattens nested AND and OR expressions into N-argument
902  * form. All processing of a qual expression after this point must be
903  * careful to maintain AND/OR flatness --- that is, do not generate a tree
904  * with AND directly under AND, nor OR directly under OR.
905  */
906  expr = eval_const_expressions(root, expr);
907 
908  /*
909  * If it's a qual or havingQual, canonicalize it.
910  */
911  if (kind == EXPRKIND_QUAL)
912  {
913  expr = (Node *) canonicalize_qual((Expr *) expr);
914 
915 #ifdef OPTIMIZER_DEBUG
916  printf("After canonicalize_qual()\n");
917  pprint(expr);
918 #endif
919  }
920 
921  /* Expand SubLinks to SubPlans */
922  if (root->parse->hasSubLinks)
923  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
924 
925  /*
926  * XXX do not insert anything here unless you have grokked the comments in
927  * SS_replace_correlation_vars ...
928  */
929 
930  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
931  if (root->query_level > 1)
932  expr = SS_replace_correlation_vars(root, expr);
933 
934  /*
935  * If it's a qual or havingQual, convert it to implicit-AND format. (We
936  * don't want to do this before eval_const_expressions, since the latter
937  * would be unable to simplify a top-level AND correctly. Also,
938  * SS_process_sublinks expects explicit-AND format.)
939  */
940  if (kind == EXPRKIND_QUAL)
941  expr = (Node *) make_ands_implicit((Expr *) expr);
942 
943  return expr;
944 }
945 
946 /*
947  * preprocess_qual_conditions
948  * Recursively scan the query's jointree and do subquery_planner's
949  * preprocessing work on each qual condition found therein.
950  */
951 static void
953 {
954  if (jtnode == NULL)
955  return;
956  if (IsA(jtnode, RangeTblRef))
957  {
958  /* nothing to do here */
959  }
960  else if (IsA(jtnode, FromExpr))
961  {
962  FromExpr *f = (FromExpr *) jtnode;
963  ListCell *l;
964 
965  foreach(l, f->fromlist)
967 
969  }
970  else if (IsA(jtnode, JoinExpr))
971  {
972  JoinExpr *j = (JoinExpr *) jtnode;
973 
976 
978  }
979  else
980  elog(ERROR, "unrecognized node type: %d",
981  (int) nodeTag(jtnode));
982 }
983 
984 /*
985  * preprocess_phv_expression
986  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
987  *
988  * If a LATERAL subquery references an output of another subquery, and that
989  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
990  * join, then we'll push the PlaceHolderVar expression down into the subquery
991  * and later pull it back up during find_lateral_references, which runs after
992  * subquery_planner has preprocessed all the expressions that were in the
993  * current query level to start with. So we need to preprocess it then.
994  */
995 Expr *
997 {
998  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
999 }
1000 
1001 /*
1002  * inheritance_planner
1003  * Generate Paths in the case where the result relation is an
1004  * inheritance set.
1005  *
1006  * We have to handle this case differently from cases where a source relation
1007  * is an inheritance set. Source inheritance is expanded at the bottom of the
1008  * plan tree (see allpaths.c), but target inheritance has to be expanded at
1009  * the top. The reason is that for UPDATE, each target relation needs a
1010  * different targetlist matching its own column set. Fortunately,
1011  * the UPDATE/DELETE target can never be the nullable side of an outer join,
1012  * so it's OK to generate the plan this way.
1013  *
1014  * Returns nothing; the useful output is in the Paths we attach to
1015  * the (UPPERREL_FINAL, NULL) upperrel stored in *root.
1016  *
1017  * Note that we have not done set_cheapest() on the final rel; it's convenient
1018  * to leave this to the caller.
1019  */
1020 static void
1022 {
1023  Query *parse = root->parse;
1024  int parentRTindex = parse->resultRelation;
1025  Bitmapset *subqueryRTindexes;
1026  Bitmapset *modifiableARIindexes;
1027  int nominalRelation = -1;
1028  List *final_rtable = NIL;
1029  int save_rel_array_size = 0;
1030  RelOptInfo **save_rel_array = NULL;
1031  List *subpaths = NIL;
1032  List *subroots = NIL;
1033  List *resultRelations = NIL;
1034  List *withCheckOptionLists = NIL;
1035  List *returningLists = NIL;
1036  List *rowMarks;
1037  RelOptInfo *final_rel;
1038  ListCell *lc;
1039  Index rti;
1040  RangeTblEntry *parent_rte;
1041  List *partitioned_rels = NIL;
1042 
1043  Assert(parse->commandType != CMD_INSERT);
1044 
1045  /*
1046  * We generate a modified instance of the original Query for each target
1047  * relation, plan that, and put all the plans into a list that will be
1048  * controlled by a single ModifyTable node. All the instances share the
1049  * same rangetable, but each instance must have its own set of subquery
1050  * RTEs within the finished rangetable because (1) they are likely to get
1051  * scribbled on during planning, and (2) it's not inconceivable that
1052  * subqueries could get planned differently in different cases. We need
1053  * not create duplicate copies of other RTE kinds, in particular not the
1054  * target relations, because they don't have either of those issues. Not
1055  * having to duplicate the target relations is important because doing so
1056  * (1) would result in a rangetable of length O(N^2) for N targets, with
1057  * at least O(N^3) work expended here; and (2) would greatly complicate
1058  * management of the rowMarks list.
1059  *
1060  * To begin with, generate a bitmapset of the relids of the subquery RTEs.
1061  */
1062  subqueryRTindexes = NULL;
1063  rti = 1;
1064  foreach(lc, parse->rtable)
1065  {
1066  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1067 
1068  if (rte->rtekind == RTE_SUBQUERY)
1069  subqueryRTindexes = bms_add_member(subqueryRTindexes, rti);
1070  rti++;
1071  }
1072 
1073  /*
1074  * Next, we want to identify which AppendRelInfo items contain references
1075  * to any of the aforesaid subquery RTEs. These items will need to be
1076  * copied and modified to adjust their subquery references; whereas the
1077  * other ones need not be touched. It's worth being tense over this
1078  * because we can usually avoid processing most of the AppendRelInfo
1079  * items, thereby saving O(N^2) space and time when the target is a large
1080  * inheritance tree. We can identify AppendRelInfo items by their
1081  * child_relid, since that should be unique within the list.
1082  */
1083  modifiableARIindexes = NULL;
1084  if (subqueryRTindexes != NULL)
1085  {
1086  foreach(lc, root->append_rel_list)
1087  {
1088  AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1089 
1090  if (bms_is_member(appinfo->parent_relid, subqueryRTindexes) ||
1091  bms_is_member(appinfo->child_relid, subqueryRTindexes) ||
1093  subqueryRTindexes))
1094  modifiableARIindexes = bms_add_member(modifiableARIindexes,
1095  appinfo->child_relid);
1096  }
1097  }
1098 
1099  /*
1100  * If the parent RTE is a partitioned table, we should use that as the
1101  * nominal relation, because the RTEs added for partitioned tables
1102  * (including the root parent) as child members of the inheritance set
1103  * do not appear anywhere else in the plan. The situation is exactly
1104  * the opposite in the case of non-partitioned inheritance parent as
1105  * described below.
1106  */
1107  parent_rte = rt_fetch(parentRTindex, root->parse->rtable);
1108  if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1109  nominalRelation = parentRTindex;
1110 
1111  /*
1112  * And now we can get on with generating a plan for each child table.
1113  */
1114  foreach(lc, root->append_rel_list)
1115  {
1116  AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1117  PlannerInfo *subroot;
1118  RangeTblEntry *child_rte;
1119  RelOptInfo *sub_final_rel;
1120  Path *subpath;
1121 
1122  /* append_rel_list contains all append rels; ignore others */
1123  if (appinfo->parent_relid != parentRTindex)
1124  continue;
1125 
1126  /*
1127  * We need a working copy of the PlannerInfo so that we can control
1128  * propagation of information back to the main copy.
1129  */
1130  subroot = makeNode(PlannerInfo);
1131  memcpy(subroot, root, sizeof(PlannerInfo));
1132 
1133  /*
1134  * Generate modified query with this rel as target. We first apply
1135  * adjust_appendrel_attrs, which copies the Query and changes
1136  * references to the parent RTE to refer to the current child RTE,
1137  * then fool around with subquery RTEs.
1138  */
1139  subroot->parse = (Query *)
1141  (Node *) parse,
1142  appinfo);
1143 
1144  /*
1145  * If there are securityQuals attached to the parent, move them to the
1146  * child rel (they've already been transformed properly for that).
1147  */
1148  parent_rte = rt_fetch(parentRTindex, subroot->parse->rtable);
1149  child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable);
1150  child_rte->securityQuals = parent_rte->securityQuals;
1151  parent_rte->securityQuals = NIL;
1152 
1153  /*
1154  * The rowMarks list might contain references to subquery RTEs, so
1155  * make a copy that we can apply ChangeVarNodes to. (Fortunately, the
1156  * executor doesn't need to see the modified copies --- we can just
1157  * pass it the original rowMarks list.)
1158  */
1159  subroot->rowMarks = copyObject(root->rowMarks);
1160 
1161  /*
1162  * The append_rel_list likewise might contain references to subquery
1163  * RTEs (if any subqueries were flattenable UNION ALLs). So prepare
1164  * to apply ChangeVarNodes to that, too. As explained above, we only
1165  * want to copy items that actually contain such references; the rest
1166  * can just get linked into the subroot's append_rel_list.
1167  *
1168  * If we know there are no such references, we can just use the outer
1169  * append_rel_list unmodified.
1170  */
1171  if (modifiableARIindexes != NULL)
1172  {
1173  ListCell *lc2;
1174 
1175  subroot->append_rel_list = NIL;
1176  foreach(lc2, root->append_rel_list)
1177  {
1178  AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
1179 
1180  if (bms_is_member(appinfo2->child_relid, modifiableARIindexes))
1181  appinfo2 = copyObject(appinfo2);
1182 
1183  subroot->append_rel_list = lappend(subroot->append_rel_list,
1184  appinfo2);
1185  }
1186  }
1187 
1188  /*
1189  * Add placeholders to the child Query's rangetable list to fill the
1190  * RT indexes already reserved for subqueries in previous children.
1191  * These won't be referenced, so there's no need to make them very
1192  * valid-looking.
1193  */
1194  while (list_length(subroot->parse->rtable) < list_length(final_rtable))
1195  subroot->parse->rtable = lappend(subroot->parse->rtable,
1197 
1198  /*
1199  * If this isn't the first child Query, generate duplicates of all
1200  * subquery RTEs, and adjust Var numbering to reference the
1201  * duplicates. To simplify the loop logic, we scan the original rtable
1202  * not the copy just made by adjust_appendrel_attrs; that should be OK
1203  * since subquery RTEs couldn't contain any references to the target
1204  * rel.
1205  */
1206  if (final_rtable != NIL && subqueryRTindexes != NULL)
1207  {
1208  ListCell *lr;
1209 
1210  rti = 1;
1211  foreach(lr, parse->rtable)
1212  {
1213  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lr);
1214 
1215  if (bms_is_member(rti, subqueryRTindexes))
1216  {
1217  Index newrti;
1218 
1219  /*
1220  * The RTE can't contain any references to its own RT
1221  * index, except in its securityQuals, so we can save a
1222  * few cycles by applying ChangeVarNodes to the rest of
1223  * the rangetable before we append the RTE to it.
1224  */
1225  newrti = list_length(subroot->parse->rtable) + 1;
1226  ChangeVarNodes((Node *) subroot->parse, rti, newrti, 0);
1227  ChangeVarNodes((Node *) subroot->rowMarks, rti, newrti, 0);
1228  /* Skip processing unchanging parts of append_rel_list */
1229  if (modifiableARIindexes != NULL)
1230  {
1231  ListCell *lc2;
1232 
1233  foreach(lc2, subroot->append_rel_list)
1234  {
1235  AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
1236 
1237  if (bms_is_member(appinfo2->child_relid,
1238  modifiableARIindexes))
1239  ChangeVarNodes((Node *) appinfo2, rti, newrti, 0);
1240  }
1241  }
1242  rte = copyObject(rte);
1243  ChangeVarNodes((Node *) rte->securityQuals, rti, newrti, 0);
1244  subroot->parse->rtable = lappend(subroot->parse->rtable,
1245  rte);
1246  }
1247  rti++;
1248  }
1249  }
1250 
1251  /* There shouldn't be any OJ info to translate, as yet */
1252  Assert(subroot->join_info_list == NIL);
1253  /* and we haven't created PlaceHolderInfos, either */
1254  Assert(subroot->placeholder_list == NIL);
1255  /* hack to mark target relation as an inheritance partition */
1256  subroot->hasInheritedTarget = true;
1257 
1258  /* Generate Path(s) for accessing this result relation */
1259  grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
1260 
1261  /*
1262  * Set the nomimal target relation of the ModifyTable node if not
1263  * already done. We use the inheritance parent RTE as the nominal
1264  * target relation if it's a partitioned table (see just above this
1265  * loop). In the non-partitioned parent case, we'll use the first
1266  * child relation (even if it's excluded) as the nominal target
1267  * relation. Because of the way expand_inherited_rtentry works, the
1268  * latter should be the RTE representing the parent table in its role
1269  * as a simple member of the inheritance set.
1270  *
1271  * It would be logically cleaner to *always* use the inheritance
1272  * parent RTE as the nominal relation; but that RTE is not otherwise
1273  * referenced in the plan in the non-partitioned inheritance case.
1274  * Instead the duplicate child RTE created by expand_inherited_rtentry
1275  * is used elsewhere in the plan, so using the original parent RTE
1276  * would give rise to confusing use of multiple aliases in EXPLAIN
1277  * output for what the user will think is the "same" table. OTOH,
1278  * it's not a problem in the partitioned inheritance case, because
1279  * the duplicate child RTE added for the parent does not appear
1280  * anywhere else in the plan tree.
1281  */
1282  if (nominalRelation < 0)
1283  nominalRelation = appinfo->child_relid;
1284 
1285  /*
1286  * Select cheapest path in case there's more than one. We always run
1287  * modification queries to conclusion, so we care only for the
1288  * cheapest-total path.
1289  */
1290  sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
1291  set_cheapest(sub_final_rel);
1292  subpath = sub_final_rel->cheapest_total_path;
1293 
1294  /*
1295  * If this child rel was excluded by constraint exclusion, exclude it
1296  * from the result plan.
1297  */
1298  if (IS_DUMMY_PATH(subpath))
1299  continue;
1300 
1301  /*
1302  * If this is the first non-excluded child, its post-planning rtable
1303  * becomes the initial contents of final_rtable; otherwise, append
1304  * just its modified subquery RTEs to final_rtable.
1305  */
1306  if (final_rtable == NIL)
1307  final_rtable = subroot->parse->rtable;
1308  else
1309  final_rtable = list_concat(final_rtable,
1310  list_copy_tail(subroot->parse->rtable,
1311  list_length(final_rtable)));
1312 
1313  /*
1314  * We need to collect all the RelOptInfos from all child plans into
1315  * the main PlannerInfo, since setrefs.c will need them. We use the
1316  * last child's simple_rel_array (previous ones are too short), so we
1317  * have to propagate forward the RelOptInfos that were already built
1318  * in previous children.
1319  */
1320  Assert(subroot->simple_rel_array_size >= save_rel_array_size);
1321  for (rti = 1; rti < save_rel_array_size; rti++)
1322  {
1323  RelOptInfo *brel = save_rel_array[rti];
1324 
1325  if (brel)
1326  subroot->simple_rel_array[rti] = brel;
1327  }
1328  save_rel_array_size = subroot->simple_rel_array_size;
1329  save_rel_array = subroot->simple_rel_array;
1330 
1331  /* Make sure any initplans from this rel get into the outer list */
1332  root->init_plans = subroot->init_plans;
1333 
1334  /* Build list of sub-paths */
1335  subpaths = lappend(subpaths, subpath);
1336 
1337  /* Build list of modified subroots, too */
1338  subroots = lappend(subroots, subroot);
1339 
1340  /* Build list of target-relation RT indexes */
1341  resultRelations = lappend_int(resultRelations, appinfo->child_relid);
1342 
1343  /* Build lists of per-relation WCO and RETURNING targetlists */
1344  if (parse->withCheckOptions)
1345  withCheckOptionLists = lappend(withCheckOptionLists,
1346  subroot->parse->withCheckOptions);
1347  if (parse->returningList)
1348  returningLists = lappend(returningLists,
1349  subroot->parse->returningList);
1350 
1351  Assert(!parse->onConflict);
1352  }
1353 
1354  if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1355  {
1356  partitioned_rels = get_partitioned_child_rels(root, parentRTindex);
1357  /* The root partitioned table is included as a child rel */
1358  Assert(list_length(partitioned_rels) >= 1);
1359  }
1360 
1361  /* Result path must go into outer query's FINAL upperrel */
1362  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1363 
1364  /*
1365  * We don't currently worry about setting final_rel's consider_parallel
1366  * flag in this case, nor about allowing FDWs or create_upper_paths_hook
1367  * to get control here.
1368  */
1369 
1370  /*
1371  * If we managed to exclude every child rel, return a dummy plan; it
1372  * doesn't even need a ModifyTable node.
1373  */
1374  if (subpaths == NIL)
1375  {
1376  set_dummy_rel_pathlist(final_rel);
1377  return;
1378  }
1379 
1380  /*
1381  * Put back the final adjusted rtable into the master copy of the Query.
1382  * (We mustn't do this if we found no non-excluded children.)
1383  */
1384  parse->rtable = final_rtable;
1385  root->simple_rel_array_size = save_rel_array_size;
1386  root->simple_rel_array = save_rel_array;
1387  /* Must reconstruct master's simple_rte_array, too */
1388  root->simple_rte_array = (RangeTblEntry **)
1389  palloc0((list_length(final_rtable) + 1) * sizeof(RangeTblEntry *));
1390  rti = 1;
1391  foreach(lc, final_rtable)
1392  {
1393  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1394 
1395  root->simple_rte_array[rti++] = rte;
1396  }
1397 
1398  /*
1399  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will
1400  * have dealt with fetching non-locked marked rows, else we need to have
1401  * ModifyTable do that.
1402  */
1403  if (parse->rowMarks)
1404  rowMarks = NIL;
1405  else
1406  rowMarks = root->rowMarks;
1407 
1408  /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
1409  add_path(final_rel, (Path *)
1410  create_modifytable_path(root, final_rel,
1411  parse->commandType,
1412  parse->canSetTag,
1413  nominalRelation,
1414  partitioned_rels,
1415  resultRelations,
1416  subpaths,
1417  subroots,
1418  withCheckOptionLists,
1419  returningLists,
1420  rowMarks,
1421  NULL,
1422  SS_assign_special_param(root)));
1423 }
1424 
1425 /*--------------------
1426  * grouping_planner
1427  * Perform planning steps related to grouping, aggregation, etc.
1428  *
1429  * This function adds all required top-level processing to the scan/join
1430  * Path(s) produced by query_planner.
1431  *
1432  * If inheritance_update is true, we're being called from inheritance_planner
1433  * and should not include a ModifyTable step in the resulting Path(s).
1434  * (inheritance_planner will create a single ModifyTable node covering all the
1435  * target tables.)
1436  *
1437  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1438  * tuple_fraction is interpreted as follows:
1439  * 0: expect all tuples to be retrieved (normal case)
1440  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1441  * from the plan to be retrieved
1442  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1443  * expected to be retrieved (ie, a LIMIT specification)
1444  *
1445  * Returns nothing; the useful output is in the Paths we attach to the
1446  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1447  * root->processed_tlist contains the final processed targetlist.
1448  *
1449  * Note that we have not done set_cheapest() on the final rel; it's convenient
1450  * to leave this to the caller.
1451  *--------------------
1452  */
1453 static void
1454 grouping_planner(PlannerInfo *root, bool inheritance_update,
1455  double tuple_fraction)
1456 {
1457  Query *parse = root->parse;
1458  List *tlist = parse->targetList;
1459  int64 offset_est = 0;
1460  int64 count_est = 0;
1461  double limit_tuples = -1.0;
1462  bool have_postponed_srfs = false;
1463  PathTarget *final_target;
1464  List *final_targets;
1465  List *final_targets_contain_srfs;
1466  RelOptInfo *current_rel;
1467  RelOptInfo *final_rel;
1468  ListCell *lc;
1469 
1470  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1471  if (parse->limitCount || parse->limitOffset)
1472  {
1473  tuple_fraction = preprocess_limit(root, tuple_fraction,
1474  &offset_est, &count_est);
1475 
1476  /*
1477  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1478  * estimate the effects of using a bounded sort.
1479  */
1480  if (count_est > 0 && offset_est >= 0)
1481  limit_tuples = (double) count_est + (double) offset_est;
1482  }
1483 
1484  /* Make tuple_fraction accessible to lower-level routines */
1485  root->tuple_fraction = tuple_fraction;
1486 
1487  if (parse->setOperations)
1488  {
1489  /*
1490  * If there's a top-level ORDER BY, assume we have to fetch all the
1491  * tuples. This might be too simplistic given all the hackery below
1492  * to possibly avoid the sort; but the odds of accurate estimates here
1493  * are pretty low anyway. XXX try to get rid of this in favor of
1494  * letting plan_set_operations generate both fast-start and
1495  * cheapest-total paths.
1496  */
1497  if (parse->sortClause)
1498  root->tuple_fraction = 0.0;
1499 
1500  /*
1501  * Construct Paths for set operations. The results will not need any
1502  * work except perhaps a top-level sort and/or LIMIT. Note that any
1503  * special work for recursive unions is the responsibility of
1504  * plan_set_operations.
1505  */
1506  current_rel = plan_set_operations(root);
1507 
1508  /*
1509  * We should not need to call preprocess_targetlist, since we must be
1510  * in a SELECT query node. Instead, use the targetlist returned by
1511  * plan_set_operations (since this tells whether it returned any
1512  * resjunk columns!), and transfer any sort key information from the
1513  * original tlist.
1514  */
1515  Assert(parse->commandType == CMD_SELECT);
1516 
1517  tlist = root->processed_tlist; /* from plan_set_operations */
1518 
1519  /* for safety, copy processed_tlist instead of modifying in-place */
1520  tlist = postprocess_setop_tlist(copyObject(tlist), parse->targetList);
1521 
1522  /* Save aside the final decorated tlist */
1523  root->processed_tlist = tlist;
1524 
1525  /* Also extract the PathTarget form of the setop result tlist */
1526  final_target = current_rel->cheapest_total_path->pathtarget;
1527 
1528  /* The setop result tlist couldn't contain any SRFs */
1529  Assert(!parse->hasTargetSRFs);
1530  final_targets = final_targets_contain_srfs = NIL;
1531 
1532  /*
1533  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1534  * checked already, but let's make sure).
1535  */
1536  if (parse->rowMarks)
1537  ereport(ERROR,
1538  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1539  /*------
1540  translator: %s is a SQL row locking clause such as FOR UPDATE */
1541  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1543  linitial(parse->rowMarks))->strength))));
1544 
1545  /*
1546  * Calculate pathkeys that represent result ordering requirements
1547  */
1548  Assert(parse->distinctClause == NIL);
1550  parse->sortClause,
1551  tlist);
1552  }
1553  else
1554  {
1555  /* No set operations, do regular planning */
1556  PathTarget *sort_input_target;
1557  List *sort_input_targets;
1558  List *sort_input_targets_contain_srfs;
1559  PathTarget *grouping_target;
1560  List *grouping_targets;
1561  List *grouping_targets_contain_srfs;
1562  PathTarget *scanjoin_target;
1563  List *scanjoin_targets;
1564  List *scanjoin_targets_contain_srfs;
1565  bool have_grouping;
1566  AggClauseCosts agg_costs;
1567  WindowFuncLists *wflists = NULL;
1568  List *activeWindows = NIL;
1569  grouping_sets_data *gset_data = NULL;
1570  standard_qp_extra qp_extra;
1571 
1572  /* A recursive query should always have setOperations */
1573  Assert(!root->hasRecursion);
1574 
1575  /* Preprocess grouping sets and GROUP BY clause, if any */
1576  if (parse->groupingSets)
1577  {
1578  gset_data = preprocess_grouping_sets(root);
1579  }
1580  else
1581  {
1582  /* Preprocess regular GROUP BY clause, if any */
1583  if (parse->groupClause)
1584  parse->groupClause = preprocess_groupclause(root, NIL);
1585  }
1586 
1587  /* Preprocess targetlist */
1588  tlist = preprocess_targetlist(root, tlist);
1589 
1590  if (parse->onConflict)
1591  parse->onConflict->onConflictSet =
1593  parse->resultRelation,
1594  parse->rtable);
1595 
1596  /*
1597  * We are now done hacking up the query's targetlist. Most of the
1598  * remaining planning work will be done with the PathTarget
1599  * representation of tlists, but save aside the full representation so
1600  * that we can transfer its decoration (resnames etc) to the topmost
1601  * tlist of the finished Plan.
1602  */
1603  root->processed_tlist = tlist;
1604 
1605  /*
1606  * Collect statistics about aggregates for estimating costs, and mark
1607  * all the aggregates with resolved aggtranstypes. We must do this
1608  * before slicing and dicing the tlist into various pathtargets, else
1609  * some copies of the Aggref nodes might escape being marked with the
1610  * correct transtypes.
1611  *
1612  * Note: currently, we do not detect duplicate aggregates here. This
1613  * may result in somewhat-overestimated cost, which is fine for our
1614  * purposes since all Paths will get charged the same. But at some
1615  * point we might wish to do that detection in the planner, rather
1616  * than during executor startup.
1617  */
1618  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
1619  if (parse->hasAggs)
1620  {
1621  get_agg_clause_costs(root, (Node *) tlist, AGGSPLIT_SIMPLE,
1622  &agg_costs);
1624  &agg_costs);
1625  }
1626 
1627  /*
1628  * Locate any window functions in the tlist. (We don't need to look
1629  * anywhere else, since expressions used in ORDER BY will be in there
1630  * too.) Note that they could all have been eliminated by constant
1631  * folding, in which case we don't need to do any more work.
1632  */
1633  if (parse->hasWindowFuncs)
1634  {
1635  wflists = find_window_functions((Node *) tlist,
1636  list_length(parse->windowClause));
1637  if (wflists->numWindowFuncs > 0)
1638  activeWindows = select_active_windows(root, wflists);
1639  else
1640  parse->hasWindowFuncs = false;
1641  }
1642 
1643  /*
1644  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1645  * adding logic between here and the query_planner() call. Anything
1646  * that is needed in MIN/MAX-optimizable cases will have to be
1647  * duplicated in planagg.c.
1648  */
1649  if (parse->hasAggs)
1650  preprocess_minmax_aggregates(root, tlist);
1651 
1652  /*
1653  * Figure out whether there's a hard limit on the number of rows that
1654  * query_planner's result subplan needs to return. Even if we know a
1655  * hard limit overall, it doesn't apply if the query has any
1656  * grouping/aggregation operations, or SRFs in the tlist.
1657  */
1658  if (parse->groupClause ||
1659  parse->groupingSets ||
1660  parse->distinctClause ||
1661  parse->hasAggs ||
1662  parse->hasWindowFuncs ||
1663  parse->hasTargetSRFs ||
1664  root->hasHavingQual)
1665  root->limit_tuples = -1.0;
1666  else
1667  root->limit_tuples = limit_tuples;
1668 
1669  /* Set up data needed by standard_qp_callback */
1670  qp_extra.tlist = tlist;
1671  qp_extra.activeWindows = activeWindows;
1672  qp_extra.groupClause = (gset_data
1673  ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
1674  : parse->groupClause);
1675 
1676  /*
1677  * Generate the best unsorted and presorted paths for the scan/join
1678  * portion of this Query, ie the processing represented by the
1679  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1680  * We also generate (in standard_qp_callback) pathkey representations
1681  * of the query's sort clause, distinct clause, etc.
1682  */
1683  current_rel = query_planner(root, tlist,
1684  standard_qp_callback, &qp_extra);
1685 
1686  /*
1687  * Convert the query's result tlist into PathTarget format.
1688  *
1689  * Note: it's desirable to not do this till after query_planner(),
1690  * because the target width estimates can use per-Var width numbers
1691  * that were obtained within query_planner().
1692  */
1693  final_target = create_pathtarget(root, tlist);
1694 
1695  /*
1696  * If ORDER BY was given, consider whether we should use a post-sort
1697  * projection, and compute the adjusted target for preceding steps if
1698  * so.
1699  */
1700  if (parse->sortClause)
1701  sort_input_target = make_sort_input_target(root,
1702  final_target,
1703  &have_postponed_srfs);
1704  else
1705  sort_input_target = final_target;
1706 
1707  /*
1708  * If we have window functions to deal with, the output from any
1709  * grouping step needs to be what the window functions want;
1710  * otherwise, it should be sort_input_target.
1711  */
1712  if (activeWindows)
1713  grouping_target = make_window_input_target(root,
1714  final_target,
1715  activeWindows);
1716  else
1717  grouping_target = sort_input_target;
1718 
1719  /*
1720  * If we have grouping or aggregation to do, the topmost scan/join
1721  * plan node must emit what the grouping step wants; otherwise, it
1722  * should emit grouping_target.
1723  */
1724  have_grouping = (parse->groupClause || parse->groupingSets ||
1725  parse->hasAggs || root->hasHavingQual);
1726  if (have_grouping)
1727  scanjoin_target = make_group_input_target(root, final_target);
1728  else
1729  scanjoin_target = grouping_target;
1730 
1731  /*
1732  * If there are any SRFs in the targetlist, we must separate each of
1733  * these PathTargets into SRF-computing and SRF-free targets. Replace
1734  * each of the named targets with a SRF-free version, and remember the
1735  * list of additional projection steps we need to add afterwards.
1736  */
1737  if (parse->hasTargetSRFs)
1738  {
1739  /* final_target doesn't recompute any SRFs in sort_input_target */
1740  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1741  &final_targets,
1742  &final_targets_contain_srfs);
1743  final_target = (PathTarget *) linitial(final_targets);
1744  Assert(!linitial_int(final_targets_contain_srfs));
1745  /* likewise for sort_input_target vs. grouping_target */
1746  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1747  &sort_input_targets,
1748  &sort_input_targets_contain_srfs);
1749  sort_input_target = (PathTarget *) linitial(sort_input_targets);
1750  Assert(!linitial_int(sort_input_targets_contain_srfs));
1751  /* likewise for grouping_target vs. scanjoin_target */
1752  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1753  &grouping_targets,
1754  &grouping_targets_contain_srfs);
1755  grouping_target = (PathTarget *) linitial(grouping_targets);
1756  Assert(!linitial_int(grouping_targets_contain_srfs));
1757  /* scanjoin_target will not have any SRFs precomputed for it */
1758  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1759  &scanjoin_targets,
1760  &scanjoin_targets_contain_srfs);
1761  scanjoin_target = (PathTarget *) linitial(scanjoin_targets);
1762  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1763  }
1764  else
1765  {
1766  /* initialize lists, just to keep compiler quiet */
1767  final_targets = final_targets_contain_srfs = NIL;
1768  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1769  grouping_targets = grouping_targets_contain_srfs = NIL;
1770  scanjoin_targets = scanjoin_targets_contain_srfs = NIL;
1771  }
1772 
1773  /*
1774  * Forcibly apply SRF-free scan/join target to all the Paths for the
1775  * scan/join rel.
1776  *
1777  * In principle we should re-run set_cheapest() here to identify the
1778  * cheapest path, but it seems unlikely that adding the same tlist
1779  * eval costs to all the paths would change that, so we don't bother.
1780  * Instead, just assume that the cheapest-startup and cheapest-total
1781  * paths remain so. (There should be no parameterized paths anymore,
1782  * so we needn't worry about updating cheapest_parameterized_paths.)
1783  */
1784  foreach(lc, current_rel->pathlist)
1785  {
1786  Path *subpath = (Path *) lfirst(lc);
1787  Path *path;
1788 
1789  Assert(subpath->param_info == NULL);
1790  path = apply_projection_to_path(root, current_rel,
1791  subpath, scanjoin_target);
1792  /* If we had to add a Result, path is different from subpath */
1793  if (path != subpath)
1794  {
1795  lfirst(lc) = path;
1796  if (subpath == current_rel->cheapest_startup_path)
1797  current_rel->cheapest_startup_path = path;
1798  if (subpath == current_rel->cheapest_total_path)
1799  current_rel->cheapest_total_path = path;
1800  }
1801  }
1802 
1803  /*
1804  * Upper planning steps which make use of the top scan/join rel's
1805  * partial pathlist will expect partial paths for that rel to produce
1806  * the same output as complete paths ... and we just changed the
1807  * output for the complete paths, so we'll need to do the same thing
1808  * for partial paths. But only parallel-safe expressions can be
1809  * computed by partial paths.
1810  */
1811  if (current_rel->partial_pathlist &&
1812  is_parallel_safe(root, (Node *) scanjoin_target->exprs))
1813  {
1814  /* Apply the scan/join target to each partial path */
1815  foreach(lc, current_rel->partial_pathlist)
1816  {
1817  Path *subpath = (Path *) lfirst(lc);
1818  Path *newpath;
1819 
1820  /* Shouldn't have any parameterized paths anymore */
1821  Assert(subpath->param_info == NULL);
1822 
1823  /*
1824  * Don't use apply_projection_to_path() here, because there
1825  * could be other pointers to these paths, and therefore we
1826  * mustn't modify them in place.
1827  */
1828  newpath = (Path *) create_projection_path(root,
1829  current_rel,
1830  subpath,
1831  scanjoin_target);
1832  lfirst(lc) = newpath;
1833  }
1834  }
1835  else
1836  {
1837  /*
1838  * In the unfortunate event that scanjoin_target is not
1839  * parallel-safe, we can't apply it to the partial paths; in that
1840  * case, we'll need to forget about the partial paths, which
1841  * aren't valid input for upper planning steps.
1842  */
1843  current_rel->partial_pathlist = NIL;
1844  }
1845 
1846  /* Now fix things up if scan/join target contains SRFs */
1847  if (parse->hasTargetSRFs)
1848  adjust_paths_for_srfs(root, current_rel,
1849  scanjoin_targets,
1850  scanjoin_targets_contain_srfs);
1851 
1852  /*
1853  * Save the various upper-rel PathTargets we just computed into
1854  * root->upper_targets[]. The core code doesn't use this, but it
1855  * provides a convenient place for extensions to get at the info. For
1856  * consistency, we save all the intermediate targets, even though some
1857  * of the corresponding upperrels might not be needed for this query.
1858  */
1859  root->upper_targets[UPPERREL_FINAL] = final_target;
1860  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1861  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1862 
1863  /*
1864  * If we have grouping and/or aggregation, consider ways to implement
1865  * that. We build a new upperrel representing the output of this
1866  * phase.
1867  */
1868  if (have_grouping)
1869  {
1870  current_rel = create_grouping_paths(root,
1871  current_rel,
1872  grouping_target,
1873  &agg_costs,
1874  gset_data);
1875  /* Fix things up if grouping_target contains SRFs */
1876  if (parse->hasTargetSRFs)
1877  adjust_paths_for_srfs(root, current_rel,
1878  grouping_targets,
1879  grouping_targets_contain_srfs);
1880  }
1881 
1882  /*
1883  * If we have window functions, consider ways to implement those. We
1884  * build a new upperrel representing the output of this phase.
1885  */
1886  if (activeWindows)
1887  {
1888  current_rel = create_window_paths(root,
1889  current_rel,
1890  grouping_target,
1891  sort_input_target,
1892  tlist,
1893  wflists,
1894  activeWindows);
1895  /* Fix things up if sort_input_target contains SRFs */
1896  if (parse->hasTargetSRFs)
1897  adjust_paths_for_srfs(root, current_rel,
1898  sort_input_targets,
1899  sort_input_targets_contain_srfs);
1900  }
1901 
1902  /*
1903  * If there is a DISTINCT clause, consider ways to implement that. We
1904  * build a new upperrel representing the output of this phase.
1905  */
1906  if (parse->distinctClause)
1907  {
1908  current_rel = create_distinct_paths(root,
1909  current_rel);
1910  }
1911  } /* end of if (setOperations) */
1912 
1913  /*
1914  * If ORDER BY was given, consider ways to implement that, and generate a
1915  * new upperrel containing only paths that emit the correct ordering and
1916  * project the correct final_target. We can apply the original
1917  * limit_tuples limit in sort costing here, but only if there are no
1918  * postponed SRFs.
1919  */
1920  if (parse->sortClause)
1921  {
1922  current_rel = create_ordered_paths(root,
1923  current_rel,
1924  final_target,
1925  have_postponed_srfs ? -1.0 :
1926  limit_tuples);
1927  /* Fix things up if final_target contains SRFs */
1928  if (parse->hasTargetSRFs)
1929  adjust_paths_for_srfs(root, current_rel,
1930  final_targets,
1931  final_targets_contain_srfs);
1932  }
1933 
1934  /*
1935  * Now we are prepared to build the final-output upperrel.
1936  */
1937  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1938 
1939  /*
1940  * If the input rel is marked consider_parallel and there's nothing that's
1941  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1942  * consider_parallel as well. Note that if the query has rowMarks or is
1943  * not a SELECT, consider_parallel will be false for every relation in the
1944  * query.
1945  */
1946  if (current_rel->consider_parallel &&
1947  is_parallel_safe(root, parse->limitOffset) &&
1948  is_parallel_safe(root, parse->limitCount))
1949  final_rel->consider_parallel = true;
1950 
1951  /*
1952  * If the current_rel belongs to a single FDW, so does the final_rel.
1953  */
1954  final_rel->serverid = current_rel->serverid;
1955  final_rel->userid = current_rel->userid;
1956  final_rel->useridiscurrent = current_rel->useridiscurrent;
1957  final_rel->fdwroutine = current_rel->fdwroutine;
1958 
1959  /*
1960  * Generate paths for the final_rel. Insert all surviving paths, with
1961  * LockRows, Limit, and/or ModifyTable steps added if needed.
1962  */
1963  foreach(lc, current_rel->pathlist)
1964  {
1965  Path *path = (Path *) lfirst(lc);
1966 
1967  /*
1968  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1969  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1970  * here. If there are only non-locking rowmarks, they should be
1971  * handled by the ModifyTable node instead. However, root->rowMarks
1972  * is what goes into the LockRows node.)
1973  */
1974  if (parse->rowMarks)
1975  {
1976  path = (Path *) create_lockrows_path(root, final_rel, path,
1977  root->rowMarks,
1978  SS_assign_special_param(root));
1979  }
1980 
1981  /*
1982  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1983  */
1984  if (limit_needed(parse))
1985  {
1986  path = (Path *) create_limit_path(root, final_rel, path,
1987  parse->limitOffset,
1988  parse->limitCount,
1989  offset_est, count_est);
1990  }
1991 
1992  /*
1993  * If this is an INSERT/UPDATE/DELETE, and we're not being called from
1994  * inheritance_planner, add the ModifyTable node.
1995  */
1996  if (parse->commandType != CMD_SELECT && !inheritance_update)
1997  {
1998  List *withCheckOptionLists;
1999  List *returningLists;
2000  List *rowMarks;
2001 
2002  /*
2003  * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if
2004  * needed.
2005  */
2006  if (parse->withCheckOptions)
2007  withCheckOptionLists = list_make1(parse->withCheckOptions);
2008  else
2009  withCheckOptionLists = NIL;
2010 
2011  if (parse->returningList)
2012  returningLists = list_make1(parse->returningList);
2013  else
2014  returningLists = NIL;
2015 
2016  /*
2017  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2018  * will have dealt with fetching non-locked marked rows, else we
2019  * need to have ModifyTable do that.
2020  */
2021  if (parse->rowMarks)
2022  rowMarks = NIL;
2023  else
2024  rowMarks = root->rowMarks;
2025 
2026  path = (Path *)
2027  create_modifytable_path(root, final_rel,
2028  parse->commandType,
2029  parse->canSetTag,
2030  parse->resultRelation,
2031  NIL,
2033  list_make1(path),
2034  list_make1(root),
2035  withCheckOptionLists,
2036  returningLists,
2037  rowMarks,
2038  parse->onConflict,
2039  SS_assign_special_param(root));
2040  }
2041 
2042  /* And shove it into final_rel */
2043  add_path(final_rel, path);
2044  }
2045 
2046  /*
2047  * If there is an FDW that's responsible for all baserels of the query,
2048  * let it consider adding ForeignPaths.
2049  */
2050  if (final_rel->fdwroutine &&
2051  final_rel->fdwroutine->GetForeignUpperPaths)
2053  current_rel, final_rel);
2054 
2055  /* Let extensions possibly add some more paths */
2057  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2058  current_rel, final_rel);
2059 
2060  /* Note: currently, we leave it to callers to do set_cheapest() */
2061 }
2062 
2063 /*
2064  * Do preprocessing for groupingSets clause and related data. This handles the
2065  * preliminary steps of expanding the grouping sets, organizing them into lists
2066  * of rollups, and preparing annotations which will later be filled in with
2067  * size estimates.
2068  */
2069 static grouping_sets_data *
2071 {
2072  Query *parse = root->parse;
2073  List *sets;
2074  int maxref = 0;
2075  ListCell *lc;
2076  ListCell *lc_set;
2078 
2079  parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
2080 
2081  gd->any_hashable = false;
2082  gd->unhashable_refs = NULL;
2083  gd->unsortable_refs = NULL;
2084  gd->unsortable_sets = NIL;
2085 
2086  if (parse->groupClause)
2087  {
2088  ListCell *lc;
2089 
2090  foreach(lc, parse->groupClause)
2091  {
2092  SortGroupClause *gc = lfirst(lc);
2093  Index ref = gc->tleSortGroupRef;
2094 
2095  if (ref > maxref)
2096  maxref = ref;
2097 
2098  if (!gc->hashable)
2100 
2101  if (!OidIsValid(gc->sortop))
2103  }
2104  }
2105 
2106  /* Allocate workspace array for remapping */
2107  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2108 
2109  /*
2110  * If we have any unsortable sets, we must extract them before trying to
2111  * prepare rollups. Unsortable sets don't go through
2112  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2113  * here.
2114  */
2115  if (!bms_is_empty(gd->unsortable_refs))
2116  {
2117  List *sortable_sets = NIL;
2118 
2119  foreach(lc, parse->groupingSets)
2120  {
2121  List *gset = lfirst(lc);
2122 
2123  if (bms_overlap_list(gd->unsortable_refs, gset))
2124  {
2126 
2127  gs->set = gset;
2128  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2129 
2130  /*
2131  * We must enforce here that an unsortable set is hashable;
2132  * later code assumes this. Parse analysis only checks that
2133  * every individual column is either hashable or sortable.
2134  *
2135  * Note that passing this test doesn't guarantee we can
2136  * generate a plan; there might be other showstoppers.
2137  */
2138  if (bms_overlap_list(gd->unhashable_refs, gset))
2139  ereport(ERROR,
2140  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2141  errmsg("could not implement GROUP BY"),
2142  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2143  }
2144  else
2145  sortable_sets = lappend(sortable_sets, gset);
2146  }
2147 
2148  if (sortable_sets)
2149  sets = extract_rollup_sets(sortable_sets);
2150  else
2151  sets = NIL;
2152  }
2153  else
2154  sets = extract_rollup_sets(parse->groupingSets);
2155 
2156  foreach(lc_set, sets)
2157  {
2158  List *current_sets = (List *) lfirst(lc_set);
2159  RollupData *rollup = makeNode(RollupData);
2160  GroupingSetData *gs;
2161 
2162  /*
2163  * Reorder the current list of grouping sets into correct prefix
2164  * order. If only one aggregation pass is needed, try to make the
2165  * list match the ORDER BY clause; if more than one pass is needed, we
2166  * don't bother with that.
2167  *
2168  * Note that this reorders the sets from smallest-member-first to
2169  * largest-member-first, and applies the GroupingSetData annotations,
2170  * though the data will be filled in later.
2171  */
2172  current_sets = reorder_grouping_sets(current_sets,
2173  (list_length(sets) == 1
2174  ? parse->sortClause
2175  : NIL));
2176 
2177  /*
2178  * Get the initial (and therefore largest) grouping set.
2179  */
2180  gs = linitial(current_sets);
2181 
2182  /*
2183  * Order the groupClause appropriately. If the first grouping set is
2184  * empty, then the groupClause must also be empty; otherwise we have
2185  * to force the groupClause to match that grouping set's order.
2186  *
2187  * (The first grouping set can be empty even though parse->groupClause
2188  * is not empty only if all non-empty grouping sets are unsortable.
2189  * The groupClauses for hashed grouping sets are built later on.)
2190  */
2191  if (gs->set)
2192  rollup->groupClause = preprocess_groupclause(root, gs->set);
2193  else
2194  rollup->groupClause = NIL;
2195 
2196  /*
2197  * Is it hashable? We pretend empty sets are hashable even though we
2198  * actually force them not to be hashed later. But don't bother if
2199  * there's nothing but empty sets (since in that case we can't hash
2200  * anything).
2201  */
2202  if (gs->set &&
2204  {
2205  rollup->hashable = true;
2206  gd->any_hashable = true;
2207  }
2208 
2209  /*
2210  * Now that we've pinned down an order for the groupClause for this
2211  * list of grouping sets, we need to remap the entries in the grouping
2212  * sets from sortgrouprefs to plain indices (0-based) into the
2213  * groupClause for this collection of grouping sets. We keep the
2214  * original form for later use, though.
2215  */
2216  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2217  current_sets,
2218  gd->tleref_to_colnum_map);
2219  rollup->gsets_data = current_sets;
2220 
2221  gd->rollups = lappend(gd->rollups, rollup);
2222  }
2223 
2224  if (gd->unsortable_sets)
2225  {
2226  /*
2227  * We have not yet pinned down a groupclause for this, but we will
2228  * need index-based lists for estimation purposes. Construct
2229  * hash_sets_idx based on the entire original groupclause for now.
2230  */
2232  gd->unsortable_sets,
2233  gd->tleref_to_colnum_map);
2234  gd->any_hashable = true;
2235  }
2236 
2237  return gd;
2238 }
2239 
2240 /*
2241  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2242  * (without annotation) mapped to indexes into the given groupclause.
2243  */
2244 static List *
2246  List *gsets,
2247  int *tleref_to_colnum_map)
2248 {
2249  int ref = 0;
2250  List *result = NIL;
2251  ListCell *lc;
2252 
2253  foreach(lc, groupClause)
2254  {
2255  SortGroupClause *gc = lfirst(lc);
2256 
2257  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2258  }
2259 
2260  foreach(lc, gsets)
2261  {
2262  List *set = NIL;
2263  ListCell *lc2;
2264  GroupingSetData *gs = lfirst(lc);
2265 
2266  foreach(lc2, gs->set)
2267  {
2268  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2269  }
2270 
2271  result = lappend(result, set);
2272  }
2273 
2274  return result;
2275 }
2276 
2277 
2278 
2279 /*
2280  * Detect whether a plan node is a "dummy" plan created when a relation
2281  * is deemed not to need scanning due to constraint exclusion.
2282  *
2283  * Currently, such dummy plans are Result nodes with constant FALSE
2284  * filter quals (see set_dummy_rel_pathlist and create_append_plan).
2285  *
2286  * XXX this probably ought to be somewhere else, but not clear where.
2287  */
2288 bool
2290 {
2291  if (IsA(plan, Result))
2292  {
2293  List *rcqual = (List *) ((Result *) plan)->resconstantqual;
2294 
2295  if (list_length(rcqual) == 1)
2296  {
2297  Const *constqual = (Const *) linitial(rcqual);
2298 
2299  if (constqual && IsA(constqual, Const))
2300  {
2301  if (!constqual->constisnull &&
2302  !DatumGetBool(constqual->constvalue))
2303  return true;
2304  }
2305  }
2306  }
2307  return false;
2308 }
2309 
2310 /*
2311  * preprocess_rowmarks - set up PlanRowMarks if needed
2312  */
2313 static void
2315 {
2316  Query *parse = root->parse;
2317  Bitmapset *rels;
2318  List *prowmarks;
2319  ListCell *l;
2320  int i;
2321 
2322  if (parse->rowMarks)
2323  {
2324  /*
2325  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2326  * grouping, since grouping renders a reference to individual tuple
2327  * CTIDs invalid. This is also checked at parse time, but that's
2328  * insufficient because of rule substitution, query pullup, etc.
2329  */
2330  CheckSelectLocking(parse, ((RowMarkClause *)
2331  linitial(parse->rowMarks))->strength);
2332  }
2333  else
2334  {
2335  /*
2336  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2337  * UPDATE/SHARE.
2338  */
2339  if (parse->commandType != CMD_UPDATE &&
2340  parse->commandType != CMD_DELETE)
2341  return;
2342  }
2343 
2344  /*
2345  * We need to have rowmarks for all base relations except the target. We
2346  * make a bitmapset of all base rels and then remove the items we don't
2347  * need or have FOR [KEY] UPDATE/SHARE marks for.
2348  */
2349  rels = get_relids_in_jointree((Node *) parse->jointree, false);
2350  if (parse->resultRelation)
2351  rels = bms_del_member(rels, parse->resultRelation);
2352 
2353  /*
2354  * Convert RowMarkClauses to PlanRowMark representation.
2355  */
2356  prowmarks = NIL;
2357  foreach(l, parse->rowMarks)
2358  {
2359  RowMarkClause *rc = (RowMarkClause *) lfirst(l);
2360  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2361  PlanRowMark *newrc;
2362 
2363  /*
2364  * Currently, it is syntactically impossible to have FOR UPDATE et al
2365  * applied to an update/delete target rel. If that ever becomes
2366  * possible, we should drop the target from the PlanRowMark list.
2367  */
2368  Assert(rc->rti != parse->resultRelation);
2369 
2370  /*
2371  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2372  * can't support true locking. Subqueries that got flattened into the
2373  * main query should be ignored completely. Any that didn't will get
2374  * ROW_MARK_COPY items in the next loop.
2375  */
2376  if (rte->rtekind != RTE_RELATION)
2377  continue;
2378 
2379  rels = bms_del_member(rels, rc->rti);
2380 
2381  newrc = makeNode(PlanRowMark);
2382  newrc->rti = newrc->prti = rc->rti;
2383  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2384  newrc->markType = select_rowmark_type(rte, rc->strength);
2385  newrc->allMarkTypes = (1 << newrc->markType);
2386  newrc->strength = rc->strength;
2387  newrc->waitPolicy = rc->waitPolicy;
2388  newrc->isParent = false;
2389 
2390  prowmarks = lappend(prowmarks, newrc);
2391  }
2392 
2393  /*
2394  * Now, add rowmarks for any non-target, non-locked base relations.
2395  */
2396  i = 0;
2397  foreach(l, parse->rtable)
2398  {
2399  RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
2400  PlanRowMark *newrc;
2401 
2402  i++;
2403  if (!bms_is_member(i, rels))
2404  continue;
2405 
2406  newrc = makeNode(PlanRowMark);
2407  newrc->rti = newrc->prti = i;
2408  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2409  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2410  newrc->allMarkTypes = (1 << newrc->markType);
2411  newrc->strength = LCS_NONE;
2412  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2413  newrc->isParent = false;
2414 
2415  prowmarks = lappend(prowmarks, newrc);
2416  }
2417 
2418  root->rowMarks = prowmarks;
2419 }
2420 
2421 /*
2422  * Select RowMarkType to use for a given table
2423  */
2426 {
2427  if (rte->rtekind != RTE_RELATION)
2428  {
2429  /* If it's not a table at all, use ROW_MARK_COPY */
2430  return ROW_MARK_COPY;
2431  }
2432  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2433  {
2434  /* Let the FDW select the rowmark type, if it wants to */
2435  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2436 
2437  if (fdwroutine->GetForeignRowMarkType != NULL)
2438  return fdwroutine->GetForeignRowMarkType(rte, strength);
2439  /* Otherwise, use ROW_MARK_COPY by default */
2440  return ROW_MARK_COPY;
2441  }
2442  else
2443  {
2444  /* Regular table, apply the appropriate lock type */
2445  switch (strength)
2446  {
2447  case LCS_NONE:
2448 
2449  /*
2450  * We don't need a tuple lock, only the ability to re-fetch
2451  * the row.
2452  */
2453  return ROW_MARK_REFERENCE;
2454  break;
2455  case LCS_FORKEYSHARE:
2456  return ROW_MARK_KEYSHARE;
2457  break;
2458  case LCS_FORSHARE:
2459  return ROW_MARK_SHARE;
2460  break;
2461  case LCS_FORNOKEYUPDATE:
2462  return ROW_MARK_NOKEYEXCLUSIVE;
2463  break;
2464  case LCS_FORUPDATE:
2465  return ROW_MARK_EXCLUSIVE;
2466  break;
2467  }
2468  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2469  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2470  }
2471 }
2472 
2473 /*
2474  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2475  *
2476  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2477  * results back in *count_est and *offset_est. These variables are set to
2478  * 0 if the corresponding clause is not present, and -1 if it's present
2479  * but we couldn't estimate the value for it. (The "0" convention is OK
2480  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2481  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2482  * usual practice of never estimating less than one row.) These values will
2483  * be passed to create_limit_path, which see if you change this code.
2484  *
2485  * The return value is the suitably adjusted tuple_fraction to use for
2486  * planning the query. This adjustment is not overridable, since it reflects
2487  * plan actions that grouping_planner() will certainly take, not assumptions
2488  * about context.
2489  */
2490 static double
2491 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2492  int64 *offset_est, int64 *count_est)
2493 {
2494  Query *parse = root->parse;
2495  Node *est;
2496  double limit_fraction;
2497 
2498  /* Should not be called unless LIMIT or OFFSET */
2499  Assert(parse->limitCount || parse->limitOffset);
2500 
2501  /*
2502  * Try to obtain the clause values. We use estimate_expression_value
2503  * primarily because it can sometimes do something useful with Params.
2504  */
2505  if (parse->limitCount)
2506  {
2507  est = estimate_expression_value(root, parse->limitCount);
2508  if (est && IsA(est, Const))
2509  {
2510  if (((Const *) est)->constisnull)
2511  {
2512  /* NULL indicates LIMIT ALL, ie, no limit */
2513  *count_est = 0; /* treat as not present */
2514  }
2515  else
2516  {
2517  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2518  if (*count_est <= 0)
2519  *count_est = 1; /* force to at least 1 */
2520  }
2521  }
2522  else
2523  *count_est = -1; /* can't estimate */
2524  }
2525  else
2526  *count_est = 0; /* not present */
2527 
2528  if (parse->limitOffset)
2529  {
2530  est = estimate_expression_value(root, parse->limitOffset);
2531  if (est && IsA(est, Const))
2532  {
2533  if (((Const *) est)->constisnull)
2534  {
2535  /* Treat NULL as no offset; the executor will too */
2536  *offset_est = 0; /* treat as not present */
2537  }
2538  else
2539  {
2540  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2541  if (*offset_est < 0)
2542  *offset_est = 0; /* treat as not present */
2543  }
2544  }
2545  else
2546  *offset_est = -1; /* can't estimate */
2547  }
2548  else
2549  *offset_est = 0; /* not present */
2550 
2551  if (*count_est != 0)
2552  {
2553  /*
2554  * A LIMIT clause limits the absolute number of tuples returned.
2555  * However, if it's not a constant LIMIT then we have to guess; for
2556  * lack of a better idea, assume 10% of the plan's result is wanted.
2557  */
2558  if (*count_est < 0 || *offset_est < 0)
2559  {
2560  /* LIMIT or OFFSET is an expression ... punt ... */
2561  limit_fraction = 0.10;
2562  }
2563  else
2564  {
2565  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2566  limit_fraction = (double) *count_est + (double) *offset_est;
2567  }
2568 
2569  /*
2570  * If we have absolute limits from both caller and LIMIT, use the
2571  * smaller value; likewise if they are both fractional. If one is
2572  * fractional and the other absolute, we can't easily determine which
2573  * is smaller, but we use the heuristic that the absolute will usually
2574  * be smaller.
2575  */
2576  if (tuple_fraction >= 1.0)
2577  {
2578  if (limit_fraction >= 1.0)
2579  {
2580  /* both absolute */
2581  tuple_fraction = Min(tuple_fraction, limit_fraction);
2582  }
2583  else
2584  {
2585  /* caller absolute, limit fractional; use caller's value */
2586  }
2587  }
2588  else if (tuple_fraction > 0.0)
2589  {
2590  if (limit_fraction >= 1.0)
2591  {
2592  /* caller fractional, limit absolute; use limit */
2593  tuple_fraction = limit_fraction;
2594  }
2595  else
2596  {
2597  /* both fractional */
2598  tuple_fraction = Min(tuple_fraction, limit_fraction);
2599  }
2600  }
2601  else
2602  {
2603  /* no info from caller, just use limit */
2604  tuple_fraction = limit_fraction;
2605  }
2606  }
2607  else if (*offset_est != 0 && tuple_fraction > 0.0)
2608  {
2609  /*
2610  * We have an OFFSET but no LIMIT. This acts entirely differently
2611  * from the LIMIT case: here, we need to increase rather than decrease
2612  * the caller's tuple_fraction, because the OFFSET acts to cause more
2613  * tuples to be fetched instead of fewer. This only matters if we got
2614  * a tuple_fraction > 0, however.
2615  *
2616  * As above, use 10% if OFFSET is present but unestimatable.
2617  */
2618  if (*offset_est < 0)
2619  limit_fraction = 0.10;
2620  else
2621  limit_fraction = (double) *offset_est;
2622 
2623  /*
2624  * If we have absolute counts from both caller and OFFSET, add them
2625  * together; likewise if they are both fractional. If one is
2626  * fractional and the other absolute, we want to take the larger, and
2627  * we heuristically assume that's the fractional one.
2628  */
2629  if (tuple_fraction >= 1.0)
2630  {
2631  if (limit_fraction >= 1.0)
2632  {
2633  /* both absolute, so add them together */
2634  tuple_fraction += limit_fraction;
2635  }
2636  else
2637  {
2638  /* caller absolute, limit fractional; use limit */
2639  tuple_fraction = limit_fraction;
2640  }
2641  }
2642  else
2643  {
2644  if (limit_fraction >= 1.0)
2645  {
2646  /* caller fractional, limit absolute; use caller's value */
2647  }
2648  else
2649  {
2650  /* both fractional, so add them together */
2651  tuple_fraction += limit_fraction;
2652  if (tuple_fraction >= 1.0)
2653  tuple_fraction = 0.0; /* assume fetch all */
2654  }
2655  }
2656  }
2657 
2658  return tuple_fraction;
2659 }
2660 
2661 /*
2662  * limit_needed - do we actually need a Limit plan node?
2663  *
2664  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2665  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2666  * locution for an optimization fence. (Because other places in the planner
2667  * merely check whether parse->limitOffset isn't NULL, it will still work as
2668  * an optimization fence --- we're just suppressing unnecessary run-time
2669  * overhead.)
2670  *
2671  * This might look like it could be merged into preprocess_limit, but there's
2672  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2673  * in preprocess_limit it's good enough to consider estimated values.
2674  */
2675 static bool
2677 {
2678  Node *node;
2679 
2680  node = parse->limitCount;
2681  if (node)
2682  {
2683  if (IsA(node, Const))
2684  {
2685  /* NULL indicates LIMIT ALL, ie, no limit */
2686  if (!((Const *) node)->constisnull)
2687  return true; /* LIMIT with a constant value */
2688  }
2689  else
2690  return true; /* non-constant LIMIT */
2691  }
2692 
2693  node = parse->limitOffset;
2694  if (node)
2695  {
2696  if (IsA(node, Const))
2697  {
2698  /* Treat NULL as no offset; the executor would too */
2699  if (!((Const *) node)->constisnull)
2700  {
2701  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2702 
2703  if (offset != 0)
2704  return true; /* OFFSET with a nonzero value */
2705  }
2706  }
2707  else
2708  return true; /* non-constant OFFSET */
2709  }
2710 
2711  return false; /* don't need a Limit plan node */
2712 }
2713 
2714 
2715 /*
2716  * remove_useless_groupby_columns
2717  * Remove any columns in the GROUP BY clause that are redundant due to
2718  * being functionally dependent on other GROUP BY columns.
2719  *
2720  * Since some other DBMSes do not allow references to ungrouped columns, it's
2721  * not unusual to find all columns listed in GROUP BY even though listing the
2722  * primary-key columns would be sufficient. Deleting such excess columns
2723  * avoids redundant sorting work, so it's worth doing. When we do this, we
2724  * must mark the plan as dependent on the pkey constraint (compare the
2725  * parser's check_ungrouped_columns() and check_functional_grouping()).
2726  *
2727  * In principle, we could treat any NOT-NULL columns appearing in a UNIQUE
2728  * index as the determining columns. But as with check_functional_grouping(),
2729  * there's currently no way to represent dependency on a NOT NULL constraint,
2730  * so we consider only the pkey for now.
2731  */
2732 static void
2734 {
2735  Query *parse = root->parse;
2736  Bitmapset **groupbyattnos;
2737  Bitmapset **surplusvars;
2738  ListCell *lc;
2739  int relid;
2740 
2741  /* No chance to do anything if there are less than two GROUP BY items */
2742  if (list_length(parse->groupClause) < 2)
2743  return;
2744 
2745  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2746  if (parse->groupingSets)
2747  return;
2748 
2749  /*
2750  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2751  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2752  * that are GROUP BY items.
2753  */
2754  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2755  (list_length(parse->rtable) + 1));
2756  foreach(lc, parse->groupClause)
2757  {
2758  SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
2759  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2760  Var *var = (Var *) tle->expr;
2761 
2762  /*
2763  * Ignore non-Vars and Vars from other query levels.
2764  *
2765  * XXX in principle, stable expressions containing Vars could also be
2766  * removed, if all the Vars are functionally dependent on other GROUP
2767  * BY items. But it's not clear that such cases occur often enough to
2768  * be worth troubling over.
2769  */
2770  if (!IsA(var, Var) ||
2771  var->varlevelsup > 0)
2772  continue;
2773 
2774  /* OK, remember we have this Var */
2775  relid = var->varno;
2776  Assert(relid <= list_length(parse->rtable));
2777  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2779  }
2780 
2781  /*
2782  * Consider each relation and see if it is possible to remove some of its
2783  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2784  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2785  * of the column attnos of RTE k that are removable GROUP BY items.
2786  */
2787  surplusvars = NULL; /* don't allocate array unless required */
2788  relid = 0;
2789  foreach(lc, parse->rtable)
2790  {
2791  RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
2792  Bitmapset *relattnos;
2793  Bitmapset *pkattnos;
2794  Oid constraintOid;
2795 
2796  relid++;
2797 
2798  /* Only plain relations could have primary-key constraints */
2799  if (rte->rtekind != RTE_RELATION)
2800  continue;
2801 
2802  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2803  relattnos = groupbyattnos[relid];
2804  if (bms_membership(relattnos) != BMS_MULTIPLE)
2805  continue;
2806 
2807  /*
2808  * Can't remove any columns for this rel if there is no suitable
2809  * (i.e., nondeferrable) primary key constraint.
2810  */
2811  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2812  if (pkattnos == NULL)
2813  continue;
2814 
2815  /*
2816  * If the primary key is a proper subset of relattnos then we have
2817  * some items in the GROUP BY that can be removed.
2818  */
2819  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2820  {
2821  /*
2822  * To easily remember whether we've found anything to do, we don't
2823  * allocate the surplusvars[] array until we find something.
2824  */
2825  if (surplusvars == NULL)
2826  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2827  (list_length(parse->rtable) + 1));
2828 
2829  /* Remember the attnos of the removable columns */
2830  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2831 
2832  /* Also, mark the resulting plan as dependent on this constraint */
2833  parse->constraintDeps = lappend_oid(parse->constraintDeps,
2834  constraintOid);
2835  }
2836  }
2837 
2838  /*
2839  * If we found any surplus Vars, build a new GROUP BY clause without them.
2840  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2841  * markings, but that's harmless.)
2842  */
2843  if (surplusvars != NULL)
2844  {
2845  List *new_groupby = NIL;
2846 
2847  foreach(lc, parse->groupClause)
2848  {
2849  SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
2850  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2851  Var *var = (Var *) tle->expr;
2852 
2853  /*
2854  * New list must include non-Vars, outer Vars, and anything not
2855  * marked as surplus.
2856  */
2857  if (!IsA(var, Var) ||
2858  var->varlevelsup > 0 ||
2860  surplusvars[var->varno]))
2861  new_groupby = lappend(new_groupby, sgc);
2862  }
2863 
2864  parse->groupClause = new_groupby;
2865  }
2866 }
2867 
2868 /*
2869  * preprocess_groupclause - do preparatory work on GROUP BY clause
2870  *
2871  * The idea here is to adjust the ordering of the GROUP BY elements
2872  * (which in itself is semantically insignificant) to match ORDER BY,
2873  * thereby allowing a single sort operation to both implement the ORDER BY
2874  * requirement and set up for a Unique step that implements GROUP BY.
2875  *
2876  * In principle it might be interesting to consider other orderings of the
2877  * GROUP BY elements, which could match the sort ordering of other
2878  * possible plans (eg an indexscan) and thereby reduce cost. We don't
2879  * bother with that, though. Hashed grouping will frequently win anyway.
2880  *
2881  * Note: we need no comparable processing of the distinctClause because
2882  * the parser already enforced that that matches ORDER BY.
2883  *
2884  * For grouping sets, the order of items is instead forced to agree with that
2885  * of the grouping set (and items not in the grouping set are skipped). The
2886  * work of sorting the order of grouping set elements to match the ORDER BY if
2887  * possible is done elsewhere.
2888  */
2889 static List *
2891 {
2892  Query *parse = root->parse;
2893  List *new_groupclause = NIL;
2894  bool partial_match;
2895  ListCell *sl;
2896  ListCell *gl;
2897 
2898  /* For grouping sets, we need to force the ordering */
2899  if (force)
2900  {
2901  foreach(sl, force)
2902  {
2903  Index ref = lfirst_int(sl);
2905 
2906  new_groupclause = lappend(new_groupclause, cl);
2907  }
2908 
2909  return new_groupclause;
2910  }
2911 
2912  /* If no ORDER BY, nothing useful to do here */
2913  if (parse->sortClause == NIL)
2914  return parse->groupClause;
2915 
2916  /*
2917  * Scan the ORDER BY clause and construct a list of matching GROUP BY
2918  * items, but only as far as we can make a matching prefix.
2919  *
2920  * This code assumes that the sortClause contains no duplicate items.
2921  */
2922  foreach(sl, parse->sortClause)
2923  {
2924  SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
2925 
2926  foreach(gl, parse->groupClause)
2927  {
2928  SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2929 
2930  if (equal(gc, sc))
2931  {
2932  new_groupclause = lappend(new_groupclause, gc);
2933  break;
2934  }
2935  }
2936  if (gl == NULL)
2937  break; /* no match, so stop scanning */
2938  }
2939 
2940  /* Did we match all of the ORDER BY list, or just some of it? */
2941  partial_match = (sl != NULL);
2942 
2943  /* If no match at all, no point in reordering GROUP BY */
2944  if (new_groupclause == NIL)
2945  return parse->groupClause;
2946 
2947  /*
2948  * Add any remaining GROUP BY items to the new list, but only if we were
2949  * able to make a complete match. In other words, we only rearrange the
2950  * GROUP BY list if the result is that one list is a prefix of the other
2951  * --- otherwise there's no possibility of a common sort. Also, give up
2952  * if there are any non-sortable GROUP BY items, since then there's no
2953  * hope anyway.
2954  */
2955  foreach(gl, parse->groupClause)
2956  {
2957  SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2958 
2959  if (list_member_ptr(new_groupclause, gc))
2960  continue; /* it matched an ORDER BY item */
2961  if (partial_match)
2962  return parse->groupClause; /* give up, no common sort possible */
2963  if (!OidIsValid(gc->sortop))
2964  return parse->groupClause; /* give up, GROUP BY can't be sorted */
2965  new_groupclause = lappend(new_groupclause, gc);
2966  }
2967 
2968  /* Success --- install the rearranged GROUP BY list */
2969  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2970  return new_groupclause;
2971 }
2972 
2973 /*
2974  * Extract lists of grouping sets that can be implemented using a single
2975  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2976  *
2977  * Input must be sorted with smallest sets first. Result has each sublist
2978  * sorted with smallest sets first.
2979  *
2980  * We want to produce the absolute minimum possible number of lists here to
2981  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2982  * of finding the minimal partition of a partially-ordered set into chains
2983  * (which is what we need, taking the list of grouping sets as a poset ordered
2984  * by set inclusion) can be mapped to the problem of finding the maximum
2985  * cardinality matching on a bipartite graph, which is solvable in polynomial
2986  * time with a worst case of no worse than O(n^2.5) and usually much
2987  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2988  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2989  * half a second on my modest system even with optimization off and assertions
2990  * on.)
2991  */
2992 static List *
2994 {
2995  int num_sets_raw = list_length(groupingSets);
2996  int num_empty = 0;
2997  int num_sets = 0; /* distinct sets */
2998  int num_chains = 0;
2999  List *result = NIL;
3000  List **results;
3001  List **orig_sets;
3002  Bitmapset **set_masks;
3003  int *chains;
3004  short **adjacency;
3005  short *adjacency_buf;
3007  int i;
3008  int j;
3009  int j_size;
3010  ListCell *lc1 = list_head(groupingSets);
3011  ListCell *lc;
3012 
3013  /*
3014  * Start by stripping out empty sets. The algorithm doesn't require this,
3015  * but the planner currently needs all empty sets to be returned in the
3016  * first list, so we strip them here and add them back after.
3017  */
3018  while (lc1 && lfirst(lc1) == NIL)
3019  {
3020  ++num_empty;
3021  lc1 = lnext(lc1);
3022  }
3023 
3024  /* bail out now if it turns out that all we had were empty sets. */
3025  if (!lc1)
3026  return list_make1(groupingSets);
3027 
3028  /*----------
3029  * We don't strictly need to remove duplicate sets here, but if we don't,
3030  * they tend to become scattered through the result, which is a bit
3031  * confusing (and irritating if we ever decide to optimize them out).
3032  * So we remove them here and add them back after.
3033  *
3034  * For each non-duplicate set, we fill in the following:
3035  *
3036  * orig_sets[i] = list of the original set lists
3037  * set_masks[i] = bitmapset for testing inclusion
3038  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
3039  *
3040  * chains[i] will be the result group this set is assigned to.
3041  *
3042  * We index all of these from 1 rather than 0 because it is convenient
3043  * to leave 0 free for the NIL node in the graph algorithm.
3044  *----------
3045  */
3046  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
3047  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
3048  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
3049  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
3050 
3051  j_size = 0;
3052  j = 0;
3053  i = 1;
3054 
3055  for_each_cell(lc, lc1)
3056  {
3057  List *candidate = lfirst(lc);
3058  Bitmapset *candidate_set = NULL;
3059  ListCell *lc2;
3060  int dup_of = 0;
3061 
3062  foreach(lc2, candidate)
3063  {
3064  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
3065  }
3066 
3067  /* we can only be a dup if we're the same length as a previous set */
3068  if (j_size == list_length(candidate))
3069  {
3070  int k;
3071 
3072  for (k = j; k < i; ++k)
3073  {
3074  if (bms_equal(set_masks[k], candidate_set))
3075  {
3076  dup_of = k;
3077  break;
3078  }
3079  }
3080  }
3081  else if (j_size < list_length(candidate))
3082  {
3083  j_size = list_length(candidate);
3084  j = i;
3085  }
3086 
3087  if (dup_of > 0)
3088  {
3089  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3090  bms_free(candidate_set);
3091  }
3092  else
3093  {
3094  int k;
3095  int n_adj = 0;
3096 
3097  orig_sets[i] = list_make1(candidate);
3098  set_masks[i] = candidate_set;
3099 
3100  /* fill in adjacency list; no need to compare equal-size sets */
3101 
3102  for (k = j - 1; k > 0; --k)
3103  {
3104  if (bms_is_subset(set_masks[k], candidate_set))
3105  adjacency_buf[++n_adj] = k;
3106  }
3107 
3108  if (n_adj > 0)
3109  {
3110  adjacency_buf[0] = n_adj;
3111  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3112  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3113  }
3114  else
3115  adjacency[i] = NULL;
3116 
3117  ++i;
3118  }
3119  }
3120 
3121  num_sets = i - 1;
3122 
3123  /*
3124  * Apply the graph matching algorithm to do the work.
3125  */
3126  state = BipartiteMatch(num_sets, num_sets, adjacency);
3127 
3128  /*
3129  * Now, the state->pair* fields have the info we need to assign sets to
3130  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3131  * pair_vu[v] = u (both will be true, but we check both so that we can do
3132  * it in one pass)
3133  */
3134  chains = palloc0((num_sets + 1) * sizeof(int));
3135 
3136  for (i = 1; i <= num_sets; ++i)
3137  {
3138  int u = state->pair_vu[i];
3139  int v = state->pair_uv[i];
3140 
3141  if (u > 0 && u < i)
3142  chains[i] = chains[u];
3143  else if (v > 0 && v < i)
3144  chains[i] = chains[v];
3145  else
3146  chains[i] = ++num_chains;
3147  }
3148 
3149  /* build result lists. */
3150  results = palloc0((num_chains + 1) * sizeof(List *));
3151 
3152  for (i = 1; i <= num_sets; ++i)
3153  {
3154  int c = chains[i];
3155 
3156  Assert(c > 0);
3157 
3158  results[c] = list_concat(results[c], orig_sets[i]);
3159  }
3160 
3161  /* push any empty sets back on the first list. */
3162  while (num_empty-- > 0)
3163  results[1] = lcons(NIL, results[1]);
3164 
3165  /* make result list */
3166  for (i = 1; i <= num_chains; ++i)
3167  result = lappend(result, results[i]);
3168 
3169  /*
3170  * Free all the things.
3171  *
3172  * (This is over-fussy for small sets but for large sets we could have
3173  * tied up a nontrivial amount of memory.)
3174  */
3175  BipartiteMatchFree(state);
3176  pfree(results);
3177  pfree(chains);
3178  for (i = 1; i <= num_sets; ++i)
3179  if (adjacency[i])
3180  pfree(adjacency[i]);
3181  pfree(adjacency);
3182  pfree(adjacency_buf);
3183  pfree(orig_sets);
3184  for (i = 1; i <= num_sets; ++i)
3185  bms_free(set_masks[i]);
3186  pfree(set_masks);
3187 
3188  return result;
3189 }
3190 
3191 /*
3192  * Reorder the elements of a list of grouping sets such that they have correct
3193  * prefix relationships. Also inserts the GroupingSetData annotations.
3194  *
3195  * The input must be ordered with smallest sets first; the result is returned
3196  * with largest sets first. Note that the result shares no list substructure
3197  * with the input, so it's safe for the caller to modify it later.
3198  *
3199  * If we're passed in a sortclause, we follow its order of columns to the
3200  * extent possible, to minimize the chance that we add unnecessary sorts.
3201  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3202  * gets implemented in one pass.)
3203  */
3204 static List *
3205 reorder_grouping_sets(List *groupingsets, List *sortclause)
3206 {
3207  ListCell *lc;
3208  ListCell *lc2;
3209  List *previous = NIL;
3210  List *result = NIL;
3211 
3212  foreach(lc, groupingsets)
3213  {
3214  List *candidate = lfirst(lc);
3215  List *new_elems = list_difference_int(candidate, previous);
3217 
3218  if (list_length(new_elems) > 0)
3219  {
3220  while (list_length(sortclause) > list_length(previous))
3221  {
3222  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3223  int ref = sc->tleSortGroupRef;
3224 
3225  if (list_member_int(new_elems, ref))
3226  {
3227  previous = lappend_int(previous, ref);
3228  new_elems = list_delete_int(new_elems, ref);
3229  }
3230  else
3231  {
3232  /* diverged from the sortclause; give up on it */
3233  sortclause = NIL;
3234  break;
3235  }
3236  }
3237 
3238  foreach(lc2, new_elems)
3239  {
3240  previous = lappend_int(previous, lfirst_int(lc2));
3241  }
3242  }
3243 
3244  gs->set = list_copy(previous);
3245  result = lcons(gs, result);
3246  list_free(new_elems);
3247  }
3248 
3249  list_free(previous);
3250 
3251  return result;
3252 }
3253 
3254 /*
3255  * Compute query_pathkeys and other pathkeys during plan generation
3256  */
3257 static void
3259 {
3260  Query *parse = root->parse;
3261  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3262  List *tlist = qp_extra->tlist;
3263  List *activeWindows = qp_extra->activeWindows;
3264 
3265  /*
3266  * Calculate pathkeys that represent grouping/ordering requirements. The
3267  * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
3268  * be, in which case we just leave their pathkeys empty.
3269  */
3270  if (qp_extra->groupClause &&
3271  grouping_is_sortable(qp_extra->groupClause))
3272  root->group_pathkeys =
3274  qp_extra->groupClause,
3275  tlist);
3276  else
3277  root->group_pathkeys = NIL;
3278 
3279  /* We consider only the first (bottom) window in pathkeys logic */
3280  if (activeWindows != NIL)
3281  {
3282  WindowClause *wc = (WindowClause *) linitial(activeWindows);
3283 
3285  wc,
3286  tlist);
3287  }
3288  else
3289  root->window_pathkeys = NIL;
3290 
3291  if (parse->distinctClause &&
3293  root->distinct_pathkeys =
3295  parse->distinctClause,
3296  tlist);
3297  else
3298  root->distinct_pathkeys = NIL;
3299 
3300  root->sort_pathkeys =
3302  parse->sortClause,
3303  tlist);
3304 
3305  /*
3306  * Figure out whether we want a sorted result from query_planner.
3307  *
3308  * If we have a sortable GROUP BY clause, then we want a result sorted
3309  * properly for grouping. Otherwise, if we have window functions to
3310  * evaluate, we try to sort for the first window. Otherwise, if there's a
3311  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3312  * we try to produce output that's sufficiently well sorted for the
3313  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3314  * by the ORDER BY clause.
3315  *
3316  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3317  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3318  * that might just leave us failing to exploit an available sort order at
3319  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3320  * much easier, since we know that the parser ensured that one is a
3321  * superset of the other.
3322  */
3323  if (root->group_pathkeys)
3324  root->query_pathkeys = root->group_pathkeys;
3325  else if (root->window_pathkeys)
3326  root->query_pathkeys = root->window_pathkeys;
3327  else if (list_length(root->distinct_pathkeys) >
3328  list_length(root->sort_pathkeys))
3329  root->query_pathkeys = root->distinct_pathkeys;
3330  else if (root->sort_pathkeys)
3331  root->query_pathkeys = root->sort_pathkeys;
3332  else
3333  root->query_pathkeys = NIL;
3334 }
3335 
3336 /*
3337  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3338  *
3339  * path_rows: number of output rows from scan/join step
3340  * gsets: grouping set data, or NULL if not doing grouping sets
3341  *
3342  * If doing grouping sets, we also annotate the gsets data with the estimates
3343  * for each set and each individual rollup list, with a view to later
3344  * determining whether some combination of them could be hashed instead.
3345  */
3346 static double
3348  double path_rows,
3349  grouping_sets_data *gd)
3350 {
3351  Query *parse = root->parse;
3352  double dNumGroups;
3353 
3354  if (parse->groupClause)
3355  {
3356  List *groupExprs;
3357 
3358  if (parse->groupingSets)
3359  {
3360  /* Add up the estimates for each grouping set */
3361  ListCell *lc;
3362  ListCell *lc2;
3363 
3364  Assert(gd); /* keep Coverity happy */
3365 
3366  dNumGroups = 0;
3367 
3368  foreach(lc, gd->rollups)
3369  {
3370  RollupData *rollup = lfirst(lc);
3371  ListCell *lc;
3372 
3373  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3374  parse->targetList);
3375 
3376  rollup->numGroups = 0.0;
3377 
3378  forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
3379  {
3380  List *gset = (List *) lfirst(lc);
3381  GroupingSetData *gs = lfirst(lc2);
3382  double numGroups = estimate_num_groups(root,
3383  groupExprs,
3384  path_rows,
3385  &gset);
3386 
3387  gs->numGroups = numGroups;
3388  rollup->numGroups += numGroups;
3389  }
3390 
3391  dNumGroups += rollup->numGroups;
3392  }
3393 
3394  if (gd->hash_sets_idx)
3395  {
3396  ListCell *lc;
3397 
3398  gd->dNumHashGroups = 0;
3399 
3400  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3401  parse->targetList);
3402 
3403  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3404  {
3405  List *gset = (List *) lfirst(lc);
3406  GroupingSetData *gs = lfirst(lc2);
3407  double numGroups = estimate_num_groups(root,
3408  groupExprs,
3409  path_rows,
3410  &gset);
3411 
3412  gs->numGroups = numGroups;
3413  gd->dNumHashGroups += numGroups;
3414  }
3415 
3416  dNumGroups += gd->dNumHashGroups;
3417  }
3418  }
3419  else
3420  {
3421  /* Plain GROUP BY */
3422  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3423  parse->targetList);
3424 
3425  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3426  NULL);
3427  }
3428  }
3429  else if (parse->groupingSets)
3430  {
3431  /* Empty grouping sets ... one result row for each one */
3432  dNumGroups = list_length(parse->groupingSets);
3433  }
3434  else if (parse->hasAggs || root->hasHavingQual)
3435  {
3436  /* Plain aggregation, one result row */
3437  dNumGroups = 1;
3438  }
3439  else
3440  {
3441  /* Not grouping */
3442  dNumGroups = 1;
3443  }
3444 
3445  return dNumGroups;
3446 }
3447 
3448 /*
3449  * estimate_hashagg_tablesize
3450  * estimate the number of bytes that a hash aggregate hashtable will
3451  * require based on the agg_costs, path width and dNumGroups.
3452  *
3453  * XXX this may be over-estimating the size now that hashagg knows to omit
3454  * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3455  * grouping columns not in the hashed set are counted here even though hashagg
3456  * won't store them. Is this a problem?
3457  */
3458 static Size
3460  double dNumGroups)
3461 {
3462  Size hashentrysize;
3463 
3464  /* Estimate per-hash-entry space at tuple width... */
3465  hashentrysize = MAXALIGN(path->pathtarget->width) +
3467 
3468  /* plus space for pass-by-ref transition values... */
3469  hashentrysize += agg_costs->transitionSpace;
3470  /* plus the per-hash-entry overhead */
3471  hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3472 
3473  /*
3474  * Note that this disregards the effect of fill-factor and growth policy
3475  * of the hash-table. That's probably ok, given default the default
3476  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3477  * "double-in-size" growth policies here.
3478  */
3479  return hashentrysize * dNumGroups;
3480 }
3481 
3482 /*
3483  * create_grouping_paths
3484  *
3485  * Build a new upperrel containing Paths for grouping and/or aggregation.
3486  *
3487  * input_rel: contains the source-data Paths
3488  * target: the pathtarget for the result Paths to compute
3489  * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
3490  * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
3491  * rollup_groupclauses: list of grouping clauses for grouping sets,
3492  * or NIL if not doing grouping sets
3493  *
3494  * Note: all Paths in input_rel are expected to return the target computed
3495  * by make_group_input_target.
3496  *
3497  * We need to consider sorted and hashed aggregation in the same function,
3498  * because otherwise (1) it would be harder to throw an appropriate error
3499  * message if neither way works, and (2) we should not allow hashtable size
3500  * considerations to dissuade us from using hashing if sorting is not possible.
3501  */
3502 static RelOptInfo *
3504  RelOptInfo *input_rel,
3505  PathTarget *target,
3506  const AggClauseCosts *agg_costs,
3507  grouping_sets_data *gd)
3508 {
3509  Query *parse = root->parse;
3510  Path *cheapest_path = input_rel->cheapest_total_path;
3511  RelOptInfo *grouped_rel;
3512  PathTarget *partial_grouping_target = NULL;
3513  AggClauseCosts agg_partial_costs; /* parallel only */
3514  AggClauseCosts agg_final_costs; /* parallel only */
3515  Size hashaggtablesize;
3516  double dNumGroups;
3517  double dNumPartialGroups = 0;
3518  bool can_hash;
3519  bool can_sort;
3520  bool try_parallel_aggregation;
3521 
3522  ListCell *lc;
3523 
3524  /* For now, do all work in the (GROUP_AGG, NULL) upperrel */
3525  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3526 
3527  /*
3528  * If the input relation is not parallel-safe, then the grouped relation
3529  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3530  * target list and HAVING quals are parallel-safe.
3531  */
3532  if (input_rel->consider_parallel &&
3533  is_parallel_safe(root, (Node *) target->exprs) &&
3534  is_parallel_safe(root, (Node *) parse->havingQual))
3535  grouped_rel->consider_parallel = true;
3536 
3537  /*
3538  * If the input rel belongs to a single FDW, so does the grouped rel.
3539  */
3540  grouped_rel->serverid = input_rel->serverid;
3541  grouped_rel->userid = input_rel->userid;
3542  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3543  grouped_rel->fdwroutine = input_rel->fdwroutine;
3544 
3545  /*
3546  * Check for degenerate grouping.
3547  */
3548  if ((root->hasHavingQual || parse->groupingSets) &&
3549  !parse->hasAggs && parse->groupClause == NIL)
3550  {
3551  /*
3552  * We have a HAVING qual and/or grouping sets, but no aggregates and
3553  * no GROUP BY (which implies that the grouping sets are all empty).
3554  *
3555  * This is a degenerate case in which we are supposed to emit either
3556  * zero or one row for each grouping set depending on whether HAVING
3557  * succeeds. Furthermore, there cannot be any variables in either
3558  * HAVING or the targetlist, so we actually do not need the FROM table
3559  * at all! We can just throw away the plan-so-far and generate a
3560  * Result node. This is a sufficiently unusual corner case that it's
3561  * not worth contorting the structure of this module to avoid having
3562  * to generate the earlier paths in the first place.
3563  */
3564  int nrows = list_length(parse->groupingSets);
3565  Path *path;
3566 
3567  if (nrows > 1)
3568  {
3569  /*
3570  * Doesn't seem worthwhile writing code to cons up a
3571  * generate_series or a values scan to emit multiple rows. Instead
3572  * just make N clones and append them. (With a volatile HAVING
3573  * clause, this means you might get between 0 and N output rows.
3574  * Offhand I think that's desired.)
3575  */
3576  List *paths = NIL;
3577 
3578  while (--nrows >= 0)
3579  {
3580  path = (Path *)
3581  create_result_path(root, grouped_rel,
3582  target,
3583  (List *) parse->havingQual);
3584  paths = lappend(paths, path);
3585  }
3586  path = (Path *)
3587  create_append_path(grouped_rel,
3588  paths,
3589  NULL,
3590  0,
3591  NIL);
3592  path->pathtarget = target;
3593  }
3594  else
3595  {
3596  /* No grouping sets, or just one, so one output row */
3597  path = (Path *)
3598  create_result_path(root, grouped_rel,
3599  target,
3600  (List *) parse->havingQual);
3601  }
3602 
3603  add_path(grouped_rel, path);
3604 
3605  /* No need to consider any other alternatives. */
3606  set_cheapest(grouped_rel);
3607 
3608  return grouped_rel;
3609  }
3610 
3611  /*
3612  * Estimate number of groups.
3613  */
3614  dNumGroups = get_number_of_groups(root,
3615  cheapest_path->rows,
3616  gd);
3617 
3618  /*
3619  * Determine whether it's possible to perform sort-based implementations
3620  * of grouping. (Note that if groupClause is empty,
3621  * grouping_is_sortable() is trivially true, and all the
3622  * pathkeys_contained_in() tests will succeed too, so that we'll consider
3623  * every surviving input path.)
3624  *
3625  * If we have grouping sets, we might be able to sort some but not all of
3626  * them; in this case, we need can_sort to be true as long as we must
3627  * consider any sorted-input plan.
3628  */
3629  can_sort = (gd && gd->rollups != NIL)
3630  || grouping_is_sortable(parse->groupClause);
3631 
3632  /*
3633  * Determine whether we should consider hash-based implementations of
3634  * grouping.
3635  *
3636  * Hashed aggregation only applies if we're grouping. If we have grouping
3637  * sets, some groups might be hashable but others not; in this case we set
3638  * can_hash true as long as there is nothing globally preventing us from
3639  * hashing (and we should therefore consider plans with hashes).
3640  *
3641  * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
3642  * aggregates. (Doing so would imply storing *all* the input values in
3643  * the hash table, and/or running many sorts in parallel, either of which
3644  * seems like a certain loser.) We similarly don't support ordered-set
3645  * aggregates in hashed aggregation, but that case is also included in the
3646  * numOrderedAggs count.
3647  *
3648  * Note: grouping_is_hashable() is much more expensive to check than the
3649  * other gating conditions, so we want to do it last.
3650  */
3651  can_hash = (parse->groupClause != NIL &&
3652  agg_costs->numOrderedAggs == 0 &&
3653  (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
3654 
3655  /*
3656  * If grouped_rel->consider_parallel is true, then paths that we generate
3657  * for this grouping relation could be run inside of a worker, but that
3658  * doesn't mean we can actually use the PartialAggregate/FinalizeAggregate
3659  * execution strategy. Figure that out.
3660  */
3661  if (!grouped_rel->consider_parallel)
3662  {
3663  /* Not even parallel-safe. */
3664  try_parallel_aggregation = false;
3665  }
3666  else if (input_rel->partial_pathlist == NIL)
3667  {
3668  /* Nothing to use as input for partial aggregate. */
3669  try_parallel_aggregation = false;
3670  }
3671  else if (!parse->hasAggs && parse->groupClause == NIL)
3672  {
3673  /*
3674  * We don't know how to do parallel aggregation unless we have either
3675  * some aggregates or a grouping clause.
3676  */
3677  try_parallel_aggregation = false;
3678  }
3679  else if (parse->groupingSets)
3680  {
3681  /* We don't know how to do grouping sets in parallel. */
3682  try_parallel_aggregation = false;
3683  }
3684  else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
3685  {
3686  /* Insufficient support for partial mode. */
3687  try_parallel_aggregation = false;
3688  }
3689  else
3690  {
3691  /* Everything looks good. */
3692  try_parallel_aggregation = true;
3693  }
3694 
3695  /*
3696  * Before generating paths for grouped_rel, we first generate any possible
3697  * partial paths; that way, later code can easily consider both parallel
3698  * and non-parallel approaches to grouping. Note that the partial paths
3699  * we generate here are also partially aggregated, so simply pushing a
3700  * Gather node on top is insufficient to create a final path, as would be
3701  * the case for a scan/join rel.
3702  */
3703  if (try_parallel_aggregation)
3704  {
3705  Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
3706 
3707  /*
3708  * Build target list for partial aggregate paths. These paths cannot
3709  * just emit the same tlist as regular aggregate paths, because (1) we
3710  * must include Vars and Aggrefs needed in HAVING, which might not
3711  * appear in the result tlist, and (2) the Aggrefs must be set in
3712  * partial mode.
3713  */
3714  partial_grouping_target = make_partial_grouping_target(root, target);
3715 
3716  /* Estimate number of partial groups. */
3717  dNumPartialGroups = get_number_of_groups(root,
3718  cheapest_partial_path->rows,
3719  gd);
3720 
3721  /*
3722  * Collect statistics about aggregates for estimating costs of
3723  * performing aggregation in parallel.
3724  */
3725  MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
3726  MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
3727  if (parse->hasAggs)
3728  {
3729  /* partial phase */
3730  get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs,
3732  &agg_partial_costs);
3733 
3734  /* final phase */
3735  get_agg_clause_costs(root, (Node *) target->exprs,
3737  &agg_final_costs);
3738  get_agg_clause_costs(root, parse->havingQual,
3740  &agg_final_costs);
3741  }
3742 
3743  if (can_sort)
3744  {
3745  /* This was checked before setting try_parallel_aggregation */
3746  Assert(parse->hasAggs || parse->groupClause);
3747 
3748  /*
3749  * Use any available suitably-sorted path as input, and also
3750  * consider sorting the cheapest partial path.
3751  */
3752  foreach(lc, input_rel->partial_pathlist)
3753  {
3754  Path *path = (Path *) lfirst(lc);
3755  bool is_sorted;
3756 
3757  is_sorted = pathkeys_contained_in(root->group_pathkeys,
3758  path->pathkeys);
3759  if (path == cheapest_partial_path || is_sorted)
3760  {
3761  /* Sort the cheapest partial path, if it isn't already */
3762  if (!is_sorted)
3763  path = (Path *) create_sort_path(root,
3764  grouped_rel,
3765  path,
3766  root->group_pathkeys,
3767  -1.0);
3768 
3769  if (parse->hasAggs)
3770  add_partial_path(grouped_rel, (Path *)
3771  create_agg_path(root,
3772  grouped_rel,
3773  path,
3774  partial_grouping_target,
3775  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3777  parse->groupClause,
3778  NIL,
3779  &agg_partial_costs,
3780  dNumPartialGroups));
3781  else
3782  add_partial_path(grouped_rel, (Path *)
3783  create_group_path(root,
3784  grouped_rel,
3785  path,
3786  partial_grouping_target,
3787  parse->groupClause,
3788  NIL,
3789  dNumPartialGroups));
3790  }
3791  }
3792  }
3793 
3794  if (can_hash)
3795  {
3796  /* Checked above */
3797  Assert(parse->hasAggs || parse->groupClause);
3798 
3799  hashaggtablesize =
3800  estimate_hashagg_tablesize(cheapest_partial_path,
3801  &agg_partial_costs,
3802  dNumPartialGroups);
3803 
3804  /*
3805  * Tentatively produce a partial HashAgg Path, depending on if it
3806  * looks as if the hash table will fit in work_mem.
3807  */
3808  if (hashaggtablesize < work_mem * 1024L)
3809  {
3810  add_partial_path(grouped_rel, (Path *)
3811  create_agg_path(root,
3812  grouped_rel,
3813  cheapest_partial_path,
3814  partial_grouping_target,
3815  AGG_HASHED,
3817  parse->groupClause,
3818  NIL,
3819  &agg_partial_costs,
3820  dNumPartialGroups));
3821  }
3822  }
3823  }
3824 
3825  /* Build final grouping paths */
3826  if (can_sort)
3827  {
3828  /*
3829  * Use any available suitably-sorted path as input, and also consider
3830  * sorting the cheapest-total path.
3831  */
3832  foreach(lc, input_rel->pathlist)
3833  {
3834  Path *path = (Path *) lfirst(lc);
3835  bool is_sorted;
3836 
3837  is_sorted = pathkeys_contained_in(root->group_pathkeys,
3838  path->pathkeys);
3839  if (path == cheapest_path || is_sorted)
3840  {
3841  /* Sort the cheapest-total path if it isn't already sorted */
3842  if (!is_sorted)
3843  path = (Path *) create_sort_path(root,
3844  grouped_rel,
3845  path,
3846  root->group_pathkeys,
3847  -1.0);
3848 
3849  /* Now decide what to stick atop it */
3850  if (parse->groupingSets)
3851  {
3852  consider_groupingsets_paths(root, grouped_rel,
3853  path, true, can_hash, target,
3854  gd, agg_costs, dNumGroups);
3855  }
3856  else if (parse->hasAggs)
3857  {
3858  /*
3859  * We have aggregation, possibly with plain GROUP BY. Make
3860  * an AggPath.
3861  */
3862  add_path(grouped_rel, (Path *)
3863  create_agg_path(root,
3864  grouped_rel,
3865  path,
3866  target,
3867  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3869  parse->groupClause,
3870  (List *) parse->havingQual,
3871  agg_costs,
3872  dNumGroups));
3873  }
3874  else if (parse->groupClause)
3875  {
3876  /*
3877  * We have GROUP BY without aggregation or grouping sets.
3878  * Make a GroupPath.
3879  */
3880  add_path(grouped_rel, (Path *)
3881  create_group_path(root,
3882  grouped_rel,
3883  path,
3884  target,
3885  parse->groupClause,
3886  (List *) parse->havingQual,
3887  dNumGroups));
3888  }
3889  else
3890  {
3891  /* Other cases should have been handled above */
3892  Assert(false);
3893  }
3894  }
3895  }
3896 
3897  /*
3898  * Now generate a complete GroupAgg Path atop of the cheapest partial
3899  * path. We can do this using either Gather or Gather Merge.
3900  */
3901  if (grouped_rel->partial_pathlist)
3902  {
3903  Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
3904  double total_groups = path->rows * path->parallel_workers;
3905 
3906  path = (Path *) create_gather_path(root,
3907  grouped_rel,
3908  path,
3909  partial_grouping_target,
3910  NULL,
3911  &total_groups);
3912 
3913  /*
3914  * Since Gather's output is always unsorted, we'll need to sort,
3915  * unless there's no GROUP BY clause or a degenerate (constant)
3916  * one, in which case there will only be a single group.
3917  */
3918  if (root->group_pathkeys)
3919  path = (Path *) create_sort_path(root,
3920  grouped_rel,
3921  path,
3922  root->group_pathkeys,
3923  -1.0);
3924 
3925  if (parse->hasAggs)
3926  add_path(grouped_rel, (Path *)
3927  create_agg_path(root,
3928  grouped_rel,
3929  path,
3930  target,
3931  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3933  parse->groupClause,
3934  (List *) parse->havingQual,
3935  &agg_final_costs,
3936  dNumGroups));
3937  else
3938  add_path(grouped_rel, (Path *)
3939  create_group_path(root,
3940  grouped_rel,
3941  path,
3942  target,
3943  parse->groupClause,
3944  (List *) parse->havingQual,
3945  dNumGroups));
3946 
3947  /*
3948  * The point of using Gather Merge rather than Gather is that it
3949  * can preserve the ordering of the input path, so there's no
3950  * reason to try it unless (1) it's possible to produce more than
3951  * one output row and (2) we want the output path to be ordered.
3952  */
3953  if (parse->groupClause != NIL && root->group_pathkeys != NIL)
3954  {
3955  foreach(lc, grouped_rel->partial_pathlist)
3956  {
3957  Path *subpath = (Path *) lfirst(lc);
3958  Path *gmpath;
3959  double total_groups;
3960 
3961  /*
3962  * It's useful to consider paths that are already properly
3963  * ordered for Gather Merge, because those don't need a
3964  * sort. It's also useful to consider the cheapest path,
3965  * because sorting it in parallel and then doing Gather
3966  * Merge may be better than doing an unordered Gather
3967  * followed by a sort. But there's no point in
3968  * considering non-cheapest paths that aren't already
3969  * sorted correctly.
3970  */
3971  if (path != subpath &&
3973  subpath->pathkeys))
3974  continue;
3975 
3976  total_groups = subpath->rows * subpath->parallel_workers;
3977 
3978  gmpath = (Path *)
3980  grouped_rel,
3981  subpath,
3982  partial_grouping_target,
3983  root->group_pathkeys,
3984  NULL,
3985  &total_groups);
3986 
3987  if (parse->hasAggs)
3988  add_path(grouped_rel, (Path *)
3989  create_agg_path(root,
3990  grouped_rel,
3991  gmpath,
3992  target,
3993  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3995  parse->groupClause,
3996  (List *) parse->havingQual,
3997  &agg_final_costs,
3998  dNumGroups));
3999  else
4000  add_path(grouped_rel, (Path *)
4001  create_group_path(root,
4002  grouped_rel,
4003  gmpath,
4004  target,
4005  parse->groupClause,
4006  (List *) parse->havingQual,
4007  dNumGroups));
4008  }
4009  }
4010  }
4011  }
4012 
4013  if (can_hash)
4014  {
4015  if (parse->groupingSets)
4016  {
4017  /*
4018  * Try for a hash-only groupingsets path over unsorted input.
4019  */
4020  consider_groupingsets_paths(root, grouped_rel,
4021  cheapest_path, false, true, target,
4022  gd, agg_costs, dNumGroups);
4023  }
4024  else
4025  {
4026  hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
4027  agg_costs,
4028  dNumGroups);
4029 
4030  /*
4031  * Provided that the estimated size of the hashtable does not
4032  * exceed work_mem, we'll generate a HashAgg Path, although if we
4033  * were unable to sort above, then we'd better generate a Path, so
4034  * that we at least have one.
4035  */
4036  if (hashaggtablesize < work_mem * 1024L ||
4037  grouped_rel->pathlist == NIL)
4038  {
4039  /*
4040  * We just need an Agg over the cheapest-total input path,
4041  * since input order won't matter.
4042  */
4043  add_path(grouped_rel, (Path *)
4044  create_agg_path(root, grouped_rel,
4045  cheapest_path,
4046  target,
4047  AGG_HASHED,
4049  parse->groupClause,
4050  (List *) parse->havingQual,
4051  agg_costs,
4052  dNumGroups));
4053  }
4054  }
4055 
4056  /*
4057  * Generate a HashAgg Path atop of the cheapest partial path. Once
4058  * again, we'll only do this if it looks as though the hash table
4059  * won't exceed work_mem.
4060  */
4061  if (grouped_rel->partial_pathlist)
4062  {
4063  Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
4064 
4065  hashaggtablesize = estimate_hashagg_tablesize(path,
4066  &agg_final_costs,
4067  dNumGroups);
4068 
4069  if (hashaggtablesize < work_mem * 1024L)
4070  {
4071  double total_groups = path->rows * path->parallel_workers;
4072 
4073  path = (Path *) create_gather_path(root,
4074  grouped_rel,
4075  path,
4076  partial_grouping_target,
4077  NULL,
4078  &total_groups);
4079 
4080  add_path(grouped_rel, (Path *)
4081  create_agg_path(root,
4082  grouped_rel,
4083  path,
4084  target,
4085  AGG_HASHED,
4087  parse->groupClause,
4088  (List *) parse->havingQual,
4089  &agg_final_costs,
4090  dNumGroups));
4091  }
4092  }
4093  }
4094 
4095  /* Give a helpful error if we failed to find any implementation */
4096  if (grouped_rel->pathlist == NIL)
4097  ereport(ERROR,
4098  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4099  errmsg("could not implement GROUP BY"),
4100  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4101 
4102  /*
4103  * If there is an FDW that's responsible for all baserels of the query,
4104  * let it consider adding ForeignPaths.
4105  */
4106  if (grouped_rel->fdwroutine &&
4107  grouped_rel->fdwroutine->GetForeignUpperPaths)
4109  input_rel, grouped_rel);
4110 
4111  /* Let extensions possibly add some more paths */
4113  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4114  input_rel, grouped_rel);
4115 
4116  /* Now choose the best path(s) */
4117  set_cheapest(grouped_rel);
4118 
4119  /*
4120  * We've been using the partial pathlist for the grouped relation to hold
4121  * partially aggregated paths, but that's actually a little bit bogus
4122  * because it's unsafe for later planning stages -- like ordered_rel ---
4123  * to get the idea that they can use these partial paths as if they didn't
4124  * need a FinalizeAggregate step. Zap the partial pathlist at this stage
4125  * so we don't get confused.
4126  */
4127  grouped_rel->partial_pathlist = NIL;
4128 
4129  return grouped_rel;
4130 }
4131 
4132 
4133 /*
4134  * For a given input path, consider the possible ways of doing grouping sets on
4135  * it, by combinations of hashing and sorting. This can be called multiple
4136  * times, so it's important that it not scribble on input. No result is
4137  * returned, but any generated paths are added to grouped_rel.
4138  */
4139 static void
4141  RelOptInfo *grouped_rel,
4142  Path *path,
4143  bool is_sorted,
4144  bool can_hash,
4145  PathTarget *target,
4146  grouping_sets_data *gd,
4147  const AggClauseCosts *agg_costs,
4148  double dNumGroups)
4149 {
4150  Query *parse = root->parse;
4151 
4152  /*
4153  * If we're not being offered sorted input, then only consider plans that
4154  * can be done entirely by hashing.
4155  *
4156  * We can hash everything if it looks like it'll fit in work_mem. But if
4157  * the input is actually sorted despite not being advertised as such, we
4158  * prefer to make use of that in order to use less memory.
4159  *
4160  * If none of the grouping sets are sortable, then ignore the work_mem
4161  * limit and generate a path anyway, since otherwise we'll just fail.
4162  */
4163  if (!is_sorted)
4164  {
4165  List *new_rollups = NIL;
4166  RollupData *unhashed_rollup = NULL;
4167  List *sets_data;
4168  List *empty_sets_data = NIL;
4169  List *empty_sets = NIL;
4170  ListCell *lc;
4171  ListCell *l_start = list_head(gd->rollups);
4172  AggStrategy strat = AGG_HASHED;
4173  Size hashsize;
4174  double exclude_groups = 0.0;
4175 
4176  Assert(can_hash);
4177 
4178  if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4179  {
4180  unhashed_rollup = lfirst(l_start);
4181  exclude_groups = unhashed_rollup->numGroups;
4182  l_start = lnext(l_start);
4183  }
4184 
4185  hashsize = estimate_hashagg_tablesize(path,
4186  agg_costs,
4187  dNumGroups - exclude_groups);
4188 
4189  /*
4190  * gd->rollups is empty if we have only unsortable columns to work
4191  * with. Override work_mem in that case; otherwise, we'll rely on the
4192  * sorted-input case to generate usable mixed paths.
4193  */
4194  if (hashsize > work_mem * 1024L && gd->rollups)
4195  return; /* nope, won't fit */
4196 
4197  /*
4198  * We need to burst the existing rollups list into individual grouping
4199  * sets and recompute a groupClause for each set.
4200  */
4201  sets_data = list_copy(gd->unsortable_sets);
4202 
4203  for_each_cell(lc, l_start)
4204  {
4205  RollupData *rollup = lfirst(lc);
4206 
4207  /*
4208  * If we find an unhashable rollup that's not been skipped by the
4209  * "actually sorted" check above, we can't cope; we'd need sorted
4210  * input (with a different sort order) but we can't get that here.
4211  * So bail out; we'll get a valid path from the is_sorted case
4212  * instead.
4213  *
4214  * The mere presence of empty grouping sets doesn't make a rollup
4215  * unhashable (see preprocess_grouping_sets), we handle those
4216  * specially below.
4217  */
4218  if (!rollup->hashable)
4219  return;
4220  else
4221  sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
4222  }
4223  foreach(lc, sets_data)
4224  {
4225  GroupingSetData *gs = lfirst(lc);
4226  List *gset = gs->set;
4227  RollupData *rollup;
4228 
4229  if (gset == NIL)
4230  {
4231  /* Empty grouping sets can't be hashed. */
4232  empty_sets_data = lappend(empty_sets_data, gs);
4233  empty_sets = lappend(empty_sets, NIL);
4234  }
4235  else
4236  {
4237  rollup = makeNode(RollupData);
4238 
4239  rollup->groupClause = preprocess_groupclause(root, gset);
4240  rollup->gsets_data = list_make1(gs);
4241  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4242  rollup->gsets_data,
4243  gd->tleref_to_colnum_map);
4244  rollup->numGroups = gs->numGroups;
4245  rollup->hashable = true;
4246  rollup->is_hashed = true;
4247  new_rollups = lappend(new_rollups, rollup);
4248  }
4249  }
4250 
4251  /*
4252  * If we didn't find anything nonempty to hash, then bail. We'll
4253  * generate a path from the is_sorted case.
4254  */
4255  if (new_rollups == NIL)
4256  return;
4257 
4258  /*
4259  * If there were empty grouping sets they should have been in the
4260  * first rollup.
4261  */
4262  Assert(!unhashed_rollup || !empty_sets);
4263 
4264  if (unhashed_rollup)
4265  {
4266  new_rollups = lappend(new_rollups, unhashed_rollup);
4267  strat = AGG_MIXED;
4268  }
4269  else if (empty_sets)
4270  {
4271  RollupData *rollup = makeNode(RollupData);
4272 
4273  rollup->groupClause = NIL;
4274  rollup->gsets_data = empty_sets_data;
4275  rollup->gsets = empty_sets;
4276  rollup->numGroups = list_length(empty_sets);
4277  rollup->hashable = false;
4278  rollup->is_hashed = false;
4279  new_rollups = lappend(new_rollups, rollup);
4280  strat = AGG_MIXED;
4281  }
4282 
4283  add_path(grouped_rel, (Path *)
4285  grouped_rel,
4286  path,
4287  target,
4288  (List *) parse->havingQual,
4289  strat,
4290  new_rollups,
4291  agg_costs,
4292  dNumGroups));
4293  return;
4294  }
4295 
4296  /*
4297  * If we have sorted input but nothing we can do with it, bail.
4298  */
4299  if (list_length(gd->rollups) == 0)
4300  return;
4301 
4302  /*
4303  * Given sorted input, we try and make two paths: one sorted and one mixed
4304  * sort/hash. (We need to try both because hashagg might be disabled, or
4305  * some columns might not be sortable.)
4306  *
4307  * can_hash is passed in as false if some obstacle elsewhere (such as
4308  * ordered aggs) means that we shouldn't consider hashing at all.
4309  */
4310  if (can_hash && gd->any_hashable)
4311  {
4312  List *rollups = NIL;
4313  List *hash_sets = list_copy(gd->unsortable_sets);
4314  double availspace = (work_mem * 1024.0);
4315  ListCell *lc;
4316 
4317  /*
4318  * Account first for space needed for groups we can't sort at all.
4319  */
4320  availspace -= (double) estimate_hashagg_tablesize(path,
4321  agg_costs,
4322  gd->dNumHashGroups);
4323 
4324  if (availspace > 0 && list_length(gd->rollups) > 1)
4325  {
4326  double scale;
4327  int num_rollups = list_length(gd->rollups);
4328  int k_capacity;
4329  int *k_weights = palloc(num_rollups * sizeof(int));
4330  Bitmapset *hash_items = NULL;
4331  int i;
4332 
4333  /*
4334  * We treat this as a knapsack problem: the knapsack capacity
4335  * represents work_mem, the item weights are the estimated memory
4336  * usage of the hashtables needed to implement a single rollup, and
4337  * we really ought to use the cost saving as the item value;
4338  * however, currently the costs assigned to sort nodes don't
4339  * reflect the comparison costs well, and so we treat all items as
4340  * of equal value (each rollup we hash instead saves us one sort).
4341  *
4342  * To use the discrete knapsack, we need to scale the values to a
4343  * reasonably small bounded range. We choose to allow a 5% error
4344  * margin; we have no more than 4096 rollups in the worst possible
4345  * case, which with a 5% error margin will require a bit over 42MB
4346  * of workspace. (Anyone wanting to plan queries that complex had
4347  * better have the memory for it. In more reasonable cases, with
4348  * no more than a couple of dozen rollups, the memory usage will
4349  * be negligible.)
4350  *
4351  * k_capacity is naturally bounded, but we clamp the values for
4352  * scale and weight (below) to avoid overflows or underflows (or
4353  * uselessly trying to use a scale factor less than 1 byte).
4354  */
4355  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4356  k_capacity = (int) floor(availspace / scale);
4357 
4358  /*
4359  * We leave the first rollup out of consideration since it's the
4360  * one that matches the input sort order. We assign indexes "i"
4361  * to only those entries considered for hashing; the second loop,
4362  * below, must use the same condition.
4363  */
4364  i = 0;
4366  {
4367  RollupData *rollup = lfirst(lc);
4368 
4369  if (rollup->hashable)
4370  {
4371  double sz = estimate_hashagg_tablesize(path,
4372  agg_costs,
4373  rollup->numGroups);
4374 
4375  /*
4376  * If sz is enormous, but work_mem (and hence scale) is
4377  * small, avoid integer overflow here.
4378  */
4379  k_weights[i] = (int) Min(floor(sz / scale),
4380  k_capacity + 1.0);
4381  ++i;
4382  }
4383  }
4384 
4385  /*
4386  * Apply knapsack algorithm; compute the set of items which
4387  * maximizes the value stored (in this case the number of sorts
4388  * saved) while keeping the total size (approximately) within
4389  * capacity.
4390  */
4391  if (i > 0)
4392  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4393 
4394  if (!bms_is_empty(hash_items))
4395  {
4396  rollups = list_make1(linitial(gd->rollups));
4397 
4398  i = 0;
4400  {
4401  RollupData *rollup = lfirst(lc);
4402 
4403  if (rollup->hashable)
4404  {
4405  if (bms_is_member(i, hash_items))
4406  hash_sets = list_concat(hash_sets,
4407  list_copy(rollup->gsets_data));
4408  else
4409  rollups = lappend(rollups, rollup);
4410  ++i;
4411  }
4412  else
4413  rollups = lappend(rollups, rollup);
4414  }
4415  }
4416  }
4417 
4418  if (!rollups && hash_sets)
4419  rollups = list_copy(gd->rollups);
4420 
4421  foreach(lc, hash_sets)
4422  {
4423  GroupingSetData *gs = lfirst(lc);
4424  RollupData *rollup = makeNode(RollupData);
4425 
4426  Assert(gs->set != NIL);
4427 
4428  rollup->groupClause = preprocess_groupclause(root, gs->set);
4429  rollup->gsets_data = list_make1(gs);
4430  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4431  rollup->gsets_data,
4432  gd->tleref_to_colnum_map);
4433  rollup->numGroups = gs->numGroups;
4434  rollup->hashable = true;
4435  rollup->is_hashed = true;
4436  rollups = lcons(rollup, rollups);
4437  }
4438 
4439  if (rollups)
4440  {
4441  add_path(grouped_rel, (Path *)
4443  grouped_rel,
4444  path,
4445  target,
4446  (List *) parse->havingQual,
4447  AGG_MIXED,
4448  rollups,
4449  agg_costs,
4450  dNumGroups));
4451  }
4452  }
4453 
4454  /*
4455  * Now try the simple sorted case.
4456  */
4457  if (!gd->unsortable_sets)
4458  add_path(grouped_rel, (Path *)
4460  grouped_rel,
4461  path,
4462  target,
4463  (List *) parse->havingQual,
4464  AGG_SORTED,
4465  gd->rollups,
4466  agg_costs,
4467  dNumGroups));
4468 }
4469 
4470 /*
4471  * create_window_paths
4472  *
4473  * Build a new upperrel containing Paths for window-function evaluation.
4474  *
4475  * input_rel: contains the source-data Paths
4476  * input_target: result of make_window_input_target
4477  * output_target: what the topmost WindowAggPath should return
4478  * tlist: query's target list (needed to look up pathkeys)
4479  * wflists: result of find_window_functions
4480  * activeWindows: result of select_active_windows
4481  *
4482  * Note: all Paths in input_rel are expected to return input_target.
4483  */
4484 static RelOptInfo *
4486  RelOptInfo *input_rel,
4487  PathTarget *input_target,
4488  PathTarget *output_target,
4489  List *tlist,
4490  WindowFuncLists *wflists,
4491  List *activeWindows)
4492 {
4493  RelOptInfo *window_rel;
4494  ListCell *lc;
4495 
4496  /* For now, do all work in the (WINDOW, NULL) upperrel */
4497  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4498 
4499  /*
4500  * If the input relation is not parallel-safe, then the window relation
4501  * can't be parallel-safe, either. Otherwise, we need to examine the
4502  * target list and active windows for non-parallel-safe constructs.
4503  */
4504  if (input_rel->consider_parallel &&
4505  is_parallel_safe(root, (Node *) output_target->exprs) &&
4506  is_parallel_safe(root, (Node *) activeWindows))
4507  window_rel->consider_parallel = true;
4508 
4509  /*
4510  * If the input rel belongs to a single FDW, so does the window rel.
4511  */
4512  window_rel->serverid = input_rel->serverid;
4513  window_rel->userid = input_rel->userid;
4514  window_rel->useridiscurrent = input_rel->useridiscurrent;
4515  window_rel->fdwroutine = input_rel->fdwroutine;
4516 
4517  /*
4518  * Consider computing window functions starting from the existing
4519  * cheapest-total path (which will likely require a sort) as well as any
4520  * existing paths that satisfy root->window_pathkeys (which won't).
4521  */
4522  foreach(lc, input_rel->pathlist)
4523  {
4524  Path *path = (Path *) lfirst(lc);
4525 
4526  if (path == input_rel->cheapest_total_path ||
4529  window_rel,
4530  path,
4531  input_target,
4532  output_target,
4533  tlist,
4534  wflists,
4535  activeWindows);
4536  }
4537 
4538  /*
4539  * If there is an FDW that's responsible for all baserels of the query,
4540  * let it consider adding ForeignPaths.
4541  */
4542  if (window_rel->fdwroutine &&
4543  window_rel->fdwroutine->GetForeignUpperPaths)
4544  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4545  input_rel, window_rel);
4546 
4547  /* Let extensions possibly add some more paths */
4549  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4550  input_rel, window_rel);
4551 
4552  /* Now choose the best path(s) */
4553  set_cheapest(window_rel);
4554 
4555  return window_rel;
4556 }
4557 
4558 /*
4559  * Stack window-function implementation steps atop the given Path, and
4560  * add the result to window_rel.
4561  *
4562  * window_rel: upperrel to contain result
4563  * path: input Path to use (must return input_target)
4564  * input_target: result of make_window_input_target
4565  * output_target: what the topmost WindowAggPath should return
4566  * tlist: query's target list (needed to look up pathkeys)
4567  * wflists: result of find_window_functions
4568  * activeWindows: result of select_active_windows
4569  */
4570 static void
4572  RelOptInfo *window_rel,
4573  Path *path,
4574  PathTarget *input_target,
4575  PathTarget *output_target,
4576  List *tlist,
4577  WindowFuncLists *wflists,
4578  List *activeWindows)
4579 {
4580  PathTarget *window_target;
4581  ListCell *l;
4582 
4583  /*
4584  * Since each window clause could require a different sort order, we stack
4585  * up a WindowAgg node for each clause, with sort steps between them as
4586  * needed. (We assume that select_active_windows chose a good order for
4587  * executing the clauses in.)
4588  *
4589  * input_target should contain all Vars and Aggs needed for the result.
4590  * (In some cases we wouldn't need to propagate all of these all the way
4591  * to the top, since they might only be needed as inputs to WindowFuncs.
4592  * It's probably not worth trying to optimize that though.) It must also
4593  * contain all window partitioning and sorting expressions, to ensure
4594  * they're computed only once at the bottom of the stack (that's critical
4595  * for volatile functions). As we climb up the stack, we'll add outputs
4596  * for the WindowFuncs computed at each level.
4597  */
4598  window_target = input_target;
4599 
4600  foreach(l, activeWindows)
4601  {
4602  WindowClause *wc = (WindowClause *) lfirst(l);
4603  List *window_pathkeys;
4604 
4605  window_pathkeys = make_pathkeys_for_window(root,
4606  wc,
4607  tlist);
4608 
4609  /* Sort if necessary */
4610  if (!pathkeys_contained_in(window_pathkeys, path->pathkeys))
4611  {
4612  path = (Path *) create_sort_path(root, window_rel,
4613  path,
4614  window_pathkeys,
4615  -1.0);
4616  }
4617 
4618  if (lnext(l))
4619  {
4620  /*
4621  * Add the current WindowFuncs to the output target for this
4622  * intermediate WindowAggPath. We must copy window_target to
4623  * avoid changing the previous path's target.
4624  *
4625  * Note: a WindowFunc adds nothing to the target's eval costs; but
4626  * we do need to account for the increase in tlist width.
4627  */
4628  ListCell *lc2;
4629 
4630  window_target = copy_pathtarget(window_target);
4631  foreach(lc2, wflists->windowFuncs[wc->winref])
4632  {
4633  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4634 
4635  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4636  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4637  }
4638  }
4639  else
4640  {
4641  /* Install the goal target in the topmost WindowAgg */
4642  window_target = output_target;
4643  }
4644 
4645  path = (Path *)
4646  create_windowagg_path(root, window_rel, path, window_target,
4647  wflists->windowFuncs[wc->winref],
4648  wc,
4649  window_pathkeys);
4650  }
4651 
4652  add_path(window_rel, path);
4653 }
4654 
4655 /*
4656  * create_distinct_paths
4657  *
4658  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4659  *
4660  * input_rel: contains the source-data Paths
4661  *
4662  * Note: input paths should already compute the desired pathtarget, since
4663  * Sort/Unique won't project anything.
4664  */
4665 static RelOptInfo *
4667  RelOptInfo *input_rel)
4668 {
4669  Query *parse = root->parse;
4670  Path *cheapest_input_path = input_rel->cheapest_total_path;
4671  RelOptInfo *distinct_rel;
4672  double numDistinctRows;
4673  bool allow_hash;
4674  Path *path;
4675  ListCell *lc;
4676 
4677  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4678  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4679 
4680  /*
4681  * We don't compute anything at this level, so distinct_rel will be
4682  * parallel-safe if the input rel is parallel-safe. In particular, if
4683  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4684  * output those expressions, and will not be parallel-safe unless those
4685  * expressions are parallel-safe.
4686  */
4687  distinct_rel->consider_parallel = input_rel->consider_parallel;
4688 
4689  /*
4690  * If the input rel belongs to a single FDW, so does the distinct_rel.
4691  */
4692  distinct_rel->serverid = input_rel->serverid;
4693  distinct_rel->userid = input_rel->userid;
4694  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4695  distinct_rel->fdwroutine = input_rel->fdwroutine;
4696 
4697  /* Estimate number of distinct rows there will be */
4698  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4699  root->hasHavingQual)
4700  {
4701  /*
4702  * If there was grouping or aggregation, use the number of input rows
4703  * as the estimated number of DISTINCT rows (ie, assume the input is
4704  * already mostly unique).
4705  */
4706  numDistinctRows = cheapest_input_path->rows;
4707  }
4708  else
4709  {
4710  /*
4711  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4712  */
4713  List *distinctExprs;
4714 
4715  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4716  parse->targetList);
4717  numDistinctRows = estimate_num_groups(root, distinctExprs,
4718  cheapest_input_path->rows,
4719  NULL);
4720  }
4721 
4722  /*
4723  * Consider sort-based implementations of DISTINCT, if possible.
4724  */
4726  {
4727  /*
4728  * First, if we have any adequately-presorted paths, just stick a
4729  * Unique node on those. Then consider doing an explicit sort of the
4730  * cheapest input path and Unique'ing that.
4731  *
4732  * When we have DISTINCT ON, we must sort by the more rigorous of
4733  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4734  * Also, if we do have to do an explicit sort, we might as well use
4735  * the more rigorous ordering to avoid a second sort later. (Note
4736  * that the parser will have ensured that one clause is a prefix of
4737  * the other.)
4738  */
4739  List *needed_pathkeys;
4740 
4741  if (parse->hasDistinctOn &&
4743  list_length(root->sort_pathkeys))
4744  needed_pathkeys = root->sort_pathkeys;
4745  else
4746  needed_pathkeys = root->distinct_pathkeys;
4747 
4748  foreach(lc, input_rel->pathlist)
4749  {
4750  Path *path = (Path *) lfirst(lc);
4751 
4752  if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4753  {
4754  add_path(distinct_rel, (Path *)
4755  create_upper_unique_path(root, distinct_rel,
4756  path,
4758  numDistinctRows));
4759  }
4760  }
4761 
4762  /* For explicit-sort case, always use the more rigorous clause */
4763  if (list_length(root->distinct_pathkeys) <
4764  list_length(root->sort_pathkeys))
4765  {
4766  needed_pathkeys = root->sort_pathkeys;
4767  /* Assert checks that parser didn't mess up... */
4769  needed_pathkeys));
4770  }
4771  else
4772  needed_pathkeys = root->distinct_pathkeys;
4773 
4774  path = cheapest_input_path;
4775  if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4776  path = (Path *) create_sort_path(root, distinct_rel,
4777  path,
4778  needed_pathkeys,
4779  -1.0);
4780 
4781  add_path(distinct_rel, (Path *)
4782  create_upper_unique_path(root, distinct_rel,
4783  path,
4785  numDistinctRows));
4786  }
4787 
4788  /*
4789  * Consider hash-based implementations of DISTINCT, if possible.
4790  *
4791  * If we were not able to make any other types of path, we *must* hash or
4792  * die trying. If we do have other choices, there are several things that
4793  * should prevent selection of hashing: if the query uses DISTINCT ON
4794  * (because it won't really have the expected behavior if we hash), or if
4795  * enable_hashagg is off, or if it looks like the hashtable will exceed
4796  * work_mem.
4797  *
4798  * Note: grouping_is_hashable() is much more expensive to check than the
4799  * other gating conditions, so we want to do it last.
4800  */
4801  if (distinct_rel->pathlist == NIL)
4802  allow_hash = true; /* we have no alternatives */
4803  else if (parse->hasDistinctOn || !enable_hashagg)
4804  allow_hash = false; /* policy-based decision not to hash */
4805  else
4806  {
4807  Size hashentrysize;
4808 
4809  /* Estimate per-hash-entry space at tuple width... */
4810  hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) +
4812  /* plus the per-hash-entry overhead */
4813  hashentrysize += hash_agg_entry_size(0);
4814 
4815  /* Allow hashing only if hashtable is predicted to fit in work_mem */
4816  allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L);
4817  }
4818 
4819  if (allow_hash && grouping_is_hashable(parse->distinctClause))
4820  {
4821  /* Generate hashed aggregate path --- no sort needed */
4822  add_path(distinct_rel, (Path *)
4823  create_agg_path(root,
4824  distinct_rel,
4825  cheapest_input_path,
4826  cheapest_input_path->pathtarget,
4827  AGG_HASHED,
4829  parse->distinctClause,
4830  NIL,
4831  NULL,
4832  numDistinctRows));
4833  }
4834 
4835  /* Give a helpful error if we failed to find any implementation */
4836  if (distinct_rel->pathlist == NIL)
4837  ereport(ERROR,
4838  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4839  errmsg("could not implement DISTINCT"),
4840  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4841 
4842  /*
4843  * If there is an FDW that's responsible for all baserels of the query,
4844  * let it consider adding ForeignPaths.
4845  */
4846  if (distinct_rel->fdwroutine &&
4847  distinct_rel->fdwroutine->GetForeignUpperPaths)
4848  distinct_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_DISTINCT,
4849  input_rel, distinct_rel);
4850 
4851  /* Let extensions possibly add some more paths */
4853  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT,
4854  input_rel, distinct_rel);
4855 
4856  /* Now choose the best path(s) */
4857  set_cheapest(distinct_rel);
4858 
4859  return distinct_rel;
4860 }
4861 
4862 /*
4863  * create_ordered_paths
4864  *
4865  * Build a new upperrel containing Paths for ORDER BY evaluation.
4866  *
4867  * All paths in the result must satisfy the ORDER BY ordering.
4868  * The only new path we need consider is an explicit sort on the
4869  * cheapest-total existing path.
4870  *
4871  * input_rel: contains the source-data Paths
4872  * target: the output tlist the result Paths must emit
4873  * limit_tuples: estimated bound on the number of output tuples,
4874  * or -1 if no LIMIT or couldn't estimate
4875  */
4876 static RelOptInfo *
4878  RelOptInfo *input_rel,
4879  PathTarget *target,
4880  double limit_tuples)
4881 {
4882  Path *cheapest_input_path = input_rel->cheapest_total_path;
4883  RelOptInfo *ordered_rel;
4884  ListCell *lc;
4885 
4886  /* For now, do all work in the (ORDERED, NULL) upperrel */
4887  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4888 
4889  /*
4890  * If the input relation is not parallel-safe, then the ordered relation
4891  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
4892  * target list is parallel-safe.
4893  */
4894  if (input_rel->consider_parallel &&
4895  is_parallel_safe(root, (Node *) target->exprs))
4896  ordered_rel->consider_parallel = true;
4897 
4898  /*
4899  * If the input rel belongs to a single FDW, so does the ordered_rel.
4900  */
4901  ordered_rel->serverid = input_rel->serverid;
4902  ordered_rel->userid = input_rel->userid;
4903  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
4904  ordered_rel->fdwroutine = input_rel->fdwroutine;
4905 
4906  foreach(lc, input_rel->pathlist)
4907  {
4908  Path *path = (Path *) lfirst(lc);
4909  bool is_sorted;
4910 
4911  is_sorted = pathkeys_contained_in(root->sort_pathkeys,
4912  path->pathkeys);
4913  if (path == cheapest_input_path || is_sorted)
4914  {
4915  if (!is_sorted)
4916  {
4917  /* An explicit sort here can take advantage of LIMIT */
4918  path = (Path *) create_sort_path(root,
4919  ordered_rel,
4920  path,
4921  root->sort_pathkeys,
4922  limit_tuples);
4923  }
4924 
4925  /* Add projection step if needed */
4926  if (path->pathtarget != target)
4927  path = apply_projection_to_path(root, ordered_rel,
4928  path, target);
4929 
4930  add_path(ordered_rel, path);
4931  }
4932  }
4933 
4934  /*
4935  * generate_gather_paths() will have already generated a simple Gather
4936  * path for the best parallel path, if any, and the loop above will have
4937  * considered sorting it. Similarly, generate_gather_paths() will also
4938  * have generated order-preserving Gather Merge plans which can be used
4939  * without sorting if they happen to match the sort_pathkeys, and the loop
4940  * above will have handled those as well. However, there's one more
4941  * possibility: it may make sense to sort the cheapest partial path
4942  * according to the required output order and then use Gather Merge.
4943  */
4944  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
4945  input_rel->partial_pathlist != NIL)
4946  {
4947  Path *cheapest_partial_path;
4948 
4949  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4950 
4951  /*
4952  * If cheapest partial path doesn't need a sort, this is redundant
4953  * with what's already been tried.
4954  */
4956  cheapest_partial_path->pathkeys))
4957  {
4958  Path *path;
4959  double total_groups;
4960 
4961  path = (Path *) create_sort_path(root,
4962  ordered_rel,
4963  cheapest_partial_path,
4964  root->sort_pathkeys,
4965  -1.0);
4966 
4967  total_groups = cheapest_partial_path->rows *
4968  cheapest_partial_path->parallel_workers;
4969  path = (Path *)
4970  create_gather_merge_path(root, ordered_rel,
4971  path,
4972  target, root->sort_pathkeys, NULL,
4973  &total_groups);
4974 
4975  /* Add projection step if needed */
4976  if (path->pathtarget != target)
4977  path = apply_projection_to_path(root, ordered_rel,
4978  path, target);
4979 
4980  add_path(ordered_rel, path);
4981  }
4982  }
4983 
4984  /*
4985  * If there is an FDW that's responsible for all baserels of the query,
4986  * let it consider adding ForeignPaths.
4987  */
4988  if (ordered_rel->fdwroutine &&
4989  ordered_rel->fdwroutine->GetForeignUpperPaths)
4990  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
4991  input_rel, ordered_rel);
4992 
4993  /* Let extensions possibly add some more paths */
4995  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
4996  input_rel, ordered_rel);
4997 
4998  /*
4999  * No need to bother with set_cheapest here; grouping_planner does not
5000  * need us to do it.
5001  */
5002  Assert(ordered_rel->pathlist != NIL);
5003 
5004  return ordered_rel;
5005 }
5006 
5007 
5008 /*
5009  * make_group_input_target
5010  * Generate appropriate PathTarget for initial input to grouping nodes.
5011  *
5012  * If there is grouping or aggregation, the scan/join subplan cannot emit
5013  * the query's final targetlist; for example, it certainly can't emit any
5014  * aggregate function calls. This routine generates the correct target
5015  * for the scan/join subplan.
5016  *
5017  * The query target list passed from the parser already contains entries
5018  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5019  * for variables used only in HAVING clauses; so we need to add those
5020  * variables to the subplan target list. Also, we flatten all expressions
5021  * except GROUP BY items into their component variables; other expressions
5022  * will be computed by the upper plan nodes rather than by the subplan.
5023  * For example, given a query like
5024  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5025  * we want to pass this targetlist to the subplan:
5026  * a+b,c,d
5027  * where the a+b target will be used by the Sort/Group steps, and the
5028  * other targets will be used for computing the final results.
5029  *
5030  * 'final_target' is the query's final target list (in PathTarget form)
5031  *
5032  * The result is the PathTarget to be computed by the Paths returned from
5033  * query_planner().
5034  */
5035 static PathTarget *
5037 {
5038  Query *parse = root->parse;
5039  PathTarget *input_target;
5040  List *non_group_cols;
5041  List *non_group_vars;
5042  int i;
5043  ListCell *lc;
5044 
5045  /*
5046  * We must build a target containing all grouping columns, plus any other
5047  * Vars mentioned in the query's targetlist and HAVING qual.
5048  */
5049  input_target = create_empty_pathtarget();
5050  non_group_cols = NIL;
5051 
5052  i = 0;
5053  foreach(lc, final_target->exprs)
5054  {
5055  Expr *expr = (Expr *) lfirst(lc);
5056  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5057 
5058  if (sgref && parse->groupClause &&
5060  {
5061  /*
5062  * It's a grouping column, so add it to the input target as-is.
5063  */
5064  add_column_to_pathtarget(input_target, expr, sgref);
5065  }
5066  else
5067  {
5068  /*
5069  * Non-grouping column, so just remember the expression for later
5070  * call to pull_var_clause.
5071  */
5072  non_group_cols = lappend(non_group_cols, expr);
5073  }
5074 
5075  i++;
5076  }
5077 
5078  /*
5079  * If there's a HAVING clause, we'll need the Vars it uses, too.
5080  */
5081  if (parse->havingQual)
5082  non_group_cols = lappend(non_group_cols, parse->havingQual);
5083 
5084  /*
5085  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5086  * add them to the input target if not already present. (A Var used
5087  * directly as a GROUP BY item will be present already.) Note this
5088  * includes Vars used in resjunk items, so we are covering the needs of
5089  * ORDER BY and window specifications. Vars used within Aggrefs and
5090  * WindowFuncs will be pulled out here, too.
5091  */
5092  non_group_vars = pull_var_clause((Node *) non_group_cols,
5096  add_new_columns_to_pathtarget(input_target, non_group_vars);
5097 
5098  /* clean up cruft */
5099  list_free(non_group_vars);
5100  list_free(non_group_cols);
5101 
5102  /* XXX this causes some redundant cost calculation ... */
5103  return set_pathtarget_cost_width(root, input_target);
5104 }
5105 
5106 /*
5107  * make_partial_grouping_target
5108  * Generate appropriate PathTarget for output of partial aggregate
5109  * (or partial grouping, if there are no aggregates) nodes.
5110  *
5111  * A partial aggregation node needs to emit all the same aggregates that
5112  * a regular aggregation node would, plus any aggregates used in HAVING;
5113  * except that the Aggref nodes should be marked as partial aggregates.
5114  *
5115  * In addition, we'd better emit any Vars and PlaceholderVars that are
5116  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5117  * these would be Vars that are grouped by or used in grouping expressions.)
5118  *
5119  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5120  * We get the HAVING clause out of *root.
5121  */
5122 static PathTarget *
5124 {
5125  Query *parse = root->parse;
5126  PathTarget *partial_target;
5127  List *non_group_cols;
5128  List *non_group_exprs;
5129  int i;
5130  ListCell *lc;
5131 
5132  partial_target = create_empty_pathtarget();
5133  non_group_cols = NIL;
5134 
5135  i = 0;
5136  foreach(lc, grouping_target->exprs)
5137  {
5138  Expr *expr = (Expr *) lfirst(lc);
5139  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5140 
5141  if (sgref && parse->groupClause &&
5143  {
5144  /*
5145  * It's a grouping column, so add it to the partial_target as-is.
5146  * (This allows the upper agg step to repeat the grouping calcs.)
5147  */
5148  add_column_to_pathtarget(partial_target, expr, sgref);
5149  }
5150  else
5151  {
5152  /*
5153  * Non-grouping column, so just remember the expression for later
5154  * call to pull_var_clause.
5155  */
5156  non_group_cols = lappend(non_group_cols, expr);
5157  }
5158 
5159  i++;
5160  }
5161 
5162  /*
5163  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5164  */
5165  if (parse->havingQual)
5166  non_group_cols = lappend(non_group_cols, parse->havingQual);
5167 
5168  /*
5169  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5170  * non-group cols (plus HAVING), and add them to the partial_target if not
5171  * already present. (An expression used directly as a GROUP BY item will
5172  * be present already.) Note this includes Vars used in resjunk items, so
5173  * we are covering the needs of ORDER BY and window specifications.
5174  */
5175  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5179 
5180  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5181 
5182  /*
5183  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5184  * are at the top level of the target list, so we can just scan the list
5185  * rather than recursing through the expression trees.
5186  */
5187  foreach(lc, partial_target->exprs)
5188  {
5189  Aggref *aggref = (Aggref *) lfirst(lc);
5190 
5191  if (IsA(aggref, Aggref))
5192  {
5193  Aggref *newaggref;
5194 
5195  /*
5196  * We shouldn't need to copy the substructure of the Aggref node,
5197  * but flat-copy the node itself to avoid damaging other trees.
5198  */
5199  newaggref = makeNode(Aggref);
5200  memcpy(newaggref, aggref, sizeof(Aggref));
5201 
5202  /* For now, assume serialization is required */
5204 
5205  lfirst(lc) = newaggref;
5206  }
5207  }
5208 
5209  /* clean up cruft */
5210  list_free(non_group_exprs);
5211  list_free(non_group_cols);
5212 
5213  /* XXX this causes some redundant cost calculation ... */
5214  return set_pathtarget_cost_width(root, partial_target);
5215 }
5216 
5217 /*
5218  * mark_partial_aggref
5219  * Adjust an Aggref to make it represent a partial-aggregation step.
5220  *
5221  * The Aggref node is modified in-place; caller must do any copying required.
5222  */
5223 void
5225 {
5226  /* aggtranstype should be computed by this point */
5228  /* ... but aggsplit should still be as the parser left it */
5229  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5230 
5231  /* Mark the Aggref with the intended partial-aggregation mode */
5232  agg->aggsplit = aggsplit;
5233 
5234  /*
5235  * Adjust result type if needed. Normally, a partial aggregate returns
5236  * the aggregate's transition type; but if that's INTERNAL and we're
5237  * serializing, it returns BYTEA instead.
5238  */
5239  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5240  {
5241  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5242  agg->aggtype = BYTEAOID;
5243  else
5244  agg->aggtype = agg->aggtranstype;
5245  }
5246 }
5247 
5248 /*
5249  * postprocess_setop_tlist
5250  * Fix up targetlist returned by plan_set_operations().
5251  *
5252  * We need to transpose sort key info from the orig_tlist into new_tlist.
5253  * NOTE: this would not be good enough if we supported resjunk sort keys
5254  * for results of set operations --- then, we'd need to project a whole
5255  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5256  * find any resjunk columns in orig_tlist.
5257  */
5258 static List *
5259 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5260 {
5261  ListCell *l;
5262  ListCell *orig_tlist_item = list_head(orig_tlist);
5263 
5264  foreach(l, new_tlist)
5265  {
5266  TargetEntry *new_tle = (TargetEntry *) lfirst(l);
5267  TargetEntry *orig_tle;
5268 
5269  /* ignore resjunk columns in setop result */
5270  if (new_tle->resjunk)
5271  continue;
5272 
5273  Assert(orig_tlist_item != NULL);
5274  orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
5275  orig_tlist_item = lnext(orig_tlist_item);
5276  if (orig_tle->resjunk) /* should not happen */
5277  elog(ERROR, "resjunk output columns are not implemented");
5278  Assert(new_tle->resno == orig_tle->resno);
5279  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5280  }
5281  if (orig_tlist_item != NULL)
5282  elog(ERROR, "resjunk output columns are not implemented");
5283  return new_tlist;
5284 }
5285 
5286 /*
5287  * select_active_windows
5288  * Create a list of the "active" window clauses (ie, those referenced
5289  * by non-deleted WindowFuncs) in the order they are to be executed.
5290  */
5291 static List *
5293 {
5294  List *result;
5295  List *actives;
5296  ListCell *lc;
5297 
5298  /* First, make a list of the active windows */
5299  actives = NIL;
5300  foreach(lc, root->parse->windowClause)
5301  {
5302  WindowClause *wc = (WindowClause *) lfirst(lc);
5303 
5304  /* It's only active if wflists shows some related WindowFuncs */
5305  Assert(wc->winref <= wflists->maxWinRef);
5306  if (wflists->windowFuncs[wc->winref] != NIL)
5307  actives = lappend(actives, wc);
5308  }
5309 
5310  /*
5311  * Now, ensure that windows with identical partitioning/ordering clauses
5312  * are adjacent in the list. This is required by the SQL standard, which
5313  * says that only one sort is to be used for such windows, even if they
5314  * are otherwise distinct (eg, different names or framing clauses).
5315  *
5316  * There is room to be much smarter here, for example detecting whether
5317  * one window's sort keys are a prefix of another's (so that sorting for
5318  * the latter would do for the former), or putting windows first that
5319  * match a sort order available for the underlying query. For the moment
5320  * we are content with meeting the spec.
5321  */
5322  result = NIL;
5323  while (actives != NIL)
5324  {
5325  WindowClause *wc = (WindowClause *) linitial(actives);
5326  ListCell *prev;
5327  ListCell *next;
5328 
5329  /* Move wc from actives to result */
5330  actives = list_delete_first(actives);
5331  result = lappend(result, wc);
5332 
5333  /* Now move any matching windows from actives to result */
5334  prev = NULL;
5335  for (lc = list_head(actives); lc; lc = next)
5336  {
5337  WindowClause *wc2 = (WindowClause *) lfirst(lc);
5338 
5339  next = lnext(lc);
5340  /* framing options are NOT to be compared here! */
5341  if (equal(wc->partitionClause, wc2->partitionClause) &&
5342  equal(wc->orderClause, wc2->orderClause))
5343  {
5344  actives = list_delete_cell(actives, lc, prev);
5345  result = lappend(result, wc2);
5346  }
5347  else
5348  prev = lc;
5349  }
5350  }
5351 
5352  return result;
5353 }
5354 
5355 /*
5356  * make_window_input_target
5357  * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5358  *
5359  * When the query has window functions, this function computes the desired
5360  * target to be computed by the node just below the first WindowAgg.
5361  * This tlist must contain all values needed to evaluate the window functions,
5362  * compute the final target list, and perform any required final sort step.
5363  * If multiple WindowAggs are needed, each intermediate one adds its window
5364  * function results onto this base tlist; only the topmost WindowAgg computes
5365  * the actual desired target list.
5366  *
5367  * This function is much like make_group_input_target, though not quite enough
5368  * like it to share code. As in that function, we flatten most expressions
5369  * into their component variables. But we do not want to flatten window
5370  * PARTITION BY/ORDER BY clauses, since that might result in multiple
5371  * evaluations of them, which would be bad (possibly even resulting in
5372  * inconsistent answers, if they contain volatile functions).
5373  * Also, we must not flatten GROUP BY clauses that were left unflattened by
5374  * make_group_input_target, because we may no longer have access to the
5375  * individual Vars in them.
5376  *
5377  * Another key difference from make_group_input_target is that we don't
5378  * flatten Aggref expressions, since those are to be computed below the
5379  * window functions and just referenced like Vars above that.
5380  *
5381  * 'final_target' is the query's final target list (in PathTarget form)
5382  * 'activeWindows' is the list of active windows previously identified by
5383  * select_active_windows.
5384  *
5385  * The result is the PathTarget to be computed by the plan node immediately
5386  * below the first WindowAgg node.
5387  */
5388 static PathTarget *
5390  PathTarget *final_target,
5391  List *activeWindows)
5392 {
5393  Query *parse = root->parse;
5394  PathTarget *input_target;
5395  Bitmapset *sgrefs;
5396  List *flattenable_cols;
5397  List *flattenable_vars;
5398  int i;
5399  ListCell *lc;
5400 
5401  Assert(parse->hasWindowFuncs);
5402 
5403  /*
5404  * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5405  * into a bitmapset for convenient reference below.
5406  */
5407  sgrefs = NULL;
5408  foreach(lc, activeWindows)
5409  {
5410  WindowClause *wc = (WindowClause *) lfirst(lc);
5411  ListCell *lc2;
5412 
5413  foreach(lc2, wc->partitionClause)
5414  {
5415  SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
5416 
5417  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5418  }
5419  foreach(lc2, wc->orderClause)
5420  {
5421  SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
5422 
5423  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5424  }
5425  }
5426 
5427  /* Add in sortgroupref numbers of GROUP BY clauses, too */
5428  foreach(lc, parse->groupClause)
5429  {
5430  SortGroupClause *grpcl = (SortGroupClause *) lfirst(lc);
5431 
5432  sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5433  }
5434 
5435  /*
5436  * Construct a target containing all the non-flattenable targetlist items,
5437  * and save aside the others for a moment.
5438  */
5439  input_target = create_empty_pathtarget();
5440  flattenable_cols = NIL;
5441 
5442  i = 0;
5443  foreach(lc, final_target->exprs)
5444  {
5445  Expr *expr = (Expr *) lfirst(lc);
5446  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5447 
5448  /*
5449  * Don't want to deconstruct window clauses or GROUP BY items. (Note
5450  * that such items can't contain window functions, so it's okay to
5451  * compute them below the WindowAgg nodes.)
5452  */
5453  if (sgref != 0 && bms_is_member(sgref, sgrefs))
5454  {
5455  /*
5456  * Don't want to deconstruct this value, so add it to the input
5457  * target as-is.
5458  */
5459  add_column_to_pathtarget(input_target, expr, sgref);
5460  }
5461  else
5462  {
5463  /*
5464  * Column is to be flattened, so just remember the expression for
5465  * later call to pull_var_clause.
5466  */
5467  flattenable_cols = lappend(flattenable_cols, expr);
5468  }
5469 
5470  i++;
5471  }
5472 
5473  /*
5474  * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
5475  * add them to the input target if not already present. (Some might be
5476  * there already because they're used directly as window/group clauses.)
5477  *
5478  * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
5479  * Aggrefs are placed in the Agg node's tlist and not left to be computed
5480  * at higher levels. On the other hand, we should recurse into
5481  * WindowFuncs to make sure their input expressions are available.
5482  */
5483  flattenable_vars = pull_var_clause((Node *) flattenable_cols,
5487  add_new_columns_to_pathtarget(input_target, flattenable_vars);
5488 
5489  /* clean up cruft */
5490  list_free(flattenable_vars);
5491  list_free(flattenable_cols);
5492 
5493  /* XXX this causes some redundant cost calculation ... */
5494  return set_pathtarget_cost_width(root, input_target);
5495 }
5496 
5497 /*
5498  * make_pathkeys_for_window
5499  * Create a pathkeys list describing the required input ordering
5500  * for the given WindowClause.
5501  *
5502  * The required ordering is first the PARTITION keys, then the ORDER keys.
5503  * In the future we might try to implement windowing using hashing, in which
5504  * case the ordering could be relaxed, but for now we always sort.
5505  *
5506  * Caution: if you change this, see createplan.c's get_column_info_for_window!
5507  */
5508 static List *
5510  List *tlist)
5511 {
5512  List *window_pathkeys;
5513  List *window_sortclauses;
5514 
5515  /* Throw error if can't sort */
5517  ereport(ERROR,
5518  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5519  errmsg("could not implement window PARTITION BY"),
5520  errdetail("Window partitioning columns must be of sortable datatypes.")));
5522  ereport(ERROR,
5523  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5524  errmsg("could not implement window ORDER BY"),
5525  errdetail("Window ordering columns must be of sortable datatypes.")));
5526 
5527  /* Okay, make the combined pathkeys */
5528  window_sortclauses = list_concat(list_copy(wc->partitionClause),
5529  list_copy(wc->orderClause));
5530  window_pathkeys = make_pathkeys_for_sortclauses(root,
5531  window_sortclauses,
5532  tlist);
5533  list_free(window_sortclauses);
5534  return window_pathkeys;
5535 }
5536 
5537 /*
5538  * make_sort_input_target
5539  * Generate appropriate PathTarget for initial input to Sort step.
5540  *
5541  * If the query has ORDER BY, this function chooses the target to be computed
5542  * by the node just below the Sort (and DISTINCT, if any, since Unique can't
5543  * project) steps. This might or might not be identical to the query's final
5544  * output target.
5545  *
5546  * The main argument for keeping the sort-input tlist the same as the final
5547  * is that we avoid a separate projection node (which will be needed if
5548  * they're different, because Sort can't project). However, there are also
5549  * advantages to postponing tlist evaluation till after the Sort: it ensures
5550  * a consistent order of evaluation for any volatile functions in the tlist,
5551  * and if there's also a LIMIT, we can stop the query without ever computing
5552  * tlist functions for later rows, which is beneficial for both volatile and
5553  * expensive functions.
5554  *
5555  * Our current policy is to postpone volatile expressions till after the sort
5556  * unconditionally (assuming that that's possible, ie they are in plain tlist
5557  * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
5558  * postpone set-returning expressions, because running them beforehand would
5559  * bloat the sort dataset, and because it might cause unexpected output order
5560  * if the sort isn't stable. However there's a constraint on that: all SRFs
5561  * in the tlist should be evaluated at the same plan step, so that they can
5562  * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
5563  * mustn't postpone any SRFs. (Note that in principle that policy should
5564  * probably get applied to the group/window input targetlists too, but we
5565  * have not done that historically.) Lastly, expensive expressions are
5566  * postponed if there is a LIMIT, or if root->tuple_fraction shows that
5567  * partial evaluation of the query is possible (if neither is true, we expect
5568  * to have to evaluate the expressions for every row anyway), or if there are
5569  * any volatile or set-returning expressions (since once we've put in a
5570  * projection at all, it won't cost any more to postpone more stuff).
5571  *
5572  * Another issue that could potentially be considered here is that
5573  * evaluating tlist expressions could result in data that's either wider
5574  * or narrower than the input Vars, thus changing the volume of data that
5575  * has to go through the Sort. However, we usually have only a very bad
5576  * idea of the output width of any expression more complex than a Var,
5577  * so for now it seems too risky to try to optimize on that basis.
5578  *
5579  * Note that if we do produce a modified sort-input target, and then the
5580  * query ends up not using an explicit Sort, no particular harm is done:
5581  * we'll initially use the modified target for the preceding path nodes,
5582  * but then change them to the final target with apply_projection_to_path.
5583  * Moreover, in such a case the guarantees about evaluation order of
5584  * volatile functions still hold, since the rows are sorted already.
5585  *
5586  * This function has some things in common with make_group_input_target and
5587  * make_window_input_target, though the detailed rules for what to do are
5588  * different. We never flatten/postpone any grouping or ordering columns;
5589  * those are needed before the sort. If we do flatten a particular
5590  * expression, we leave Aggref and WindowFunc nodes alone, since those were
5591  * computed earlier.
5592  *
5593  * 'final_target' is the query's final target list (in PathTarget form)
5594  * 'have_postponed_srfs' is an output argument, see below
5595  *
5596  * The result is the PathTarget to be computed by the plan node immediately
5597  * below the Sort step (and the Distinct step, if any). This will be
5598  * exactly final_target if we decide a projection step wouldn't be helpful.
5599  *
5600  * In addition, *have_postponed_srfs is set to TRUE if we choose to postpone
5601  * any set-returning functions to after the Sort.
5602  */
5603 static PathTarget *
5605  PathTarget *final_target,
5606  bool *have_postponed_srfs)
5607 {
5608  Query *parse = root->parse;
5609  PathTarget *input_target;
5610  int ncols;
5611  bool *col_is_srf;
5612  bool *postpone_col;
5613  bool have_srf;
5614  bool have_volatile;
5615  bool have_expensive;
5616  bool have_srf_sortcols;
5617  bool postpone_srfs;
5618  List *postponable_cols;
5619  List *postponable_vars;
5620  int i;
5621  ListCell *lc;
5622 
5623  /* Shouldn't get here unless query has ORDER BY */
5624  Assert(parse->sortClause);
5625 
5626  *have_postponed_srfs = false; /* default result */
5627 
5628  /* Inspect tlist and collect per-column information */
5629  ncols = list_length(final_target->exprs);
5630  col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
5631  postpone_col = (bool *) palloc0(ncols * sizeof(bool));
5632  have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
5633 
5634  i = 0;
5635  foreach(lc, final_target->exprs)
5636  {
5637  Expr *expr = (Expr *) lfirst(lc);
5638 
5639  /*
5640  * If the column has a sortgroupref, assume it has to be evaluated
5641  * before sorting. Generally such columns would be ORDER BY, GROUP
5642  * BY, etc targets. One exception is columns that were removed from
5643  * GROUP BY by remove_useless_groupby_columns() ... but those would
5644  * only be Vars anyway. There don't seem to be any cases where it
5645  * would be worth the trouble to double-check.
5646  */
5647  if (get_pathtarget_sortgroupref(final_target, i) == 0)
5648  {
5649  /*
5650  * Check for SRF or volatile functions. Check the SRF case first
5651  * because we must know whether we have any postponed SRFs.
5652  */
5653  if (parse->hasTargetSRFs &&
5654  expression_returns_set((Node *) expr))
5655  {
5656  /* We'll decide below whether these are postponable */
5657  col_is_srf[i] = true;
5658  have_srf = true;
5659  }
5660  else if (contain_volatile_functions((Node *) expr))
5661  {
5662  /* Unconditionally postpone */
5663  postpone_col[i] = true;
5664  have_volatile = true;
5665  }
5666  else
5667  {
5668  /*
5669  * Else check the cost. XXX it's annoying to have to do this
5670  * when set_pathtarget_cost_width() just did it. Refactor to
5671  * allow sharing the work?
5672  */
5673  QualCost cost;
5674 
5675  cost_qual_eval_node(&cost, (Node *) expr, root);
5676 
5677  /*
5678  * We arbitrarily define "expensive" as "more than 10X
5679  * cpu_operator_cost". Note this will take in any PL function
5680  * with default cost.
5681  */
5682  if (cost.per_tuple > 10 * cpu_operator_cost)
5683  {
5684  postpone_col[i] = true;
5685  have_expensive = true;
5686  }
5687  }
5688  }
5689  else
5690  {
5691  /* For sortgroupref cols, just check if any contain SRFs */
5692  if (!have_srf_sortcols &&
5693  parse->hasTargetSRFs &&
5694  expression_returns_set((Node *) expr))
5695  have_srf_sortcols = true;
5696  }
5697 
5698  i++;
5699  }
5700 
5701  /*
5702  * We can postpone SRFs if we have some but none are in sortgroupref cols.
5703  */
5704  postpone_srfs = (have_srf && !have_srf_sortcols);
5705 
5706  /*
5707  * If we don't need a post-sort projection, just return final_target.
5708  */
5709  if (!(postpone_srfs || have_volatile ||
5710  (have_expensive &&
5711  (parse->limitCount || root->tuple_fraction > 0))))
5712  return final_target;
5713 
5714  /*
5715  * Report whether the post-sort projection will contain set-returning
5716  * functions. This is important because it affects whether the Sort can
5717  * rely on the query's LIMIT (if any) to bound the number of rows it needs
5718  * to return.
5719  */
5720  *have_postponed_srfs = postpone_srfs;
5721 
5722  /*
5723  * Construct the sort-input target, taking all non-postponable columns and
5724  * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
5725  * the postponable ones.
5726  */
5727  input_target = create_empty_pathtarget();
5728  postponable_cols = NIL;
5729 
5730  i = 0;
5731  foreach(lc, final_target->exprs)
5732  {
5733  Expr *expr = (Expr *) lfirst(lc);
5734 
5735  if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
5736  postponable_cols = lappend(postponable_cols, expr);
5737  else
5738  add_column_to_pathtarget(input_target, expr,
5739  get_pathtarget_sortgroupref(final_target, i));
5740 
5741  i++;
5742  }
5743 
5744  /*
5745  * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
5746  * postponable columns, and add them to the sort-input target if not
5747  * already present. (Some might be there already.) We mustn't
5748  * deconstruct Aggrefs or WindowFuncs here, since the projection node
5749  * would be unable to recompute them.
5750  */
5751  postponable_vars = pull_var_clause((Node *) postponable_cols,
5755  add_new_columns_to_pathtarget(input_target, postponable_vars);
5756 
5757  /* clean up cruft */
5758  list_free(postponable_vars);
5759  list_free(postponable_cols);
5760 
5761  /* XXX this represents even more redundant cost calculation ... */
5762  return set_pathtarget_cost_width(root, input_target);
5763 }
5764 
5765 /*
5766  * get_cheapest_fractional_path
5767  * Find the cheapest path for retrieving a specified fraction of all
5768  * the tuples expected to be returned by the given relation.
5769  *
5770  * We interpret tuple_fraction the same way as grouping_planner.
5771  *
5772  * We assume set_cheapest() has been run on the given rel.
5773  */
5774 Path *
5775 get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
5776 {
5777  Path *best_path = rel->cheapest_total_path;
5778  ListCell *l;
5779 
5780  /* If all tuples will be retrieved, just return the cheapest-total path */
5781  if (tuple_fraction <= 0.0)
5782  return best_path;
5783 
5784  /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
5785  if (tuple_fraction >= 1.0 && best_path->rows > 0)
5786  tuple_fraction /= best_path->rows;
5787 
5788  foreach(l, rel->pathlist)
5789  {
5790  Path *path = (Path *) lfirst(l);
5791 
5792  if (path == rel->cheapest_total_path ||
5793  compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
5794  continue;
5795 
5796  best_path = path;
5797  }
5798 
5799  return best_path;
5800 }
5801 
5802 /*
5803  * adjust_paths_for_srfs
5804  * Fix up the Paths of the given upperrel to handle tSRFs properly.
5805  *
5806  * The executor can only handle set-returning functions that appear at the
5807  * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
5808  * that are not at top level, we need to split up the evaluation into multiple
5809  * plan levels in which each level satisfies this constraint. This function
5810  * modifies each Path of an upperrel that (might) compute any SRFs in its
5811  * output tlist to insert appropriate projection steps.
5812  *
5813  * The given targets and targets_contain_srfs lists are from
5814  * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
5815  * target in targets.
5816  */
5817 static void
5819  List *targets, List *targets_contain_srfs)
5820 {
5821  ListCell *lc;
5822 
5823  Assert(list_length(targets) == list_length(targets_contain_srfs));
5824  Assert(!linitial_int(targets_contain_srfs));
5825 
5826  /* If no SRFs appear at this plan level, nothing to do */
5827  if (list_length(targets) == 1)
5828  return;
5829 
5830  /*
5831  * Stack SRF-evaluation nodes atop each path for the rel.
5832  *
5833  * In principle we should re-run set_cheapest() here to identify the
5834  * cheapest path, but it seems unlikely that adding the same tlist eval
5835  * costs to all the paths would change that, so we don't bother. Instead,
5836  * just assume that the cheapest-startup and cheapest-total paths remain
5837  * so. (There should be no parameterized paths anymore, so we needn't
5838  * worry about updating cheapest_parameterized_paths.)
5839  */
5840  foreach(lc, rel->pathlist)
5841  {
5842  Path *subpath = (Path *) lfirst(lc);
5843  Path *newpath = subpath;
5844  ListCell *lc1,
5845