PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/htup_details.h"
22 #include "access/parallel.h"
23 #include "access/sysattr.h"
24 #include "access/xact.h"
26 #include "catalog/pg_proc.h"
27 #include "catalog/pg_type.h"
28 #include "executor/executor.h"
29 #include "executor/nodeAgg.h"
30 #include "foreign/fdwapi.h"
31 #include "miscadmin.h"
32 #include "lib/bipartite_match.h"
33 #include "lib/knapsack.h"
34 #include "nodes/makefuncs.h"
35 #include "nodes/nodeFuncs.h"
36 #ifdef OPTIMIZER_DEBUG
37 #include "nodes/print.h"
38 #endif
39 #include "optimizer/clauses.h"
40 #include "optimizer/cost.h"
41 #include "optimizer/pathnode.h"
42 #include "optimizer/paths.h"
43 #include "optimizer/plancat.h"
44 #include "optimizer/planmain.h"
45 #include "optimizer/planner.h"
46 #include "optimizer/prep.h"
47 #include "optimizer/subselect.h"
48 #include "optimizer/tlist.h"
49 #include "optimizer/var.h"
50 #include "parser/analyze.h"
51 #include "parser/parsetree.h"
52 #include "parser/parse_agg.h"
53 #include "rewrite/rewriteManip.h"
54 #include "storage/dsm_impl.h"
55 #include "utils/rel.h"
56 #include "utils/selfuncs.h"
57 #include "utils/lsyscache.h"
58 #include "utils/syscache.h"
59 
60 
61 /* GUC parameters */
64 
65 /* Hook for plugins to get control in planner() */
67 
68 /* Hook for plugins to get control when grouping_planner() plans upper rels */
70 
71 
72 /* Expression kind codes for preprocess_expression */
73 #define EXPRKIND_QUAL 0
74 #define EXPRKIND_TARGET 1
75 #define EXPRKIND_RTFUNC 2
76 #define EXPRKIND_RTFUNC_LATERAL 3
77 #define EXPRKIND_VALUES 4
78 #define EXPRKIND_VALUES_LATERAL 5
79 #define EXPRKIND_LIMIT 6
80 #define EXPRKIND_APPINFO 7
81 #define EXPRKIND_PHV 8
82 #define EXPRKIND_TABLESAMPLE 9
83 #define EXPRKIND_ARBITER_ELEM 10
84 #define EXPRKIND_TABLEFUNC 11
85 #define EXPRKIND_TABLEFUNC_LATERAL 12
86 
87 /* Passthrough data for standard_qp_callback */
88 typedef struct
89 {
90  List *tlist; /* preprocessed query targetlist */
91  List *activeWindows; /* active windows, if any */
92  List *groupClause; /* overrides parse->groupClause */
94 
95 /*
96  * Data specific to grouping sets
97  */
98 
99 typedef struct
100 {
110 
111 /* Local functions */
112 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
113 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
114 static void inheritance_planner(PlannerInfo *root);
115 static void grouping_planner(PlannerInfo *root, bool inheritance_update,
116  double tuple_fraction);
118 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
119  int *tleref_to_colnum_map);
120 static void preprocess_rowmarks(PlannerInfo *root);
121 static double preprocess_limit(PlannerInfo *root,
122  double tuple_fraction,
123  int64 *offset_est, int64 *count_est);
124 static bool limit_needed(Query *parse);
126 static List *preprocess_groupclause(PlannerInfo *root, List *force);
127 static List *extract_rollup_sets(List *groupingSets);
128 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
129 static void standard_qp_callback(PlannerInfo *root, void *extra);
130 static double get_number_of_groups(PlannerInfo *root,
131  double path_rows,
132  grouping_sets_data *gd);
134  const AggClauseCosts *agg_costs,
135  double dNumGroups);
137  RelOptInfo *input_rel,
138  PathTarget *target,
139  const AggClauseCosts *agg_costs,
140  grouping_sets_data *gd);
141 static void consider_groupingsets_paths(PlannerInfo *root,
142  RelOptInfo *grouped_rel,
143  Path *path,
144  bool is_sorted,
145  bool can_hash,
146  PathTarget *target,
147  grouping_sets_data *gd,
148  const AggClauseCosts *agg_costs,
149  double dNumGroups);
151  RelOptInfo *input_rel,
152  PathTarget *input_target,
153  PathTarget *output_target,
154  List *tlist,
155  WindowFuncLists *wflists,
156  List *activeWindows);
157 static void create_one_window_path(PlannerInfo *root,
158  RelOptInfo *window_rel,
159  Path *path,
160  PathTarget *input_target,
161  PathTarget *output_target,
162  List *tlist,
163  WindowFuncLists *wflists,
164  List *activeWindows);
166  RelOptInfo *input_rel);
168  RelOptInfo *input_rel,
169  PathTarget *target,
170  double limit_tuples);
172  PathTarget *final_target);
174  PathTarget *grouping_target);
175 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
176 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
178  PathTarget *final_target,
179  List *activeWindows);
181  List *tlist);
183  PathTarget *final_target,
184  bool *have_postponed_srfs);
185 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
186  List *targets, List *targets_contain_srfs);
187 
188 
189 /*****************************************************************************
190  *
191  * Query optimizer entry point
192  *
193  * To support loadable plugins that monitor or modify planner behavior,
194  * we provide a hook variable that lets a plugin get control before and
195  * after the standard planning process. The plugin would normally call
196  * standard_planner().
197  *
198  * Note to plugin authors: standard_planner() scribbles on its Query input,
199  * so you'd better copy that data structure if you want to plan more than once.
200  *
201  *****************************************************************************/
202 PlannedStmt *
203 planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
204 {
205  PlannedStmt *result;
206 
207  if (planner_hook)
208  result = (*planner_hook) (parse, cursorOptions, boundParams);
209  else
210  result = standard_planner(parse, cursorOptions, boundParams);
211  return result;
212 }
213 
214 PlannedStmt *
215 standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
216 {
217  PlannedStmt *result;
218  PlannerGlobal *glob;
219  double tuple_fraction;
220  PlannerInfo *root;
221  RelOptInfo *final_rel;
222  Path *best_path;
223  Plan *top_plan;
224  ListCell *lp,
225  *lr;
226 
227  /*
228  * Set up global state for this planner invocation. This data is needed
229  * across all levels of sub-Query that might exist in the given command,
230  * so we keep it in a separate struct that's linked to by each per-Query
231  * PlannerInfo.
232  */
233  glob = makeNode(PlannerGlobal);
234 
235  glob->boundParams = boundParams;
236  glob->subplans = NIL;
237  glob->subroots = NIL;
238  glob->rewindPlanIDs = NULL;
239  glob->finalrtable = NIL;
240  glob->finalrowmarks = NIL;
241  glob->resultRelations = NIL;
242  glob->nonleafResultRelations = NIL;
243  glob->rootResultRelations = NIL;
244  glob->relationOids = NIL;
245  glob->invalItems = NIL;
246  glob->nParamExec = 0;
247  glob->lastPHId = 0;
248  glob->lastRowMarkId = 0;
249  glob->lastPlanNodeId = 0;
250  glob->transientPlan = false;
251  glob->dependsOnRole = false;
252 
253  /*
254  * Assess whether it's feasible to use parallel mode for this query. We
255  * can't do this in a standalone backend, or if the command will try to
256  * modify any data, or if this is a cursor operation, or if GUCs are set
257  * to values that don't permit parallelism, or if parallel-unsafe
258  * functions are present in the query tree.
259  *
260  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
261  * MATERIALIZED VIEW to use parallel plans, but this is safe only because
262  * the command is writing into a completely new table which workers won't
263  * be able to see. If the workers could see the table, the fact that
264  * group locking would cause them to ignore the leader's heavyweight
265  * relation extension lock and GIN page locks would make this unsafe.
266  * We'll have to fix that somehow if we want to allow parallel inserts in
267  * general; updates and deletes have additional problems especially around
268  * combo CIDs.)
269  *
270  * For now, we don't try to use parallel mode if we're running inside a
271  * parallel worker. We might eventually be able to relax this
272  * restriction, but for now it seems best not to have parallel workers
273  * trying to create their own parallel workers.
274  *
275  * We can't use parallelism in serializable mode because the predicate
276  * locking code is not parallel-aware. It's not catastrophic if someone
277  * tries to run a parallel plan in serializable mode; it just won't get
278  * any workers and will run serially. But it seems like a good heuristic
279  * to assume that the same serialization level will be in effect at plan
280  * time and execution time, so don't generate a parallel plan if we're in
281  * serializable mode.
282  */
283  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
286  parse->commandType == CMD_SELECT &&
287  !parse->hasModifyingCTE &&
289  !IsParallelWorker() &&
291  {
292  /* all the cheap tests pass, so scan the query tree */
293  glob->maxParallelHazard = max_parallel_hazard(parse);
295  }
296  else
297  {
298  /* skip the query tree scan, just assume it's unsafe */
300  glob->parallelModeOK = false;
301  }
302 
303  /*
304  * glob->parallelModeNeeded is normally set to false here and changed to
305  * true during plan creation if a Gather or Gather Merge plan is actually
306  * created (cf. create_gather_plan, create_gather_merge_plan).
307  *
308  * However, if force_parallel_mode = on or force_parallel_mode = regress,
309  * then we impose parallel mode whenever it's safe to do so, even if the
310  * final plan doesn't use parallelism. It's not safe to do so if the
311  * query contains anything parallel-unsafe; parallelModeOK will be false
312  * in that case. Note that parallelModeOK can't change after this point.
313  * Otherwise, everything in the query is either parallel-safe or
314  * parallel-restricted, and in either case it should be OK to impose
315  * parallel-mode restrictions. If that ends up breaking something, then
316  * either some function the user included in the query is incorrectly
317  * labelled as parallel-safe or parallel-restricted when in reality it's
318  * parallel-unsafe, or else the query planner itself has a bug.
319  */
320  glob->parallelModeNeeded = glob->parallelModeOK &&
322 
323  /* Determine what fraction of the plan is likely to be scanned */
324  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
325  {
326  /*
327  * We have no real idea how many tuples the user will ultimately FETCH
328  * from a cursor, but it is often the case that he doesn't want 'em
329  * all, or would prefer a fast-start plan anyway so that he can
330  * process some of the tuples sooner. Use a GUC parameter to decide
331  * what fraction to optimize for.
332  */
333  tuple_fraction = cursor_tuple_fraction;
334 
335  /*
336  * We document cursor_tuple_fraction as simply being a fraction, which
337  * means the edge cases 0 and 1 have to be treated specially here. We
338  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
339  */
340  if (tuple_fraction >= 1.0)
341  tuple_fraction = 0.0;
342  else if (tuple_fraction <= 0.0)
343  tuple_fraction = 1e-10;
344  }
345  else
346  {
347  /* Default assumption is we need all the tuples */
348  tuple_fraction = 0.0;
349  }
350 
351  /* primary planning entry point (may recurse for subqueries) */
352  root = subquery_planner(glob, parse, NULL,
353  false, tuple_fraction);
354 
355  /* Select best Path and turn it into a Plan */
356  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
357  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
358 
359  top_plan = create_plan(root, best_path);
360 
361  /*
362  * If creating a plan for a scrollable cursor, make sure it can run
363  * backwards on demand. Add a Material node at the top at need.
364  */
365  if (cursorOptions & CURSOR_OPT_SCROLL)
366  {
367  if (!ExecSupportsBackwardScan(top_plan))
368  top_plan = materialize_finished_plan(top_plan);
369  }
370 
371  /*
372  * Optionally add a Gather node for testing purposes, provided this is
373  * actually a safe thing to do.
374  */
376  {
377  Gather *gather = makeNode(Gather);
378 
379  gather->plan.targetlist = top_plan->targetlist;
380  gather->plan.qual = NIL;
381  gather->plan.lefttree = top_plan;
382  gather->plan.righttree = NULL;
383  gather->num_workers = 1;
384  gather->single_copy = true;
386 
387  /*
388  * Since this Gather has no parallel-aware descendants to signal to,
389  * we don't need a rescan Param.
390  */
391  gather->rescan_param = -1;
392 
393  /*
394  * Ideally we'd use cost_gather here, but setting up dummy path data
395  * to satisfy it doesn't seem much cleaner than knowing what it does.
396  */
397  gather->plan.startup_cost = top_plan->startup_cost +
399  gather->plan.total_cost = top_plan->total_cost +
401  gather->plan.plan_rows = top_plan->plan_rows;
402  gather->plan.plan_width = top_plan->plan_width;
403  gather->plan.parallel_aware = false;
404  gather->plan.parallel_safe = false;
405 
406  /* use parallel mode for parallel plans. */
407  root->glob->parallelModeNeeded = true;
408 
409  top_plan = &gather->plan;
410  }
411 
412  /*
413  * If any Params were generated, run through the plan tree and compute
414  * each plan node's extParam/allParam sets. Ideally we'd merge this into
415  * set_plan_references' tree traversal, but for now it has to be separate
416  * because we need to visit subplans before not after main plan.
417  */
418  if (glob->nParamExec > 0)
419  {
420  Assert(list_length(glob->subplans) == list_length(glob->subroots));
421  forboth(lp, glob->subplans, lr, glob->subroots)
422  {
423  Plan *subplan = (Plan *) lfirst(lp);
424  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
425 
426  SS_finalize_plan(subroot, subplan);
427  }
428  SS_finalize_plan(root, top_plan);
429  }
430 
431  /* final cleanup of the plan */
432  Assert(glob->finalrtable == NIL);
433  Assert(glob->finalrowmarks == NIL);
434  Assert(glob->resultRelations == NIL);
436  Assert(glob->rootResultRelations == NIL);
437  top_plan = set_plan_references(root, top_plan);
438  /* ... and the subplans (both regular subplans and initplans) */
439  Assert(list_length(glob->subplans) == list_length(glob->subroots));
440  forboth(lp, glob->subplans, lr, glob->subroots)
441  {
442  Plan *subplan = (Plan *) lfirst(lp);
443  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
444 
445  lfirst(lp) = set_plan_references(subroot, subplan);
446  }
447 
448  /* build the PlannedStmt result */
449  result = makeNode(PlannedStmt);
450 
451  result->commandType = parse->commandType;
452  result->queryId = parse->queryId;
453  result->hasReturning = (parse->returningList != NIL);
454  result->hasModifyingCTE = parse->hasModifyingCTE;
455  result->canSetTag = parse->canSetTag;
456  result->transientPlan = glob->transientPlan;
457  result->dependsOnRole = glob->dependsOnRole;
458  result->parallelModeNeeded = glob->parallelModeNeeded;
459  result->planTree = top_plan;
460  result->rtable = glob->finalrtable;
461  result->resultRelations = glob->resultRelations;
464  result->subplans = glob->subplans;
465  result->rewindPlanIDs = glob->rewindPlanIDs;
466  result->rowMarks = glob->finalrowmarks;
467  result->relationOids = glob->relationOids;
468  result->invalItems = glob->invalItems;
469  result->nParamExec = glob->nParamExec;
470  /* utilityStmt should be null, but we might as well copy it */
471  result->utilityStmt = parse->utilityStmt;
472  result->stmt_location = parse->stmt_location;
473  result->stmt_len = parse->stmt_len;
474 
475  return result;
476 }
477 
478 
479 /*--------------------
480  * subquery_planner
481  * Invokes the planner on a subquery. We recurse to here for each
482  * sub-SELECT found in the query tree.
483  *
484  * glob is the global state for the current planner run.
485  * parse is the querytree produced by the parser & rewriter.
486  * parent_root is the immediate parent Query's info (NULL at the top level).
487  * hasRecursion is true if this is a recursive WITH query.
488  * tuple_fraction is the fraction of tuples we expect will be retrieved.
489  * tuple_fraction is interpreted as explained for grouping_planner, below.
490  *
491  * Basically, this routine does the stuff that should only be done once
492  * per Query object. It then calls grouping_planner. At one time,
493  * grouping_planner could be invoked recursively on the same Query object;
494  * that's not currently true, but we keep the separation between the two
495  * routines anyway, in case we need it again someday.
496  *
497  * subquery_planner will be called recursively to handle sub-Query nodes
498  * found within the query's expressions and rangetable.
499  *
500  * Returns the PlannerInfo struct ("root") that contains all data generated
501  * while planning the subquery. In particular, the Path(s) attached to
502  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
503  * cheapest way(s) to implement the query. The top level will select the
504  * best Path and pass it through createplan.c to produce a finished Plan.
505  *--------------------
506  */
507 PlannerInfo *
509  PlannerInfo *parent_root,
510  bool hasRecursion, double tuple_fraction)
511 {
512  PlannerInfo *root;
513  List *newWithCheckOptions;
514  List *newHaving;
515  bool hasOuterJoins;
516  RelOptInfo *final_rel;
517  ListCell *l;
518 
519  /* Create a PlannerInfo data structure for this subquery */
520  root = makeNode(PlannerInfo);
521  root->parse = parse;
522  root->glob = glob;
523  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
524  root->parent_root = parent_root;
525  root->plan_params = NIL;
526  root->outer_params = NULL;
528  root->init_plans = NIL;
529  root->cte_plan_ids = NIL;
530  root->multiexpr_params = NIL;
531  root->eq_classes = NIL;
532  root->append_rel_list = NIL;
533  root->pcinfo_list = NIL;
534  root->rowMarks = NIL;
535  memset(root->upper_rels, 0, sizeof(root->upper_rels));
536  memset(root->upper_targets, 0, sizeof(root->upper_targets));
537  root->processed_tlist = NIL;
538  root->grouping_map = NULL;
539  root->minmax_aggs = NIL;
540  root->qual_security_level = 0;
541  root->hasInheritedTarget = false;
542  root->hasRecursion = hasRecursion;
543  if (hasRecursion)
544  root->wt_param_id = SS_assign_special_param(root);
545  else
546  root->wt_param_id = -1;
547  root->non_recursive_path = NULL;
548 
549  /*
550  * If there is a WITH list, process each WITH query and build an initplan
551  * SubPlan structure for it.
552  */
553  if (parse->cteList)
554  SS_process_ctes(root);
555 
556  /*
557  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
558  * to transform them into joins. Note that this step does not descend
559  * into subqueries; if we pull up any subqueries below, their SubLinks are
560  * processed just before pulling them up.
561  */
562  if (parse->hasSubLinks)
563  pull_up_sublinks(root);
564 
565  /*
566  * Scan the rangetable for set-returning functions, and inline them if
567  * possible (producing subqueries that might get pulled up next).
568  * Recursion issues here are handled in the same way as for SubLinks.
569  */
571 
572  /*
573  * Check to see if any subqueries in the jointree can be merged into this
574  * query.
575  */
576  pull_up_subqueries(root);
577 
578  /*
579  * If this is a simple UNION ALL query, flatten it into an appendrel. We
580  * do this now because it requires applying pull_up_subqueries to the leaf
581  * queries of the UNION ALL, which weren't touched above because they
582  * weren't referenced by the jointree (they will be after we do this).
583  */
584  if (parse->setOperations)
586 
587  /*
588  * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
589  * avoid the expense of doing flatten_join_alias_vars(). Also check for
590  * outer joins --- if none, we can skip reduce_outer_joins(). And check
591  * for LATERAL RTEs, too. This must be done after we have done
592  * pull_up_subqueries(), of course.
593  */
594  root->hasJoinRTEs = false;
595  root->hasLateralRTEs = false;
596  hasOuterJoins = false;
597  foreach(l, parse->rtable)
598  {
600 
601  if (rte->rtekind == RTE_JOIN)
602  {
603  root->hasJoinRTEs = true;
604  if (IS_OUTER_JOIN(rte->jointype))
605  hasOuterJoins = true;
606  }
607  if (rte->lateral)
608  root->hasLateralRTEs = true;
609  }
610 
611  /*
612  * Preprocess RowMark information. We need to do this after subquery
613  * pullup (so that all non-inherited RTEs are present) and before
614  * inheritance expansion (so that the info is available for
615  * expand_inherited_tables to examine and modify).
616  */
617  preprocess_rowmarks(root);
618 
619  /*
620  * Expand any rangetable entries that are inheritance sets into "append
621  * relations". This can add entries to the rangetable, but they must be
622  * plain base relations not joins, so it's OK (and marginally more
623  * efficient) to do it after checking for join RTEs. We must do it after
624  * pulling up subqueries, else we'd fail to handle inherited tables in
625  * subqueries.
626  */
628 
629  /*
630  * Set hasHavingQual to remember if HAVING clause is present. Needed
631  * because preprocess_expression will reduce a constant-true condition to
632  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
633  */
634  root->hasHavingQual = (parse->havingQual != NULL);
635 
636  /* Clear this flag; might get set in distribute_qual_to_rels */
637  root->hasPseudoConstantQuals = false;
638 
639  /*
640  * Do expression preprocessing on targetlist and quals, as well as other
641  * random expressions in the querytree. Note that we do not need to
642  * handle sort/group expressions explicitly, because they are actually
643  * part of the targetlist.
644  */
645  parse->targetList = (List *)
646  preprocess_expression(root, (Node *) parse->targetList,
648 
649  /* Constant-folding might have removed all set-returning functions */
650  if (parse->hasTargetSRFs)
652 
653  newWithCheckOptions = NIL;
654  foreach(l, parse->withCheckOptions)
655  {
657 
658  wco->qual = preprocess_expression(root, wco->qual,
659  EXPRKIND_QUAL);
660  if (wco->qual != NULL)
661  newWithCheckOptions = lappend(newWithCheckOptions, wco);
662  }
663  parse->withCheckOptions = newWithCheckOptions;
664 
665  parse->returningList = (List *)
666  preprocess_expression(root, (Node *) parse->returningList,
668 
669  preprocess_qual_conditions(root, (Node *) parse->jointree);
670 
671  parse->havingQual = preprocess_expression(root, parse->havingQual,
672  EXPRKIND_QUAL);
673 
674  foreach(l, parse->windowClause)
675  {
677 
678  /* partitionClause/orderClause are sort/group expressions */
681  wc->endOffset = preprocess_expression(root, wc->endOffset,
683  }
684 
685  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
687  parse->limitCount = preprocess_expression(root, parse->limitCount,
689 
690  if (parse->onConflict)
691  {
692  parse->onConflict->arbiterElems = (List *)
694  (Node *) parse->onConflict->arbiterElems,
696  parse->onConflict->arbiterWhere =
698  parse->onConflict->arbiterWhere,
699  EXPRKIND_QUAL);
700  parse->onConflict->onConflictSet = (List *)
702  (Node *) parse->onConflict->onConflictSet,
704  parse->onConflict->onConflictWhere =
706  parse->onConflict->onConflictWhere,
707  EXPRKIND_QUAL);
708  /* exclRelTlist contains only Vars, so no preprocessing needed */
709  }
710 
711  root->append_rel_list = (List *)
714 
715  /* Also need to preprocess expressions within RTEs */
716  foreach(l, parse->rtable)
717  {
719  int kind;
720  ListCell *lcsq;
721 
722  if (rte->rtekind == RTE_RELATION)
723  {
724  if (rte->tablesample)
725  rte->tablesample = (TableSampleClause *)
727  (Node *) rte->tablesample,
729  }
730  else if (rte->rtekind == RTE_SUBQUERY)
731  {
732  /*
733  * We don't want to do all preprocessing yet on the subquery's
734  * expressions, since that will happen when we plan it. But if it
735  * contains any join aliases of our level, those have to get
736  * expanded now, because planning of the subquery won't do it.
737  * That's only possible if the subquery is LATERAL.
738  */
739  if (rte->lateral && root->hasJoinRTEs)
740  rte->subquery = (Query *)
741  flatten_join_alias_vars(root, (Node *) rte->subquery);
742  }
743  else if (rte->rtekind == RTE_FUNCTION)
744  {
745  /* Preprocess the function expression(s) fully */
747  rte->functions = (List *)
748  preprocess_expression(root, (Node *) rte->functions, kind);
749  }
750  else if (rte->rtekind == RTE_TABLEFUNC)
751  {
752  /* Preprocess the function expression(s) fully */
754  rte->tablefunc = (TableFunc *)
755  preprocess_expression(root, (Node *) rte->tablefunc, kind);
756  }
757  else if (rte->rtekind == RTE_VALUES)
758  {
759  /* Preprocess the values lists fully */
761  rte->values_lists = (List *)
762  preprocess_expression(root, (Node *) rte->values_lists, kind);
763  }
764 
765  /*
766  * Process each element of the securityQuals list as if it were a
767  * separate qual expression (as indeed it is). We need to do it this
768  * way to get proper canonicalization of AND/OR structure. Note that
769  * this converts each element into an implicit-AND sublist.
770  */
771  foreach(lcsq, rte->securityQuals)
772  {
773  lfirst(lcsq) = preprocess_expression(root,
774  (Node *) lfirst(lcsq),
775  EXPRKIND_QUAL);
776  }
777  }
778 
779  /*
780  * In some cases we may want to transfer a HAVING clause into WHERE. We
781  * cannot do so if the HAVING clause contains aggregates (obviously) or
782  * volatile functions (since a HAVING clause is supposed to be executed
783  * only once per group). We also can't do this if there are any nonempty
784  * grouping sets; moving such a clause into WHERE would potentially change
785  * the results, if any referenced column isn't present in all the grouping
786  * sets. (If there are only empty grouping sets, then the HAVING clause
787  * must be degenerate as discussed below.)
788  *
789  * Also, it may be that the clause is so expensive to execute that we're
790  * better off doing it only once per group, despite the loss of
791  * selectivity. This is hard to estimate short of doing the entire
792  * planning process twice, so we use a heuristic: clauses containing
793  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
794  * clause into WHERE, in hopes of eliminating tuples before aggregation
795  * instead of after.
796  *
797  * If the query has explicit grouping then we can simply move such a
798  * clause into WHERE; any group that fails the clause will not be in the
799  * output because none of its tuples will reach the grouping or
800  * aggregation stage. Otherwise we must have a degenerate (variable-free)
801  * HAVING clause, which we put in WHERE so that query_planner() can use it
802  * in a gating Result node, but also keep in HAVING to ensure that we
803  * don't emit a bogus aggregated row. (This could be done better, but it
804  * seems not worth optimizing.)
805  *
806  * Note that both havingQual and parse->jointree->quals are in
807  * implicitly-ANDed-list form at this point, even though they are declared
808  * as Node *.
809  */
810  newHaving = NIL;
811  foreach(l, (List *) parse->havingQual)
812  {
813  Node *havingclause = (Node *) lfirst(l);
814 
815  if ((parse->groupClause && parse->groupingSets) ||
816  contain_agg_clause(havingclause) ||
817  contain_volatile_functions(havingclause) ||
818  contain_subplans(havingclause))
819  {
820  /* keep it in HAVING */
821  newHaving = lappend(newHaving, havingclause);
822  }
823  else if (parse->groupClause && !parse->groupingSets)
824  {
825  /* move it to WHERE */
826  parse->jointree->quals = (Node *)
827  lappend((List *) parse->jointree->quals, havingclause);
828  }
829  else
830  {
831  /* put a copy in WHERE, keep it in HAVING */
832  parse->jointree->quals = (Node *)
833  lappend((List *) parse->jointree->quals,
834  copyObject(havingclause));
835  newHaving = lappend(newHaving, havingclause);
836  }
837  }
838  parse->havingQual = (Node *) newHaving;
839 
840  /* Remove any redundant GROUP BY columns */
842 
843  /*
844  * If we have any outer joins, try to reduce them to plain inner joins.
845  * This step is most easily done after we've done expression
846  * preprocessing.
847  */
848  if (hasOuterJoins)
849  reduce_outer_joins(root);
850 
851  /*
852  * Do the main planning. If we have an inherited target relation, that
853  * needs special processing, else go straight to grouping_planner.
854  */
855  if (parse->resultRelation &&
856  rt_fetch(parse->resultRelation, parse->rtable)->inh)
857  inheritance_planner(root);
858  else
859  grouping_planner(root, false, tuple_fraction);
860 
861  /*
862  * Capture the set of outer-level param IDs we have access to, for use in
863  * extParam/allParam calculations later.
864  */
866 
867  /*
868  * If any initPlans were created in this query level, adjust the surviving
869  * Paths' costs and parallel-safety flags to account for them. The
870  * initPlans won't actually get attached to the plan tree till
871  * create_plan() runs, but we must include their effects now.
872  */
873  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
874  SS_charge_for_initplans(root, final_rel);
875 
876  /*
877  * Make sure we've identified the cheapest Path for the final rel. (By
878  * doing this here not in grouping_planner, we include initPlan costs in
879  * the decision, though it's unlikely that will change anything.)
880  */
881  set_cheapest(final_rel);
882 
883  return root;
884 }
885 
886 /*
887  * preprocess_expression
888  * Do subquery_planner's preprocessing work for an expression,
889  * which can be a targetlist, a WHERE clause (including JOIN/ON
890  * conditions), a HAVING clause, or a few other things.
891  */
892 static Node *
893 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
894 {
895  /*
896  * Fall out quickly if expression is empty. This occurs often enough to
897  * be worth checking. Note that null->null is the correct conversion for
898  * implicit-AND result format, too.
899  */
900  if (expr == NULL)
901  return NULL;
902 
903  /*
904  * If the query has any join RTEs, replace join alias variables with
905  * base-relation variables. We must do this before sublink processing,
906  * else sublinks expanded out from join aliases would not get processed.
907  * We can skip it in non-lateral RTE functions, VALUES lists, and
908  * TABLESAMPLE clauses, however, since they can't contain any Vars of the
909  * current query level.
910  */
911  if (root->hasJoinRTEs &&
912  !(kind == EXPRKIND_RTFUNC ||
913  kind == EXPRKIND_VALUES ||
914  kind == EXPRKIND_TABLESAMPLE ||
915  kind == EXPRKIND_TABLEFUNC))
916  expr = flatten_join_alias_vars(root, expr);
917 
918  /*
919  * Simplify constant expressions.
920  *
921  * Note: an essential effect of this is to convert named-argument function
922  * calls to positional notation and insert the current actual values of
923  * any default arguments for functions. To ensure that happens, we *must*
924  * process all expressions here. Previous PG versions sometimes skipped
925  * const-simplification if it didn't seem worth the trouble, but we can't
926  * do that anymore.
927  *
928  * Note: this also flattens nested AND and OR expressions into N-argument
929  * form. All processing of a qual expression after this point must be
930  * careful to maintain AND/OR flatness --- that is, do not generate a tree
931  * with AND directly under AND, nor OR directly under OR.
932  */
933  expr = eval_const_expressions(root, expr);
934 
935  /*
936  * If it's a qual or havingQual, canonicalize it.
937  */
938  if (kind == EXPRKIND_QUAL)
939  {
940  expr = (Node *) canonicalize_qual((Expr *) expr);
941 
942 #ifdef OPTIMIZER_DEBUG
943  printf("After canonicalize_qual()\n");
944  pprint(expr);
945 #endif
946  }
947 
948  /* Expand SubLinks to SubPlans */
949  if (root->parse->hasSubLinks)
950  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
951 
952  /*
953  * XXX do not insert anything here unless you have grokked the comments in
954  * SS_replace_correlation_vars ...
955  */
956 
957  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
958  if (root->query_level > 1)
959  expr = SS_replace_correlation_vars(root, expr);
960 
961  /*
962  * If it's a qual or havingQual, convert it to implicit-AND format. (We
963  * don't want to do this before eval_const_expressions, since the latter
964  * would be unable to simplify a top-level AND correctly. Also,
965  * SS_process_sublinks expects explicit-AND format.)
966  */
967  if (kind == EXPRKIND_QUAL)
968  expr = (Node *) make_ands_implicit((Expr *) expr);
969 
970  return expr;
971 }
972 
973 /*
974  * preprocess_qual_conditions
975  * Recursively scan the query's jointree and do subquery_planner's
976  * preprocessing work on each qual condition found therein.
977  */
978 static void
980 {
981  if (jtnode == NULL)
982  return;
983  if (IsA(jtnode, RangeTblRef))
984  {
985  /* nothing to do here */
986  }
987  else if (IsA(jtnode, FromExpr))
988  {
989  FromExpr *f = (FromExpr *) jtnode;
990  ListCell *l;
991 
992  foreach(l, f->fromlist)
994 
996  }
997  else if (IsA(jtnode, JoinExpr))
998  {
999  JoinExpr *j = (JoinExpr *) jtnode;
1000 
1001  preprocess_qual_conditions(root, j->larg);
1002  preprocess_qual_conditions(root, j->rarg);
1003 
1005  }
1006  else
1007  elog(ERROR, "unrecognized node type: %d",
1008  (int) nodeTag(jtnode));
1009 }
1010 
1011 /*
1012  * preprocess_phv_expression
1013  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1014  *
1015  * If a LATERAL subquery references an output of another subquery, and that
1016  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1017  * join, then we'll push the PlaceHolderVar expression down into the subquery
1018  * and later pull it back up during find_lateral_references, which runs after
1019  * subquery_planner has preprocessed all the expressions that were in the
1020  * current query level to start with. So we need to preprocess it then.
1021  */
1022 Expr *
1024 {
1025  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1026 }
1027 
1028 /*
1029  * inheritance_planner
1030  * Generate Paths in the case where the result relation is an
1031  * inheritance set.
1032  *
1033  * We have to handle this case differently from cases where a source relation
1034  * is an inheritance set. Source inheritance is expanded at the bottom of the
1035  * plan tree (see allpaths.c), but target inheritance has to be expanded at
1036  * the top. The reason is that for UPDATE, each target relation needs a
1037  * different targetlist matching its own column set. Fortunately,
1038  * the UPDATE/DELETE target can never be the nullable side of an outer join,
1039  * so it's OK to generate the plan this way.
1040  *
1041  * Returns nothing; the useful output is in the Paths we attach to
1042  * the (UPPERREL_FINAL, NULL) upperrel stored in *root.
1043  *
1044  * Note that we have not done set_cheapest() on the final rel; it's convenient
1045  * to leave this to the caller.
1046  */
1047 static void
1049 {
1050  Query *parse = root->parse;
1051  int top_parentRTindex = parse->resultRelation;
1052  Bitmapset *subqueryRTindexes;
1053  Bitmapset *modifiableARIindexes;
1054  int nominalRelation = -1;
1055  List *final_rtable = NIL;
1056  int save_rel_array_size = 0;
1057  RelOptInfo **save_rel_array = NULL;
1058  List *subpaths = NIL;
1059  List *subroots = NIL;
1060  List *resultRelations = NIL;
1061  List *withCheckOptionLists = NIL;
1062  List *returningLists = NIL;
1063  List *rowMarks;
1064  RelOptInfo *final_rel;
1065  ListCell *lc;
1066  Index rti;
1067  RangeTblEntry *parent_rte;
1068  List *partitioned_rels = NIL;
1069  PlannerInfo *parent_root;
1070  Query *parent_parse;
1071  Bitmapset *parent_relids = bms_make_singleton(top_parentRTindex);
1072  PlannerInfo **parent_roots = NULL;
1073 
1074  Assert(parse->commandType != CMD_INSERT);
1075 
1076  /*
1077  * We generate a modified instance of the original Query for each target
1078  * relation, plan that, and put all the plans into a list that will be
1079  * controlled by a single ModifyTable node. All the instances share the
1080  * same rangetable, but each instance must have its own set of subquery
1081  * RTEs within the finished rangetable because (1) they are likely to get
1082  * scribbled on during planning, and (2) it's not inconceivable that
1083  * subqueries could get planned differently in different cases. We need
1084  * not create duplicate copies of other RTE kinds, in particular not the
1085  * target relations, because they don't have either of those issues. Not
1086  * having to duplicate the target relations is important because doing so
1087  * (1) would result in a rangetable of length O(N^2) for N targets, with
1088  * at least O(N^3) work expended here; and (2) would greatly complicate
1089  * management of the rowMarks list.
1090  *
1091  * To begin with, generate a bitmapset of the relids of the subquery RTEs.
1092  */
1093  subqueryRTindexes = NULL;
1094  rti = 1;
1095  foreach(lc, parse->rtable)
1096  {
1098 
1099  if (rte->rtekind == RTE_SUBQUERY)
1100  subqueryRTindexes = bms_add_member(subqueryRTindexes, rti);
1101  rti++;
1102  }
1103 
1104  /*
1105  * Next, we want to identify which AppendRelInfo items contain references
1106  * to any of the aforesaid subquery RTEs. These items will need to be
1107  * copied and modified to adjust their subquery references; whereas the
1108  * other ones need not be touched. It's worth being tense over this
1109  * because we can usually avoid processing most of the AppendRelInfo
1110  * items, thereby saving O(N^2) space and time when the target is a large
1111  * inheritance tree. We can identify AppendRelInfo items by their
1112  * child_relid, since that should be unique within the list.
1113  */
1114  modifiableARIindexes = NULL;
1115  if (subqueryRTindexes != NULL)
1116  {
1117  foreach(lc, root->append_rel_list)
1118  {
1119  AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc);
1120 
1121  if (bms_is_member(appinfo->parent_relid, subqueryRTindexes) ||
1122  bms_is_member(appinfo->child_relid, subqueryRTindexes) ||
1124  subqueryRTindexes))
1125  modifiableARIindexes = bms_add_member(modifiableARIindexes,
1126  appinfo->child_relid);
1127  }
1128  }
1129 
1130  /*
1131  * If the parent RTE is a partitioned table, we should use that as the
1132  * nominal relation, because the RTEs added for partitioned tables
1133  * (including the root parent) as child members of the inheritance set do
1134  * not appear anywhere else in the plan. The situation is exactly the
1135  * opposite in the case of non-partitioned inheritance parent as described
1136  * below. For the same reason, collect the list of descendant partitioned
1137  * tables to be saved in ModifyTable node, so that executor can lock those
1138  * as well.
1139  */
1140  parent_rte = rt_fetch(top_parentRTindex, root->parse->rtable);
1141  if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1142  {
1143  nominalRelation = top_parentRTindex;
1144  partitioned_rels = get_partitioned_child_rels(root, top_parentRTindex);
1145  /* The root partitioned table is included as a child rel */
1146  Assert(list_length(partitioned_rels) >= 1);
1147  }
1148 
1149  /*
1150  * The PlannerInfo for each child is obtained by translating the relevant
1151  * members of the PlannerInfo for its immediate parent, which we find
1152  * using the parent_relid in its AppendRelInfo. We save the PlannerInfo
1153  * for each parent in an array indexed by relid for fast retrieval. Since
1154  * the maximum number of parents is limited by the number of RTEs in the
1155  * query, we use that number to allocate the array. An extra entry is
1156  * needed since relids start from 1.
1157  */
1158  parent_roots = (PlannerInfo **) palloc0((list_length(parse->rtable) + 1) *
1159  sizeof(PlannerInfo *));
1160  parent_roots[top_parentRTindex] = root;
1161 
1162  /*
1163  * And now we can get on with generating a plan for each child table.
1164  */
1165  foreach(lc, root->append_rel_list)
1166  {
1167  AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc);
1168  PlannerInfo *subroot;
1169  RangeTblEntry *child_rte;
1170  RelOptInfo *sub_final_rel;
1171  Path *subpath;
1172 
1173  /* append_rel_list contains all append rels; ignore others */
1174  if (!bms_is_member(appinfo->parent_relid, parent_relids))
1175  continue;
1176 
1177  /*
1178  * expand_inherited_rtentry() always processes a parent before any of
1179  * that parent's children, so the parent_root for this relation should
1180  * already be available.
1181  */
1182  parent_root = parent_roots[appinfo->parent_relid];
1183  Assert(parent_root != NULL);
1184  parent_parse = parent_root->parse;
1185 
1186  /*
1187  * We need a working copy of the PlannerInfo so that we can control
1188  * propagation of information back to the main copy.
1189  */
1190  subroot = makeNode(PlannerInfo);
1191  memcpy(subroot, parent_root, sizeof(PlannerInfo));
1192 
1193  /*
1194  * Generate modified query with this rel as target. We first apply
1195  * adjust_appendrel_attrs, which copies the Query and changes
1196  * references to the parent RTE to refer to the current child RTE,
1197  * then fool around with subquery RTEs.
1198  */
1199  subroot->parse = (Query *)
1200  adjust_appendrel_attrs(parent_root,
1201  (Node *) parent_parse,
1202  1, &appinfo);
1203 
1204  /*
1205  * If there are securityQuals attached to the parent, move them to the
1206  * child rel (they've already been transformed properly for that).
1207  */
1208  parent_rte = rt_fetch(appinfo->parent_relid, subroot->parse->rtable);
1209  child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable);
1210  child_rte->securityQuals = parent_rte->securityQuals;
1211  parent_rte->securityQuals = NIL;
1212 
1213  /*
1214  * The rowMarks list might contain references to subquery RTEs, so
1215  * make a copy that we can apply ChangeVarNodes to. (Fortunately, the
1216  * executor doesn't need to see the modified copies --- we can just
1217  * pass it the original rowMarks list.)
1218  */
1219  subroot->rowMarks = copyObject(parent_root->rowMarks);
1220 
1221  /*
1222  * The append_rel_list likewise might contain references to subquery
1223  * RTEs (if any subqueries were flattenable UNION ALLs). So prepare
1224  * to apply ChangeVarNodes to that, too. As explained above, we only
1225  * want to copy items that actually contain such references; the rest
1226  * can just get linked into the subroot's append_rel_list.
1227  *
1228  * If we know there are no such references, we can just use the outer
1229  * append_rel_list unmodified.
1230  */
1231  if (modifiableARIindexes != NULL)
1232  {
1233  ListCell *lc2;
1234 
1235  subroot->append_rel_list = NIL;
1236  foreach(lc2, parent_root->append_rel_list)
1237  {
1238  AppendRelInfo *appinfo2 = lfirst_node(AppendRelInfo, lc2);
1239 
1240  if (bms_is_member(appinfo2->child_relid, modifiableARIindexes))
1241  appinfo2 = copyObject(appinfo2);
1242 
1243  subroot->append_rel_list = lappend(subroot->append_rel_list,
1244  appinfo2);
1245  }
1246  }
1247 
1248  /*
1249  * Add placeholders to the child Query's rangetable list to fill the
1250  * RT indexes already reserved for subqueries in previous children.
1251  * These won't be referenced, so there's no need to make them very
1252  * valid-looking.
1253  */
1254  while (list_length(subroot->parse->rtable) < list_length(final_rtable))
1255  subroot->parse->rtable = lappend(subroot->parse->rtable,
1257 
1258  /*
1259  * If this isn't the first child Query, generate duplicates of all
1260  * subquery RTEs, and adjust Var numbering to reference the
1261  * duplicates. To simplify the loop logic, we scan the original rtable
1262  * not the copy just made by adjust_appendrel_attrs; that should be OK
1263  * since subquery RTEs couldn't contain any references to the target
1264  * rel.
1265  */
1266  if (final_rtable != NIL && subqueryRTindexes != NULL)
1267  {
1268  ListCell *lr;
1269 
1270  rti = 1;
1271  foreach(lr, parent_parse->rtable)
1272  {
1274 
1275  if (bms_is_member(rti, subqueryRTindexes))
1276  {
1277  Index newrti;
1278 
1279  /*
1280  * The RTE can't contain any references to its own RT
1281  * index, except in its securityQuals, so we can save a
1282  * few cycles by applying ChangeVarNodes to the rest of
1283  * the rangetable before we append the RTE to it.
1284  */
1285  newrti = list_length(subroot->parse->rtable) + 1;
1286  ChangeVarNodes((Node *) subroot->parse, rti, newrti, 0);
1287  ChangeVarNodes((Node *) subroot->rowMarks, rti, newrti, 0);
1288  /* Skip processing unchanging parts of append_rel_list */
1289  if (modifiableARIindexes != NULL)
1290  {
1291  ListCell *lc2;
1292 
1293  foreach(lc2, subroot->append_rel_list)
1294  {
1295  AppendRelInfo *appinfo2 = lfirst_node(AppendRelInfo, lc2);
1296 
1297  if (bms_is_member(appinfo2->child_relid,
1298  modifiableARIindexes))
1299  ChangeVarNodes((Node *) appinfo2, rti, newrti, 0);
1300  }
1301  }
1302  rte = copyObject(rte);
1303  ChangeVarNodes((Node *) rte->securityQuals, rti, newrti, 0);
1304  subroot->parse->rtable = lappend(subroot->parse->rtable,
1305  rte);
1306  }
1307  rti++;
1308  }
1309  }
1310 
1311  /* There shouldn't be any OJ info to translate, as yet */
1312  Assert(subroot->join_info_list == NIL);
1313  /* and we haven't created PlaceHolderInfos, either */
1314  Assert(subroot->placeholder_list == NIL);
1315  /* hack to mark target relation as an inheritance partition */
1316  subroot->hasInheritedTarget = true;
1317 
1318  /*
1319  * If the child is further partitioned, remember it as a parent. Since
1320  * a partitioned table does not have any data, we don't need to create
1321  * a plan for it. We do, however, need to remember the PlannerInfo for
1322  * use when processing its children.
1323  */
1324  if (child_rte->inh)
1325  {
1326  Assert(child_rte->relkind == RELKIND_PARTITIONED_TABLE);
1327  parent_relids =
1328  bms_add_member(parent_relids, appinfo->child_relid);
1329  parent_roots[appinfo->child_relid] = subroot;
1330 
1331  continue;
1332  }
1333 
1334  /* Generate Path(s) for accessing this result relation */
1335  grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
1336 
1337  /*
1338  * Set the nomimal target relation of the ModifyTable node if not
1339  * already done. We use the inheritance parent RTE as the nominal
1340  * target relation if it's a partitioned table (see just above this
1341  * loop). In the non-partitioned parent case, we'll use the first
1342  * child relation (even if it's excluded) as the nominal target
1343  * relation. Because of the way expand_inherited_rtentry works, the
1344  * latter should be the RTE representing the parent table in its role
1345  * as a simple member of the inheritance set.
1346  *
1347  * It would be logically cleaner to *always* use the inheritance
1348  * parent RTE as the nominal relation; but that RTE is not otherwise
1349  * referenced in the plan in the non-partitioned inheritance case.
1350  * Instead the duplicate child RTE created by expand_inherited_rtentry
1351  * is used elsewhere in the plan, so using the original parent RTE
1352  * would give rise to confusing use of multiple aliases in EXPLAIN
1353  * output for what the user will think is the "same" table. OTOH,
1354  * it's not a problem in the partitioned inheritance case, because the
1355  * duplicate child RTE added for the parent does not appear anywhere
1356  * else in the plan tree.
1357  */
1358  if (nominalRelation < 0)
1359  nominalRelation = appinfo->child_relid;
1360 
1361  /*
1362  * Select cheapest path in case there's more than one. We always run
1363  * modification queries to conclusion, so we care only for the
1364  * cheapest-total path.
1365  */
1366  sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
1367  set_cheapest(sub_final_rel);
1368  subpath = sub_final_rel->cheapest_total_path;
1369 
1370  /*
1371  * If this child rel was excluded by constraint exclusion, exclude it
1372  * from the result plan.
1373  */
1374  if (IS_DUMMY_PATH(subpath))
1375  continue;
1376 
1377  /*
1378  * If this is the first non-excluded child, its post-planning rtable
1379  * becomes the initial contents of final_rtable; otherwise, append
1380  * just its modified subquery RTEs to final_rtable.
1381  */
1382  if (final_rtable == NIL)
1383  final_rtable = subroot->parse->rtable;
1384  else
1385  final_rtable = list_concat(final_rtable,
1386  list_copy_tail(subroot->parse->rtable,
1387  list_length(final_rtable)));
1388 
1389  /*
1390  * We need to collect all the RelOptInfos from all child plans into
1391  * the main PlannerInfo, since setrefs.c will need them. We use the
1392  * last child's simple_rel_array (previous ones are too short), so we
1393  * have to propagate forward the RelOptInfos that were already built
1394  * in previous children.
1395  */
1396  Assert(subroot->simple_rel_array_size >= save_rel_array_size);
1397  for (rti = 1; rti < save_rel_array_size; rti++)
1398  {
1399  RelOptInfo *brel = save_rel_array[rti];
1400 
1401  if (brel)
1402  subroot->simple_rel_array[rti] = brel;
1403  }
1404  save_rel_array_size = subroot->simple_rel_array_size;
1405  save_rel_array = subroot->simple_rel_array;
1406 
1407  /* Make sure any initplans from this rel get into the outer list */
1408  root->init_plans = subroot->init_plans;
1409 
1410  /* Build list of sub-paths */
1411  subpaths = lappend(subpaths, subpath);
1412 
1413  /* Build list of modified subroots, too */
1414  subroots = lappend(subroots, subroot);
1415 
1416  /* Build list of target-relation RT indexes */
1417  resultRelations = lappend_int(resultRelations, appinfo->child_relid);
1418 
1419  /* Build lists of per-relation WCO and RETURNING targetlists */
1420  if (parse->withCheckOptions)
1421  withCheckOptionLists = lappend(withCheckOptionLists,
1422  subroot->parse->withCheckOptions);
1423  if (parse->returningList)
1424  returningLists = lappend(returningLists,
1425  subroot->parse->returningList);
1426 
1427  Assert(!parse->onConflict);
1428  }
1429 
1430  /* Result path must go into outer query's FINAL upperrel */
1431  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1432 
1433  /*
1434  * We don't currently worry about setting final_rel's consider_parallel
1435  * flag in this case, nor about allowing FDWs or create_upper_paths_hook
1436  * to get control here.
1437  */
1438 
1439  /*
1440  * If we managed to exclude every child rel, return a dummy plan; it
1441  * doesn't even need a ModifyTable node.
1442  */
1443  if (subpaths == NIL)
1444  {
1445  set_dummy_rel_pathlist(final_rel);
1446  return;
1447  }
1448 
1449  /*
1450  * Put back the final adjusted rtable into the master copy of the Query.
1451  * (We mustn't do this if we found no non-excluded children.)
1452  */
1453  parse->rtable = final_rtable;
1454  root->simple_rel_array_size = save_rel_array_size;
1455  root->simple_rel_array = save_rel_array;
1456  /* Must reconstruct master's simple_rte_array, too */
1457  root->simple_rte_array = (RangeTblEntry **)
1458  palloc0((list_length(final_rtable) + 1) * sizeof(RangeTblEntry *));
1459  rti = 1;
1460  foreach(lc, final_rtable)
1461  {
1463 
1464  root->simple_rte_array[rti++] = rte;
1465  }
1466 
1467  /*
1468  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will
1469  * have dealt with fetching non-locked marked rows, else we need to have
1470  * ModifyTable do that.
1471  */
1472  if (parse->rowMarks)
1473  rowMarks = NIL;
1474  else
1475  rowMarks = root->rowMarks;
1476 
1477  /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
1478  add_path(final_rel, (Path *)
1479  create_modifytable_path(root, final_rel,
1480  parse->commandType,
1481  parse->canSetTag,
1482  nominalRelation,
1483  partitioned_rels,
1484  resultRelations,
1485  subpaths,
1486  subroots,
1487  withCheckOptionLists,
1488  returningLists,
1489  rowMarks,
1490  NULL,
1491  SS_assign_special_param(root)));
1492 }
1493 
1494 /*--------------------
1495  * grouping_planner
1496  * Perform planning steps related to grouping, aggregation, etc.
1497  *
1498  * This function adds all required top-level processing to the scan/join
1499  * Path(s) produced by query_planner.
1500  *
1501  * If inheritance_update is true, we're being called from inheritance_planner
1502  * and should not include a ModifyTable step in the resulting Path(s).
1503  * (inheritance_planner will create a single ModifyTable node covering all the
1504  * target tables.)
1505  *
1506  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1507  * tuple_fraction is interpreted as follows:
1508  * 0: expect all tuples to be retrieved (normal case)
1509  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1510  * from the plan to be retrieved
1511  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1512  * expected to be retrieved (ie, a LIMIT specification)
1513  *
1514  * Returns nothing; the useful output is in the Paths we attach to the
1515  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1516  * root->processed_tlist contains the final processed targetlist.
1517  *
1518  * Note that we have not done set_cheapest() on the final rel; it's convenient
1519  * to leave this to the caller.
1520  *--------------------
1521  */
1522 static void
1523 grouping_planner(PlannerInfo *root, bool inheritance_update,
1524  double tuple_fraction)
1525 {
1526  Query *parse = root->parse;
1527  List *tlist = parse->targetList;
1528  int64 offset_est = 0;
1529  int64 count_est = 0;
1530  double limit_tuples = -1.0;
1531  bool have_postponed_srfs = false;
1532  PathTarget *final_target;
1533  List *final_targets;
1534  List *final_targets_contain_srfs;
1535  RelOptInfo *current_rel;
1536  RelOptInfo *final_rel;
1537  ListCell *lc;
1538 
1539  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1540  if (parse->limitCount || parse->limitOffset)
1541  {
1542  tuple_fraction = preprocess_limit(root, tuple_fraction,
1543  &offset_est, &count_est);
1544 
1545  /*
1546  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1547  * estimate the effects of using a bounded sort.
1548  */
1549  if (count_est > 0 && offset_est >= 0)
1550  limit_tuples = (double) count_est + (double) offset_est;
1551  }
1552 
1553  /* Make tuple_fraction accessible to lower-level routines */
1554  root->tuple_fraction = tuple_fraction;
1555 
1556  if (parse->setOperations)
1557  {
1558  /*
1559  * If there's a top-level ORDER BY, assume we have to fetch all the
1560  * tuples. This might be too simplistic given all the hackery below
1561  * to possibly avoid the sort; but the odds of accurate estimates here
1562  * are pretty low anyway. XXX try to get rid of this in favor of
1563  * letting plan_set_operations generate both fast-start and
1564  * cheapest-total paths.
1565  */
1566  if (parse->sortClause)
1567  root->tuple_fraction = 0.0;
1568 
1569  /*
1570  * Construct Paths for set operations. The results will not need any
1571  * work except perhaps a top-level sort and/or LIMIT. Note that any
1572  * special work for recursive unions is the responsibility of
1573  * plan_set_operations.
1574  */
1575  current_rel = plan_set_operations(root);
1576 
1577  /*
1578  * We should not need to call preprocess_targetlist, since we must be
1579  * in a SELECT query node. Instead, use the targetlist returned by
1580  * plan_set_operations (since this tells whether it returned any
1581  * resjunk columns!), and transfer any sort key information from the
1582  * original tlist.
1583  */
1584  Assert(parse->commandType == CMD_SELECT);
1585 
1586  tlist = root->processed_tlist; /* from plan_set_operations */
1587 
1588  /* for safety, copy processed_tlist instead of modifying in-place */
1589  tlist = postprocess_setop_tlist(copyObject(tlist), parse->targetList);
1590 
1591  /* Save aside the final decorated tlist */
1592  root->processed_tlist = tlist;
1593 
1594  /* Also extract the PathTarget form of the setop result tlist */
1595  final_target = current_rel->cheapest_total_path->pathtarget;
1596 
1597  /* The setop result tlist couldn't contain any SRFs */
1598  Assert(!parse->hasTargetSRFs);
1599  final_targets = final_targets_contain_srfs = NIL;
1600 
1601  /*
1602  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1603  * checked already, but let's make sure).
1604  */
1605  if (parse->rowMarks)
1606  ereport(ERROR,
1607  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1608  /*------
1609  translator: %s is a SQL row locking clause such as FOR UPDATE */
1610  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1612  parse->rowMarks)->strength))));
1613 
1614  /*
1615  * Calculate pathkeys that represent result ordering requirements
1616  */
1617  Assert(parse->distinctClause == NIL);
1619  parse->sortClause,
1620  tlist);
1621  }
1622  else
1623  {
1624  /* No set operations, do regular planning */
1625  PathTarget *sort_input_target;
1626  List *sort_input_targets;
1627  List *sort_input_targets_contain_srfs;
1628  PathTarget *grouping_target;
1629  List *grouping_targets;
1630  List *grouping_targets_contain_srfs;
1631  PathTarget *scanjoin_target;
1632  List *scanjoin_targets;
1633  List *scanjoin_targets_contain_srfs;
1634  bool have_grouping;
1635  AggClauseCosts agg_costs;
1636  WindowFuncLists *wflists = NULL;
1637  List *activeWindows = NIL;
1638  grouping_sets_data *gset_data = NULL;
1639  standard_qp_extra qp_extra;
1640 
1641  /* A recursive query should always have setOperations */
1642  Assert(!root->hasRecursion);
1643 
1644  /* Preprocess grouping sets and GROUP BY clause, if any */
1645  if (parse->groupingSets)
1646  {
1647  gset_data = preprocess_grouping_sets(root);
1648  }
1649  else
1650  {
1651  /* Preprocess regular GROUP BY clause, if any */
1652  if (parse->groupClause)
1653  parse->groupClause = preprocess_groupclause(root, NIL);
1654  }
1655 
1656  /* Preprocess targetlist */
1657  tlist = preprocess_targetlist(root, tlist);
1658 
1659  if (parse->onConflict)
1660  parse->onConflict->onConflictSet =
1662  parse->resultRelation,
1663  parse->rtable);
1664 
1665  /*
1666  * We are now done hacking up the query's targetlist. Most of the
1667  * remaining planning work will be done with the PathTarget
1668  * representation of tlists, but save aside the full representation so
1669  * that we can transfer its decoration (resnames etc) to the topmost
1670  * tlist of the finished Plan.
1671  */
1672  root->processed_tlist = tlist;
1673 
1674  /*
1675  * Collect statistics about aggregates for estimating costs, and mark
1676  * all the aggregates with resolved aggtranstypes. We must do this
1677  * before slicing and dicing the tlist into various pathtargets, else
1678  * some copies of the Aggref nodes might escape being marked with the
1679  * correct transtypes.
1680  *
1681  * Note: currently, we do not detect duplicate aggregates here. This
1682  * may result in somewhat-overestimated cost, which is fine for our
1683  * purposes since all Paths will get charged the same. But at some
1684  * point we might wish to do that detection in the planner, rather
1685  * than during executor startup.
1686  */
1687  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
1688  if (parse->hasAggs)
1689  {
1690  get_agg_clause_costs(root, (Node *) tlist, AGGSPLIT_SIMPLE,
1691  &agg_costs);
1693  &agg_costs);
1694  }
1695 
1696  /*
1697  * Locate any window functions in the tlist. (We don't need to look
1698  * anywhere else, since expressions used in ORDER BY will be in there
1699  * too.) Note that they could all have been eliminated by constant
1700  * folding, in which case we don't need to do any more work.
1701  */
1702  if (parse->hasWindowFuncs)
1703  {
1704  wflists = find_window_functions((Node *) tlist,
1705  list_length(parse->windowClause));
1706  if (wflists->numWindowFuncs > 0)
1707  activeWindows = select_active_windows(root, wflists);
1708  else
1709  parse->hasWindowFuncs = false;
1710  }
1711 
1712  /*
1713  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1714  * adding logic between here and the query_planner() call. Anything
1715  * that is needed in MIN/MAX-optimizable cases will have to be
1716  * duplicated in planagg.c.
1717  */
1718  if (parse->hasAggs)
1719  preprocess_minmax_aggregates(root, tlist);
1720 
1721  /*
1722  * Figure out whether there's a hard limit on the number of rows that
1723  * query_planner's result subplan needs to return. Even if we know a
1724  * hard limit overall, it doesn't apply if the query has any
1725  * grouping/aggregation operations, or SRFs in the tlist.
1726  */
1727  if (parse->groupClause ||
1728  parse->groupingSets ||
1729  parse->distinctClause ||
1730  parse->hasAggs ||
1731  parse->hasWindowFuncs ||
1732  parse->hasTargetSRFs ||
1733  root->hasHavingQual)
1734  root->limit_tuples = -1.0;
1735  else
1736  root->limit_tuples = limit_tuples;
1737 
1738  /* Set up data needed by standard_qp_callback */
1739  qp_extra.tlist = tlist;
1740  qp_extra.activeWindows = activeWindows;
1741  qp_extra.groupClause = (gset_data
1742  ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
1743  : parse->groupClause);
1744 
1745  /*
1746  * Generate the best unsorted and presorted paths for the scan/join
1747  * portion of this Query, ie the processing represented by the
1748  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1749  * We also generate (in standard_qp_callback) pathkey representations
1750  * of the query's sort clause, distinct clause, etc.
1751  */
1752  current_rel = query_planner(root, tlist,
1753  standard_qp_callback, &qp_extra);
1754 
1755  /*
1756  * Convert the query's result tlist into PathTarget format.
1757  *
1758  * Note: it's desirable to not do this till after query_planner(),
1759  * because the target width estimates can use per-Var width numbers
1760  * that were obtained within query_planner().
1761  */
1762  final_target = create_pathtarget(root, tlist);
1763 
1764  /*
1765  * If ORDER BY was given, consider whether we should use a post-sort
1766  * projection, and compute the adjusted target for preceding steps if
1767  * so.
1768  */
1769  if (parse->sortClause)
1770  sort_input_target = make_sort_input_target(root,
1771  final_target,
1772  &have_postponed_srfs);
1773  else
1774  sort_input_target = final_target;
1775 
1776  /*
1777  * If we have window functions to deal with, the output from any
1778  * grouping step needs to be what the window functions want;
1779  * otherwise, it should be sort_input_target.
1780  */
1781  if (activeWindows)
1782  grouping_target = make_window_input_target(root,
1783  final_target,
1784  activeWindows);
1785  else
1786  grouping_target = sort_input_target;
1787 
1788  /*
1789  * If we have grouping or aggregation to do, the topmost scan/join
1790  * plan node must emit what the grouping step wants; otherwise, it
1791  * should emit grouping_target.
1792  */
1793  have_grouping = (parse->groupClause || parse->groupingSets ||
1794  parse->hasAggs || root->hasHavingQual);
1795  if (have_grouping)
1796  scanjoin_target = make_group_input_target(root, final_target);
1797  else
1798  scanjoin_target = grouping_target;
1799 
1800  /*
1801  * If there are any SRFs in the targetlist, we must separate each of
1802  * these PathTargets into SRF-computing and SRF-free targets. Replace
1803  * each of the named targets with a SRF-free version, and remember the
1804  * list of additional projection steps we need to add afterwards.
1805  */
1806  if (parse->hasTargetSRFs)
1807  {
1808  /* final_target doesn't recompute any SRFs in sort_input_target */
1809  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1810  &final_targets,
1811  &final_targets_contain_srfs);
1812  final_target = linitial_node(PathTarget, final_targets);
1813  Assert(!linitial_int(final_targets_contain_srfs));
1814  /* likewise for sort_input_target vs. grouping_target */
1815  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1816  &sort_input_targets,
1817  &sort_input_targets_contain_srfs);
1818  sort_input_target = linitial_node(PathTarget, sort_input_targets);
1819  Assert(!linitial_int(sort_input_targets_contain_srfs));
1820  /* likewise for grouping_target vs. scanjoin_target */
1821  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1822  &grouping_targets,
1823  &grouping_targets_contain_srfs);
1824  grouping_target = linitial_node(PathTarget, grouping_targets);
1825  Assert(!linitial_int(grouping_targets_contain_srfs));
1826  /* scanjoin_target will not have any SRFs precomputed for it */
1827  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1828  &scanjoin_targets,
1829  &scanjoin_targets_contain_srfs);
1830  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1831  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1832  }
1833  else
1834  {
1835  /* initialize lists, just to keep compiler quiet */
1836  final_targets = final_targets_contain_srfs = NIL;
1837  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1838  grouping_targets = grouping_targets_contain_srfs = NIL;
1839  scanjoin_targets = scanjoin_targets_contain_srfs = NIL;
1840  }
1841 
1842  /*
1843  * Forcibly apply SRF-free scan/join target to all the Paths for the
1844  * scan/join rel.
1845  *
1846  * In principle we should re-run set_cheapest() here to identify the
1847  * cheapest path, but it seems unlikely that adding the same tlist
1848  * eval costs to all the paths would change that, so we don't bother.
1849  * Instead, just assume that the cheapest-startup and cheapest-total
1850  * paths remain so. (There should be no parameterized paths anymore,
1851  * so we needn't worry about updating cheapest_parameterized_paths.)
1852  */
1853  foreach(lc, current_rel->pathlist)
1854  {
1855  Path *subpath = (Path *) lfirst(lc);
1856  Path *path;
1857 
1858  Assert(subpath->param_info == NULL);
1859  path = apply_projection_to_path(root, current_rel,
1860  subpath, scanjoin_target);
1861  /* If we had to add a Result, path is different from subpath */
1862  if (path != subpath)
1863  {
1864  lfirst(lc) = path;
1865  if (subpath == current_rel->cheapest_startup_path)
1866  current_rel->cheapest_startup_path = path;
1867  if (subpath == current_rel->cheapest_total_path)
1868  current_rel->cheapest_total_path = path;
1869  }
1870  }
1871 
1872  /*
1873  * Upper planning steps which make use of the top scan/join rel's
1874  * partial pathlist will expect partial paths for that rel to produce
1875  * the same output as complete paths ... and we just changed the
1876  * output for the complete paths, so we'll need to do the same thing
1877  * for partial paths. But only parallel-safe expressions can be
1878  * computed by partial paths.
1879  */
1880  if (current_rel->partial_pathlist &&
1881  is_parallel_safe(root, (Node *) scanjoin_target->exprs))
1882  {
1883  /* Apply the scan/join target to each partial path */
1884  foreach(lc, current_rel->partial_pathlist)
1885  {
1886  Path *subpath = (Path *) lfirst(lc);
1887  Path *newpath;
1888 
1889  /* Shouldn't have any parameterized paths anymore */
1890  Assert(subpath->param_info == NULL);
1891 
1892  /*
1893  * Don't use apply_projection_to_path() here, because there
1894  * could be other pointers to these paths, and therefore we
1895  * mustn't modify them in place.
1896  */
1897  newpath = (Path *) create_projection_path(root,
1898  current_rel,
1899  subpath,
1900  scanjoin_target);
1901  lfirst(lc) = newpath;
1902  }
1903  }
1904  else
1905  {
1906  /*
1907  * In the unfortunate event that scanjoin_target is not
1908  * parallel-safe, we can't apply it to the partial paths; in that
1909  * case, we'll need to forget about the partial paths, which
1910  * aren't valid input for upper planning steps.
1911  */
1912  current_rel->partial_pathlist = NIL;
1913  }
1914 
1915  /* Now fix things up if scan/join target contains SRFs */
1916  if (parse->hasTargetSRFs)
1917  adjust_paths_for_srfs(root, current_rel,
1918  scanjoin_targets,
1919  scanjoin_targets_contain_srfs);
1920 
1921  /*
1922  * Save the various upper-rel PathTargets we just computed into
1923  * root->upper_targets[]. The core code doesn't use this, but it
1924  * provides a convenient place for extensions to get at the info. For
1925  * consistency, we save all the intermediate targets, even though some
1926  * of the corresponding upperrels might not be needed for this query.
1927  */
1928  root->upper_targets[UPPERREL_FINAL] = final_target;
1929  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1930  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1931 
1932  /*
1933  * If we have grouping and/or aggregation, consider ways to implement
1934  * that. We build a new upperrel representing the output of this
1935  * phase.
1936  */
1937  if (have_grouping)
1938  {
1939  current_rel = create_grouping_paths(root,
1940  current_rel,
1941  grouping_target,
1942  &agg_costs,
1943  gset_data);
1944  /* Fix things up if grouping_target contains SRFs */
1945  if (parse->hasTargetSRFs)
1946  adjust_paths_for_srfs(root, current_rel,
1947  grouping_targets,
1948  grouping_targets_contain_srfs);
1949  }
1950 
1951  /*
1952  * If we have window functions, consider ways to implement those. We
1953  * build a new upperrel representing the output of this phase.
1954  */
1955  if (activeWindows)
1956  {
1957  current_rel = create_window_paths(root,
1958  current_rel,
1959  grouping_target,
1960  sort_input_target,
1961  tlist,
1962  wflists,
1963  activeWindows);
1964  /* Fix things up if sort_input_target contains SRFs */
1965  if (parse->hasTargetSRFs)
1966  adjust_paths_for_srfs(root, current_rel,
1967  sort_input_targets,
1968  sort_input_targets_contain_srfs);
1969  }
1970 
1971  /*
1972  * If there is a DISTINCT clause, consider ways to implement that. We
1973  * build a new upperrel representing the output of this phase.
1974  */
1975  if (parse->distinctClause)
1976  {
1977  current_rel = create_distinct_paths(root,
1978  current_rel);
1979  }
1980  } /* end of if (setOperations) */
1981 
1982  /*
1983  * If ORDER BY was given, consider ways to implement that, and generate a
1984  * new upperrel containing only paths that emit the correct ordering and
1985  * project the correct final_target. We can apply the original
1986  * limit_tuples limit in sort costing here, but only if there are no
1987  * postponed SRFs.
1988  */
1989  if (parse->sortClause)
1990  {
1991  current_rel = create_ordered_paths(root,
1992  current_rel,
1993  final_target,
1994  have_postponed_srfs ? -1.0 :
1995  limit_tuples);
1996  /* Fix things up if final_target contains SRFs */
1997  if (parse->hasTargetSRFs)
1998  adjust_paths_for_srfs(root, current_rel,
1999  final_targets,
2000  final_targets_contain_srfs);
2001  }
2002 
2003  /*
2004  * Now we are prepared to build the final-output upperrel.
2005  */
2006  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
2007 
2008  /*
2009  * If the input rel is marked consider_parallel and there's nothing that's
2010  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
2011  * consider_parallel as well. Note that if the query has rowMarks or is
2012  * not a SELECT, consider_parallel will be false for every relation in the
2013  * query.
2014  */
2015  if (current_rel->consider_parallel &&
2016  is_parallel_safe(root, parse->limitOffset) &&
2017  is_parallel_safe(root, parse->limitCount))
2018  final_rel->consider_parallel = true;
2019 
2020  /*
2021  * If the current_rel belongs to a single FDW, so does the final_rel.
2022  */
2023  final_rel->serverid = current_rel->serverid;
2024  final_rel->userid = current_rel->userid;
2025  final_rel->useridiscurrent = current_rel->useridiscurrent;
2026  final_rel->fdwroutine = current_rel->fdwroutine;
2027 
2028  /*
2029  * Generate paths for the final_rel. Insert all surviving paths, with
2030  * LockRows, Limit, and/or ModifyTable steps added if needed.
2031  */
2032  foreach(lc, current_rel->pathlist)
2033  {
2034  Path *path = (Path *) lfirst(lc);
2035 
2036  /*
2037  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
2038  * (Note: we intentionally test parse->rowMarks not root->rowMarks
2039  * here. If there are only non-locking rowmarks, they should be
2040  * handled by the ModifyTable node instead. However, root->rowMarks
2041  * is what goes into the LockRows node.)
2042  */
2043  if (parse->rowMarks)
2044  {
2045  path = (Path *) create_lockrows_path(root, final_rel, path,
2046  root->rowMarks,
2047  SS_assign_special_param(root));
2048  }
2049 
2050  /*
2051  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
2052  */
2053  if (limit_needed(parse))
2054  {
2055  path = (Path *) create_limit_path(root, final_rel, path,
2056  parse->limitOffset,
2057  parse->limitCount,
2058  offset_est, count_est);
2059  }
2060 
2061  /*
2062  * If this is an INSERT/UPDATE/DELETE, and we're not being called from
2063  * inheritance_planner, add the ModifyTable node.
2064  */
2065  if (parse->commandType != CMD_SELECT && !inheritance_update)
2066  {
2067  List *withCheckOptionLists;
2068  List *returningLists;
2069  List *rowMarks;
2070 
2071  /*
2072  * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if
2073  * needed.
2074  */
2075  if (parse->withCheckOptions)
2076  withCheckOptionLists = list_make1(parse->withCheckOptions);
2077  else
2078  withCheckOptionLists = NIL;
2079 
2080  if (parse->returningList)
2081  returningLists = list_make1(parse->returningList);
2082  else
2083  returningLists = NIL;
2084 
2085  /*
2086  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2087  * will have dealt with fetching non-locked marked rows, else we
2088  * need to have ModifyTable do that.
2089  */
2090  if (parse->rowMarks)
2091  rowMarks = NIL;
2092  else
2093  rowMarks = root->rowMarks;
2094 
2095  path = (Path *)
2096  create_modifytable_path(root, final_rel,
2097  parse->commandType,
2098  parse->canSetTag,
2099  parse->resultRelation,
2100  NIL,
2102  list_make1(path),
2103  list_make1(root),
2104  withCheckOptionLists,
2105  returningLists,
2106  rowMarks,
2107  parse->onConflict,
2108  SS_assign_special_param(root));
2109  }
2110 
2111  /* And shove it into final_rel */
2112  add_path(final_rel, path);
2113  }
2114 
2115  /*
2116  * If there is an FDW that's responsible for all baserels of the query,
2117  * let it consider adding ForeignPaths.
2118  */
2119  if (final_rel->fdwroutine &&
2120  final_rel->fdwroutine->GetForeignUpperPaths)
2122  current_rel, final_rel);
2123 
2124  /* Let extensions possibly add some more paths */
2126  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2127  current_rel, final_rel);
2128 
2129  /* Note: currently, we leave it to callers to do set_cheapest() */
2130 }
2131 
2132 /*
2133  * Do preprocessing for groupingSets clause and related data. This handles the
2134  * preliminary steps of expanding the grouping sets, organizing them into lists
2135  * of rollups, and preparing annotations which will later be filled in with
2136  * size estimates.
2137  */
2138 static grouping_sets_data *
2140 {
2141  Query *parse = root->parse;
2142  List *sets;
2143  int maxref = 0;
2144  ListCell *lc;
2145  ListCell *lc_set;
2147 
2148  parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
2149 
2150  gd->any_hashable = false;
2151  gd->unhashable_refs = NULL;
2152  gd->unsortable_refs = NULL;
2153  gd->unsortable_sets = NIL;
2154 
2155  if (parse->groupClause)
2156  {
2157  ListCell *lc;
2158 
2159  foreach(lc, parse->groupClause)
2160  {
2162  Index ref = gc->tleSortGroupRef;
2163 
2164  if (ref > maxref)
2165  maxref = ref;
2166 
2167  if (!gc->hashable)
2169 
2170  if (!OidIsValid(gc->sortop))
2172  }
2173  }
2174 
2175  /* Allocate workspace array for remapping */
2176  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2177 
2178  /*
2179  * If we have any unsortable sets, we must extract them before trying to
2180  * prepare rollups. Unsortable sets don't go through
2181  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2182  * here.
2183  */
2184  if (!bms_is_empty(gd->unsortable_refs))
2185  {
2186  List *sortable_sets = NIL;
2187 
2188  foreach(lc, parse->groupingSets)
2189  {
2190  List *gset = (List *) lfirst(lc);
2191 
2192  if (bms_overlap_list(gd->unsortable_refs, gset))
2193  {
2195 
2196  gs->set = gset;
2197  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2198 
2199  /*
2200  * We must enforce here that an unsortable set is hashable;
2201  * later code assumes this. Parse analysis only checks that
2202  * every individual column is either hashable or sortable.
2203  *
2204  * Note that passing this test doesn't guarantee we can
2205  * generate a plan; there might be other showstoppers.
2206  */
2207  if (bms_overlap_list(gd->unhashable_refs, gset))
2208  ereport(ERROR,
2209  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2210  errmsg("could not implement GROUP BY"),
2211  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2212  }
2213  else
2214  sortable_sets = lappend(sortable_sets, gset);
2215  }
2216 
2217  if (sortable_sets)
2218  sets = extract_rollup_sets(sortable_sets);
2219  else
2220  sets = NIL;
2221  }
2222  else
2223  sets = extract_rollup_sets(parse->groupingSets);
2224 
2225  foreach(lc_set, sets)
2226  {
2227  List *current_sets = (List *) lfirst(lc_set);
2228  RollupData *rollup = makeNode(RollupData);
2229  GroupingSetData *gs;
2230 
2231  /*
2232  * Reorder the current list of grouping sets into correct prefix
2233  * order. If only one aggregation pass is needed, try to make the
2234  * list match the ORDER BY clause; if more than one pass is needed, we
2235  * don't bother with that.
2236  *
2237  * Note that this reorders the sets from smallest-member-first to
2238  * largest-member-first, and applies the GroupingSetData annotations,
2239  * though the data will be filled in later.
2240  */
2241  current_sets = reorder_grouping_sets(current_sets,
2242  (list_length(sets) == 1
2243  ? parse->sortClause
2244  : NIL));
2245 
2246  /*
2247  * Get the initial (and therefore largest) grouping set.
2248  */
2249  gs = linitial_node(GroupingSetData, current_sets);
2250 
2251  /*
2252  * Order the groupClause appropriately. If the first grouping set is
2253  * empty, then the groupClause must also be empty; otherwise we have
2254  * to force the groupClause to match that grouping set's order.
2255  *
2256  * (The first grouping set can be empty even though parse->groupClause
2257  * is not empty only if all non-empty grouping sets are unsortable.
2258  * The groupClauses for hashed grouping sets are built later on.)
2259  */
2260  if (gs->set)
2261  rollup->groupClause = preprocess_groupclause(root, gs->set);
2262  else
2263  rollup->groupClause = NIL;
2264 
2265  /*
2266  * Is it hashable? We pretend empty sets are hashable even though we
2267  * actually force them not to be hashed later. But don't bother if
2268  * there's nothing but empty sets (since in that case we can't hash
2269  * anything).
2270  */
2271  if (gs->set &&
2273  {
2274  rollup->hashable = true;
2275  gd->any_hashable = true;
2276  }
2277 
2278  /*
2279  * Now that we've pinned down an order for the groupClause for this
2280  * list of grouping sets, we need to remap the entries in the grouping
2281  * sets from sortgrouprefs to plain indices (0-based) into the
2282  * groupClause for this collection of grouping sets. We keep the
2283  * original form for later use, though.
2284  */
2285  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2286  current_sets,
2287  gd->tleref_to_colnum_map);
2288  rollup->gsets_data = current_sets;
2289 
2290  gd->rollups = lappend(gd->rollups, rollup);
2291  }
2292 
2293  if (gd->unsortable_sets)
2294  {
2295  /*
2296  * We have not yet pinned down a groupclause for this, but we will
2297  * need index-based lists for estimation purposes. Construct
2298  * hash_sets_idx based on the entire original groupclause for now.
2299  */
2301  gd->unsortable_sets,
2302  gd->tleref_to_colnum_map);
2303  gd->any_hashable = true;
2304  }
2305 
2306  return gd;
2307 }
2308 
2309 /*
2310  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2311  * (without annotation) mapped to indexes into the given groupclause.
2312  */
2313 static List *
2315  List *gsets,
2316  int *tleref_to_colnum_map)
2317 {
2318  int ref = 0;
2319  List *result = NIL;
2320  ListCell *lc;
2321 
2322  foreach(lc, groupClause)
2323  {
2325 
2326  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2327  }
2328 
2329  foreach(lc, gsets)
2330  {
2331  List *set = NIL;
2332  ListCell *lc2;
2334 
2335  foreach(lc2, gs->set)
2336  {
2337  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2338  }
2339 
2340  result = lappend(result, set);
2341  }
2342 
2343  return result;
2344 }
2345 
2346 
2347 
2348 /*
2349  * Detect whether a plan node is a "dummy" plan created when a relation
2350  * is deemed not to need scanning due to constraint exclusion.
2351  *
2352  * Currently, such dummy plans are Result nodes with constant FALSE
2353  * filter quals (see set_dummy_rel_pathlist and create_append_plan).
2354  *
2355  * XXX this probably ought to be somewhere else, but not clear where.
2356  */
2357 bool
2359 {
2360  if (IsA(plan, Result))
2361  {
2362  List *rcqual = (List *) ((Result *) plan)->resconstantqual;
2363 
2364  if (list_length(rcqual) == 1)
2365  {
2366  Const *constqual = (Const *) linitial(rcqual);
2367 
2368  if (constqual && IsA(constqual, Const))
2369  {
2370  if (!constqual->constisnull &&
2371  !DatumGetBool(constqual->constvalue))
2372  return true;
2373  }
2374  }
2375  }
2376  return false;
2377 }
2378 
2379 /*
2380  * preprocess_rowmarks - set up PlanRowMarks if needed
2381  */
2382 static void
2384 {
2385  Query *parse = root->parse;
2386  Bitmapset *rels;
2387  List *prowmarks;
2388  ListCell *l;
2389  int i;
2390 
2391  if (parse->rowMarks)
2392  {
2393  /*
2394  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2395  * grouping, since grouping renders a reference to individual tuple
2396  * CTIDs invalid. This is also checked at parse time, but that's
2397  * insufficient because of rule substitution, query pullup, etc.
2398  */
2400  parse->rowMarks)->strength);
2401  }
2402  else
2403  {
2404  /*
2405  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2406  * UPDATE/SHARE.
2407  */
2408  if (parse->commandType != CMD_UPDATE &&
2409  parse->commandType != CMD_DELETE)
2410  return;
2411  }
2412 
2413  /*
2414  * We need to have rowmarks for all base relations except the target. We
2415  * make a bitmapset of all base rels and then remove the items we don't
2416  * need or have FOR [KEY] UPDATE/SHARE marks for.
2417  */
2418  rels = get_relids_in_jointree((Node *) parse->jointree, false);
2419  if (parse->resultRelation)
2420  rels = bms_del_member(rels, parse->resultRelation);
2421 
2422  /*
2423  * Convert RowMarkClauses to PlanRowMark representation.
2424  */
2425  prowmarks = NIL;
2426  foreach(l, parse->rowMarks)
2427  {
2429  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2430  PlanRowMark *newrc;
2431 
2432  /*
2433  * Currently, it is syntactically impossible to have FOR UPDATE et al
2434  * applied to an update/delete target rel. If that ever becomes
2435  * possible, we should drop the target from the PlanRowMark list.
2436  */
2437  Assert(rc->rti != parse->resultRelation);
2438 
2439  /*
2440  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2441  * can't support true locking. Subqueries that got flattened into the
2442  * main query should be ignored completely. Any that didn't will get
2443  * ROW_MARK_COPY items in the next loop.
2444  */
2445  if (rte->rtekind != RTE_RELATION)
2446  continue;
2447 
2448  rels = bms_del_member(rels, rc->rti);
2449 
2450  newrc = makeNode(PlanRowMark);
2451  newrc->rti = newrc->prti = rc->rti;
2452  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2453  newrc->markType = select_rowmark_type(rte, rc->strength);
2454  newrc->allMarkTypes = (1 << newrc->markType);
2455  newrc->strength = rc->strength;
2456  newrc->waitPolicy = rc->waitPolicy;
2457  newrc->isParent = false;
2458 
2459  prowmarks = lappend(prowmarks, newrc);
2460  }
2461 
2462  /*
2463  * Now, add rowmarks for any non-target, non-locked base relations.
2464  */
2465  i = 0;
2466  foreach(l, parse->rtable)
2467  {
2469  PlanRowMark *newrc;
2470 
2471  i++;
2472  if (!bms_is_member(i, rels))
2473  continue;
2474 
2475  newrc = makeNode(PlanRowMark);
2476  newrc->rti = newrc->prti = i;
2477  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2478  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2479  newrc->allMarkTypes = (1 << newrc->markType);
2480  newrc->strength = LCS_NONE;
2481  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2482  newrc->isParent = false;
2483 
2484  prowmarks = lappend(prowmarks, newrc);
2485  }
2486 
2487  root->rowMarks = prowmarks;
2488 }
2489 
2490 /*
2491  * Select RowMarkType to use for a given table
2492  */
2495 {
2496  if (rte->rtekind != RTE_RELATION)
2497  {
2498  /* If it's not a table at all, use ROW_MARK_COPY */
2499  return ROW_MARK_COPY;
2500  }
2501  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2502  {
2503  /* Let the FDW select the rowmark type, if it wants to */
2504  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2505 
2506  if (fdwroutine->GetForeignRowMarkType != NULL)
2507  return fdwroutine->GetForeignRowMarkType(rte, strength);
2508  /* Otherwise, use ROW_MARK_COPY by default */
2509  return ROW_MARK_COPY;
2510  }
2511  else
2512  {
2513  /* Regular table, apply the appropriate lock type */
2514  switch (strength)
2515  {
2516  case LCS_NONE:
2517 
2518  /*
2519  * We don't need a tuple lock, only the ability to re-fetch
2520  * the row.
2521  */
2522  return ROW_MARK_REFERENCE;
2523  break;
2524  case LCS_FORKEYSHARE:
2525  return ROW_MARK_KEYSHARE;
2526  break;
2527  case LCS_FORSHARE:
2528  return ROW_MARK_SHARE;
2529  break;
2530  case LCS_FORNOKEYUPDATE:
2531  return ROW_MARK_NOKEYEXCLUSIVE;
2532  break;
2533  case LCS_FORUPDATE:
2534  return ROW_MARK_EXCLUSIVE;
2535  break;
2536  }
2537  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2538  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2539  }
2540 }
2541 
2542 /*
2543  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2544  *
2545  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2546  * results back in *count_est and *offset_est. These variables are set to
2547  * 0 if the corresponding clause is not present, and -1 if it's present
2548  * but we couldn't estimate the value for it. (The "0" convention is OK
2549  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2550  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2551  * usual practice of never estimating less than one row.) These values will
2552  * be passed to create_limit_path, which see if you change this code.
2553  *
2554  * The return value is the suitably adjusted tuple_fraction to use for
2555  * planning the query. This adjustment is not overridable, since it reflects
2556  * plan actions that grouping_planner() will certainly take, not assumptions
2557  * about context.
2558  */
2559 static double
2560 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2561  int64 *offset_est, int64 *count_est)
2562 {
2563  Query *parse = root->parse;
2564  Node *est;
2565  double limit_fraction;
2566 
2567  /* Should not be called unless LIMIT or OFFSET */
2568  Assert(parse->limitCount || parse->limitOffset);
2569 
2570  /*
2571  * Try to obtain the clause values. We use estimate_expression_value
2572  * primarily because it can sometimes do something useful with Params.
2573  */
2574  if (parse->limitCount)
2575  {
2576  est = estimate_expression_value(root, parse->limitCount);
2577  if (est && IsA(est, Const))
2578  {
2579  if (((Const *) est)->constisnull)
2580  {
2581  /* NULL indicates LIMIT ALL, ie, no limit */
2582  *count_est = 0; /* treat as not present */
2583  }
2584  else
2585  {
2586  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2587  if (*count_est <= 0)
2588  *count_est = 1; /* force to at least 1 */
2589  }
2590  }
2591  else
2592  *count_est = -1; /* can't estimate */
2593  }
2594  else
2595  *count_est = 0; /* not present */
2596 
2597  if (parse->limitOffset)
2598  {
2599  est = estimate_expression_value(root, parse->limitOffset);
2600  if (est && IsA(est, Const))
2601  {
2602  if (((Const *) est)->constisnull)
2603  {
2604  /* Treat NULL as no offset; the executor will too */
2605  *offset_est = 0; /* treat as not present */
2606  }
2607  else
2608  {
2609  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2610  if (*offset_est < 0)
2611  *offset_est = 0; /* treat as not present */
2612  }
2613  }
2614  else
2615  *offset_est = -1; /* can't estimate */
2616  }
2617  else
2618  *offset_est = 0; /* not present */
2619 
2620  if (*count_est != 0)
2621  {
2622  /*
2623  * A LIMIT clause limits the absolute number of tuples returned.
2624  * However, if it's not a constant LIMIT then we have to guess; for
2625  * lack of a better idea, assume 10% of the plan's result is wanted.
2626  */
2627  if (*count_est < 0 || *offset_est < 0)
2628  {
2629  /* LIMIT or OFFSET is an expression ... punt ... */
2630  limit_fraction = 0.10;
2631  }
2632  else
2633  {
2634  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2635  limit_fraction = (double) *count_est + (double) *offset_est;
2636  }
2637 
2638  /*
2639  * If we have absolute limits from both caller and LIMIT, use the
2640  * smaller value; likewise if they are both fractional. If one is
2641  * fractional and the other absolute, we can't easily determine which
2642  * is smaller, but we use the heuristic that the absolute will usually
2643  * be smaller.
2644  */
2645  if (tuple_fraction >= 1.0)
2646  {
2647  if (limit_fraction >= 1.0)
2648  {
2649  /* both absolute */
2650  tuple_fraction = Min(tuple_fraction, limit_fraction);
2651  }
2652  else
2653  {
2654  /* caller absolute, limit fractional; use caller's value */
2655  }
2656  }
2657  else if (tuple_fraction > 0.0)
2658  {
2659  if (limit_fraction >= 1.0)
2660  {
2661  /* caller fractional, limit absolute; use limit */
2662  tuple_fraction = limit_fraction;
2663  }
2664  else
2665  {
2666  /* both fractional */
2667  tuple_fraction = Min(tuple_fraction, limit_fraction);
2668  }
2669  }
2670  else
2671  {
2672  /* no info from caller, just use limit */
2673  tuple_fraction = limit_fraction;
2674  }
2675  }
2676  else if (*offset_est != 0 && tuple_fraction > 0.0)
2677  {
2678  /*
2679  * We have an OFFSET but no LIMIT. This acts entirely differently
2680  * from the LIMIT case: here, we need to increase rather than decrease
2681  * the caller's tuple_fraction, because the OFFSET acts to cause more
2682  * tuples to be fetched instead of fewer. This only matters if we got
2683  * a tuple_fraction > 0, however.
2684  *
2685  * As above, use 10% if OFFSET is present but unestimatable.
2686  */
2687  if (*offset_est < 0)
2688  limit_fraction = 0.10;
2689  else
2690  limit_fraction = (double) *offset_est;
2691 
2692  /*
2693  * If we have absolute counts from both caller and OFFSET, add them
2694  * together; likewise if they are both fractional. If one is
2695  * fractional and the other absolute, we want to take the larger, and
2696  * we heuristically assume that's the fractional one.
2697  */
2698  if (tuple_fraction >= 1.0)
2699  {
2700  if (limit_fraction >= 1.0)
2701  {
2702  /* both absolute, so add them together */
2703  tuple_fraction += limit_fraction;
2704  }
2705  else
2706  {
2707  /* caller absolute, limit fractional; use limit */
2708  tuple_fraction = limit_fraction;
2709  }
2710  }
2711  else
2712  {
2713  if (limit_fraction >= 1.0)
2714  {
2715  /* caller fractional, limit absolute; use caller's value */
2716  }
2717  else
2718  {
2719  /* both fractional, so add them together */
2720  tuple_fraction += limit_fraction;
2721  if (tuple_fraction >= 1.0)
2722  tuple_fraction = 0.0; /* assume fetch all */
2723  }
2724  }
2725  }
2726 
2727  return tuple_fraction;
2728 }
2729 
2730 /*
2731  * limit_needed - do we actually need a Limit plan node?
2732  *
2733  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2734  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2735  * locution for an optimization fence. (Because other places in the planner
2736  * merely check whether parse->limitOffset isn't NULL, it will still work as
2737  * an optimization fence --- we're just suppressing unnecessary run-time
2738  * overhead.)
2739  *
2740  * This might look like it could be merged into preprocess_limit, but there's
2741  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2742  * in preprocess_limit it's good enough to consider estimated values.
2743  */
2744 static bool
2746 {
2747  Node *node;
2748 
2749  node = parse->limitCount;
2750  if (node)
2751  {
2752  if (IsA(node, Const))
2753  {
2754  /* NULL indicates LIMIT ALL, ie, no limit */
2755  if (!((Const *) node)->constisnull)
2756  return true; /* LIMIT with a constant value */
2757  }
2758  else
2759  return true; /* non-constant LIMIT */
2760  }
2761 
2762  node = parse->limitOffset;
2763  if (node)
2764  {
2765  if (IsA(node, Const))
2766  {
2767  /* Treat NULL as no offset; the executor would too */
2768  if (!((Const *) node)->constisnull)
2769  {
2770  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2771 
2772  if (offset != 0)
2773  return true; /* OFFSET with a nonzero value */
2774  }
2775  }
2776  else
2777  return true; /* non-constant OFFSET */
2778  }
2779 
2780  return false; /* don't need a Limit plan node */
2781 }
2782 
2783 
2784 /*
2785  * remove_useless_groupby_columns
2786  * Remove any columns in the GROUP BY clause that are redundant due to
2787  * being functionally dependent on other GROUP BY columns.
2788  *
2789  * Since some other DBMSes do not allow references to ungrouped columns, it's
2790  * not unusual to find all columns listed in GROUP BY even though listing the
2791  * primary-key columns would be sufficient. Deleting such excess columns
2792  * avoids redundant sorting work, so it's worth doing. When we do this, we
2793  * must mark the plan as dependent on the pkey constraint (compare the
2794  * parser's check_ungrouped_columns() and check_functional_grouping()).
2795  *
2796  * In principle, we could treat any NOT-NULL columns appearing in a UNIQUE
2797  * index as the determining columns. But as with check_functional_grouping(),
2798  * there's currently no way to represent dependency on a NOT NULL constraint,
2799  * so we consider only the pkey for now.
2800  */
2801 static void
2803 {
2804  Query *parse = root->parse;
2805  Bitmapset **groupbyattnos;
2806  Bitmapset **surplusvars;
2807  ListCell *lc;
2808  int relid;
2809 
2810  /* No chance to do anything if there are less than two GROUP BY items */
2811  if (list_length(parse->groupClause) < 2)
2812  return;
2813 
2814  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2815  if (parse->groupingSets)
2816  return;
2817 
2818  /*
2819  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2820  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2821  * that are GROUP BY items.
2822  */
2823  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2824  (list_length(parse->rtable) + 1));
2825  foreach(lc, parse->groupClause)
2826  {
2828  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2829  Var *var = (Var *) tle->expr;
2830 
2831  /*
2832  * Ignore non-Vars and Vars from other query levels.
2833  *
2834  * XXX in principle, stable expressions containing Vars could also be
2835  * removed, if all the Vars are functionally dependent on other GROUP
2836  * BY items. But it's not clear that such cases occur often enough to
2837  * be worth troubling over.
2838  */
2839  if (!IsA(var, Var) ||
2840  var->varlevelsup > 0)
2841  continue;
2842 
2843  /* OK, remember we have this Var */
2844  relid = var->varno;
2845  Assert(relid <= list_length(parse->rtable));
2846  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2848  }
2849 
2850  /*
2851  * Consider each relation and see if it is possible to remove some of its
2852  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2853  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2854  * of the column attnos of RTE k that are removable GROUP BY items.
2855  */
2856  surplusvars = NULL; /* don't allocate array unless required */
2857  relid = 0;
2858  foreach(lc, parse->rtable)
2859  {
2861  Bitmapset *relattnos;
2862  Bitmapset *pkattnos;
2863  Oid constraintOid;
2864 
2865  relid++;
2866 
2867  /* Only plain relations could have primary-key constraints */
2868  if (rte->rtekind != RTE_RELATION)
2869  continue;
2870 
2871  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2872  relattnos = groupbyattnos[relid];
2873  if (bms_membership(relattnos) != BMS_MULTIPLE)
2874  continue;
2875 
2876  /*
2877  * Can't remove any columns for this rel if there is no suitable
2878  * (i.e., nondeferrable) primary key constraint.
2879  */
2880  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2881  if (pkattnos == NULL)
2882  continue;
2883 
2884  /*
2885  * If the primary key is a proper subset of relattnos then we have
2886  * some items in the GROUP BY that can be removed.
2887  */
2888  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2889  {
2890  /*
2891  * To easily remember whether we've found anything to do, we don't
2892  * allocate the surplusvars[] array until we find something.
2893  */
2894  if (surplusvars == NULL)
2895  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2896  (list_length(parse->rtable) + 1));
2897 
2898  /* Remember the attnos of the removable columns */
2899  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2900 
2901  /* Also, mark the resulting plan as dependent on this constraint */
2902  parse->constraintDeps = lappend_oid(parse->constraintDeps,
2903  constraintOid);
2904  }
2905  }
2906 
2907  /*
2908  * If we found any surplus Vars, build a new GROUP BY clause without them.
2909  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2910  * markings, but that's harmless.)
2911  */
2912  if (surplusvars != NULL)
2913  {
2914  List *new_groupby = NIL;
2915 
2916  foreach(lc, parse->groupClause)
2917  {
2919  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2920  Var *var = (Var *) tle->expr;
2921 
2922  /*
2923  * New list must include non-Vars, outer Vars, and anything not
2924  * marked as surplus.
2925  */
2926  if (!IsA(var, Var) ||
2927  var->varlevelsup > 0 ||
2929  surplusvars[var->varno]))
2930  new_groupby = lappend(new_groupby, sgc);
2931  }
2932 
2933  parse->groupClause = new_groupby;
2934  }
2935 }
2936 
2937 /*
2938  * preprocess_groupclause - do preparatory work on GROUP BY clause
2939  *
2940  * The idea here is to adjust the ordering of the GROUP BY elements
2941  * (which in itself is semantically insignificant) to match ORDER BY,
2942  * thereby allowing a single sort operation to both implement the ORDER BY
2943  * requirement and set up for a Unique step that implements GROUP BY.
2944  *
2945  * In principle it might be interesting to consider other orderings of the
2946  * GROUP BY elements, which could match the sort ordering of other
2947  * possible plans (eg an indexscan) and thereby reduce cost. We don't
2948  * bother with that, though. Hashed grouping will frequently win anyway.
2949  *
2950  * Note: we need no comparable processing of the distinctClause because
2951  * the parser already enforced that that matches ORDER BY.
2952  *
2953  * For grouping sets, the order of items is instead forced to agree with that
2954  * of the grouping set (and items not in the grouping set are skipped). The
2955  * work of sorting the order of grouping set elements to match the ORDER BY if
2956  * possible is done elsewhere.
2957  */
2958 static List *
2960 {
2961  Query *parse = root->parse;
2962  List *new_groupclause = NIL;
2963  bool partial_match;
2964  ListCell *sl;
2965  ListCell *gl;
2966 
2967  /* For grouping sets, we need to force the ordering */
2968  if (force)
2969  {
2970  foreach(sl, force)
2971  {
2972  Index ref = lfirst_int(sl);
2974 
2975  new_groupclause = lappend(new_groupclause, cl);
2976  }
2977 
2978  return new_groupclause;
2979  }
2980 
2981  /* If no ORDER BY, nothing useful to do here */
2982  if (parse->sortClause == NIL)
2983  return parse->groupClause;
2984 
2985  /*
2986  * Scan the ORDER BY clause and construct a list of matching GROUP BY
2987  * items, but only as far as we can make a matching prefix.
2988  *
2989  * This code assumes that the sortClause contains no duplicate items.
2990  */
2991  foreach(sl, parse->sortClause)
2992  {
2994 
2995  foreach(gl, parse->groupClause)
2996  {
2998 
2999  if (equal(gc, sc))
3000  {
3001  new_groupclause = lappend(new_groupclause, gc);
3002  break;
3003  }
3004  }
3005  if (gl == NULL)
3006  break; /* no match, so stop scanning */
3007  }
3008 
3009  /* Did we match all of the ORDER BY list, or just some of it? */
3010  partial_match = (sl != NULL);
3011 
3012  /* If no match at all, no point in reordering GROUP BY */
3013  if (new_groupclause == NIL)
3014  return parse->groupClause;
3015 
3016  /*
3017  * Add any remaining GROUP BY items to the new list, but only if we were
3018  * able to make a complete match. In other words, we only rearrange the
3019  * GROUP BY list if the result is that one list is a prefix of the other
3020  * --- otherwise there's no possibility of a common sort. Also, give up
3021  * if there are any non-sortable GROUP BY items, since then there's no
3022  * hope anyway.
3023  */
3024  foreach(gl, parse->groupClause)
3025  {
3027 
3028  if (list_member_ptr(new_groupclause, gc))
3029  continue; /* it matched an ORDER BY item */
3030  if (partial_match)
3031  return parse->groupClause; /* give up, no common sort possible */
3032  if (!OidIsValid(gc->sortop))
3033  return parse->groupClause; /* give up, GROUP BY can't be sorted */
3034  new_groupclause = lappend(new_groupclause, gc);
3035  }
3036 
3037  /* Success --- install the rearranged GROUP BY list */
3038  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
3039  return new_groupclause;
3040 }
3041 
3042 /*
3043  * Extract lists of grouping sets that can be implemented using a single
3044  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
3045  *
3046  * Input must be sorted with smallest sets first. Result has each sublist
3047  * sorted with smallest sets first.
3048  *
3049  * We want to produce the absolute minimum possible number of lists here to
3050  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
3051  * of finding the minimal partition of a partially-ordered set into chains
3052  * (which is what we need, taking the list of grouping sets as a poset ordered
3053  * by set inclusion) can be mapped to the problem of finding the maximum
3054  * cardinality matching on a bipartite graph, which is solvable in polynomial
3055  * time with a worst case of no worse than O(n^2.5) and usually much
3056  * better. Since our N is at most 4096, we don't need to consider fallbacks to
3057  * heuristic or approximate methods. (Planning time for a 12-d cube is under
3058  * half a second on my modest system even with optimization off and assertions
3059  * on.)
3060  */
3061 static List *
3063 {
3064  int num_sets_raw = list_length(groupingSets);
3065  int num_empty = 0;
3066  int num_sets = 0; /* distinct sets */
3067  int num_chains = 0;
3068  List *result = NIL;
3069  List **results;
3070  List **orig_sets;
3071  Bitmapset **set_masks;
3072  int *chains;
3073  short **adjacency;
3074  short *adjacency_buf;
3076  int i;
3077  int j;
3078  int j_size;
3079  ListCell *lc1 = list_head(groupingSets);
3080  ListCell *lc;
3081 
3082  /*
3083  * Start by stripping out empty sets. The algorithm doesn't require this,
3084  * but the planner currently needs all empty sets to be returned in the
3085  * first list, so we strip them here and add them back after.
3086  */
3087  while (lc1 && lfirst(lc1) == NIL)
3088  {
3089  ++num_empty;
3090  lc1 = lnext(lc1);
3091  }
3092 
3093  /* bail out now if it turns out that all we had were empty sets. */
3094  if (!lc1)
3095  return list_make1(groupingSets);
3096 
3097  /*----------
3098  * We don't strictly need to remove duplicate sets here, but if we don't,
3099  * they tend to become scattered through the result, which is a bit
3100  * confusing (and irritating if we ever decide to optimize them out).
3101  * So we remove them here and add them back after.
3102  *
3103  * For each non-duplicate set, we fill in the following:
3104  *
3105  * orig_sets[i] = list of the original set lists
3106  * set_masks[i] = bitmapset for testing inclusion
3107  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
3108  *
3109  * chains[i] will be the result group this set is assigned to.
3110  *
3111  * We index all of these from 1 rather than 0 because it is convenient
3112  * to leave 0 free for the NIL node in the graph algorithm.
3113  *----------
3114  */
3115  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
3116  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
3117  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
3118  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
3119 
3120  j_size = 0;
3121  j = 0;
3122  i = 1;
3123 
3124  for_each_cell(lc, lc1)
3125  {
3126  List *candidate = (List *) lfirst(lc);
3127  Bitmapset *candidate_set = NULL;
3128  ListCell *lc2;
3129  int dup_of = 0;
3130 
3131  foreach(lc2, candidate)
3132  {
3133  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
3134  }
3135 
3136  /* we can only be a dup if we're the same length as a previous set */
3137  if (j_size == list_length(candidate))
3138  {
3139  int k;
3140 
3141  for (k = j; k < i; ++k)
3142  {
3143  if (bms_equal(set_masks[k], candidate_set))
3144  {
3145  dup_of = k;
3146  break;
3147  }
3148  }
3149  }
3150  else if (j_size < list_length(candidate))
3151  {
3152  j_size = list_length(candidate);
3153  j = i;
3154  }
3155 
3156  if (dup_of > 0)
3157  {
3158  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3159  bms_free(candidate_set);
3160  }
3161  else
3162  {
3163  int k;
3164  int n_adj = 0;
3165 
3166  orig_sets[i] = list_make1(candidate);
3167  set_masks[i] = candidate_set;
3168 
3169  /* fill in adjacency list; no need to compare equal-size sets */
3170 
3171  for (k = j - 1; k > 0; --k)
3172  {
3173  if (bms_is_subset(set_masks[k], candidate_set))
3174  adjacency_buf[++n_adj] = k;
3175  }
3176 
3177  if (n_adj > 0)
3178  {
3179  adjacency_buf[0] = n_adj;
3180  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3181  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3182  }
3183  else
3184  adjacency[i] = NULL;
3185 
3186  ++i;
3187  }
3188  }
3189 
3190  num_sets = i - 1;
3191 
3192  /*
3193  * Apply the graph matching algorithm to do the work.
3194  */
3195  state = BipartiteMatch(num_sets, num_sets, adjacency);
3196 
3197  /*
3198  * Now, the state->pair* fields have the info we need to assign sets to
3199  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3200  * pair_vu[v] = u (both will be true, but we check both so that we can do
3201  * it in one pass)
3202  */
3203  chains = palloc0((num_sets + 1) * sizeof(int));
3204 
3205  for (i = 1; i <= num_sets; ++i)
3206  {
3207  int u = state->pair_vu[i];
3208  int v = state->pair_uv[i];
3209 
3210  if (u > 0 && u < i)
3211  chains[i] = chains[u];
3212  else if (v > 0 && v < i)
3213  chains[i] = chains[v];
3214  else
3215  chains[i] = ++num_chains;
3216  }
3217 
3218  /* build result lists. */
3219  results = palloc0((num_chains + 1) * sizeof(List *));
3220 
3221  for (i = 1; i <= num_sets; ++i)
3222  {
3223  int c = chains[i];
3224 
3225  Assert(c > 0);
3226 
3227  results[c] = list_concat(results[c], orig_sets[i]);
3228  }
3229 
3230  /* push any empty sets back on the first list. */
3231  while (num_empty-- > 0)
3232  results[1] = lcons(NIL, results[1]);
3233 
3234  /* make result list */
3235  for (i = 1; i <= num_chains; ++i)
3236  result = lappend(result, results[i]);
3237 
3238  /*
3239  * Free all the things.
3240  *
3241  * (This is over-fussy for small sets but for large sets we could have
3242  * tied up a nontrivial amount of memory.)
3243  */
3244  BipartiteMatchFree(state);
3245  pfree(results);
3246  pfree(chains);
3247  for (i = 1; i <= num_sets; ++i)
3248  if (adjacency[i])
3249  pfree(adjacency[i]);
3250  pfree(adjacency);
3251  pfree(adjacency_buf);
3252  pfree(orig_sets);
3253  for (i = 1; i <= num_sets; ++i)
3254  bms_free(set_masks[i]);
3255  pfree(set_masks);
3256 
3257  return result;
3258 }
3259 
3260 /*
3261  * Reorder the elements of a list of grouping sets such that they have correct
3262  * prefix relationships. Also inserts the GroupingSetData annotations.
3263  *
3264  * The input must be ordered with smallest sets first; the result is returned
3265  * with largest sets first. Note that the result shares no list substructure
3266  * with the input, so it's safe for the caller to modify it later.
3267  *
3268  * If we're passed in a sortclause, we follow its order of columns to the
3269  * extent possible, to minimize the chance that we add unnecessary sorts.
3270  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3271  * gets implemented in one pass.)
3272  */
3273 static List *
3274 reorder_grouping_sets(List *groupingsets, List *sortclause)
3275 {
3276  ListCell *lc;
3277  ListCell *lc2;
3278  List *previous = NIL;
3279  List *result = NIL;
3280 
3281  foreach(lc, groupingsets)
3282  {
3283  List *candidate = (List *) lfirst(lc);
3284  List *new_elems = list_difference_int(candidate, previous);
3286 
3287  if (list_length(new_elems) > 0)
3288  {
3289  while (list_length(sortclause) > list_length(previous))
3290  {
3291  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3292  int ref = sc->tleSortGroupRef;
3293 
3294  if (list_member_int(new_elems, ref))
3295  {
3296  previous = lappend_int(previous, ref);
3297  new_elems = list_delete_int(new_elems, ref);
3298  }
3299  else
3300  {
3301  /* diverged from the sortclause; give up on it */
3302  sortclause = NIL;
3303  break;
3304  }
3305  }
3306 
3307  foreach(lc2, new_elems)
3308  {
3309  previous = lappend_int(previous, lfirst_int(lc2));
3310  }
3311  }
3312 
3313  gs->set = list_copy(previous);
3314  result = lcons(gs, result);
3315  list_free(new_elems);
3316  }
3317 
3318  list_free(previous);
3319 
3320  return result;
3321 }
3322 
3323 /*
3324  * Compute query_pathkeys and other pathkeys during plan generation
3325  */
3326 static void
3328 {
3329  Query *parse = root->parse;
3330  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3331  List *tlist = qp_extra->tlist;
3332  List *activeWindows = qp_extra->activeWindows;
3333 
3334  /*
3335  * Calculate pathkeys that represent grouping/ordering requirements. The
3336  * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
3337  * be, in which case we just leave their pathkeys empty.
3338  */
3339  if (qp_extra->groupClause &&
3340  grouping_is_sortable(qp_extra->groupClause))
3341  root->group_pathkeys =
3343  qp_extra->groupClause,
3344  tlist);
3345  else
3346  root->group_pathkeys = NIL;
3347 
3348  /* We consider only the first (bottom) window in pathkeys logic */
3349  if (activeWindows != NIL)
3350  {
3351  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3352 
3354  wc,
3355  tlist);
3356  }
3357  else
3358  root->window_pathkeys = NIL;
3359 
3360  if (parse->distinctClause &&
3362  root->distinct_pathkeys =
3364  parse->distinctClause,
3365  tlist);
3366  else
3367  root->distinct_pathkeys = NIL;
3368 
3369  root->sort_pathkeys =
3371  parse->sortClause,
3372  tlist);
3373 
3374  /*
3375  * Figure out whether we want a sorted result from query_planner.
3376  *
3377  * If we have a sortable GROUP BY clause, then we want a result sorted
3378  * properly for grouping. Otherwise, if we have window functions to
3379  * evaluate, we try to sort for the first window. Otherwise, if there's a
3380  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3381  * we try to produce output that's sufficiently well sorted for the
3382  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3383  * by the ORDER BY clause.
3384  *
3385  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3386  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3387  * that might just leave us failing to exploit an available sort order at
3388  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3389  * much easier, since we know that the parser ensured that one is a
3390  * superset of the other.
3391  */
3392  if (root->group_pathkeys)
3393  root->query_pathkeys = root->group_pathkeys;
3394  else if (root->window_pathkeys)
3395  root->query_pathkeys = root->window_pathkeys;
3396  else if (list_length(root->distinct_pathkeys) >
3397  list_length(root->sort_pathkeys))
3398  root->query_pathkeys = root->distinct_pathkeys;
3399  else if (root->sort_pathkeys)
3400  root->query_pathkeys = root->sort_pathkeys;
3401  else
3402  root->query_pathkeys = NIL;
3403 }
3404 
3405 /*
3406  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3407  *
3408  * path_rows: number of output rows from scan/join step
3409  * gsets: grouping set data, or NULL if not doing grouping sets
3410  *
3411  * If doing grouping sets, we also annotate the gsets data with the estimates
3412  * for each set and each individual rollup list, with a view to later
3413  * determining whether some combination of them could be hashed instead.
3414  */
3415 static double
3417  double path_rows,
3418  grouping_sets_data *gd)
3419 {
3420  Query *parse = root->parse;
3421  double dNumGroups;
3422 
3423  if (parse->groupClause)
3424  {
3425  List *groupExprs;
3426 
3427  if (parse->groupingSets)
3428  {
3429  /* Add up the estimates for each grouping set */
3430  ListCell *lc;
3431  ListCell *lc2;
3432 
3433  Assert(gd); /* keep Coverity happy */
3434 
3435  dNumGroups = 0;
3436 
3437  foreach(lc, gd->rollups)
3438  {
3439  RollupData *rollup = lfirst_node(RollupData, lc);
3440  ListCell *lc;
3441 
3442  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3443  parse->targetList);
3444 
3445  rollup->numGroups = 0.0;
3446 
3447  forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
3448  {
3449  List *gset = (List *) lfirst(lc);
3451  double numGroups = estimate_num_groups(root,
3452  groupExprs,
3453  path_rows,
3454  &gset);
3455 
3456  gs->numGroups = numGroups;
3457  rollup->numGroups += numGroups;
3458  }
3459 
3460  dNumGroups += rollup->numGroups;
3461  }
3462 
3463  if (gd->hash_sets_idx)
3464  {
3465  ListCell *lc;
3466 
3467  gd->dNumHashGroups = 0;
3468 
3469  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3470  parse->targetList);
3471 
3472  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3473  {
3474  List *gset = (List *) lfirst(lc);
3476  double numGroups = estimate_num_groups(root,
3477  groupExprs,
3478  path_rows,
3479  &gset);
3480 
3481  gs->numGroups = numGroups;
3482  gd->dNumHashGroups += numGroups;
3483  }
3484 
3485  dNumGroups += gd->dNumHashGroups;
3486  }
3487  }
3488  else
3489  {
3490  /* Plain GROUP BY */
3491  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3492  parse->targetList);
3493 
3494  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3495  NULL);
3496  }
3497  }
3498  else if (parse->groupingSets)
3499  {
3500  /* Empty grouping sets ... one result row for each one */
3501  dNumGroups = list_length(parse->groupingSets);
3502  }
3503  else if (parse->hasAggs || root->hasHavingQual)
3504  {
3505  /* Plain aggregation, one result row */
3506  dNumGroups = 1;
3507  }
3508  else
3509  {
3510  /* Not grouping */
3511  dNumGroups = 1;
3512  }
3513 
3514  return dNumGroups;
3515 }
3516 
3517 /*
3518  * estimate_hashagg_tablesize
3519  * estimate the number of bytes that a hash aggregate hashtable will
3520  * require based on the agg_costs, path width and dNumGroups.
3521  *
3522  * XXX this may be over-estimating the size now that hashagg knows to omit
3523  * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3524  * grouping columns not in the hashed set are counted here even though hashagg
3525  * won't store them. Is this a problem?
3526  */
3527 static Size
3529  double dNumGroups)
3530 {
3531  Size hashentrysize;
3532 
3533  /* Estimate per-hash-entry space at tuple width... */
3534  hashentrysize = MAXALIGN(path->pathtarget->width) +
3536 
3537  /* plus space for pass-by-ref transition values... */
3538  hashentrysize += agg_costs->transitionSpace;
3539  /* plus the per-hash-entry overhead */
3540  hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3541 
3542  /*
3543  * Note that this disregards the effect of fill-factor and growth policy
3544  * of the hash-table. That's probably ok, given default the default
3545  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3546  * "double-in-size" growth policies here.
3547  */
3548  return hashentrysize * dNumGroups;
3549 }
3550 
3551 /*
3552  * create_grouping_paths
3553  *
3554  * Build a new upperrel containing Paths for grouping and/or aggregation.
3555  *
3556  * input_rel: contains the source-data Paths
3557  * target: the pathtarget for the result Paths to compute
3558  * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
3559  * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
3560  * rollup_groupclauses: list of grouping clauses for grouping sets,
3561  * or NIL if not doing grouping sets
3562  *
3563  * Note: all Paths in input_rel are expected to return the target computed
3564  * by make_group_input_target.
3565  *
3566  * We need to consider sorted and hashed aggregation in the same function,
3567  * because otherwise (1) it would be harder to throw an appropriate error
3568  * message if neither way works, and (2) we should not allow hashtable size
3569  * considerations to dissuade us from using hashing if sorting is not possible.
3570  */
3571 static RelOptInfo *
3573  RelOptInfo *input_rel,
3574  PathTarget *target,
3575  const AggClauseCosts *agg_costs,
3576  grouping_sets_data *gd)
3577 {
3578  Query *parse = root->parse;
3579  Path *cheapest_path = input_rel->cheapest_total_path;
3580  RelOptInfo *grouped_rel;
3581  PathTarget *partial_grouping_target = NULL;
3582  AggClauseCosts agg_partial_costs; /* parallel only */
3583  AggClauseCosts agg_final_costs; /* parallel only */
3584  Size hashaggtablesize;
3585  double dNumGroups;
3586  double dNumPartialGroups = 0;
3587  bool can_hash;
3588  bool can_sort;
3589  bool try_parallel_aggregation;
3590 
3591  ListCell *lc;
3592 
3593  /* For now, do all work in the (GROUP_AGG, NULL) upperrel */
3594  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3595 
3596  /*
3597  * If the input relation is not parallel-safe, then the grouped relation
3598  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3599  * target list and HAVING quals are parallel-safe.
3600  */
3601  if (input_rel->consider_parallel &&
3602  is_parallel_safe(root, (Node *) target->exprs) &&
3603  is_parallel_safe(root, (Node *) parse->havingQual))
3604  grouped_rel->consider_parallel = true;
3605 
3606  /*
3607  * If the input rel belongs to a single FDW, so does the grouped rel.
3608  */
3609  grouped_rel->serverid = input_rel->serverid;
3610  grouped_rel->userid = input_rel->userid;
3611  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3612  grouped_rel->fdwroutine = input_rel->fdwroutine;
3613 
3614  /*
3615  * Check for degenerate grouping.
3616  */
3617  if ((root->hasHavingQual || parse->groupingSets) &&
3618  !parse->hasAggs && parse->groupClause == NIL)
3619  {
3620  /*
3621  * We have a HAVING qual and/or grouping sets, but no aggregates and
3622  * no GROUP BY (which implies that the grouping sets are all empty).
3623  *
3624  * This is a degenerate case in which we are supposed to emit either
3625  * zero or one row for each grouping set depending on whether HAVING
3626  * succeeds. Furthermore, there cannot be any variables in either
3627  * HAVING or the targetlist, so we actually do not need the FROM table
3628  * at all! We can just throw away the plan-so-far and generate a
3629  * Result node. This is a sufficiently unusual corner case that it's
3630  * not worth contorting the structure of this module to avoid having
3631  * to generate the earlier paths in the first place.
3632  */
3633  int nrows = list_length(parse->groupingSets);
3634  Path *path;
3635 
3636  if (nrows > 1)
3637  {
3638  /*
3639  * Doesn't seem worthwhile writing code to cons up a
3640  * generate_series or a values scan to emit multiple rows. Instead
3641  * just make N clones and append them. (With a volatile HAVING
3642  * clause, this means you might get between 0 and N output rows.
3643  * Offhand I think that's desired.)
3644  */
3645  List *paths = NIL;
3646 
3647  while (--nrows >= 0)
3648  {
3649  path = (Path *)
3650  create_result_path(root, grouped_rel,
3651  target,
3652  (List *) parse->havingQual);
3653  paths = lappend(paths, path);
3654  }
3655  path = (Path *)
3656  create_append_path(grouped_rel,
3657  paths,
3658  NULL,
3659  0,
3660  NIL);
3661  path->pathtarget = target;
3662  }
3663  else
3664  {
3665  /* No grouping sets, or just one, so one output row */
3666  path = (Path *)
3667  create_result_path(root, grouped_rel,
3668  target,
3669  (List *) parse->havingQual);
3670  }
3671 
3672  add_path(grouped_rel, path);
3673 
3674  /* No need to consider any other alternatives. */
3675  set_cheapest(grouped_rel);
3676 
3677  return grouped_rel;
3678  }
3679 
3680  /*
3681  * Estimate number of groups.
3682  */
3683  dNumGroups = get_number_of_groups(root,
3684  cheapest_path->rows,
3685  gd);
3686 
3687  /*
3688  * Determine whether it's possible to perform sort-based implementations
3689  * of grouping. (Note that if groupClause is empty,
3690  * grouping_is_sortable() is trivially true, and all the
3691  * pathkeys_contained_in() tests will succeed too, so that we'll consider
3692  * every surviving input path.)
3693  *
3694  * If we have grouping sets, we might be able to sort some but not all of
3695  * them; in this case, we need can_sort to be true as long as we must
3696  * consider any sorted-input plan.
3697  */
3698  can_sort = (gd && gd->rollups != NIL)
3699  || grouping_is_sortable(parse->groupClause);
3700 
3701  /*
3702  * Determine whether we should consider hash-based implementations of
3703  * grouping.
3704  *
3705  * Hashed aggregation only applies if we're grouping. If we have grouping
3706  * sets, some groups might be hashable but others not; in this case we set
3707  * can_hash true as long as there is nothing globally preventing us from
3708  * hashing (and we should therefore consider plans with hashes).
3709  *
3710  * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
3711  * aggregates. (Doing so would imply storing *all* the input values in
3712  * the hash table, and/or running many sorts in parallel, either of which
3713  * seems like a certain loser.) We similarly don't support ordered-set
3714  * aggregates in hashed aggregation, but that case is also included in the
3715  * numOrderedAggs count.
3716  *
3717  * Note: grouping_is_hashable() is much more expensive to check than the
3718  * other gating conditions, so we want to do it last.
3719  */
3720  can_hash = (parse->groupClause != NIL &&
3721  agg_costs->numOrderedAggs == 0 &&
3722  (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
3723 
3724  /*
3725  * If grouped_rel->consider_parallel is true, then paths that we generate
3726  * for this grouping relation could be run inside of a worker, but that
3727  * doesn't mean we can actually use the PartialAggregate/FinalizeAggregate
3728  * execution strategy. Figure that out.
3729  */
3730  if (!grouped_rel->consider_parallel)
3731  {
3732  /* Not even parallel-safe. */
3733  try_parallel_aggregation = false;
3734  }
3735  else if (input_rel->partial_pathlist == NIL)
3736  {
3737  /* Nothing to use as input for partial aggregate. */
3738  try_parallel_aggregation = false;
3739  }
3740  else if (!parse->hasAggs && parse->groupClause == NIL)
3741  {
3742  /*
3743  * We don't know how to do parallel aggregation unless we have either
3744  * some aggregates or a grouping clause.
3745  */
3746  try_parallel_aggregation = false;
3747  }
3748  else if (parse->groupingSets)
3749  {
3750  /* We don't know how to do grouping sets in parallel. */
3751  try_parallel_aggregation = false;
3752  }
3753  else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
3754  {
3755  /* Insufficient support for partial mode. */
3756  try_parallel_aggregation = false;
3757  }
3758  else
3759  {
3760  /* Everything looks good. */
3761  try_parallel_aggregation = true;
3762  }
3763 
3764  /*
3765  * Before generating paths for grouped_rel, we first generate any possible
3766  * partial paths; that way, later code can easily consider both parallel
3767  * and non-parallel approaches to grouping. Note that the partial paths
3768  * we generate here are also partially aggregated, so simply pushing a
3769  * Gather node on top is insufficient to create a final path, as would be
3770  * the case for a scan/join rel.
3771  */
3772  if (try_parallel_aggregation)
3773  {
3774  Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
3775 
3776  /*
3777  * Build target list for partial aggregate paths. These paths cannot
3778  * just emit the same tlist as regular aggregate paths, because (1) we
3779  * must include Vars and Aggrefs needed in HAVING, which might not
3780  * appear in the result tlist, and (2) the Aggrefs must be set in
3781  * partial mode.
3782  */
3783  partial_grouping_target = make_partial_grouping_target(root, target);
3784 
3785  /* Estimate number of partial groups. */
3786  dNumPartialGroups = get_number_of_groups(root,
3787  cheapest_partial_path->rows,
3788  gd);
3789 
3790  /*
3791  * Collect statistics about aggregates for estimating costs of
3792  * performing aggregation in parallel.
3793  */
3794  MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
3795  MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
3796  if (parse->hasAggs)
3797  {
3798  /* partial phase */
3799  get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs,
3801  &agg_partial_costs);
3802 
3803  /* final phase */
3804  get_agg_clause_costs(root, (Node *) target->exprs,
3806  &agg_final_costs);
3807  get_agg_clause_costs(root, parse->havingQual,
3809  &agg_final_costs);
3810  }
3811 
3812  if (can_sort)
3813  {
3814  /* This was checked before setting try_parallel_aggregation */
3815  Assert(parse->hasAggs || parse->groupClause);
3816 
3817  /*
3818  * Use any available suitably-sorted path as input, and also
3819  * consider sorting the cheapest partial path.
3820  */
3821  foreach(lc, input_rel->partial_pathlist)
3822  {
3823  Path *path = (Path *) lfirst(lc);
3824  bool is_sorted;
3825 
3826  is_sorted = pathkeys_contained_in(root->group_pathkeys,
3827  path->pathkeys);
3828  if (path == cheapest_partial_path || is_sorted)
3829  {
3830  /* Sort the cheapest partial path, if it isn't already */
3831  if (!is_sorted)
3832  path = (Path *) create_sort_path(root,
3833  grouped_rel,
3834  path,
3835  root->group_pathkeys,
3836  -1.0);
3837 
3838  if (parse->hasAggs)
3839  add_partial_path(grouped_rel, (Path *)
3840  create_agg_path(root,
3841  grouped_rel,
3842  path,
3843  partial_grouping_target,
3844  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3846  parse->groupClause,
3847  NIL,
3848  &agg_partial_costs,
3849  dNumPartialGroups));
3850  else
3851  add_partial_path(grouped_rel, (Path *)
3852  create_group_path(root,
3853  grouped_rel,
3854  path,
3855  partial_grouping_target,
3856  parse->groupClause,
3857  NIL,
3858  dNumPartialGroups));
3859  }
3860  }
3861  }
3862 
3863  if (can_hash)
3864  {
3865  /* Checked above */
3866  Assert(parse->hasAggs || parse->groupClause);
3867 
3868  hashaggtablesize =
3869  estimate_hashagg_tablesize(cheapest_partial_path,
3870  &agg_partial_costs,
3871  dNumPartialGroups);
3872 
3873  /*
3874  * Tentatively produce a partial HashAgg Path, depending on if it
3875  * looks as if the hash table will fit in work_mem.
3876  */
3877  if (hashaggtablesize < work_mem * 1024L)
3878  {
3879  add_partial_path(grouped_rel, (Path *)
3880  create_agg_path(root,
3881  grouped_rel,
3882  cheapest_partial_path,
3883  partial_grouping_target,
3884  AGG_HASHED,
3886  parse->groupClause,
3887  NIL,
3888  &agg_partial_costs,
3889  dNumPartialGroups));
3890  }
3891  }
3892  }
3893 
3894  /* Build final grouping paths */
3895  if (can_sort)
3896  {
3897  /*
3898  * Use any available suitably-sorted path as input, and also consider
3899  * sorting the cheapest-total path.
3900  */
3901  foreach(lc, input_rel->pathlist)
3902  {
3903  Path *path = (Path *) lfirst(lc);
3904  bool is_sorted;
3905 
3906  is_sorted = pathkeys_contained_in(root->group_pathkeys,
3907  path->pathkeys);
3908  if (path == cheapest_path || is_sorted)
3909  {
3910  /* Sort the cheapest-total path if it isn't already sorted */
3911  if (!is_sorted)
3912  path = (Path *) create_sort_path(root,
3913  grouped_rel,
3914  path,
3915  root->group_pathkeys,
3916  -1.0);
3917 
3918  /* Now decide what to stick atop it */
3919  if (parse->groupingSets)
3920  {
3921  consider_groupingsets_paths(root, grouped_rel,
3922  path, true, can_hash, target,
3923  gd, agg_costs, dNumGroups);
3924  }
3925  else if (parse->hasAggs)
3926  {
3927  /*
3928  * We have aggregation, possibly with plain GROUP BY. Make
3929  * an AggPath.
3930  */
3931  add_path(grouped_rel, (Path *)
3932  create_agg_path(root,
3933  grouped_rel,
3934  path,
3935  target,
3936  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3938  parse->groupClause,
3939  (List *) parse->havingQual,
3940  agg_costs,
3941  dNumGroups));
3942  }
3943  else if (parse->groupClause)
3944  {
3945  /*
3946  * We have GROUP BY without aggregation or grouping sets.
3947  * Make a GroupPath.
3948  */
3949  add_path(grouped_rel, (Path *)
3950  create_group_path(root,
3951  grouped_rel,
3952  path,
3953  target,
3954  parse->groupClause,
3955  (List *) parse->havingQual,
3956  dNumGroups));
3957  }
3958  else
3959  {
3960  /* Other cases should have been handled above */
3961  Assert(false);
3962  }
3963  }
3964  }
3965 
3966  /*
3967  * Now generate a complete GroupAgg Path atop of the cheapest partial
3968  * path. We can do this using either Gather or Gather Merge.
3969  */
3970  if (grouped_rel->partial_pathlist)
3971  {
3972  Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
3973  double total_groups = path->rows * path->parallel_workers;
3974 
3975  path = (Path *) create_gather_path(root,
3976  grouped_rel,
3977  path,
3978  partial_grouping_target,
3979  NULL,
3980  &total_groups);
3981 
3982  /*
3983  * Since Gather's output is always unsorted, we'll need to sort,
3984  * unless there's no GROUP BY clause or a degenerate (constant)
3985  * one, in which case there will only be a single group.
3986  */
3987  if (root->group_pathkeys)
3988  path = (Path *) create_sort_path(root,
3989  grouped_rel,
3990  path,
3991  root->group_pathkeys,
3992  -1.0);
3993 
3994  if (parse->hasAggs)
3995  add_path(grouped_rel, (Path *)
3996  create_agg_path(root,
3997  grouped_rel,
3998  path,
3999  target,
4000  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
4002  parse->groupClause,
4003  (List *) parse->havingQual,
4004  &agg_final_costs,
4005  dNumGroups));
4006  else
4007  add_path(grouped_rel, (Path *)
4008  create_group_path(root,
4009  grouped_rel,
4010  path,
4011  target,
4012  parse->groupClause,
4013  (List *) parse->havingQual,
4014  dNumGroups));
4015 
4016  /*
4017  * The point of using Gather Merge rather than Gather is that it
4018  * can preserve the ordering of the input path, so there's no
4019  * reason to try it unless (1) it's possible to produce more than
4020  * one output row and (2) we want the output path to be ordered.
4021  */
4022  if (parse->groupClause != NIL && root->group_pathkeys != NIL)
4023  {
4024  foreach(lc, grouped_rel->partial_pathlist)
4025  {
4026  Path *subpath = (Path *) lfirst(lc);
4027  Path *gmpath;
4028  double total_groups;
4029 
4030  /*
4031  * It's useful to consider paths that are already properly
4032  * ordered for Gather Merge, because those don't need a
4033  * sort. It's also useful to consider the cheapest path,
4034  * because sorting it in parallel and then doing Gather
4035  * Merge may be better than doing an unordered Gather
4036  * followed by a sort. But there's no point in
4037  * considering non-cheapest paths that aren't already
4038  * sorted correctly.
4039  */
4040  if (path != subpath &&
4042  subpath->pathkeys))
4043  continue;
4044 
4045  total_groups = subpath->rows * subpath->parallel_workers;
4046 
4047  gmpath = (Path *)
4049  grouped_rel,
4050  subpath,
4051  partial_grouping_target,
4052  root->group_pathkeys,
4053  NULL,
4054  &total_groups);
4055 
4056  if (parse->hasAggs)
4057  add_path(grouped_rel, (Path *)
4058  create_agg_path(root,
4059  grouped_rel,
4060  gmpath,
4061  target,
4062  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
4064  parse->groupClause,
4065  (List *) parse->havingQual,
4066  &agg_final_costs,
4067  dNumGroups));
4068  else
4069  add_path(grouped_rel, (Path *)
4070  create_group_path(root,
4071  grouped_rel,
4072  gmpath,
4073  target,
4074  parse->groupClause,
4075  (List *) parse->havingQual,
4076  dNumGroups));
4077  }
4078  }
4079  }
4080  }
4081 
4082  if (can_hash)
4083  {
4084  if (parse->groupingSets)
4085  {
4086  /*
4087  * Try for a hash-only groupingsets path over unsorted input.
4088  */
4089  consider_groupingsets_paths(root, grouped_rel,
4090  cheapest_path, false, true, target,
4091  gd, agg_costs, dNumGroups);
4092  }
4093  else
4094  {
4095  hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
4096  agg_costs,
4097  dNumGroups);
4098 
4099  /*
4100  * Provided that the estimated size of the hashtable does not
4101  * exceed work_mem, we'll generate a HashAgg Path, although if we
4102  * were unable to sort above, then we'd better generate a Path, so
4103  * that we at least have one.
4104  */
4105  if (hashaggtablesize < work_mem * 1024L ||
4106  grouped_rel->pathlist == NIL)
4107  {
4108  /*
4109  * We just need an Agg over the cheapest-total input path,
4110  * since input order won't matter.
4111  */
4112  add_path(grouped_rel, (Path *)
4113  create_agg_path(root, grouped_rel,
4114  cheapest_path,
4115  target,
4116  AGG_HASHED,
4118  parse->groupClause,
4119  (List *) parse->havingQual,
4120  agg_costs,
4121  dNumGroups));
4122  }
4123  }
4124 
4125  /*
4126  * Generate a HashAgg Path atop of the cheapest partial path. Once
4127  * again, we'll only do this if it looks as though the hash table
4128  * won't exceed work_mem.
4129  */
4130  if (grouped_rel->partial_pathlist)
4131  {
4132  Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
4133 
4134  hashaggtablesize = estimate_hashagg_tablesize(path,
4135  &agg_final_costs,
4136  dNumGroups);
4137 
4138  if (hashaggtablesize < work_mem * 1024L)
4139  {
4140  double total_groups = path->rows * path->parallel_workers;
4141 
4142  path = (Path *) create_gather_path(root,
4143  grouped_rel,
4144  path,
4145  partial_grouping_target,
4146  NULL,
4147  &total_groups);
4148 
4149  add_path(grouped_rel, (Path *)
4150  create_agg_path(root,
4151  grouped_rel,
4152  path,
4153  target,
4154  AGG_HASHED,
4156  parse->groupClause,
4157  (List *) parse->havingQual,
4158  &agg_final_costs,
4159  dNumGroups));
4160  }
4161  }
4162  }
4163 
4164  /* Give a helpful error if we failed to find any implementation */
4165  if (grouped_rel->pathlist == NIL)
4166  ereport(ERROR,
4167  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4168  errmsg("could not implement GROUP BY"),
4169  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4170 
4171  /*
4172  * If there is an FDW that's responsible for all baserels of the query,
4173  * let it consider adding ForeignPaths.
4174  */
4175  if (grouped_rel->fdwroutine &&
4176  grouped_rel->fdwroutine->GetForeignUpperPaths)
4178  input_rel, grouped_rel);
4179 
4180  /* Let extensions possibly add some more paths */
4182  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4183  input_rel, grouped_rel);
4184 
4185  /* Now choose the best path(s) */
4186  set_cheapest(grouped_rel);
4187 
4188  /*
4189  * We've been using the partial pathlist for the grouped relation to hold
4190  * partially aggregated paths, but that's actually a little bit bogus
4191  * because it's unsafe for later planning stages -- like ordered_rel ---
4192  * to get the idea that they can use these partial paths as if they didn't
4193  * need a FinalizeAggregate step. Zap the partial pathlist at this stage
4194  * so we don't get confused.
4195  */
4196  grouped_rel->partial_pathlist = NIL;
4197 
4198  return grouped_rel;
4199 }
4200 
4201 
4202 /*
4203  * For a given input path, consider the possible ways of doing grouping sets on
4204  * it, by combinations of hashing and sorting. This can be called multiple
4205  * times, so it's important that it not scribble on input. No result is
4206  * returned, but any generated paths are added to grouped_rel.
4207  */
4208 static void
4210  RelOptInfo *grouped_rel,
4211  Path *path,
4212  bool is_sorted,
4213  bool can_hash,
4214  PathTarget *target,
4215  grouping_sets_data *gd,
4216  const AggClauseCosts *agg_costs,
4217  double dNumGroups)
4218 {
4219  Query *parse = root->parse;
4220 
4221  /*
4222  * If we're not being offered sorted input, then only consider plans that
4223  * can be done entirely by hashing.
4224  *
4225  * We can hash everything if it looks like it'll fit in work_mem. But if
4226  * the input is actually sorted despite not being advertised as such, we
4227  * prefer to make use of that in order to use less memory.
4228  *
4229  * If none of the grouping sets are sortable, then ignore the work_mem
4230  * limit and generate a path anyway, since otherwise we'll just fail.
4231  */
4232  if (!is_sorted)
4233  {
4234  List *new_rollups = NIL;
4235  RollupData *unhashed_rollup = NULL;
4236  List *sets_data;
4237  List *empty_sets_data = NIL;
4238  List *empty_sets = NIL;
4239  ListCell *lc;
4240  ListCell *l_start = list_head(gd->rollups);
4241  AggStrategy strat = AGG_HASHED;
4242  Size hashsize;
4243  double exclude_groups = 0.0;
4244 
4245  Assert(can_hash);
4246 
4247  if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4248  {
4249  unhashed_rollup = lfirst_node(RollupData, l_start);
4250  exclude_groups = unhashed_rollup->numGroups;
4251  l_start = lnext(l_start);
4252  }
4253 
4254  hashsize = estimate_hashagg_tablesize(path,
4255  agg_costs,
4256  dNumGroups - exclude_groups);
4257 
4258  /*
4259  * gd->rollups is empty if we have only unsortable columns to work
4260  * with. Override work_mem in that case; otherwise, we'll rely on the
4261  * sorted-input case to generate usable mixed paths.
4262  */
4263  if (hashsize > work_mem * 1024L && gd->rollups)
4264  return; /* nope, won't fit */
4265 
4266  /*
4267  * We need to burst the existing rollups list into individual grouping
4268  * sets and recompute a groupClause for each set.
4269  */
4270  sets_data = list_copy(gd->unsortable_sets);
4271 
4272  for_each_cell(lc, l_start)
4273  {
4274  RollupData *rollup = lfirst_node(RollupData, lc);
4275 
4276  /*
4277  * If we find an unhashable rollup that's not been skipped by the
4278  * "actually sorted" check above, we can't cope; we'd need sorted
4279  * input (with a different sort order) but we can't get that here.
4280  * So bail out; we'll get a valid path from the is_sorted case
4281  * instead.
4282  *
4283  * The mere presence of empty grouping sets doesn't make a rollup
4284  * unhashable (see preprocess_grouping_sets), we handle those
4285  * specially below.
4286  */
4287  if (!rollup->hashable)
4288  return;
4289  else
4290  sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
4291  }
4292  foreach(lc, sets_data)
4293  {
4295  List *gset = gs->set;
4296  RollupData *rollup;
4297 
4298  if (gset == NIL)
4299  {
4300  /* Empty grouping sets can't be hashed. */
4301  empty_sets_data = lappend(empty_sets_data, gs);
4302  empty_sets = lappend(empty_sets, NIL);
4303  }
4304  else
4305  {
4306  rollup = makeNode(RollupData);
4307 
4308  rollup->groupClause = preprocess_groupclause(root, gset);
4309  rollup->gsets_data = list_make1(gs);
4310  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4311  rollup->gsets_data,
4312  gd->tleref_to_colnum_map);
4313  rollup->numGroups = gs->numGroups;
4314  rollup->hashable = true;
4315  rollup->is_hashed = true;
4316  new_rollups = lappend(new_rollups, rollup);
4317  }
4318  }
4319 
4320  /*
4321  * If we didn't find anything nonempty to hash, then bail. We'll
4322  * generate a path from the is_sorted case.
4323  */
4324  if (new_rollups == NIL)
4325  return;
4326 
4327  /*
4328  * If there were empty grouping sets they should have been in the
4329  * first rollup.
4330  */
4331  Assert(!unhashed_rollup || !empty_sets);
4332 
4333  if (unhashed_rollup)
4334  {
4335  new_rollups = lappend(new_rollups, unhashed_rollup);
4336  strat = AGG_MIXED;
4337  }
4338  else if (empty_sets)
4339  {
4340  RollupData *rollup = makeNode(RollupData);
4341 
4342  rollup->groupClause = NIL;
4343  rollup->gsets_data = empty_sets_data;
4344  rollup->gsets = empty_sets;
4345  rollup->numGroups = list_length(empty_sets);
4346  rollup->hashable = false;
4347  rollup->is_hashed = false;
4348  new_rollups = lappend(new_rollups, rollup);
4349  strat = AGG_MIXED;
4350  }
4351 
4352  add_path(grouped_rel, (Path *)
4354  grouped_rel,
4355  path,
4356  target,
4357  (List *) parse->havingQual,
4358  strat,
4359  new_rollups,
4360  agg_costs,
4361  dNumGroups));
4362  return;
4363  }
4364 
4365  /*
4366  * If we have sorted input but nothing we can do with it, bail.
4367  */
4368  if (list_length(gd->rollups) == 0)
4369  return;
4370 
4371  /*
4372  * Given sorted input, we try and make two paths: one sorted and one mixed
4373  * sort/hash. (We need to try both because hashagg might be disabled, or
4374  * some columns might not be sortable.)
4375  *
4376  * can_hash is passed in as false if some obstacle elsewhere (such as
4377  * ordered aggs) means that we shouldn't consider hashing at all.
4378  */
4379  if (can_hash && gd->any_hashable)
4380  {
4381  List *rollups = NIL;
4382  List *hash_sets = list_copy(gd->unsortable_sets);
4383  double availspace = (work_mem * 1024.0);
4384  ListCell *lc;
4385 
4386  /*
4387  * Account first for space needed for groups we can't sort at all.
4388  */
4389  availspace -= (double) estimate_hashagg_tablesize(path,
4390  agg_costs,
4391  gd->dNumHashGroups);
4392 
4393  if (availspace > 0 && list_length(gd->rollups) > 1)
4394  {
4395  double scale;
4396  int num_rollups = list_length(gd->rollups);
4397  int k_capacity;
4398  int *k_weights = palloc(num_rollups * sizeof(int));
4399  Bitmapset *hash_items = NULL;
4400  int i;
4401 
4402  /*
4403  * We treat this as a knapsack problem: the knapsack capacity
4404  * represents work_mem, the item weights are the estimated memory
4405  * usage of the hashtables needed to implement a single rollup,
4406  * and we really ought to use the cost saving as the item value;
4407  * however, currently the costs assigned to sort nodes don't
4408  * reflect the comparison costs well, and so we treat all items as
4409  * of equal value (each rollup we hash instead saves us one sort).
4410  *
4411  * To use the discrete knapsack, we need to scale the values to a
4412  * reasonably small bounded range. We choose to allow a 5% error
4413  * margin; we have no more than 4096 rollups in the worst possible
4414  * case, which with a 5% error margin will require a bit over 42MB
4415  * of workspace. (Anyone wanting to plan queries that complex had
4416  * better have the memory for it. In more reasonable cases, with
4417  * no more than a couple of dozen rollups, the memory usage will
4418  * be negligible.)
4419  *
4420  * k_capacity is naturally bounded, but we clamp the values for
4421  * scale and weight (below) to avoid overflows or underflows (or
4422  * uselessly trying to use a scale factor less than 1 byte).
4423  */
4424  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4425  k_capacity = (int) floor(availspace / scale);
4426 
4427  /*
4428  * We leave the first rollup out of consideration since it's the
4429  * one that matches the input sort order. We assign indexes "i"
4430  * to only those entries considered for hashing; the second loop,
4431  * below, must use the same condition.
4432  */
4433  i = 0;
4435  {
4436  RollupData *rollup = lfirst_node(RollupData, lc);
4437 
4438  if (rollup->hashable)
4439  {
4440  double sz = estimate_hashagg_tablesize(path,
4441  agg_costs,
4442  rollup->numGroups);
4443 
4444  /*
4445  * If sz is enormous, but work_mem (and hence scale) is
4446  * small, avoid integer overflow here.
4447  */
4448  k_weights[i] = (int) Min(floor(sz / scale),
4449  k_capacity + 1.0);
4450  ++i;
4451  }
4452  }
4453 
4454  /*
4455  * Apply knapsack algorithm; compute the set of items which
4456  * maximizes the value stored (in this case the number of sorts
4457  * saved) while keeping the total size (approximately) within
4458  * capacity.
4459  */
4460  if (i > 0)
4461  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4462 
4463  if (!bms_is_empty(hash_items))
4464  {
4465  rollups = list_make1(linitial(gd->rollups));
4466 
4467  i = 0;
4469  {
4470  RollupData *rollup = lfirst_node(RollupData, lc);
4471 
4472  if (rollup->hashable)
4473  {
4474  if (bms_is_member(i, hash_items))
4475  hash_sets = list_concat(hash_sets,
4476  list_copy(rollup->gsets_data));
4477  else
4478  rollups = lappend(rollups, rollup);
4479  ++i;
4480  }
4481  else
4482  rollups = lappend(rollups, rollup);
4483  }
4484  }
4485  }
4486 
4487  if (!rollups && hash_sets)
4488  rollups = list_copy(gd->rollups);
4489 
4490  foreach(lc, hash_sets)
4491  {
4493  RollupData *rollup = makeNode(RollupData);
4494 
4495  Assert(gs->set != NIL);
4496 
4497  rollup->groupClause = preprocess_groupclause(root, gs->set);
4498  rollup->gsets_data = list_make1(gs);
4499  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4500  rollup->gsets_data,
4501  gd->tleref_to_colnum_map);
4502  rollup->numGroups = gs->numGroups;
4503  rollup->hashable = true;
4504  rollup->is_hashed = true;
4505  rollups = lcons(rollup, rollups);
4506  }
4507 
4508  if (rollups)
4509  {
4510  add_path(grouped_rel, (Path *)
4512  grouped_rel,
4513  path,
4514  target,
4515  (List *) parse->havingQual,
4516  AGG_MIXED,
4517  rollups,
4518  agg_costs,
4519  dNumGroups));
4520  }
4521  }
4522 
4523  /*
4524  * Now try the simple sorted case.
4525  */
4526  if (!gd->unsortable_sets)
4527  add_path(grouped_rel, (Path *)
4529  grouped_rel,
4530  path,
4531  target,
4532  (List *) parse->havingQual,
4533  AGG_SORTED,
4534  gd->rollups,
4535  agg_costs,
4536  dNumGroups));
4537 }
4538 
4539 /*
4540  * create_window_paths
4541  *
4542  * Build a new upperrel containing Paths for window-function evaluation.
4543  *
4544  * input_rel: contains the source-data Paths
4545  * input_target: result of make_window_input_target
4546  * output_target: what the topmost WindowAggPath should return
4547  * tlist: query's target list (needed to look up pathkeys)
4548  * wflists: result of find_window_functions
4549  * activeWindows: result of select_active_windows
4550  *
4551  * Note: all Paths in input_rel are expected to return input_target.
4552  */
4553 static RelOptInfo *
4555  RelOptInfo *input_rel,
4556  PathTarget *input_target,
4557  PathTarget *output_target,
4558  List *tlist,
4559  WindowFuncLists *wflists,
4560  List *activeWindows)
4561 {
4562  RelOptInfo *window_rel;
4563  ListCell *lc;
4564 
4565  /* For now, do all work in the (WINDOW, NULL) upperrel */
4566  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4567 
4568  /*
4569  * If the input relation is not parallel-safe, then the window relation
4570  * can't be parallel-safe, either. Otherwise, we need to examine the
4571  * target list and active windows for non-parallel-safe constructs.
4572  */
4573  if (input_rel->consider_parallel &&
4574  is_parallel_safe(root, (Node *) output_target->exprs) &&
4575  is_parallel_safe(root, (Node *) activeWindows))
4576  window_rel->consider_parallel = true;
4577 
4578  /*
4579  * If the input rel belongs to a single FDW, so does the window rel.
4580  */
4581  window_rel->serverid = input_rel->serverid;
4582  window_rel->userid = input_rel->userid;
4583  window_rel->useridiscurrent = input_rel->useridiscurrent;
4584  window_rel->fdwroutine = input_rel->fdwroutine;
4585 
4586  /*
4587  * Consider computing window functions starting from the existing
4588  * cheapest-total path (which will likely require a sort) as well as any
4589  * existing paths that satisfy root->window_pathkeys (which won't).
4590  */
4591  foreach(lc, input_rel->pathlist)
4592  {
4593  Path *path = (Path *) lfirst(lc);
4594 
4595  if (path == input_rel->cheapest_total_path ||
4598  window_rel,
4599  path,
4600  input_target,
4601  output_target,
4602  tlist,
4603  wflists,
4604  activeWindows);
4605  }
4606 
4607  /*
4608  * If there is an FDW that's responsible for all baserels of the query,
4609  * let it consider adding ForeignPaths.
4610  */
4611  if (window_rel->fdwroutine &&
4612  window_rel->fdwroutine->GetForeignUpperPaths)
4613  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4614  input_rel, window_rel);
4615 
4616  /* Let extensions possibly add some more paths */
4618  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4619  input_rel, window_rel);
4620 
4621  /* Now choose the best path(s) */
4622  set_cheapest(window_rel);
4623 
4624  return window_rel;
4625 }
4626 
4627 /*
4628  * Stack window-function implementation steps atop the given Path, and
4629  * add the result to window_rel.
4630  *
4631  * window_rel: upperrel to contain result
4632  * path: input Path to use (must return input_target)
4633  * input_target: result of make_window_input_target
4634  * output_target: what the topmost WindowAggPath should return
4635  * tlist: query's target list (needed to look up pathkeys)
4636  * wflists: result of find_window_functions
4637  * activeWindows: result of select_active_windows
4638  */
4639 static void
4641  RelOptInfo *window_rel,
4642  Path *path,
4643  PathTarget *input_target,
4644  PathTarget *output_target,
4645  List *tlist,
4646  WindowFuncLists *wflists,
4647  List *activeWindows)
4648 {
4649  PathTarget *window_target;
4650  ListCell *l;
4651 
4652  /*
4653  * Since each window clause could require a different sort order, we stack
4654  * up a WindowAgg node for each clause, with sort steps between them as
4655  * needed. (We assume that select_active_windows chose a good order for
4656  * executing the clauses in.)
4657  *
4658  * input_target should contain all Vars and Aggs needed for the result.
4659  * (In some cases we wouldn't need to propagate all of these all the way
4660  * to the top, since they might only be needed as inputs to WindowFuncs.
4661  * It's probably not worth trying to optimize that though.) It must also
4662  * contain all window partitioning and sorting expressions, to ensure
4663  * they're computed only once at the bottom of the stack (that's critical
4664  * for volatile functions). As we climb up the stack, we'll add outputs
4665  * for the WindowFuncs computed at each level.
4666  */
4667  window_target = input_target;
4668 
4669  foreach(l, activeWindows)
4670  {
4672  List *window_pathkeys;
4673 
4674  window_pathkeys = make_pathkeys_for_window(root,
4675  wc,
4676  tlist);
4677 
4678  /* Sort if necessary */
4679  if (!pathkeys_contained_in(window_pathkeys, path->pathkeys))
4680  {
4681  path = (Path *) create_sort_path(root, window_rel,
4682  path,
4683  window_pathkeys,
4684  -1.0);
4685  }
4686 
4687  if (lnext(l))
4688  {
4689  /*
4690  * Add the current WindowFuncs to the output target for this
4691  * intermediate WindowAggPath. We must copy window_target to
4692  * avoid changing the previous path's target.
4693  *
4694  * Note: a WindowFunc adds nothing to the target's eval costs; but
4695  * we do need to account for the increase in tlist width.
4696  */
4697  ListCell *lc2;
4698 
4699  window_target = copy_pathtarget(window_target);
4700  foreach(lc2, wflists->windowFuncs[wc->winref])
4701  {
4702  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4703 
4704  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4705  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4706  }
4707  }
4708  else
4709  {
4710  /* Install the goal target in the topmost WindowAgg */
4711  window_target = output_target;
4712  }
4713 
4714  path = (Path *)
4715  create_windowagg_path(root, window_rel, path, window_target,
4716  wflists->windowFuncs[wc->winref],
4717  wc,
4718  window_pathkeys);
4719  }
4720 
4721  add_path(window_rel, path);
4722 }
4723 
4724 /*
4725  * create_distinct_paths
4726  *
4727  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4728  *
4729  * input_rel: contains the source-data Paths
4730  *
4731  * Note: input paths should already compute the desired pathtarget, since
4732  * Sort/Unique won't project anything.
4733  */
4734 static RelOptInfo *
4736  RelOptInfo *input_rel)
4737 {
4738  Query *parse = root->parse;
4739  Path *cheapest_input_path = input_rel->cheapest_total_path;
4740  RelOptInfo *distinct_rel;
4741  double numDistinctRows;
4742  bool allow_hash;
4743  Path *path;
4744  ListCell *lc;
4745 
4746  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4747  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4748 
4749  /*
4750  * We don't compute anything at this level, so distinct_rel will be
4751  * parallel-safe if the input rel is parallel-safe. In particular, if
4752  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4753  * output those expressions, and will not be parallel-safe unless those
4754  * expressions are parallel-safe.
4755  */
4756  distinct_rel->consider_parallel = input_rel->consider_parallel;
4757 
4758  /*
4759  * If the input rel belongs to a single FDW, so does the distinct_rel.
4760  */
4761  distinct_rel->serverid = input_rel->serverid;
4762  distinct_rel->userid = input_rel->userid;
4763  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4764  distinct_rel->fdwroutine = input_rel->fdwroutine;
4765 
4766  /* Estimate number of distinct rows there will be */
4767  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4768  root->hasHavingQual)
4769  {
4770  /*
4771  * If there was grouping or aggregation, use the number of input rows
4772  * as the estimated number of DISTINCT rows (ie, assume the input is
4773  * already mostly unique).
4774  */
4775  numDistinctRows = cheapest_input_path->rows;
4776  }
4777  else
4778  {
4779  /*
4780  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4781  */
4782  List *distinctExprs;
4783 
4784  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4785  parse->targetList);
4786  numDistinctRows = estimate_num_groups(root, distinctExprs,
4787  cheapest_input_path->rows,
4788  NULL);
4789  }
4790 
4791  /*
4792  * Consider sort-based implementations of DISTINCT, if possible.
4793  */
4795  {
4796  /*
4797  * First, if we have any adequately-presorted paths, just stick a
4798  * Unique node on those. Then consider doing an explicit sort of the
4799  * cheapest input path and Unique'ing that.
4800  *
4801  * When we have DISTINCT ON, we must sort by the more rigorous of
4802  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4803  * Also, if we do have to do an explicit sort, we might as well use
4804  * the more rigorous ordering to avoid a second sort later. (Note
4805  * that the parser will have ensured that one clause is a prefix of
4806  * the other.)
4807  */
4808  List *needed_pathkeys;
4809 
4810  if (parse->hasDistinctOn &&
4812  list_length(root->sort_pathkeys))
4813  needed_pathkeys = root->sort_pathkeys;
4814  else
4815  needed_pathkeys = root->distinct_pathkeys;
4816 
4817  foreach(lc, input_rel->pathlist)
4818  {
4819  Path *path = (Path *) lfirst(lc);
4820 
4821  if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4822  {
4823  add_path(distinct_rel, (Path *)
4824  create_upper_unique_path(root, distinct_rel,
4825  path,
4827  numDistinctRows));
4828  }
4829  }
4830 
4831  /* For explicit-sort case, always use the more rigorous clause */
4832  if (list_length(root->distinct_pathkeys) <
4833  list_length(root->sort_pathkeys))
4834  {
4835  needed_pathkeys = root->sort_pathkeys;
4836  /* Assert checks that parser didn't mess up... */
4838  needed_pathkeys));
4839  }
4840  else
4841  needed_pathkeys = root->distinct_pathkeys;
4842 
4843  path = cheapest_input_path;
4844  if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4845  path = (Path *) create_sort_path(root, distinct_rel,
4846  path,
4847  needed_pathkeys,
4848  -1.0);
4849 
4850  add_path(distinct_rel, (Path *)
4851  create_upper_unique_path(root, distinct_rel,
4852  path,
4854  numDistinctRows));
4855  }
4856 
4857  /*
4858  * Consider hash-based implementations of DISTINCT, if possible.
4859  *
4860  * If we were not able to make any other types of path, we *must* hash or
4861  * die trying. If we do have other choices, there are several things that
4862  * should prevent selection of hashing: if the query uses DISTINCT ON
4863  * (because it won't really have the expected behavior if we hash), or if
4864  * enable_hashagg is off, or if it looks like the hashtable will exceed
4865  * work_mem.
4866  *
4867  * Note: grouping_is_hashable() is much more expensive to check than the
4868  * other gating conditions, so we want to do it last.
4869  */
4870  if (distinct_rel->pathlist == NIL)
4871  allow_hash = true; /* we have no alternatives */
4872  else if (parse->hasDistinctOn || !enable_hashagg)
4873  allow_hash = false; /* policy-based decision not to hash */
4874  else
4875  {
4876  Size hashentrysize;
4877 
4878  /* Estimate per-hash-entry space at tuple width... */
4879  hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) +
4881  /* plus the per-hash-entry overhead */
4882  hashentrysize += hash_agg_entry_size(0);
4883 
4884  /* Allow hashing only if hashtable is predicted to fit in work_mem */
4885  allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L);
4886  }
4887 
4888  if (allow_hash && grouping_is_hashable(parse->distinctClause))
4889  {
4890  /* Generate hashed aggregate path --- no sort needed */
4891  add_path(distinct_rel, (Path *)
4892  create_agg_path(root,
4893  distinct_rel,
4894  cheapest_input_path,
4895  cheapest_input_path->pathtarget,
4896  AGG_HASHED,
4898  parse->distinctClause,
4899  NIL,
4900  NULL,
4901  numDistinctRows));
4902  }
4903 
4904  /* Give a helpful error if we failed to find any implementation */
4905  if (distinct_rel->pathlist == NIL)
4906  ereport(ERROR,
4907  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4908  errmsg("could not implement DISTINCT"),
4909  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4910 
4911  /*
4912  * If there is an FDW that's responsible for all baserels of the query,
4913  * let it consider adding ForeignPaths.
4914  */
4915  if (distinct_rel->fdwroutine &&
4916  distinct_rel->fdwroutine->GetForeignUpperPaths)
4917  distinct_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_DISTINCT,
4918  input_rel, distinct_rel);
4919 
4920  /* Let extensions possibly add some more paths */
4922  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT,
4923  input_rel, distinct_rel);
4924 
4925  /* Now choose the best path(s) */
4926  set_cheapest(distinct_rel);
4927 
4928  return distinct_rel;
4929 }
4930 
4931 /*
4932  * create_ordered_paths
4933  *
4934  * Build a new upperrel containing Paths for ORDER BY evaluation.
4935  *
4936  * All paths in the result must satisfy the ORDER BY ordering.
4937  * The only new path we need consider is an explicit sort on the
4938  * cheapest-total existing path.
4939  *
4940  * input_rel: contains the source-data Paths
4941  * target: the output tlist the result Paths must emit
4942  * limit_tuples: estimated bound on the number of output tuples,
4943  * or -1 if no LIMIT or couldn't estimate
4944  */
4945 static RelOptInfo *
4947  RelOptInfo *input_rel,
4948  PathTarget *target,
4949  double limit_tuples)
4950 {
4951  Path *cheapest_input_path = input_rel->cheapest_total_path;
4952  RelOptInfo *ordered_rel;
4953  ListCell *lc;
4954 
4955  /* For now, do all work in the (ORDERED, NULL) upperrel */
4956  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4957 
4958  /*
4959  * If the input relation is not parallel-safe, then the ordered relation
4960  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
4961  * target list is parallel-safe.
4962  */
4963  if (input_rel->consider_parallel &&
4964  is_parallel_safe(root, (Node *) target->exprs))
4965  ordered_rel->consider_parallel = true;
4966 
4967  /*
4968  * If the input rel belongs to a single FDW, so does the ordered_rel.
4969  */
4970  ordered_rel->serverid = input_rel->serverid;
4971  ordered_rel->userid = input_rel->userid;
4972  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
4973  ordered_rel->fdwroutine = input_rel->fdwroutine;
4974 
4975  foreach(lc, input_rel->pathlist)
4976  {
4977  Path *path = (Path *) lfirst(lc);
4978  bool is_sorted;
4979 
4980  is_sorted = pathkeys_contained_in(root->sort_pathkeys,
4981  path->pathkeys);
4982  if (path == cheapest_input_path || is_sorted)
4983  {
4984  if (!is_sorted)
4985  {
4986  /* An explicit sort here can take advantage of LIMIT */
4987  path = (Path *) create_sort_path(root,
4988  ordered_rel,
4989  path,
4990  root->sort_pathkeys,
4991  limit_tuples);
4992  }
4993 
4994  /* Add projection step if needed */
4995  if (path->pathtarget != target)
4996  path = apply_projection_to_path(root, ordered_rel,
4997  path, target);
4998 
4999  add_path(ordered_rel, path);
5000  }
5001  }
5002 
5003  /*
5004  * generate_gather_paths() will have already generated a simple Gather
5005  * path for the best parallel path, if any, and the loop above will have
5006  * considered sorting it. Similarly, generate_gather_paths() will also
5007  * have generated order-preserving Gather Merge plans which can be used
5008  * without sorting if they happen to match the sort_pathkeys, and the loop
5009  * above will have handled those as well. However, there's one more
5010  * possibility: it may make sense to sort the cheapest partial path
5011  * according to the required output order and then use Gather Merge.
5012  */
5013  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5014  input_rel->partial_pathlist != NIL)
5015  {
5016  Path *cheapest_partial_path;
5017 
5018  cheapest_partial_path = linitial(input_rel->partial_pathlist);
5019 
5020  /*
5021  * If cheapest partial path doesn't need a sort, this is redundant
5022  * with what's already been tried.
5023  */
5025  cheapest_partial_path->pathkeys))
5026  {
5027  Path *path;
5028  double total_groups;
5029 
5030  path = (Path *) create_sort_path(root,
5031  ordered_rel,
5032  cheapest_partial_path,
5033  root->sort_pathkeys,
5034  -1.0);
5035 
5036  total_groups = cheapest_partial_path->rows *
5037  cheapest_partial_path->parallel_workers;
5038  path = (Path *)
5039  create_gather_merge_path(root, ordered_rel,
5040  path,
5041  target, root->sort_pathkeys, NULL,
5042  &total_groups);
5043 
5044  /* Add projection step if needed */
5045  if (path->pathtarget != target)
5046  path = apply_projection_to_path(root, ordered_rel,
5047  path, target);
5048 
5049  add_path(ordered_rel, path);
5050  }
5051  }
5052 
5053  /*
5054  * If there is an FDW that's responsible for all baserels of the query,
5055  * let it consider adding ForeignPaths.
5056  */
5057  if (ordered_rel->fdwroutine &&
5058  ordered_rel->fdwroutine->GetForeignUpperPaths)
5059  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5060  input_rel, ordered_rel);
5061 
5062  /* Let extensions possibly add some more paths */
5064  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5065  input_rel, ordered_rel);
5066 
5067  /*
5068  * No need to bother with set_cheapest here; grouping_planner does not
5069  * need us to do it.
5070  */
5071  Assert(ordered_rel->pathlist != NIL);
5072 
5073  return ordered_rel;
5074 }
5075 
5076 
5077 /*
5078  * make_group_input_target
5079  * Generate appropriate PathTarget for initial input to grouping nodes.
5080  *
5081  * If there is grouping or aggregation, the scan/join subplan cannot emit
5082  * the query's final targetlist; for example, it certainly can't emit any
5083  * aggregate function calls. This routine generates the correct target
5084  * for the scan/join subplan.
5085  *
5086  * The query target list passed from the parser already contains entries
5087  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5088  * for variables used only in HAVING clauses; so we need to add those
5089  * variables to the subplan target list. Also, we flatten all expressions
5090  * except GROUP BY items into their component variables; other expressions
5091  * will be computed by the upper plan nodes rather than by the subplan.
5092  * For example, given a query like
5093  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5094  * we want to pass this targetlist to the subplan:
5095  * a+b,c,d
5096  * where the a+b target will be used by the Sort/Group steps, and the
5097  * other targets will be used for computing the final results.
5098  *
5099  * 'final_target' is the query's final target list (in PathTarget form)
5100  *
5101  * The result is the PathTarget to be computed by the Paths returned from
5102  * query_planner().
5103  */
5104 static PathTarget *
5106 {
5107  Query *parse = root->parse;
5108  PathTarget *input_target;
5109  List *non_group_cols;
5110  List *non_group_vars;
5111  int i;
5112  ListCell *lc;
5113 
5114  /*
5115  * We must build a target containing all grouping columns, plus any other
5116  * Vars mentioned in the query's targetlist and HAVING qual.
5117  */
5118  input_target = create_empty_pathtarget();
5119  non_group_cols = NIL;
5120 
5121  i = 0;
5122  foreach(lc, final_target->exprs)
5123  {
5124  Expr *expr = (Expr *) lfirst(lc);
5125  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5126 
5127  if (sgref && parse->groupClause &&
5128  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5129  {
5130  /*
5131  * It's a grouping column, so add it to the input target as-is.
5132  */
5133  add_column_to_pathtarget(input_target, expr, sgref);
5134  }
5135  else
5136  {
5137  /*
5138  * Non-grouping column, so just remember the expression for later
5139  * call to pull_var_clause.
5140  */
5141  non_group_cols = lappend(non_group_cols, expr);
5142  }
5143 
5144  i++;
5145  }
5146 
5147  /*
5148  * If there's a HAVING clause, we'll need the Vars it uses, too.
5149  */
5150  if (parse->havingQual)
5151  non_group_cols = lappend(non_group_cols, parse->havingQual);
5152 
5153  /*
5154  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5155  * add them to the input target if not already present. (A Var used
5156  * directly as a GROUP BY item will be present already.) Note this
5157  * includes Vars used in resjunk items, so we are covering the needs of
5158  * ORDER BY and window specifications. Vars used within Aggrefs and
5159  * WindowFuncs will be pulled out here, too.
5160  */
5161  non_group_vars = pull_var_clause((Node *) non_group_cols,
5165  add_new_columns_to_pathtarget(input_target, non_group_vars);
5166 
5167  /* clean up cruft */
5168  list_free(non_group_vars);
5169  list_free(non_group_cols);
5170 
5171  /* XXX this causes some redundant cost calculation ... */
5172  return set_pathtarget_cost_width(root, input_target);
5173 }
5174 
5175 /*
5176  * make_partial_grouping_target
5177  * Generate appropriate PathTarget for output of partial aggregate
5178  * (or partial grouping, if there are no aggregates) nodes.
5179  *
5180  * A partial aggregation node needs to emit all the same aggregates that
5181  * a regular aggregation node would, plus any aggregates used in HAVING;
5182  * except that the Aggref nodes should be marked as partial aggregates.
5183  *
5184  * In addition, we'd better emit any Vars and PlaceholderVars that are
5185  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5186  * these would be Vars that are grouped by or used in grouping expressions.)
5187  *
5188  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5189  * We get the HAVING clause out of *root.
5190  */
5191 static PathTarget *
5193 {
5194  Query *parse = root->parse;
5195  PathTarget *partial_target;
5196  List *non_group_cols;
5197  List *non_group_exprs;
5198  int i;
5199  ListCell *lc;
5200 
5201  partial_target = create_empty_pathtarget();
5202  non_group_cols = NIL;
5203 
5204  i = 0;
5205  foreach(lc, grouping_target->exprs)
5206  {
5207  Expr *expr = (Expr *) lfirst(lc);
5208  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5209 
5210  if (sgref && parse->groupClause &&
5211  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5212  {
5213  /*
5214  * It's a grouping column, so add it to the partial_target as-is.
5215  * (This allows the upper agg step to repeat the grouping calcs.)
5216  */
5217  add_column_to_pathtarget(partial_target, expr, sgref);
5218  }
5219  else
5220  {
5221  /*
5222  * Non-grouping column, so just remember the expression for later
5223  * call to pull_var_clause.
5224  */
5225  non_group_cols = lappend(non_group_cols, expr);
5226  }
5227 
5228  i++;
5229  }
5230 
5231  /*
5232  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5233  */
5234  if (parse->havingQual)
5235  non_group_cols = lappend(non_group_cols, parse->havingQual);
5236 
5237  /*
5238  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5239  * non-group cols (plus HAVING), and add them to the partial_target if not
5240  * already present. (An expression used directly as a GROUP BY item will
5241  * be present already.) Note this includes Vars used in resjunk items, so
5242  * we are covering the needs of ORDER BY and window specifications.
5243  */
5244  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5248 
5249  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5250 
5251  /*
5252  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5253  * are at the top level of the target list, so we can just scan the list
5254  * rather than recursing through the expression trees.
5255  */
5256  foreach(lc, partial_target->exprs)
5257  {
5258  Aggref *aggref = (Aggref *) lfirst(lc);
5259 
5260  if (IsA(aggref, Aggref))
5261  {
5262  Aggref *newaggref;
5263 
5264  /*
5265  * We shouldn't need to copy the substructure of the Aggref node,
5266  * but flat-copy the node itself to avoid damaging other trees.
5267  */
5268  newaggref = makeNode(Aggref);
5269  memcpy(newaggref, aggref, sizeof(Aggref));
5270 
5271  /* For now, assume serialization is required */
5273 
5274  lfirst(lc) = newaggref;
5275  }
5276  }
5277 
5278  /* clean up cruft */
5279  list_free(non_group_exprs);
5280  list_free(non_group_cols);
5281 
5282  /* XXX this causes some redundant cost calculation ... */
5283  return set_pathtarget_cost_width(root, partial_target);
5284 }
5285 
5286 /*
5287  * mark_partial_aggref
5288  * Adjust an Aggref to make it represent a partial-aggregation step.
5289  *
5290  * The Aggref node is modified in-place; caller must do any copying required.
5291  */
5292 void
5294 {
5295  /* aggtranstype should be computed by this point */
5297  /* ... but aggsplit should still be as the parser left it */
5298  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5299 
5300  /* Mark the Aggref with the intended partial-aggregation mode */
5301  agg->aggsplit = aggsplit;
5302 
5303  /*
5304  * Adjust result type if needed. Normally, a partial aggregate returns
5305  * the aggregate's transition type; but if that's INTERNAL and we're
5306  * serializing, it returns BYTEA instead.
5307  */
5308  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5309  {
5310  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5311  agg->aggtype = BYTEAOID;
5312  else
5313  agg->aggtype = agg->aggtranstype;
5314  }
5315 }
5316 
5317 /*
5318  * postprocess_setop_tlist
5319  * Fix up targetlist returned by plan_set_operations().
5320  *
5321  * We need to transpose sort key info from the orig_tlist into new_tlist.
5322  * NOTE: this would not be good enough if we supported resjunk sort keys
5323  * for results of set operations --- then, we'd need to project a whole
5324  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5325  * find any resjunk columns in orig_tlist.
5326  */
5327 static List *
5328 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5329 {
5330  ListCell *l;
5331  ListCell *orig_tlist_item = list_head(orig_tlist);
5332 
5333  foreach(l, new_tlist)
5334  {
5335  TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5336  TargetEntry *orig_tle;
5337 
5338  /* ignore resjunk columns in setop result */
5339  if (new_tle->resjunk)
5340  continue;
5341 
5342  Assert(orig_tlist_item != NULL);
5343  orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5344  orig_tlist_item = lnext(orig_tlist_item);
5345  if (orig_tle->resjunk) /* should not happen */
5346  elog(ERROR, "resjunk output columns are not implemented");
5347  Assert(new_tle->resno == orig_tle->resno);
5348  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5349  }
5350  if (orig_tlist_item != NULL)
5351  elog(ERROR, "resjunk output columns are not implemented");
5352  return new_tlist;
5353 }
5354 
5355 /*
5356  * select_active_windows
5357  * Create a list of the "active" window clauses (ie, those referenced
5358  * by non-deleted WindowFuncs) in the order they are to be executed.
5359  */
5360 static List *
5362 {
5363  List *result;
5364  List *actives;
5365  ListCell *lc;
5366 
5367  /* First, make a list of the active windows */
5368  actives = NIL;
5369  foreach(lc, root->parse->windowClause)
5370  {
5372 
5373  /* It's only active if wflists shows some related WindowFuncs */
5374  Assert(wc->winref <= wflists->maxWinRef);
5375  if (wflists->windowFuncs[wc->winref] != NIL)
5376  actives = lappend(actives, wc);
5377  }
5378 
5379  /*
5380  * Now, ensure that windows with identical partitioning/ordering clauses
5381  * are adjacent in the list. This is required by the SQL standard, which
5382  * says that only one sort is to be used for such windows, even if they
5383  * are otherwise distinct (eg, different names or framing clauses).
5384  *
5385  * There is room to be much smarter here, for example detecting whether
5386  * one window's sort keys are a prefix of another's (so that sorting for
5387  * the latter would do for the former), or putting windows first that
5388  * match a sort order available for the underlying query. For the moment
5389  * we are content with meeting the spec.
5390  */
5391  result = NIL;
5392  while (actives != NIL)
5393  {
5394  WindowClause *wc = linitial_node(WindowClause, actives);
5395  ListCell *prev;
5396  ListCell *next;
5397 
5398  /* Move wc from actives to result */
5399  actives = list_delete_first(actives);
5400  result = lappend(result, wc);
5401 
5402  /* Now move any matching windows from actives to result */
5403  prev = NULL;
5404  for (lc = list_head(actives); lc; lc = next)
5405  {
5407 
5408  next = lnext(lc);
5409  /* framing options are NOT to be compared here! */
5410  if (equal(wc->partitionClause, wc2->partitionClause) &&
5411  equal(wc->orderClause, wc2->orderClause))
5412  {
5413  actives = list_delete_cell(actives, lc, prev);
5414  result = lappend(result, wc2);
5415  }
5416  else
5417  prev = lc;
5418  }
5419  }
5420 
5421  return result;
5422 }
5423 
5424 /*
5425  * make_window_input_target
5426  * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5427  *
5428  * When the query has window functions, this function computes the desired
5429  * target to be computed by the node just below the first WindowAgg.
5430  * This tlist must contain all values needed to evaluate the window functions,
5431  * compute the final target list, and perform any required final sort step.
5432  * If multiple WindowAggs are needed, each intermediate one adds its window
5433  * function results onto this base tlist; only the topmost WindowAgg computes
5434  * the actual desired target list.
5435  *
5436  * This function is much like make_group_input_target, though not quite enough
5437  * like it to share code. As in that function, we flatten most expressions
5438  * into their component variables. But we do not want to flatten window
5439  * PARTITION BY/ORDER BY clauses, since that might result in multiple
5440  * evaluations of them, which would be bad (possibly even resulting in
5441  * inconsistent answers, if they contain volatile functions).
5442  * Also, we must not flatten GROUP BY clauses that were left unflattened by
5443  * make_group_input_target, because we may no longer have access to the
5444  * individual Vars in them.
5445  *
5446  * Another key difference from make_group_input_target is that we don't
5447  * flatten Aggref expressions, since those are to be computed below the
5448  * window functions and just referenced like Vars above that.
5449  *
5450  * 'final_target' is the query's final target list (in PathTarget form)
5451  * 'activeWindows' is the list of active windows previously identified by
5452  * select_active_windows.
5453  *
5454  * The result is the PathTarget to be computed by the plan node immediately
5455  * below the first WindowAgg node.
5456  */
5457 static PathTarget *
5459  PathTarget *final_target,
5460  List *activeWindows)
5461 {
5462  Query *parse = root->parse;
5463  PathTarget *input_target;
5464  Bitmapset *sgrefs;
5465  List *flattenable_cols;
5466  List *flattenable_vars;
5467  int i;
5468  ListCell *lc;
5469 
5470  Assert(parse->hasWindowFuncs);
5471 
5472  /*
5473  * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5474  * into a bitmapset for convenient reference below.
5475  */
5476  sgrefs = NULL;
5477  foreach(lc, activeWindows)
5478  {
5480  ListCell *lc2;
5481 
5482  foreach(lc2, wc->partitionClause)
5483  {
5485 
5486  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5487  }
5488  foreach(lc2, wc->orderClause)
5489  {
5491 
5492  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5493  }
5494  }
5495 
5496  /* Add in sortgroupref numbers of GROUP BY clauses, too */
5497  foreach(lc, parse->groupClause)
5498  {
5500 
5501  sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5502  }
5503 
5504  /*
5505  * Construct a target containing all the non-flattenable targetlist items,
5506  * and save aside the others for a moment.
5507  */
5508  input_target = create_empty_pathtarget();
5509  flattenable_cols = NIL;
5510 
5511  i = 0;
5512  foreach(lc, final_target->exprs)
5513  {
5514  Expr *expr = (Expr *) lfirst(lc);
5515  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5516 
5517  /*
5518  * Don't want to deconstruct window clauses or GROUP BY items. (Note
5519  * that such items can't contain window functions, so it's okay to
5520  * compute them below the WindowAgg nodes.)
5521  */
5522  if (sgref != 0 && bms_is_member(sgref, sgrefs))
5523  {
5524  /*
5525  * Don't want to deconstruct this value, so add it to the input
5526  * target as-is.
5527  */
5528  add_column_to_pathtarget(input_target, expr, sgref);
5529  }
5530  else
5531  {
5532  /*
5533  * Column is to be flattened, so just remember the expression for
5534  * later call to pull_var_clause.
5535  */
5536  flattenable_cols = lappend(flattenable_cols, expr);
5537  }
5538 
5539  i++;
5540  }
5541 
5542  /*
5543  * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
5544  * add them to the input target if not already present. (Some might be
5545  * there already because they're used directly as window/group clauses.)
5546  *
5547  * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
5548  * Aggrefs are placed in the Agg node's tlist and not left to be computed
5549  * at higher levels. On the other hand, we should recurse into
5550  * WindowFuncs to make sure their input expressions are available.
5551  */
5552  flattenable_vars = pull_var_clause((Node *) flattenable_cols,
5556  add_new_columns_to_pathtarget(input_target, flattenable_vars);
5557 
5558  /* clean up cruft */
5559  list_free(flattenable_vars);
5560  list_free(flattenable_cols);
5561 
5562  /* XXX this causes some redundant cost calculation ... */
5563  return set_pathtarget_cost_width(root, input_target);
5564 }
5565 
5566 /*
5567  * make_pathkeys_for_window
5568  * Create a pathkeys list describing the required input ordering
5569  * for the given WindowClause.
5570  *
5571  * The required ordering is first the PARTITION keys, then the ORDER keys.
5572  * In the future we might try to implement windowing using hashing, in which
5573  * case the ordering could be relaxed, but for now we always sort.
5574  *
5575  * Caution: if you change this, see createplan.c's get_column_info_for_window!
5576  */
5577 static List *
5579  List *tlist)
5580 {
5581  List *window_pathkeys;
5582  List *window_sortclauses;
5583 
5584  /* Throw error if can't sort */
5586  ereport(ERROR,
5587  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5588  errmsg("could not implement window PARTITION BY"),
5589  errdetail("Window partitioning columns must be of sortable datatypes.")));
5591  ereport(ERROR,
5592  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5593  errmsg("could not implement window ORDER BY"),
5594  errdetail("Window ordering columns must be of sortable datatypes.")));
5595 
5596  /* Okay, make the combined pathkeys */
5597  window_sortclauses = list_concat(list_copy(wc->partitionClause),
5598  list_copy(wc->orderClause));
5599  window_pathkeys = make_pathkeys_for_sortclauses(root,
5600  window_sortclauses,
5601  tlist);
5602  list_free(window_sortclauses);
5603  return window_pathkeys;
5604 }
5605 
5606 /*
5607  * make_sort_input_target
5608  * Generate appropriate PathTarget for initial input to Sort step.
5609  *
5610  * If the query has ORDER BY, this function chooses the target to be computed
5611  * by the node just below the Sort (and DISTINCT, if any, since Unique can't
5612  * project) steps. This might or might not be identical to the query's final
5613  * output target.
5614  *
5615  * The main argument for keeping the sort-input tlist the same as the final
5616  * is that we avoid a separate projection node (which will be needed if
5617  * they're different, because Sort can't project). However, there are also
5618  * advantages to postponing tlist evaluation till after the Sort: it ensures
5619  * a consistent order of evaluation for any volatile functions in the tlist,
5620  * and if there's also a LIMIT, we can stop the query without ever computing
5621  * tlist functions for later rows, which is beneficial for both volatile and
5622  * expensive functions.
5623  *
5624  * Our current policy is to postpone volatile expressions till after the sort
5625  * unconditionally (assuming that that's possible, ie they are in plain tlist
5626  * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
5627  * postpone set-returning expressions, because running them beforehand would
5628  * bloat the sort dataset, and because it might cause unexpected output order
5629  * if the sort isn't stable. However there's a constraint on that: all SRFs
5630  * in the tlist should be evaluated at the same plan step, so that they can
5631  * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
5632  * mustn't postpone any SRFs. (Note that in principle that policy should
5633  * probably get applied to the group/window input targetlists too, but we
5634  * have not done that historically.) Lastly, expensive expressions are
5635  * postponed if there is a LIMIT, or if root->tuple_fraction shows that
5636  * partial evaluation of the query is possible (if neither is true, we expect
5637  * to have to evaluate the expressions for every row anyway), or if there are
5638  * any volatile or set-returning expressions (since once we've put in a
5639  * projection at all, it won't cost any more to postpone more stuff).
5640  *
5641  * Another issue that could potentially be considered here is that
5642  * evaluating tlist expressions could result in data that's either wider
5643  * or narrower than the input Vars, thus changing the volume of data that
5644  * has to go through the Sort. However, we usually have only a very bad
5645  * idea of the output width of any expression more complex than a Var,
5646  * so for now it seems too risky to try to optimize on that basis.
5647  *
5648  * Note that if we do produce a modified sort-input target, and then the
5649  * query ends up not using an explicit Sort, no particular harm is done:
5650  * we'll initially use the modified target for the preceding path nodes,
5651  * but then change them to the final target with apply_projection_to_path.
5652  * Moreover, in such a case the guarantees about evaluation order of
5653  * volatile functions still hold, since the rows are sorted already.
5654  *
5655  * This function has some things in common with make_group_input_target and
5656  * make_window_input_target, though the detailed rules for what to do are
5657  * different. We never flatten/postpone any grouping or ordering columns;
5658  * those are needed before the sort. If we do flatten a particular
5659  * expression, we leave Aggref and WindowFunc nodes alone, since those were
5660  * computed earlier.
5661  *
5662  * 'final_target' is the query's final target list (in PathTarget form)
5663  * 'have_postponed_srfs' is an output argument, see below
5664  *
5665  * The result is the PathTarget to be computed by the plan node immediately
5666  * below the Sort step (and the Distinct step, if any). This will be
5667  * exactly final_target if we decide a projection step wouldn't be helpful.
5668  *
5669  * In addition, *have_postponed_srfs is set to TRUE if we choose to postpone
5670  * any set-returning functions to after the Sort.
5671  */
5672 static PathTarget *
5674  PathTarget *final_target,
5675  bool *have_postponed_srfs)
5676 {
5677  Query *parse = root->parse;
5678  PathTarget *input_target;
5679  int ncols;
5680  bool *col_is_srf;
5681  bool *postpone_col;
5682  bool have_srf;
5683  bool have_volatile;
5684  bool have_expensive;
5685  bool have_srf_sortcols;
5686  bool postpone_srfs;
5687  List *postponable_cols;
5688  List *postponable_vars;
5689  int i;
5690  ListCell *lc;
5691 
5692  /* Shouldn't get here unless query has ORDER BY */
5693  Assert(parse->sortClause);
5694 
5695  *have_postponed_srfs = false; /* default result */
5696 
5697  /* Inspect tlist and collect per-column information */
5698  ncols = list_length(final_target->exprs);
5699  col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
5700  postpone_col = (bool *) palloc0(ncols * sizeof(bool));
5701  have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
5702 
5703  i = 0;
5704  foreach(lc, final_target->exprs)
5705  {
5706  Expr *expr = (Expr *) lfirst(lc);
5707 
5708  /*
5709  * If the column has a sortgroupref, assume it has to be evaluated
5710  * before sorting. Generally such columns would be ORDER BY, GROUP
5711  * BY, etc targets. One exception is columns that were removed from
5712  * GROUP BY by remove_useless_groupby_columns() ... but those would
5713  * only be Vars anyway. There don't seem to be any cases where it
5714  * would be worth the trouble to double-check.
5715  */
5716  if (get_pathtarget_sortgroupref(final_target, i) == 0)
5717  {
5718  /*
5719  * Check for SRF or volatile functions. Check the SRF case first
5720  * because we must know whether we have any postponed SRFs.
5721  */
5722  if (parse->hasTargetSRFs &&
5723  expression_returns_set((Node *) expr))
5724  {
5725  /* We'll decide below whether these are postponable */
5726  col_is_srf[i] = true;
5727  have_srf = true;
5728  }
5729  else if (contain_volatile_functions((Node *) expr))
5730  {
5731  /* Unconditionally postpone */
5732  postpone_col[i] = true;
5733  have_volatile = true;
5734  }
5735  else
5736  {
5737  /*
5738  * Else check the cost. XXX it's annoying to have to do this
5739  * when set_pathtarget_cost_width() just did it. Refactor to
5740  * allow sharing the work?
5741  */
5742  QualCost cost;
5743 
5744  cost_qual_eval_node(&cost, (Node *) expr, root);
5745 
5746  /*
5747  * We arbitrarily define "expensive" as "more than 10X
5748  * cpu_operator_cost". Note this will take in any PL function
5749  * with default cost.
5750  */
5751  if (cost.per_tuple > 10 * cpu_operator_cost)
5752  {
5753  postpone_col[i] = true;
5754  have_expensive = true;
5755  }
5756  }
5757  }
5758  else
5759  {
5760  /* For sortgroupref cols, just check if any contain SRFs */
5761  if (!have_srf_sortcols &&
5762  parse->hasTargetSRFs &&
5763  expression_returns_set((Node *) expr))
5764  have_srf_sortcols = true;
5765  }
5766 
5767  i++;
5768  }
5769 
5770  /*
5771  * We can postpone SRFs if we have some but none are in sortgroupref cols.
5772  */
5773  postpone_srfs = (have_srf && !have_srf_sortcols);
5774 
5775  /*
5776  * If we don't need a post-sort projection, just return final_target.
5777  */
5778  if (!(postpone_srfs || have_volatile ||
5779  (have_expensive &&
5780  (parse->limitCount || root->tuple_fraction > 0))))
5781  return final_target;
5782 
5783  /*
5784  * Report whether the post-sort projection will contain set-returning
5785  * functions. This is important because it affects whether the Sort can
5786  * rely on the query's LIMIT (if any) to bound the number of rows it needs
5787  * to return.
5788  */
5789  *have_postponed_srfs = postpone_srfs;
5790 
5791  /*
5792  * Construct the sort-input target, taking all non-postponable columns and
5793  * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
5794  * the postponable ones.
5795  */
5796  input_target = create_empty_pathtarget();
5797  postponable_cols = NIL;
5798 
5799  i = 0;
5800  foreach(lc, final_target->exprs)
5801  {
5802  Expr *expr = (Expr *) lfirst(lc);
5803 
5804  if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
5805  postponable_cols = lappend(postponable_cols, expr);
5806  else
5807  add_column_to_pathtarget(input_target, expr,
5808  get_pathtarget_sortgroupref(final_target, i));
5809 
5810  i++;
5811  }
5812 
5813  /*
5814  * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
5815  * postponable columns, and add them to the sort-input target if not
5816  * already present. (Some might be there already.) We mustn't
5817  * deconstruct Aggrefs or WindowFuncs here, since the projection node
5818  * would be unable to recompute them.
5819  */
5820  postponable_vars = pull_var_clause((Node *) postponable_cols,
5824  add_new_columns_to_pathtarget(input_target, postponable_vars);
5825 
5826  /* clean up cruft */
5827  list_free(postponable_vars);
5828  list_free(postponable_cols);
5829 
5830  /* XXX this represents even more redundant cost calculation ... */
5831  return set_pathtarget_cost_width(root, input_target);
5832 }
5833 
5834 /*
5835  * get_cheapest_fractional_path
5836  * Find the cheapest path for retrieving a specified fraction of all
5837  * the tuples expected to be returned by the given relation.
5838  *
5839  * We interpret tuple_fraction the same way as grouping_planner.
5840  *
5841  * We assume set_cheapest() has been run on the given rel.
5842  */
5843 Path *
5844 get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
5845 {
5846  Path *best_path = rel->cheapest_total_path;
5847  ListCell *l;
5848 
5849  /* If all tuples will be retrieved, just return the cheapest-total path */
5850  if (tuple_fraction <= 0.0)
5851  return best_path;
5852 
5853  /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
5854  if (tuple_fraction >= 1.0 && best_path->rows > 0)
5855  tuple_fraction /= best_path->rows;
5856 
5857  foreach(l, rel->pathlist)
5858  {
5859  Path *path = (Path *) lfirst(l);
5860 
5861  if (path == rel->cheapest_total_path ||
5862  compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)