PostgreSQL Source Code  git master
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/htup_details.h"
22 #include "access/parallel.h"
23 #include "access/sysattr.h"
24 #include "access/xact.h"
26 #include "catalog/pg_proc.h"
27 #include "catalog/pg_type.h"
28 #include "executor/executor.h"
29 #include "executor/nodeAgg.h"
30 #include "foreign/fdwapi.h"
31 #include "miscadmin.h"
32 #include "lib/bipartite_match.h"
33 #include "lib/knapsack.h"
34 #include "nodes/makefuncs.h"
35 #include "nodes/nodeFuncs.h"
36 #ifdef OPTIMIZER_DEBUG
37 #include "nodes/print.h"
38 #endif
39 #include "optimizer/clauses.h"
40 #include "optimizer/cost.h"
41 #include "optimizer/pathnode.h"
42 #include "optimizer/paths.h"
43 #include "optimizer/plancat.h"
44 #include "optimizer/planmain.h"
45 #include "optimizer/planner.h"
46 #include "optimizer/prep.h"
47 #include "optimizer/subselect.h"
48 #include "optimizer/tlist.h"
49 #include "optimizer/var.h"
50 #include "parser/analyze.h"
51 #include "parser/parsetree.h"
52 #include "parser/parse_agg.h"
53 #include "rewrite/rewriteManip.h"
54 #include "storage/dsm_impl.h"
55 #include "utils/rel.h"
56 #include "utils/selfuncs.h"
57 #include "utils/lsyscache.h"
58 #include "utils/syscache.h"
59 
60 
61 /* GUC parameters */
65 
66 /* Hook for plugins to get control in planner() */
68 
69 /* Hook for plugins to get control when grouping_planner() plans upper rels */
71 
72 
73 /* Expression kind codes for preprocess_expression */
74 #define EXPRKIND_QUAL 0
75 #define EXPRKIND_TARGET 1
76 #define EXPRKIND_RTFUNC 2
77 #define EXPRKIND_RTFUNC_LATERAL 3
78 #define EXPRKIND_VALUES 4
79 #define EXPRKIND_VALUES_LATERAL 5
80 #define EXPRKIND_LIMIT 6
81 #define EXPRKIND_APPINFO 7
82 #define EXPRKIND_PHV 8
83 #define EXPRKIND_TABLESAMPLE 9
84 #define EXPRKIND_ARBITER_ELEM 10
85 #define EXPRKIND_TABLEFUNC 11
86 #define EXPRKIND_TABLEFUNC_LATERAL 12
87 
88 /* Passthrough data for standard_qp_callback */
89 typedef struct
90 {
91  List *tlist; /* preprocessed query targetlist */
92  List *activeWindows; /* active windows, if any */
93  List *groupClause; /* overrides parse->groupClause */
95 
96 /*
97  * Data specific to grouping sets
98  */
99 
100 typedef struct
101 {
111 
112 /* Local functions */
113 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
114 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
115 static void inheritance_planner(PlannerInfo *root);
116 static void grouping_planner(PlannerInfo *root, bool inheritance_update,
117  double tuple_fraction);
119 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
120  int *tleref_to_colnum_map);
121 static void preprocess_rowmarks(PlannerInfo *root);
122 static double preprocess_limit(PlannerInfo *root,
123  double tuple_fraction,
124  int64 *offset_est, int64 *count_est);
125 static bool limit_needed(Query *parse);
127 static List *preprocess_groupclause(PlannerInfo *root, List *force);
128 static List *extract_rollup_sets(List *groupingSets);
129 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
130 static void standard_qp_callback(PlannerInfo *root, void *extra);
131 static double get_number_of_groups(PlannerInfo *root,
132  double path_rows,
133  grouping_sets_data *gd);
135  const AggClauseCosts *agg_costs,
136  double dNumGroups);
138  RelOptInfo *input_rel,
139  PathTarget *target,
140  const AggClauseCosts *agg_costs,
141  grouping_sets_data *gd);
142 static void consider_groupingsets_paths(PlannerInfo *root,
143  RelOptInfo *grouped_rel,
144  Path *path,
145  bool is_sorted,
146  bool can_hash,
147  PathTarget *target,
148  grouping_sets_data *gd,
149  const AggClauseCosts *agg_costs,
150  double dNumGroups);
152  RelOptInfo *input_rel,
153  PathTarget *input_target,
154  PathTarget *output_target,
155  List *tlist,
156  WindowFuncLists *wflists,
157  List *activeWindows);
158 static void create_one_window_path(PlannerInfo *root,
159  RelOptInfo *window_rel,
160  Path *path,
161  PathTarget *input_target,
162  PathTarget *output_target,
163  List *tlist,
164  WindowFuncLists *wflists,
165  List *activeWindows);
167  RelOptInfo *input_rel);
169  RelOptInfo *input_rel,
170  PathTarget *target,
171  double limit_tuples);
173  PathTarget *final_target);
175  PathTarget *grouping_target);
176 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
177 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
179  PathTarget *final_target,
180  List *activeWindows);
182  List *tlist);
184  PathTarget *final_target,
185  bool *have_postponed_srfs);
186 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
187  List *targets, List *targets_contain_srfs);
188 
189 
190 /*****************************************************************************
191  *
192  * Query optimizer entry point
193  *
194  * To support loadable plugins that monitor or modify planner behavior,
195  * we provide a hook variable that lets a plugin get control before and
196  * after the standard planning process. The plugin would normally call
197  * standard_planner().
198  *
199  * Note to plugin authors: standard_planner() scribbles on its Query input,
200  * so you'd better copy that data structure if you want to plan more than once.
201  *
202  *****************************************************************************/
203 PlannedStmt *
204 planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
205 {
206  PlannedStmt *result;
207 
208  if (planner_hook)
209  result = (*planner_hook) (parse, cursorOptions, boundParams);
210  else
211  result = standard_planner(parse, cursorOptions, boundParams);
212  return result;
213 }
214 
215 PlannedStmt *
216 standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
217 {
218  PlannedStmt *result;
219  PlannerGlobal *glob;
220  double tuple_fraction;
221  PlannerInfo *root;
222  RelOptInfo *final_rel;
223  Path *best_path;
224  Plan *top_plan;
225  ListCell *lp,
226  *lr;
227 
228  /*
229  * Set up global state for this planner invocation. This data is needed
230  * across all levels of sub-Query that might exist in the given command,
231  * so we keep it in a separate struct that's linked to by each per-Query
232  * PlannerInfo.
233  */
234  glob = makeNode(PlannerGlobal);
235 
236  glob->boundParams = boundParams;
237  glob->subplans = NIL;
238  glob->subroots = NIL;
239  glob->rewindPlanIDs = NULL;
240  glob->finalrtable = NIL;
241  glob->finalrowmarks = NIL;
242  glob->resultRelations = NIL;
243  glob->nonleafResultRelations = NIL;
244  glob->rootResultRelations = NIL;
245  glob->relationOids = NIL;
246  glob->invalItems = NIL;
247  glob->paramExecTypes = NIL;
248  glob->lastPHId = 0;
249  glob->lastRowMarkId = 0;
250  glob->lastPlanNodeId = 0;
251  glob->transientPlan = false;
252  glob->dependsOnRole = false;
253 
254  /*
255  * Assess whether it's feasible to use parallel mode for this query. We
256  * can't do this in a standalone backend, or if the command will try to
257  * modify any data, or if this is a cursor operation, or if GUCs are set
258  * to values that don't permit parallelism, or if parallel-unsafe
259  * functions are present in the query tree.
260  *
261  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
262  * MATERIALIZED VIEW to use parallel plans, but this is safe only because
263  * the command is writing into a completely new table which workers won't
264  * be able to see. If the workers could see the table, the fact that
265  * group locking would cause them to ignore the leader's heavyweight
266  * relation extension lock and GIN page locks would make this unsafe.
267  * We'll have to fix that somehow if we want to allow parallel inserts in
268  * general; updates and deletes have additional problems especially around
269  * combo CIDs.)
270  *
271  * For now, we don't try to use parallel mode if we're running inside a
272  * parallel worker. We might eventually be able to relax this
273  * restriction, but for now it seems best not to have parallel workers
274  * trying to create their own parallel workers.
275  *
276  * We can't use parallelism in serializable mode because the predicate
277  * locking code is not parallel-aware. It's not catastrophic if someone
278  * tries to run a parallel plan in serializable mode; it just won't get
279  * any workers and will run serially. But it seems like a good heuristic
280  * to assume that the same serialization level will be in effect at plan
281  * time and execution time, so don't generate a parallel plan if we're in
282  * serializable mode.
283  */
284  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
287  parse->commandType == CMD_SELECT &&
288  !parse->hasModifyingCTE &&
290  !IsParallelWorker() &&
292  {
293  /* all the cheap tests pass, so scan the query tree */
294  glob->maxParallelHazard = max_parallel_hazard(parse);
296  }
297  else
298  {
299  /* skip the query tree scan, just assume it's unsafe */
301  glob->parallelModeOK = false;
302  }
303 
304  /*
305  * glob->parallelModeNeeded is normally set to false here and changed to
306  * true during plan creation if a Gather or Gather Merge plan is actually
307  * created (cf. create_gather_plan, create_gather_merge_plan).
308  *
309  * However, if force_parallel_mode = on or force_parallel_mode = regress,
310  * then we impose parallel mode whenever it's safe to do so, even if the
311  * final plan doesn't use parallelism. It's not safe to do so if the
312  * query contains anything parallel-unsafe; parallelModeOK will be false
313  * in that case. Note that parallelModeOK can't change after this point.
314  * Otherwise, everything in the query is either parallel-safe or
315  * parallel-restricted, and in either case it should be OK to impose
316  * parallel-mode restrictions. If that ends up breaking something, then
317  * either some function the user included in the query is incorrectly
318  * labelled as parallel-safe or parallel-restricted when in reality it's
319  * parallel-unsafe, or else the query planner itself has a bug.
320  */
321  glob->parallelModeNeeded = glob->parallelModeOK &&
323 
324  /* Determine what fraction of the plan is likely to be scanned */
325  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
326  {
327  /*
328  * We have no real idea how many tuples the user will ultimately FETCH
329  * from a cursor, but it is often the case that he doesn't want 'em
330  * all, or would prefer a fast-start plan anyway so that he can
331  * process some of the tuples sooner. Use a GUC parameter to decide
332  * what fraction to optimize for.
333  */
334  tuple_fraction = cursor_tuple_fraction;
335 
336  /*
337  * We document cursor_tuple_fraction as simply being a fraction, which
338  * means the edge cases 0 and 1 have to be treated specially here. We
339  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
340  */
341  if (tuple_fraction >= 1.0)
342  tuple_fraction = 0.0;
343  else if (tuple_fraction <= 0.0)
344  tuple_fraction = 1e-10;
345  }
346  else
347  {
348  /* Default assumption is we need all the tuples */
349  tuple_fraction = 0.0;
350  }
351 
352  /* primary planning entry point (may recurse for subqueries) */
353  root = subquery_planner(glob, parse, NULL,
354  false, tuple_fraction);
355 
356  /* Select best Path and turn it into a Plan */
357  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
358  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
359 
360  top_plan = create_plan(root, best_path);
361 
362  /*
363  * If creating a plan for a scrollable cursor, make sure it can run
364  * backwards on demand. Add a Material node at the top at need.
365  */
366  if (cursorOptions & CURSOR_OPT_SCROLL)
367  {
368  if (!ExecSupportsBackwardScan(top_plan))
369  top_plan = materialize_finished_plan(top_plan);
370  }
371 
372  /*
373  * Optionally add a Gather node for testing purposes, provided this is
374  * actually a safe thing to do.
375  */
377  {
378  Gather *gather = makeNode(Gather);
379 
380  /*
381  * If there are any initPlans attached to the formerly-top plan node,
382  * move them up to the Gather node; same as we do for Material node in
383  * materialize_finished_plan.
384  */
385  gather->plan.initPlan = top_plan->initPlan;
386  top_plan->initPlan = NIL;
387 
388  gather->plan.targetlist = top_plan->targetlist;
389  gather->plan.qual = NIL;
390  gather->plan.lefttree = top_plan;
391  gather->plan.righttree = NULL;
392  gather->num_workers = 1;
393  gather->single_copy = true;
395 
396  /*
397  * Since this Gather has no parallel-aware descendants to signal to,
398  * we don't need a rescan Param.
399  */
400  gather->rescan_param = -1;
401 
402  /*
403  * Ideally we'd use cost_gather here, but setting up dummy path data
404  * to satisfy it doesn't seem much cleaner than knowing what it does.
405  */
406  gather->plan.startup_cost = top_plan->startup_cost +
408  gather->plan.total_cost = top_plan->total_cost +
410  gather->plan.plan_rows = top_plan->plan_rows;
411  gather->plan.plan_width = top_plan->plan_width;
412  gather->plan.parallel_aware = false;
413  gather->plan.parallel_safe = false;
414 
415  /* use parallel mode for parallel plans. */
416  root->glob->parallelModeNeeded = true;
417 
418  top_plan = &gather->plan;
419  }
420 
421  /*
422  * If any Params were generated, run through the plan tree and compute
423  * each plan node's extParam/allParam sets. Ideally we'd merge this into
424  * set_plan_references' tree traversal, but for now it has to be separate
425  * because we need to visit subplans before not after main plan.
426  */
427  if (glob->paramExecTypes != NIL)
428  {
429  Assert(list_length(glob->subplans) == list_length(glob->subroots));
430  forboth(lp, glob->subplans, lr, glob->subroots)
431  {
432  Plan *subplan = (Plan *) lfirst(lp);
433  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
434 
435  SS_finalize_plan(subroot, subplan);
436  }
437  SS_finalize_plan(root, top_plan);
438  }
439 
440  /* final cleanup of the plan */
441  Assert(glob->finalrtable == NIL);
442  Assert(glob->finalrowmarks == NIL);
443  Assert(glob->resultRelations == NIL);
445  Assert(glob->rootResultRelations == NIL);
446  top_plan = set_plan_references(root, top_plan);
447  /* ... and the subplans (both regular subplans and initplans) */
448  Assert(list_length(glob->subplans) == list_length(glob->subroots));
449  forboth(lp, glob->subplans, lr, glob->subroots)
450  {
451  Plan *subplan = (Plan *) lfirst(lp);
452  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
453 
454  lfirst(lp) = set_plan_references(subroot, subplan);
455  }
456 
457  /* build the PlannedStmt result */
458  result = makeNode(PlannedStmt);
459 
460  result->commandType = parse->commandType;
461  result->queryId = parse->queryId;
462  result->hasReturning = (parse->returningList != NIL);
463  result->hasModifyingCTE = parse->hasModifyingCTE;
464  result->canSetTag = parse->canSetTag;
465  result->transientPlan = glob->transientPlan;
466  result->dependsOnRole = glob->dependsOnRole;
467  result->parallelModeNeeded = glob->parallelModeNeeded;
468  result->planTree = top_plan;
469  result->rtable = glob->finalrtable;
470  result->resultRelations = glob->resultRelations;
473  result->subplans = glob->subplans;
474  result->rewindPlanIDs = glob->rewindPlanIDs;
475  result->rowMarks = glob->finalrowmarks;
476  result->relationOids = glob->relationOids;
477  result->invalItems = glob->invalItems;
478  result->paramExecTypes = glob->paramExecTypes;
479  /* utilityStmt should be null, but we might as well copy it */
480  result->utilityStmt = parse->utilityStmt;
481  result->stmt_location = parse->stmt_location;
482  result->stmt_len = parse->stmt_len;
483 
484  return result;
485 }
486 
487 
488 /*--------------------
489  * subquery_planner
490  * Invokes the planner on a subquery. We recurse to here for each
491  * sub-SELECT found in the query tree.
492  *
493  * glob is the global state for the current planner run.
494  * parse is the querytree produced by the parser & rewriter.
495  * parent_root is the immediate parent Query's info (NULL at the top level).
496  * hasRecursion is true if this is a recursive WITH query.
497  * tuple_fraction is the fraction of tuples we expect will be retrieved.
498  * tuple_fraction is interpreted as explained for grouping_planner, below.
499  *
500  * Basically, this routine does the stuff that should only be done once
501  * per Query object. It then calls grouping_planner. At one time,
502  * grouping_planner could be invoked recursively on the same Query object;
503  * that's not currently true, but we keep the separation between the two
504  * routines anyway, in case we need it again someday.
505  *
506  * subquery_planner will be called recursively to handle sub-Query nodes
507  * found within the query's expressions and rangetable.
508  *
509  * Returns the PlannerInfo struct ("root") that contains all data generated
510  * while planning the subquery. In particular, the Path(s) attached to
511  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
512  * cheapest way(s) to implement the query. The top level will select the
513  * best Path and pass it through createplan.c to produce a finished Plan.
514  *--------------------
515  */
516 PlannerInfo *
518  PlannerInfo *parent_root,
519  bool hasRecursion, double tuple_fraction)
520 {
521  PlannerInfo *root;
522  List *newWithCheckOptions;
523  List *newHaving;
524  bool hasOuterJoins;
525  RelOptInfo *final_rel;
526  ListCell *l;
527 
528  /* Create a PlannerInfo data structure for this subquery */
529  root = makeNode(PlannerInfo);
530  root->parse = parse;
531  root->glob = glob;
532  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
533  root->parent_root = parent_root;
534  root->plan_params = NIL;
535  root->outer_params = NULL;
537  root->init_plans = NIL;
538  root->cte_plan_ids = NIL;
539  root->multiexpr_params = NIL;
540  root->eq_classes = NIL;
541  root->append_rel_list = NIL;
542  root->pcinfo_list = NIL;
543  root->rowMarks = NIL;
544  memset(root->upper_rels, 0, sizeof(root->upper_rels));
545  memset(root->upper_targets, 0, sizeof(root->upper_targets));
546  root->processed_tlist = NIL;
547  root->grouping_map = NULL;
548  root->minmax_aggs = NIL;
549  root->qual_security_level = 0;
550  root->hasInheritedTarget = false;
551  root->hasRecursion = hasRecursion;
552  if (hasRecursion)
553  root->wt_param_id = SS_assign_special_param(root);
554  else
555  root->wt_param_id = -1;
556  root->non_recursive_path = NULL;
557 
558  /*
559  * If there is a WITH list, process each WITH query and build an initplan
560  * SubPlan structure for it.
561  */
562  if (parse->cteList)
563  SS_process_ctes(root);
564 
565  /*
566  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
567  * to transform them into joins. Note that this step does not descend
568  * into subqueries; if we pull up any subqueries below, their SubLinks are
569  * processed just before pulling them up.
570  */
571  if (parse->hasSubLinks)
572  pull_up_sublinks(root);
573 
574  /*
575  * Scan the rangetable for set-returning functions, and inline them if
576  * possible (producing subqueries that might get pulled up next).
577  * Recursion issues here are handled in the same way as for SubLinks.
578  */
580 
581  /*
582  * Check to see if any subqueries in the jointree can be merged into this
583  * query.
584  */
585  pull_up_subqueries(root);
586 
587  /*
588  * If this is a simple UNION ALL query, flatten it into an appendrel. We
589  * do this now because it requires applying pull_up_subqueries to the leaf
590  * queries of the UNION ALL, which weren't touched above because they
591  * weren't referenced by the jointree (they will be after we do this).
592  */
593  if (parse->setOperations)
595 
596  /*
597  * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
598  * avoid the expense of doing flatten_join_alias_vars(). Also check for
599  * outer joins --- if none, we can skip reduce_outer_joins(). And check
600  * for LATERAL RTEs, too. This must be done after we have done
601  * pull_up_subqueries(), of course.
602  */
603  root->hasJoinRTEs = false;
604  root->hasLateralRTEs = false;
605  hasOuterJoins = false;
606  foreach(l, parse->rtable)
607  {
609 
610  if (rte->rtekind == RTE_JOIN)
611  {
612  root->hasJoinRTEs = true;
613  if (IS_OUTER_JOIN(rte->jointype))
614  hasOuterJoins = true;
615  }
616  if (rte->lateral)
617  root->hasLateralRTEs = true;
618  }
619 
620  /*
621  * Preprocess RowMark information. We need to do this after subquery
622  * pullup (so that all non-inherited RTEs are present) and before
623  * inheritance expansion (so that the info is available for
624  * expand_inherited_tables to examine and modify).
625  */
626  preprocess_rowmarks(root);
627 
628  /*
629  * Expand any rangetable entries that are inheritance sets into "append
630  * relations". This can add entries to the rangetable, but they must be
631  * plain base relations not joins, so it's OK (and marginally more
632  * efficient) to do it after checking for join RTEs. We must do it after
633  * pulling up subqueries, else we'd fail to handle inherited tables in
634  * subqueries.
635  */
637 
638  /*
639  * Set hasHavingQual to remember if HAVING clause is present. Needed
640  * because preprocess_expression will reduce a constant-true condition to
641  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
642  */
643  root->hasHavingQual = (parse->havingQual != NULL);
644 
645  /* Clear this flag; might get set in distribute_qual_to_rels */
646  root->hasPseudoConstantQuals = false;
647 
648  /*
649  * Do expression preprocessing on targetlist and quals, as well as other
650  * random expressions in the querytree. Note that we do not need to
651  * handle sort/group expressions explicitly, because they are actually
652  * part of the targetlist.
653  */
654  parse->targetList = (List *)
655  preprocess_expression(root, (Node *) parse->targetList,
657 
658  /* Constant-folding might have removed all set-returning functions */
659  if (parse->hasTargetSRFs)
661 
662  newWithCheckOptions = NIL;
663  foreach(l, parse->withCheckOptions)
664  {
666 
667  wco->qual = preprocess_expression(root, wco->qual,
668  EXPRKIND_QUAL);
669  if (wco->qual != NULL)
670  newWithCheckOptions = lappend(newWithCheckOptions, wco);
671  }
672  parse->withCheckOptions = newWithCheckOptions;
673 
674  parse->returningList = (List *)
675  preprocess_expression(root, (Node *) parse->returningList,
677 
678  preprocess_qual_conditions(root, (Node *) parse->jointree);
679 
680  parse->havingQual = preprocess_expression(root, parse->havingQual,
681  EXPRKIND_QUAL);
682 
683  foreach(l, parse->windowClause)
684  {
686 
687  /* partitionClause/orderClause are sort/group expressions */
690  wc->endOffset = preprocess_expression(root, wc->endOffset,
692  }
693 
694  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
696  parse->limitCount = preprocess_expression(root, parse->limitCount,
698 
699  if (parse->onConflict)
700  {
701  parse->onConflict->arbiterElems = (List *)
703  (Node *) parse->onConflict->arbiterElems,
705  parse->onConflict->arbiterWhere =
707  parse->onConflict->arbiterWhere,
708  EXPRKIND_QUAL);
709  parse->onConflict->onConflictSet = (List *)
711  (Node *) parse->onConflict->onConflictSet,
713  parse->onConflict->onConflictWhere =
715  parse->onConflict->onConflictWhere,
716  EXPRKIND_QUAL);
717  /* exclRelTlist contains only Vars, so no preprocessing needed */
718  }
719 
720  root->append_rel_list = (List *)
723 
724  /* Also need to preprocess expressions within RTEs */
725  foreach(l, parse->rtable)
726  {
728  int kind;
729  ListCell *lcsq;
730 
731  if (rte->rtekind == RTE_RELATION)
732  {
733  if (rte->tablesample)
734  rte->tablesample = (TableSampleClause *)
736  (Node *) rte->tablesample,
738  }
739  else if (rte->rtekind == RTE_SUBQUERY)
740  {
741  /*
742  * We don't want to do all preprocessing yet on the subquery's
743  * expressions, since that will happen when we plan it. But if it
744  * contains any join aliases of our level, those have to get
745  * expanded now, because planning of the subquery won't do it.
746  * That's only possible if the subquery is LATERAL.
747  */
748  if (rte->lateral && root->hasJoinRTEs)
749  rte->subquery = (Query *)
750  flatten_join_alias_vars(root, (Node *) rte->subquery);
751  }
752  else if (rte->rtekind == RTE_FUNCTION)
753  {
754  /* Preprocess the function expression(s) fully */
756  rte->functions = (List *)
757  preprocess_expression(root, (Node *) rte->functions, kind);
758  }
759  else if (rte->rtekind == RTE_TABLEFUNC)
760  {
761  /* Preprocess the function expression(s) fully */
763  rte->tablefunc = (TableFunc *)
764  preprocess_expression(root, (Node *) rte->tablefunc, kind);
765  }
766  else if (rte->rtekind == RTE_VALUES)
767  {
768  /* Preprocess the values lists fully */
770  rte->values_lists = (List *)
771  preprocess_expression(root, (Node *) rte->values_lists, kind);
772  }
773 
774  /*
775  * Process each element of the securityQuals list as if it were a
776  * separate qual expression (as indeed it is). We need to do it this
777  * way to get proper canonicalization of AND/OR structure. Note that
778  * this converts each element into an implicit-AND sublist.
779  */
780  foreach(lcsq, rte->securityQuals)
781  {
782  lfirst(lcsq) = preprocess_expression(root,
783  (Node *) lfirst(lcsq),
784  EXPRKIND_QUAL);
785  }
786  }
787 
788  /*
789  * Now that we are done preprocessing expressions, and in particular done
790  * flattening join alias variables, get rid of the joinaliasvars lists.
791  * They no longer match what expressions in the rest of the tree look
792  * like, because we have not preprocessed expressions in those lists (and
793  * do not want to; for example, expanding a SubLink there would result in
794  * a useless unreferenced subplan). Leaving them in place simply creates
795  * a hazard for later scans of the tree. We could try to prevent that by
796  * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
797  * but that doesn't sound very reliable.
798  */
799  if (root->hasJoinRTEs)
800  {
801  foreach(l, parse->rtable)
802  {
804 
805  rte->joinaliasvars = NIL;
806  }
807  }
808 
809  /*
810  * In some cases we may want to transfer a HAVING clause into WHERE. We
811  * cannot do so if the HAVING clause contains aggregates (obviously) or
812  * volatile functions (since a HAVING clause is supposed to be executed
813  * only once per group). We also can't do this if there are any nonempty
814  * grouping sets; moving such a clause into WHERE would potentially change
815  * the results, if any referenced column isn't present in all the grouping
816  * sets. (If there are only empty grouping sets, then the HAVING clause
817  * must be degenerate as discussed below.)
818  *
819  * Also, it may be that the clause is so expensive to execute that we're
820  * better off doing it only once per group, despite the loss of
821  * selectivity. This is hard to estimate short of doing the entire
822  * planning process twice, so we use a heuristic: clauses containing
823  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
824  * clause into WHERE, in hopes of eliminating tuples before aggregation
825  * instead of after.
826  *
827  * If the query has explicit grouping then we can simply move such a
828  * clause into WHERE; any group that fails the clause will not be in the
829  * output because none of its tuples will reach the grouping or
830  * aggregation stage. Otherwise we must have a degenerate (variable-free)
831  * HAVING clause, which we put in WHERE so that query_planner() can use it
832  * in a gating Result node, but also keep in HAVING to ensure that we
833  * don't emit a bogus aggregated row. (This could be done better, but it
834  * seems not worth optimizing.)
835  *
836  * Note that both havingQual and parse->jointree->quals are in
837  * implicitly-ANDed-list form at this point, even though they are declared
838  * as Node *.
839  */
840  newHaving = NIL;
841  foreach(l, (List *) parse->havingQual)
842  {
843  Node *havingclause = (Node *) lfirst(l);
844 
845  if ((parse->groupClause && parse->groupingSets) ||
846  contain_agg_clause(havingclause) ||
847  contain_volatile_functions(havingclause) ||
848  contain_subplans(havingclause))
849  {
850  /* keep it in HAVING */
851  newHaving = lappend(newHaving, havingclause);
852  }
853  else if (parse->groupClause && !parse->groupingSets)
854  {
855  /* move it to WHERE */
856  parse->jointree->quals = (Node *)
857  lappend((List *) parse->jointree->quals, havingclause);
858  }
859  else
860  {
861  /* put a copy in WHERE, keep it in HAVING */
862  parse->jointree->quals = (Node *)
863  lappend((List *) parse->jointree->quals,
864  copyObject(havingclause));
865  newHaving = lappend(newHaving, havingclause);
866  }
867  }
868  parse->havingQual = (Node *) newHaving;
869 
870  /* Remove any redundant GROUP BY columns */
872 
873  /*
874  * If we have any outer joins, try to reduce them to plain inner joins.
875  * This step is most easily done after we've done expression
876  * preprocessing.
877  */
878  if (hasOuterJoins)
879  reduce_outer_joins(root);
880 
881  /*
882  * Do the main planning. If we have an inherited target relation, that
883  * needs special processing, else go straight to grouping_planner.
884  */
885  if (parse->resultRelation &&
886  rt_fetch(parse->resultRelation, parse->rtable)->inh)
887  inheritance_planner(root);
888  else
889  grouping_planner(root, false, tuple_fraction);
890 
891  /*
892  * Capture the set of outer-level param IDs we have access to, for use in
893  * extParam/allParam calculations later.
894  */
896 
897  /*
898  * If any initPlans were created in this query level, adjust the surviving
899  * Paths' costs and parallel-safety flags to account for them. The
900  * initPlans won't actually get attached to the plan tree till
901  * create_plan() runs, but we must include their effects now.
902  */
903  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
904  SS_charge_for_initplans(root, final_rel);
905 
906  /*
907  * Make sure we've identified the cheapest Path for the final rel. (By
908  * doing this here not in grouping_planner, we include initPlan costs in
909  * the decision, though it's unlikely that will change anything.)
910  */
911  set_cheapest(final_rel);
912 
913  return root;
914 }
915 
916 /*
917  * preprocess_expression
918  * Do subquery_planner's preprocessing work for an expression,
919  * which can be a targetlist, a WHERE clause (including JOIN/ON
920  * conditions), a HAVING clause, or a few other things.
921  */
922 static Node *
923 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
924 {
925  /*
926  * Fall out quickly if expression is empty. This occurs often enough to
927  * be worth checking. Note that null->null is the correct conversion for
928  * implicit-AND result format, too.
929  */
930  if (expr == NULL)
931  return NULL;
932 
933  /*
934  * If the query has any join RTEs, replace join alias variables with
935  * base-relation variables. We must do this first, since any expressions
936  * we may extract from the joinaliasvars lists have not been preprocessed.
937  * For example, if we did this after sublink processing, sublinks expanded
938  * out from join aliases would not get processed. But we can skip this in
939  * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
940  * they can't contain any Vars of the current query level.
941  */
942  if (root->hasJoinRTEs &&
943  !(kind == EXPRKIND_RTFUNC ||
944  kind == EXPRKIND_VALUES ||
945  kind == EXPRKIND_TABLESAMPLE ||
946  kind == EXPRKIND_TABLEFUNC))
947  expr = flatten_join_alias_vars(root, expr);
948 
949  /*
950  * Simplify constant expressions.
951  *
952  * Note: an essential effect of this is to convert named-argument function
953  * calls to positional notation and insert the current actual values of
954  * any default arguments for functions. To ensure that happens, we *must*
955  * process all expressions here. Previous PG versions sometimes skipped
956  * const-simplification if it didn't seem worth the trouble, but we can't
957  * do that anymore.
958  *
959  * Note: this also flattens nested AND and OR expressions into N-argument
960  * form. All processing of a qual expression after this point must be
961  * careful to maintain AND/OR flatness --- that is, do not generate a tree
962  * with AND directly under AND, nor OR directly under OR.
963  */
964  expr = eval_const_expressions(root, expr);
965 
966  /*
967  * If it's a qual or havingQual, canonicalize it.
968  */
969  if (kind == EXPRKIND_QUAL)
970  {
971  expr = (Node *) canonicalize_qual((Expr *) expr);
972 
973 #ifdef OPTIMIZER_DEBUG
974  printf("After canonicalize_qual()\n");
975  pprint(expr);
976 #endif
977  }
978 
979  /* Expand SubLinks to SubPlans */
980  if (root->parse->hasSubLinks)
981  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
982 
983  /*
984  * XXX do not insert anything here unless you have grokked the comments in
985  * SS_replace_correlation_vars ...
986  */
987 
988  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
989  if (root->query_level > 1)
990  expr = SS_replace_correlation_vars(root, expr);
991 
992  /*
993  * If it's a qual or havingQual, convert it to implicit-AND format. (We
994  * don't want to do this before eval_const_expressions, since the latter
995  * would be unable to simplify a top-level AND correctly. Also,
996  * SS_process_sublinks expects explicit-AND format.)
997  */
998  if (kind == EXPRKIND_QUAL)
999  expr = (Node *) make_ands_implicit((Expr *) expr);
1000 
1001  return expr;
1002 }
1003 
1004 /*
1005  * preprocess_qual_conditions
1006  * Recursively scan the query's jointree and do subquery_planner's
1007  * preprocessing work on each qual condition found therein.
1008  */
1009 static void
1011 {
1012  if (jtnode == NULL)
1013  return;
1014  if (IsA(jtnode, RangeTblRef))
1015  {
1016  /* nothing to do here */
1017  }
1018  else if (IsA(jtnode, FromExpr))
1019  {
1020  FromExpr *f = (FromExpr *) jtnode;
1021  ListCell *l;
1022 
1023  foreach(l, f->fromlist)
1025 
1027  }
1028  else if (IsA(jtnode, JoinExpr))
1029  {
1030  JoinExpr *j = (JoinExpr *) jtnode;
1031 
1032  preprocess_qual_conditions(root, j->larg);
1033  preprocess_qual_conditions(root, j->rarg);
1034 
1036  }
1037  else
1038  elog(ERROR, "unrecognized node type: %d",
1039  (int) nodeTag(jtnode));
1040 }
1041 
1042 /*
1043  * preprocess_phv_expression
1044  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1045  *
1046  * If a LATERAL subquery references an output of another subquery, and that
1047  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1048  * join, then we'll push the PlaceHolderVar expression down into the subquery
1049  * and later pull it back up during find_lateral_references, which runs after
1050  * subquery_planner has preprocessed all the expressions that were in the
1051  * current query level to start with. So we need to preprocess it then.
1052  */
1053 Expr *
1055 {
1056  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1057 }
1058 
1059 /*
1060  * inheritance_planner
1061  * Generate Paths in the case where the result relation is an
1062  * inheritance set.
1063  *
1064  * We have to handle this case differently from cases where a source relation
1065  * is an inheritance set. Source inheritance is expanded at the bottom of the
1066  * plan tree (see allpaths.c), but target inheritance has to be expanded at
1067  * the top. The reason is that for UPDATE, each target relation needs a
1068  * different targetlist matching its own column set. Fortunately,
1069  * the UPDATE/DELETE target can never be the nullable side of an outer join,
1070  * so it's OK to generate the plan this way.
1071  *
1072  * Returns nothing; the useful output is in the Paths we attach to
1073  * the (UPPERREL_FINAL, NULL) upperrel stored in *root.
1074  *
1075  * Note that we have not done set_cheapest() on the final rel; it's convenient
1076  * to leave this to the caller.
1077  */
1078 static void
1080 {
1081  Query *parse = root->parse;
1082  int top_parentRTindex = parse->resultRelation;
1083  Bitmapset *subqueryRTindexes;
1084  Bitmapset *modifiableARIindexes;
1085  int nominalRelation = -1;
1086  List *final_rtable = NIL;
1087  int save_rel_array_size = 0;
1088  RelOptInfo **save_rel_array = NULL;
1089  List *subpaths = NIL;
1090  List *subroots = NIL;
1091  List *resultRelations = NIL;
1092  List *withCheckOptionLists = NIL;
1093  List *returningLists = NIL;
1094  List *rowMarks;
1095  RelOptInfo *final_rel;
1096  ListCell *lc;
1097  Index rti;
1098  RangeTblEntry *parent_rte;
1099  List *partitioned_rels = NIL;
1100  PlannerInfo *parent_root;
1101  Query *parent_parse;
1102  Bitmapset *parent_relids = bms_make_singleton(top_parentRTindex);
1103  PlannerInfo **parent_roots = NULL;
1104 
1105  Assert(parse->commandType != CMD_INSERT);
1106 
1107  /*
1108  * We generate a modified instance of the original Query for each target
1109  * relation, plan that, and put all the plans into a list that will be
1110  * controlled by a single ModifyTable node. All the instances share the
1111  * same rangetable, but each instance must have its own set of subquery
1112  * RTEs within the finished rangetable because (1) they are likely to get
1113  * scribbled on during planning, and (2) it's not inconceivable that
1114  * subqueries could get planned differently in different cases. We need
1115  * not create duplicate copies of other RTE kinds, in particular not the
1116  * target relations, because they don't have either of those issues. Not
1117  * having to duplicate the target relations is important because doing so
1118  * (1) would result in a rangetable of length O(N^2) for N targets, with
1119  * at least O(N^3) work expended here; and (2) would greatly complicate
1120  * management of the rowMarks list.
1121  *
1122  * To begin with, generate a bitmapset of the relids of the subquery RTEs.
1123  */
1124  subqueryRTindexes = NULL;
1125  rti = 1;
1126  foreach(lc, parse->rtable)
1127  {
1129 
1130  if (rte->rtekind == RTE_SUBQUERY)
1131  subqueryRTindexes = bms_add_member(subqueryRTindexes, rti);
1132  rti++;
1133  }
1134 
1135  /*
1136  * Next, we want to identify which AppendRelInfo items contain references
1137  * to any of the aforesaid subquery RTEs. These items will need to be
1138  * copied and modified to adjust their subquery references; whereas the
1139  * other ones need not be touched. It's worth being tense over this
1140  * because we can usually avoid processing most of the AppendRelInfo
1141  * items, thereby saving O(N^2) space and time when the target is a large
1142  * inheritance tree. We can identify AppendRelInfo items by their
1143  * child_relid, since that should be unique within the list.
1144  */
1145  modifiableARIindexes = NULL;
1146  if (subqueryRTindexes != NULL)
1147  {
1148  foreach(lc, root->append_rel_list)
1149  {
1150  AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc);
1151 
1152  if (bms_is_member(appinfo->parent_relid, subqueryRTindexes) ||
1153  bms_is_member(appinfo->child_relid, subqueryRTindexes) ||
1155  subqueryRTindexes))
1156  modifiableARIindexes = bms_add_member(modifiableARIindexes,
1157  appinfo->child_relid);
1158  }
1159  }
1160 
1161  /*
1162  * If the parent RTE is a partitioned table, we should use that as the
1163  * nominal relation, because the RTEs added for partitioned tables
1164  * (including the root parent) as child members of the inheritance set do
1165  * not appear anywhere else in the plan. The situation is exactly the
1166  * opposite in the case of non-partitioned inheritance parent as described
1167  * below. For the same reason, collect the list of descendant partitioned
1168  * tables to be saved in ModifyTable node, so that executor can lock those
1169  * as well.
1170  */
1171  parent_rte = rt_fetch(top_parentRTindex, root->parse->rtable);
1172  if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1173  {
1174  nominalRelation = top_parentRTindex;
1175  partitioned_rels = get_partitioned_child_rels(root, top_parentRTindex);
1176  /* The root partitioned table is included as a child rel */
1177  Assert(list_length(partitioned_rels) >= 1);
1178  }
1179 
1180  /*
1181  * The PlannerInfo for each child is obtained by translating the relevant
1182  * members of the PlannerInfo for its immediate parent, which we find
1183  * using the parent_relid in its AppendRelInfo. We save the PlannerInfo
1184  * for each parent in an array indexed by relid for fast retrieval. Since
1185  * the maximum number of parents is limited by the number of RTEs in the
1186  * query, we use that number to allocate the array. An extra entry is
1187  * needed since relids start from 1.
1188  */
1189  parent_roots = (PlannerInfo **) palloc0((list_length(parse->rtable) + 1) *
1190  sizeof(PlannerInfo *));
1191  parent_roots[top_parentRTindex] = root;
1192 
1193  /*
1194  * And now we can get on with generating a plan for each child table.
1195  */
1196  foreach(lc, root->append_rel_list)
1197  {
1198  AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc);
1199  PlannerInfo *subroot;
1200  RangeTblEntry *child_rte;
1201  RelOptInfo *sub_final_rel;
1202  Path *subpath;
1203 
1204  /* append_rel_list contains all append rels; ignore others */
1205  if (!bms_is_member(appinfo->parent_relid, parent_relids))
1206  continue;
1207 
1208  /*
1209  * expand_inherited_rtentry() always processes a parent before any of
1210  * that parent's children, so the parent_root for this relation should
1211  * already be available.
1212  */
1213  parent_root = parent_roots[appinfo->parent_relid];
1214  Assert(parent_root != NULL);
1215  parent_parse = parent_root->parse;
1216 
1217  /*
1218  * We need a working copy of the PlannerInfo so that we can control
1219  * propagation of information back to the main copy.
1220  */
1221  subroot = makeNode(PlannerInfo);
1222  memcpy(subroot, parent_root, sizeof(PlannerInfo));
1223 
1224  /*
1225  * Generate modified query with this rel as target. We first apply
1226  * adjust_appendrel_attrs, which copies the Query and changes
1227  * references to the parent RTE to refer to the current child RTE,
1228  * then fool around with subquery RTEs.
1229  */
1230  subroot->parse = (Query *)
1231  adjust_appendrel_attrs(parent_root,
1232  (Node *) parent_parse,
1233  1, &appinfo);
1234 
1235  /*
1236  * If there are securityQuals attached to the parent, move them to the
1237  * child rel (they've already been transformed properly for that).
1238  */
1239  parent_rte = rt_fetch(appinfo->parent_relid, subroot->parse->rtable);
1240  child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable);
1241  child_rte->securityQuals = parent_rte->securityQuals;
1242  parent_rte->securityQuals = NIL;
1243 
1244  /*
1245  * The rowMarks list might contain references to subquery RTEs, so
1246  * make a copy that we can apply ChangeVarNodes to. (Fortunately, the
1247  * executor doesn't need to see the modified copies --- we can just
1248  * pass it the original rowMarks list.)
1249  */
1250  subroot->rowMarks = copyObject(parent_root->rowMarks);
1251 
1252  /*
1253  * The append_rel_list likewise might contain references to subquery
1254  * RTEs (if any subqueries were flattenable UNION ALLs). So prepare
1255  * to apply ChangeVarNodes to that, too. As explained above, we only
1256  * want to copy items that actually contain such references; the rest
1257  * can just get linked into the subroot's append_rel_list.
1258  *
1259  * If we know there are no such references, we can just use the outer
1260  * append_rel_list unmodified.
1261  */
1262  if (modifiableARIindexes != NULL)
1263  {
1264  ListCell *lc2;
1265 
1266  subroot->append_rel_list = NIL;
1267  foreach(lc2, parent_root->append_rel_list)
1268  {
1269  AppendRelInfo *appinfo2 = lfirst_node(AppendRelInfo, lc2);
1270 
1271  if (bms_is_member(appinfo2->child_relid, modifiableARIindexes))
1272  appinfo2 = copyObject(appinfo2);
1273 
1274  subroot->append_rel_list = lappend(subroot->append_rel_list,
1275  appinfo2);
1276  }
1277  }
1278 
1279  /*
1280  * Add placeholders to the child Query's rangetable list to fill the
1281  * RT indexes already reserved for subqueries in previous children.
1282  * These won't be referenced, so there's no need to make them very
1283  * valid-looking.
1284  */
1285  while (list_length(subroot->parse->rtable) < list_length(final_rtable))
1286  subroot->parse->rtable = lappend(subroot->parse->rtable,
1288 
1289  /*
1290  * If this isn't the first child Query, generate duplicates of all
1291  * subquery RTEs, and adjust Var numbering to reference the
1292  * duplicates. To simplify the loop logic, we scan the original rtable
1293  * not the copy just made by adjust_appendrel_attrs; that should be OK
1294  * since subquery RTEs couldn't contain any references to the target
1295  * rel.
1296  */
1297  if (final_rtable != NIL && subqueryRTindexes != NULL)
1298  {
1299  ListCell *lr;
1300 
1301  rti = 1;
1302  foreach(lr, parent_parse->rtable)
1303  {
1305 
1306  if (bms_is_member(rti, subqueryRTindexes))
1307  {
1308  Index newrti;
1309 
1310  /*
1311  * The RTE can't contain any references to its own RT
1312  * index, except in its securityQuals, so we can save a
1313  * few cycles by applying ChangeVarNodes to the rest of
1314  * the rangetable before we append the RTE to it.
1315  */
1316  newrti = list_length(subroot->parse->rtable) + 1;
1317  ChangeVarNodes((Node *) subroot->parse, rti, newrti, 0);
1318  ChangeVarNodes((Node *) subroot->rowMarks, rti, newrti, 0);
1319  /* Skip processing unchanging parts of append_rel_list */
1320  if (modifiableARIindexes != NULL)
1321  {
1322  ListCell *lc2;
1323 
1324  foreach(lc2, subroot->append_rel_list)
1325  {
1326  AppendRelInfo *appinfo2 = lfirst_node(AppendRelInfo, lc2);
1327 
1328  if (bms_is_member(appinfo2->child_relid,
1329  modifiableARIindexes))
1330  ChangeVarNodes((Node *) appinfo2, rti, newrti, 0);
1331  }
1332  }
1333  rte = copyObject(rte);
1334  ChangeVarNodes((Node *) rte->securityQuals, rti, newrti, 0);
1335  subroot->parse->rtable = lappend(subroot->parse->rtable,
1336  rte);
1337  }
1338  rti++;
1339  }
1340  }
1341 
1342  /* There shouldn't be any OJ info to translate, as yet */
1343  Assert(subroot->join_info_list == NIL);
1344  /* and we haven't created PlaceHolderInfos, either */
1345  Assert(subroot->placeholder_list == NIL);
1346  /* hack to mark target relation as an inheritance partition */
1347  subroot->hasInheritedTarget = true;
1348 
1349  /*
1350  * If the child is further partitioned, remember it as a parent. Since
1351  * a partitioned table does not have any data, we don't need to create
1352  * a plan for it. We do, however, need to remember the PlannerInfo for
1353  * use when processing its children.
1354  */
1355  if (child_rte->inh)
1356  {
1357  Assert(child_rte->relkind == RELKIND_PARTITIONED_TABLE);
1358  parent_relids =
1359  bms_add_member(parent_relids, appinfo->child_relid);
1360  parent_roots[appinfo->child_relid] = subroot;
1361 
1362  continue;
1363  }
1364 
1365  /* Generate Path(s) for accessing this result relation */
1366  grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
1367 
1368  /*
1369  * Set the nomimal target relation of the ModifyTable node if not
1370  * already done. We use the inheritance parent RTE as the nominal
1371  * target relation if it's a partitioned table (see just above this
1372  * loop). In the non-partitioned parent case, we'll use the first
1373  * child relation (even if it's excluded) as the nominal target
1374  * relation. Because of the way expand_inherited_rtentry works, the
1375  * latter should be the RTE representing the parent table in its role
1376  * as a simple member of the inheritance set.
1377  *
1378  * It would be logically cleaner to *always* use the inheritance
1379  * parent RTE as the nominal relation; but that RTE is not otherwise
1380  * referenced in the plan in the non-partitioned inheritance case.
1381  * Instead the duplicate child RTE created by expand_inherited_rtentry
1382  * is used elsewhere in the plan, so using the original parent RTE
1383  * would give rise to confusing use of multiple aliases in EXPLAIN
1384  * output for what the user will think is the "same" table. OTOH,
1385  * it's not a problem in the partitioned inheritance case, because the
1386  * duplicate child RTE added for the parent does not appear anywhere
1387  * else in the plan tree.
1388  */
1389  if (nominalRelation < 0)
1390  nominalRelation = appinfo->child_relid;
1391 
1392  /*
1393  * Select cheapest path in case there's more than one. We always run
1394  * modification queries to conclusion, so we care only for the
1395  * cheapest-total path.
1396  */
1397  sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
1398  set_cheapest(sub_final_rel);
1399  subpath = sub_final_rel->cheapest_total_path;
1400 
1401  /*
1402  * If this child rel was excluded by constraint exclusion, exclude it
1403  * from the result plan.
1404  */
1405  if (IS_DUMMY_PATH(subpath))
1406  continue;
1407 
1408  /*
1409  * If this is the first non-excluded child, its post-planning rtable
1410  * becomes the initial contents of final_rtable; otherwise, append
1411  * just its modified subquery RTEs to final_rtable.
1412  */
1413  if (final_rtable == NIL)
1414  final_rtable = subroot->parse->rtable;
1415  else
1416  final_rtable = list_concat(final_rtable,
1417  list_copy_tail(subroot->parse->rtable,
1418  list_length(final_rtable)));
1419 
1420  /*
1421  * We need to collect all the RelOptInfos from all child plans into
1422  * the main PlannerInfo, since setrefs.c will need them. We use the
1423  * last child's simple_rel_array (previous ones are too short), so we
1424  * have to propagate forward the RelOptInfos that were already built
1425  * in previous children.
1426  */
1427  Assert(subroot->simple_rel_array_size >= save_rel_array_size);
1428  for (rti = 1; rti < save_rel_array_size; rti++)
1429  {
1430  RelOptInfo *brel = save_rel_array[rti];
1431 
1432  if (brel)
1433  subroot->simple_rel_array[rti] = brel;
1434  }
1435  save_rel_array_size = subroot->simple_rel_array_size;
1436  save_rel_array = subroot->simple_rel_array;
1437 
1438  /* Make sure any initplans from this rel get into the outer list */
1439  root->init_plans = subroot->init_plans;
1440 
1441  /* Build list of sub-paths */
1442  subpaths = lappend(subpaths, subpath);
1443 
1444  /* Build list of modified subroots, too */
1445  subroots = lappend(subroots, subroot);
1446 
1447  /* Build list of target-relation RT indexes */
1448  resultRelations = lappend_int(resultRelations, appinfo->child_relid);
1449 
1450  /* Build lists of per-relation WCO and RETURNING targetlists */
1451  if (parse->withCheckOptions)
1452  withCheckOptionLists = lappend(withCheckOptionLists,
1453  subroot->parse->withCheckOptions);
1454  if (parse->returningList)
1455  returningLists = lappend(returningLists,
1456  subroot->parse->returningList);
1457 
1458  Assert(!parse->onConflict);
1459  }
1460 
1461  /* Result path must go into outer query's FINAL upperrel */
1462  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1463 
1464  /*
1465  * We don't currently worry about setting final_rel's consider_parallel
1466  * flag in this case, nor about allowing FDWs or create_upper_paths_hook
1467  * to get control here.
1468  */
1469 
1470  /*
1471  * If we managed to exclude every child rel, return a dummy plan; it
1472  * doesn't even need a ModifyTable node.
1473  */
1474  if (subpaths == NIL)
1475  {
1476  set_dummy_rel_pathlist(final_rel);
1477  return;
1478  }
1479 
1480  /*
1481  * Put back the final adjusted rtable into the master copy of the Query.
1482  * (We mustn't do this if we found no non-excluded children.)
1483  */
1484  parse->rtable = final_rtable;
1485  root->simple_rel_array_size = save_rel_array_size;
1486  root->simple_rel_array = save_rel_array;
1487  /* Must reconstruct master's simple_rte_array, too */
1488  root->simple_rte_array = (RangeTblEntry **)
1489  palloc0((list_length(final_rtable) + 1) * sizeof(RangeTblEntry *));
1490  rti = 1;
1491  foreach(lc, final_rtable)
1492  {
1494 
1495  root->simple_rte_array[rti++] = rte;
1496  }
1497 
1498  /*
1499  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will
1500  * have dealt with fetching non-locked marked rows, else we need to have
1501  * ModifyTable do that.
1502  */
1503  if (parse->rowMarks)
1504  rowMarks = NIL;
1505  else
1506  rowMarks = root->rowMarks;
1507 
1508  /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
1509  add_path(final_rel, (Path *)
1510  create_modifytable_path(root, final_rel,
1511  parse->commandType,
1512  parse->canSetTag,
1513  nominalRelation,
1514  partitioned_rels,
1515  resultRelations,
1516  subpaths,
1517  subroots,
1518  withCheckOptionLists,
1519  returningLists,
1520  rowMarks,
1521  NULL,
1522  SS_assign_special_param(root)));
1523 }
1524 
1525 /*--------------------
1526  * grouping_planner
1527  * Perform planning steps related to grouping, aggregation, etc.
1528  *
1529  * This function adds all required top-level processing to the scan/join
1530  * Path(s) produced by query_planner.
1531  *
1532  * If inheritance_update is true, we're being called from inheritance_planner
1533  * and should not include a ModifyTable step in the resulting Path(s).
1534  * (inheritance_planner will create a single ModifyTable node covering all the
1535  * target tables.)
1536  *
1537  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1538  * tuple_fraction is interpreted as follows:
1539  * 0: expect all tuples to be retrieved (normal case)
1540  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1541  * from the plan to be retrieved
1542  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1543  * expected to be retrieved (ie, a LIMIT specification)
1544  *
1545  * Returns nothing; the useful output is in the Paths we attach to the
1546  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1547  * root->processed_tlist contains the final processed targetlist.
1548  *
1549  * Note that we have not done set_cheapest() on the final rel; it's convenient
1550  * to leave this to the caller.
1551  *--------------------
1552  */
1553 static void
1554 grouping_planner(PlannerInfo *root, bool inheritance_update,
1555  double tuple_fraction)
1556 {
1557  Query *parse = root->parse;
1558  List *tlist;
1559  int64 offset_est = 0;
1560  int64 count_est = 0;
1561  double limit_tuples = -1.0;
1562  bool have_postponed_srfs = false;
1563  PathTarget *final_target;
1564  List *final_targets;
1565  List *final_targets_contain_srfs;
1566  RelOptInfo *current_rel;
1567  RelOptInfo *final_rel;
1568  ListCell *lc;
1569 
1570  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1571  if (parse->limitCount || parse->limitOffset)
1572  {
1573  tuple_fraction = preprocess_limit(root, tuple_fraction,
1574  &offset_est, &count_est);
1575 
1576  /*
1577  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1578  * estimate the effects of using a bounded sort.
1579  */
1580  if (count_est > 0 && offset_est >= 0)
1581  limit_tuples = (double) count_est + (double) offset_est;
1582  }
1583 
1584  /* Make tuple_fraction accessible to lower-level routines */
1585  root->tuple_fraction = tuple_fraction;
1586 
1587  if (parse->setOperations)
1588  {
1589  /*
1590  * If there's a top-level ORDER BY, assume we have to fetch all the
1591  * tuples. This might be too simplistic given all the hackery below
1592  * to possibly avoid the sort; but the odds of accurate estimates here
1593  * are pretty low anyway. XXX try to get rid of this in favor of
1594  * letting plan_set_operations generate both fast-start and
1595  * cheapest-total paths.
1596  */
1597  if (parse->sortClause)
1598  root->tuple_fraction = 0.0;
1599 
1600  /*
1601  * Construct Paths for set operations. The results will not need any
1602  * work except perhaps a top-level sort and/or LIMIT. Note that any
1603  * special work for recursive unions is the responsibility of
1604  * plan_set_operations.
1605  */
1606  current_rel = plan_set_operations(root);
1607 
1608  /*
1609  * We should not need to call preprocess_targetlist, since we must be
1610  * in a SELECT query node. Instead, use the targetlist returned by
1611  * plan_set_operations (since this tells whether it returned any
1612  * resjunk columns!), and transfer any sort key information from the
1613  * original tlist.
1614  */
1615  Assert(parse->commandType == CMD_SELECT);
1616 
1617  tlist = root->processed_tlist; /* from plan_set_operations */
1618 
1619  /* for safety, copy processed_tlist instead of modifying in-place */
1620  tlist = postprocess_setop_tlist(copyObject(tlist), parse->targetList);
1621 
1622  /* Save aside the final decorated tlist */
1623  root->processed_tlist = tlist;
1624 
1625  /* Also extract the PathTarget form of the setop result tlist */
1626  final_target = current_rel->cheapest_total_path->pathtarget;
1627 
1628  /* The setop result tlist couldn't contain any SRFs */
1629  Assert(!parse->hasTargetSRFs);
1630  final_targets = final_targets_contain_srfs = NIL;
1631 
1632  /*
1633  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1634  * checked already, but let's make sure).
1635  */
1636  if (parse->rowMarks)
1637  ereport(ERROR,
1638  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1639  /*------
1640  translator: %s is a SQL row locking clause such as FOR UPDATE */
1641  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1643  parse->rowMarks)->strength))));
1644 
1645  /*
1646  * Calculate pathkeys that represent result ordering requirements
1647  */
1648  Assert(parse->distinctClause == NIL);
1650  parse->sortClause,
1651  tlist);
1652  }
1653  else
1654  {
1655  /* No set operations, do regular planning */
1656  PathTarget *sort_input_target;
1657  List *sort_input_targets;
1658  List *sort_input_targets_contain_srfs;
1659  PathTarget *grouping_target;
1660  List *grouping_targets;
1661  List *grouping_targets_contain_srfs;
1662  PathTarget *scanjoin_target;
1663  List *scanjoin_targets;
1664  List *scanjoin_targets_contain_srfs;
1665  bool have_grouping;
1666  AggClauseCosts agg_costs;
1667  WindowFuncLists *wflists = NULL;
1668  List *activeWindows = NIL;
1669  grouping_sets_data *gset_data = NULL;
1670  standard_qp_extra qp_extra;
1671 
1672  /* A recursive query should always have setOperations */
1673  Assert(!root->hasRecursion);
1674 
1675  /* Preprocess grouping sets and GROUP BY clause, if any */
1676  if (parse->groupingSets)
1677  {
1678  gset_data = preprocess_grouping_sets(root);
1679  }
1680  else
1681  {
1682  /* Preprocess regular GROUP BY clause, if any */
1683  if (parse->groupClause)
1684  parse->groupClause = preprocess_groupclause(root, NIL);
1685  }
1686 
1687  /* Preprocess targetlist */
1688  tlist = preprocess_targetlist(root);
1689 
1690  /*
1691  * We are now done hacking up the query's targetlist. Most of the
1692  * remaining planning work will be done with the PathTarget
1693  * representation of tlists, but save aside the full representation so
1694  * that we can transfer its decoration (resnames etc) to the topmost
1695  * tlist of the finished Plan.
1696  */
1697  root->processed_tlist = tlist;
1698 
1699  /*
1700  * Collect statistics about aggregates for estimating costs, and mark
1701  * all the aggregates with resolved aggtranstypes. We must do this
1702  * before slicing and dicing the tlist into various pathtargets, else
1703  * some copies of the Aggref nodes might escape being marked with the
1704  * correct transtypes.
1705  *
1706  * Note: currently, we do not detect duplicate aggregates here. This
1707  * may result in somewhat-overestimated cost, which is fine for our
1708  * purposes since all Paths will get charged the same. But at some
1709  * point we might wish to do that detection in the planner, rather
1710  * than during executor startup.
1711  */
1712  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
1713  if (parse->hasAggs)
1714  {
1715  get_agg_clause_costs(root, (Node *) tlist, AGGSPLIT_SIMPLE,
1716  &agg_costs);
1718  &agg_costs);
1719  }
1720 
1721  /*
1722  * Locate any window functions in the tlist. (We don't need to look
1723  * anywhere else, since expressions used in ORDER BY will be in there
1724  * too.) Note that they could all have been eliminated by constant
1725  * folding, in which case we don't need to do any more work.
1726  */
1727  if (parse->hasWindowFuncs)
1728  {
1729  wflists = find_window_functions((Node *) tlist,
1730  list_length(parse->windowClause));
1731  if (wflists->numWindowFuncs > 0)
1732  activeWindows = select_active_windows(root, wflists);
1733  else
1734  parse->hasWindowFuncs = false;
1735  }
1736 
1737  /*
1738  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1739  * adding logic between here and the query_planner() call. Anything
1740  * that is needed in MIN/MAX-optimizable cases will have to be
1741  * duplicated in planagg.c.
1742  */
1743  if (parse->hasAggs)
1744  preprocess_minmax_aggregates(root, tlist);
1745 
1746  /*
1747  * Figure out whether there's a hard limit on the number of rows that
1748  * query_planner's result subplan needs to return. Even if we know a
1749  * hard limit overall, it doesn't apply if the query has any
1750  * grouping/aggregation operations, or SRFs in the tlist.
1751  */
1752  if (parse->groupClause ||
1753  parse->groupingSets ||
1754  parse->distinctClause ||
1755  parse->hasAggs ||
1756  parse->hasWindowFuncs ||
1757  parse->hasTargetSRFs ||
1758  root->hasHavingQual)
1759  root->limit_tuples = -1.0;
1760  else
1761  root->limit_tuples = limit_tuples;
1762 
1763  /* Set up data needed by standard_qp_callback */
1764  qp_extra.tlist = tlist;
1765  qp_extra.activeWindows = activeWindows;
1766  qp_extra.groupClause = (gset_data
1767  ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
1768  : parse->groupClause);
1769 
1770  /*
1771  * Generate the best unsorted and presorted paths for the scan/join
1772  * portion of this Query, ie the processing represented by the
1773  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1774  * We also generate (in standard_qp_callback) pathkey representations
1775  * of the query's sort clause, distinct clause, etc.
1776  */
1777  current_rel = query_planner(root, tlist,
1778  standard_qp_callback, &qp_extra);
1779 
1780  /*
1781  * Convert the query's result tlist into PathTarget format.
1782  *
1783  * Note: it's desirable to not do this till after query_planner(),
1784  * because the target width estimates can use per-Var width numbers
1785  * that were obtained within query_planner().
1786  */
1787  final_target = create_pathtarget(root, tlist);
1788 
1789  /*
1790  * If ORDER BY was given, consider whether we should use a post-sort
1791  * projection, and compute the adjusted target for preceding steps if
1792  * so.
1793  */
1794  if (parse->sortClause)
1795  sort_input_target = make_sort_input_target(root,
1796  final_target,
1797  &have_postponed_srfs);
1798  else
1799  sort_input_target = final_target;
1800 
1801  /*
1802  * If we have window functions to deal with, the output from any
1803  * grouping step needs to be what the window functions want;
1804  * otherwise, it should be sort_input_target.
1805  */
1806  if (activeWindows)
1807  grouping_target = make_window_input_target(root,
1808  final_target,
1809  activeWindows);
1810  else
1811  grouping_target = sort_input_target;
1812 
1813  /*
1814  * If we have grouping or aggregation to do, the topmost scan/join
1815  * plan node must emit what the grouping step wants; otherwise, it
1816  * should emit grouping_target.
1817  */
1818  have_grouping = (parse->groupClause || parse->groupingSets ||
1819  parse->hasAggs || root->hasHavingQual);
1820  if (have_grouping)
1821  scanjoin_target = make_group_input_target(root, final_target);
1822  else
1823  scanjoin_target = grouping_target;
1824 
1825  /*
1826  * If there are any SRFs in the targetlist, we must separate each of
1827  * these PathTargets into SRF-computing and SRF-free targets. Replace
1828  * each of the named targets with a SRF-free version, and remember the
1829  * list of additional projection steps we need to add afterwards.
1830  */
1831  if (parse->hasTargetSRFs)
1832  {
1833  /* final_target doesn't recompute any SRFs in sort_input_target */
1834  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1835  &final_targets,
1836  &final_targets_contain_srfs);
1837  final_target = linitial_node(PathTarget, final_targets);
1838  Assert(!linitial_int(final_targets_contain_srfs));
1839  /* likewise for sort_input_target vs. grouping_target */
1840  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1841  &sort_input_targets,
1842  &sort_input_targets_contain_srfs);
1843  sort_input_target = linitial_node(PathTarget, sort_input_targets);
1844  Assert(!linitial_int(sort_input_targets_contain_srfs));
1845  /* likewise for grouping_target vs. scanjoin_target */
1846  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1847  &grouping_targets,
1848  &grouping_targets_contain_srfs);
1849  grouping_target = linitial_node(PathTarget, grouping_targets);
1850  Assert(!linitial_int(grouping_targets_contain_srfs));
1851  /* scanjoin_target will not have any SRFs precomputed for it */
1852  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1853  &scanjoin_targets,
1854  &scanjoin_targets_contain_srfs);
1855  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1856  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1857  }
1858  else
1859  {
1860  /* initialize lists, just to keep compiler quiet */
1861  final_targets = final_targets_contain_srfs = NIL;
1862  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1863  grouping_targets = grouping_targets_contain_srfs = NIL;
1864  scanjoin_targets = scanjoin_targets_contain_srfs = NIL;
1865  }
1866 
1867  /*
1868  * Forcibly apply SRF-free scan/join target to all the Paths for the
1869  * scan/join rel.
1870  *
1871  * In principle we should re-run set_cheapest() here to identify the
1872  * cheapest path, but it seems unlikely that adding the same tlist
1873  * eval costs to all the paths would change that, so we don't bother.
1874  * Instead, just assume that the cheapest-startup and cheapest-total
1875  * paths remain so. (There should be no parameterized paths anymore,
1876  * so we needn't worry about updating cheapest_parameterized_paths.)
1877  */
1878  foreach(lc, current_rel->pathlist)
1879  {
1880  Path *subpath = (Path *) lfirst(lc);
1881  Path *path;
1882 
1883  Assert(subpath->param_info == NULL);
1884  path = apply_projection_to_path(root, current_rel,
1885  subpath, scanjoin_target);
1886  /* If we had to add a Result, path is different from subpath */
1887  if (path != subpath)
1888  {
1889  lfirst(lc) = path;
1890  if (subpath == current_rel->cheapest_startup_path)
1891  current_rel->cheapest_startup_path = path;
1892  if (subpath == current_rel->cheapest_total_path)
1893  current_rel->cheapest_total_path = path;
1894  }
1895  }
1896 
1897  /*
1898  * Upper planning steps which make use of the top scan/join rel's
1899  * partial pathlist will expect partial paths for that rel to produce
1900  * the same output as complete paths ... and we just changed the
1901  * output for the complete paths, so we'll need to do the same thing
1902  * for partial paths. But only parallel-safe expressions can be
1903  * computed by partial paths.
1904  */
1905  if (current_rel->partial_pathlist &&
1906  is_parallel_safe(root, (Node *) scanjoin_target->exprs))
1907  {
1908  /* Apply the scan/join target to each partial path */
1909  foreach(lc, current_rel->partial_pathlist)
1910  {
1911  Path *subpath = (Path *) lfirst(lc);
1912  Path *newpath;
1913 
1914  /* Shouldn't have any parameterized paths anymore */
1915  Assert(subpath->param_info == NULL);
1916 
1917  /*
1918  * Don't use apply_projection_to_path() here, because there
1919  * could be other pointers to these paths, and therefore we
1920  * mustn't modify them in place.
1921  */
1922  newpath = (Path *) create_projection_path(root,
1923  current_rel,
1924  subpath,
1925  scanjoin_target);
1926  lfirst(lc) = newpath;
1927  }
1928  }
1929  else
1930  {
1931  /*
1932  * In the unfortunate event that scanjoin_target is not
1933  * parallel-safe, we can't apply it to the partial paths; in that
1934  * case, we'll need to forget about the partial paths, which
1935  * aren't valid input for upper planning steps.
1936  */
1937  current_rel->partial_pathlist = NIL;
1938  }
1939 
1940  /* Now fix things up if scan/join target contains SRFs */
1941  if (parse->hasTargetSRFs)
1942  adjust_paths_for_srfs(root, current_rel,
1943  scanjoin_targets,
1944  scanjoin_targets_contain_srfs);
1945 
1946  /*
1947  * Save the various upper-rel PathTargets we just computed into
1948  * root->upper_targets[]. The core code doesn't use this, but it
1949  * provides a convenient place for extensions to get at the info. For
1950  * consistency, we save all the intermediate targets, even though some
1951  * of the corresponding upperrels might not be needed for this query.
1952  */
1953  root->upper_targets[UPPERREL_FINAL] = final_target;
1954  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1955  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1956 
1957  /*
1958  * If we have grouping and/or aggregation, consider ways to implement
1959  * that. We build a new upperrel representing the output of this
1960  * phase.
1961  */
1962  if (have_grouping)
1963  {
1964  current_rel = create_grouping_paths(root,
1965  current_rel,
1966  grouping_target,
1967  &agg_costs,
1968  gset_data);
1969  /* Fix things up if grouping_target contains SRFs */
1970  if (parse->hasTargetSRFs)
1971  adjust_paths_for_srfs(root, current_rel,
1972  grouping_targets,
1973  grouping_targets_contain_srfs);
1974  }
1975 
1976  /*
1977  * If we have window functions, consider ways to implement those. We
1978  * build a new upperrel representing the output of this phase.
1979  */
1980  if (activeWindows)
1981  {
1982  current_rel = create_window_paths(root,
1983  current_rel,
1984  grouping_target,
1985  sort_input_target,
1986  tlist,
1987  wflists,
1988  activeWindows);
1989  /* Fix things up if sort_input_target contains SRFs */
1990  if (parse->hasTargetSRFs)
1991  adjust_paths_for_srfs(root, current_rel,
1992  sort_input_targets,
1993  sort_input_targets_contain_srfs);
1994  }
1995 
1996  /*
1997  * If there is a DISTINCT clause, consider ways to implement that. We
1998  * build a new upperrel representing the output of this phase.
1999  */
2000  if (parse->distinctClause)
2001  {
2002  current_rel = create_distinct_paths(root,
2003  current_rel);
2004  }
2005  } /* end of if (setOperations) */
2006 
2007  /*
2008  * If ORDER BY was given, consider ways to implement that, and generate a
2009  * new upperrel containing only paths that emit the correct ordering and
2010  * project the correct final_target. We can apply the original
2011  * limit_tuples limit in sort costing here, but only if there are no
2012  * postponed SRFs.
2013  */
2014  if (parse->sortClause)
2015  {
2016  current_rel = create_ordered_paths(root,
2017  current_rel,
2018  final_target,
2019  have_postponed_srfs ? -1.0 :
2020  limit_tuples);
2021  /* Fix things up if final_target contains SRFs */
2022  if (parse->hasTargetSRFs)
2023  adjust_paths_for_srfs(root, current_rel,
2024  final_targets,
2025  final_targets_contain_srfs);
2026  }
2027 
2028  /*
2029  * Now we are prepared to build the final-output upperrel.
2030  */
2031  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
2032 
2033  /*
2034  * If the input rel is marked consider_parallel and there's nothing that's
2035  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
2036  * consider_parallel as well. Note that if the query has rowMarks or is
2037  * not a SELECT, consider_parallel will be false for every relation in the
2038  * query.
2039  */
2040  if (current_rel->consider_parallel &&
2041  is_parallel_safe(root, parse->limitOffset) &&
2042  is_parallel_safe(root, parse->limitCount))
2043  final_rel->consider_parallel = true;
2044 
2045  /*
2046  * If the current_rel belongs to a single FDW, so does the final_rel.
2047  */
2048  final_rel->serverid = current_rel->serverid;
2049  final_rel->userid = current_rel->userid;
2050  final_rel->useridiscurrent = current_rel->useridiscurrent;
2051  final_rel->fdwroutine = current_rel->fdwroutine;
2052 
2053  /*
2054  * Generate paths for the final_rel. Insert all surviving paths, with
2055  * LockRows, Limit, and/or ModifyTable steps added if needed.
2056  */
2057  foreach(lc, current_rel->pathlist)
2058  {
2059  Path *path = (Path *) lfirst(lc);
2060 
2061  /*
2062  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
2063  * (Note: we intentionally test parse->rowMarks not root->rowMarks
2064  * here. If there are only non-locking rowmarks, they should be
2065  * handled by the ModifyTable node instead. However, root->rowMarks
2066  * is what goes into the LockRows node.)
2067  */
2068  if (parse->rowMarks)
2069  {
2070  path = (Path *) create_lockrows_path(root, final_rel, path,
2071  root->rowMarks,
2072  SS_assign_special_param(root));
2073  }
2074 
2075  /*
2076  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
2077  */
2078  if (limit_needed(parse))
2079  {
2080  path = (Path *) create_limit_path(root, final_rel, path,
2081  parse->limitOffset,
2082  parse->limitCount,
2083  offset_est, count_est);
2084  }
2085 
2086  /*
2087  * If this is an INSERT/UPDATE/DELETE, and we're not being called from
2088  * inheritance_planner, add the ModifyTable node.
2089  */
2090  if (parse->commandType != CMD_SELECT && !inheritance_update)
2091  {
2092  List *withCheckOptionLists;
2093  List *returningLists;
2094  List *rowMarks;
2095 
2096  /*
2097  * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if
2098  * needed.
2099  */
2100  if (parse->withCheckOptions)
2101  withCheckOptionLists = list_make1(parse->withCheckOptions);
2102  else
2103  withCheckOptionLists = NIL;
2104 
2105  if (parse->returningList)
2106  returningLists = list_make1(parse->returningList);
2107  else
2108  returningLists = NIL;
2109 
2110  /*
2111  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2112  * will have dealt with fetching non-locked marked rows, else we
2113  * need to have ModifyTable do that.
2114  */
2115  if (parse->rowMarks)
2116  rowMarks = NIL;
2117  else
2118  rowMarks = root->rowMarks;
2119 
2120  path = (Path *)
2121  create_modifytable_path(root, final_rel,
2122  parse->commandType,
2123  parse->canSetTag,
2124  parse->resultRelation,
2125  NIL,
2127  list_make1(path),
2128  list_make1(root),
2129  withCheckOptionLists,
2130  returningLists,
2131  rowMarks,
2132  parse->onConflict,
2133  SS_assign_special_param(root));
2134  }
2135 
2136  /* And shove it into final_rel */
2137  add_path(final_rel, path);
2138  }
2139 
2140  /*
2141  * If there is an FDW that's responsible for all baserels of the query,
2142  * let it consider adding ForeignPaths.
2143  */
2144  if (final_rel->fdwroutine &&
2145  final_rel->fdwroutine->GetForeignUpperPaths)
2147  current_rel, final_rel);
2148 
2149  /* Let extensions possibly add some more paths */
2151  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2152  current_rel, final_rel);
2153 
2154  /* Note: currently, we leave it to callers to do set_cheapest() */
2155 }
2156 
2157 /*
2158  * Do preprocessing for groupingSets clause and related data. This handles the
2159  * preliminary steps of expanding the grouping sets, organizing them into lists
2160  * of rollups, and preparing annotations which will later be filled in with
2161  * size estimates.
2162  */
2163 static grouping_sets_data *
2165 {
2166  Query *parse = root->parse;
2167  List *sets;
2168  int maxref = 0;
2169  ListCell *lc;
2170  ListCell *lc_set;
2172 
2173  parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
2174 
2175  gd->any_hashable = false;
2176  gd->unhashable_refs = NULL;
2177  gd->unsortable_refs = NULL;
2178  gd->unsortable_sets = NIL;
2179 
2180  if (parse->groupClause)
2181  {
2182  ListCell *lc;
2183 
2184  foreach(lc, parse->groupClause)
2185  {
2187  Index ref = gc->tleSortGroupRef;
2188 
2189  if (ref > maxref)
2190  maxref = ref;
2191 
2192  if (!gc->hashable)
2194 
2195  if (!OidIsValid(gc->sortop))
2197  }
2198  }
2199 
2200  /* Allocate workspace array for remapping */
2201  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2202 
2203  /*
2204  * If we have any unsortable sets, we must extract them before trying to
2205  * prepare rollups. Unsortable sets don't go through
2206  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2207  * here.
2208  */
2209  if (!bms_is_empty(gd->unsortable_refs))
2210  {
2211  List *sortable_sets = NIL;
2212 
2213  foreach(lc, parse->groupingSets)
2214  {
2215  List *gset = (List *) lfirst(lc);
2216 
2217  if (bms_overlap_list(gd->unsortable_refs, gset))
2218  {
2220 
2221  gs->set = gset;
2222  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2223 
2224  /*
2225  * We must enforce here that an unsortable set is hashable;
2226  * later code assumes this. Parse analysis only checks that
2227  * every individual column is either hashable or sortable.
2228  *
2229  * Note that passing this test doesn't guarantee we can
2230  * generate a plan; there might be other showstoppers.
2231  */
2232  if (bms_overlap_list(gd->unhashable_refs, gset))
2233  ereport(ERROR,
2234  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2235  errmsg("could not implement GROUP BY"),
2236  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2237  }
2238  else
2239  sortable_sets = lappend(sortable_sets, gset);
2240  }
2241 
2242  if (sortable_sets)
2243  sets = extract_rollup_sets(sortable_sets);
2244  else
2245  sets = NIL;
2246  }
2247  else
2248  sets = extract_rollup_sets(parse->groupingSets);
2249 
2250  foreach(lc_set, sets)
2251  {
2252  List *current_sets = (List *) lfirst(lc_set);
2253  RollupData *rollup = makeNode(RollupData);
2254  GroupingSetData *gs;
2255 
2256  /*
2257  * Reorder the current list of grouping sets into correct prefix
2258  * order. If only one aggregation pass is needed, try to make the
2259  * list match the ORDER BY clause; if more than one pass is needed, we
2260  * don't bother with that.
2261  *
2262  * Note that this reorders the sets from smallest-member-first to
2263  * largest-member-first, and applies the GroupingSetData annotations,
2264  * though the data will be filled in later.
2265  */
2266  current_sets = reorder_grouping_sets(current_sets,
2267  (list_length(sets) == 1
2268  ? parse->sortClause
2269  : NIL));
2270 
2271  /*
2272  * Get the initial (and therefore largest) grouping set.
2273  */
2274  gs = linitial_node(GroupingSetData, current_sets);
2275 
2276  /*
2277  * Order the groupClause appropriately. If the first grouping set is
2278  * empty, then the groupClause must also be empty; otherwise we have
2279  * to force the groupClause to match that grouping set's order.
2280  *
2281  * (The first grouping set can be empty even though parse->groupClause
2282  * is not empty only if all non-empty grouping sets are unsortable.
2283  * The groupClauses for hashed grouping sets are built later on.)
2284  */
2285  if (gs->set)
2286  rollup->groupClause = preprocess_groupclause(root, gs->set);
2287  else
2288  rollup->groupClause = NIL;
2289 
2290  /*
2291  * Is it hashable? We pretend empty sets are hashable even though we
2292  * actually force them not to be hashed later. But don't bother if
2293  * there's nothing but empty sets (since in that case we can't hash
2294  * anything).
2295  */
2296  if (gs->set &&
2298  {
2299  rollup->hashable = true;
2300  gd->any_hashable = true;
2301  }
2302 
2303  /*
2304  * Now that we've pinned down an order for the groupClause for this
2305  * list of grouping sets, we need to remap the entries in the grouping
2306  * sets from sortgrouprefs to plain indices (0-based) into the
2307  * groupClause for this collection of grouping sets. We keep the
2308  * original form for later use, though.
2309  */
2310  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2311  current_sets,
2312  gd->tleref_to_colnum_map);
2313  rollup->gsets_data = current_sets;
2314 
2315  gd->rollups = lappend(gd->rollups, rollup);
2316  }
2317 
2318  if (gd->unsortable_sets)
2319  {
2320  /*
2321  * We have not yet pinned down a groupclause for this, but we will
2322  * need index-based lists for estimation purposes. Construct
2323  * hash_sets_idx based on the entire original groupclause for now.
2324  */
2326  gd->unsortable_sets,
2327  gd->tleref_to_colnum_map);
2328  gd->any_hashable = true;
2329  }
2330 
2331  return gd;
2332 }
2333 
2334 /*
2335  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2336  * (without annotation) mapped to indexes into the given groupclause.
2337  */
2338 static List *
2340  List *gsets,
2341  int *tleref_to_colnum_map)
2342 {
2343  int ref = 0;
2344  List *result = NIL;
2345  ListCell *lc;
2346 
2347  foreach(lc, groupClause)
2348  {
2350 
2351  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2352  }
2353 
2354  foreach(lc, gsets)
2355  {
2356  List *set = NIL;
2357  ListCell *lc2;
2359 
2360  foreach(lc2, gs->set)
2361  {
2362  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2363  }
2364 
2365  result = lappend(result, set);
2366  }
2367 
2368  return result;
2369 }
2370 
2371 
2372 
2373 /*
2374  * Detect whether a plan node is a "dummy" plan created when a relation
2375  * is deemed not to need scanning due to constraint exclusion.
2376  *
2377  * Currently, such dummy plans are Result nodes with constant FALSE
2378  * filter quals (see set_dummy_rel_pathlist and create_append_plan).
2379  *
2380  * XXX this probably ought to be somewhere else, but not clear where.
2381  */
2382 bool
2384 {
2385  if (IsA(plan, Result))
2386  {
2387  List *rcqual = (List *) ((Result *) plan)->resconstantqual;
2388 
2389  if (list_length(rcqual) == 1)
2390  {
2391  Const *constqual = (Const *) linitial(rcqual);
2392 
2393  if (constqual && IsA(constqual, Const))
2394  {
2395  if (!constqual->constisnull &&
2396  !DatumGetBool(constqual->constvalue))
2397  return true;
2398  }
2399  }
2400  }
2401  return false;
2402 }
2403 
2404 /*
2405  * preprocess_rowmarks - set up PlanRowMarks if needed
2406  */
2407 static void
2409 {
2410  Query *parse = root->parse;
2411  Bitmapset *rels;
2412  List *prowmarks;
2413  ListCell *l;
2414  int i;
2415 
2416  if (parse->rowMarks)
2417  {
2418  /*
2419  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2420  * grouping, since grouping renders a reference to individual tuple
2421  * CTIDs invalid. This is also checked at parse time, but that's
2422  * insufficient because of rule substitution, query pullup, etc.
2423  */
2425  parse->rowMarks)->strength);
2426  }
2427  else
2428  {
2429  /*
2430  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2431  * UPDATE/SHARE.
2432  */
2433  if (parse->commandType != CMD_UPDATE &&
2434  parse->commandType != CMD_DELETE)
2435  return;
2436  }
2437 
2438  /*
2439  * We need to have rowmarks for all base relations except the target. We
2440  * make a bitmapset of all base rels and then remove the items we don't
2441  * need or have FOR [KEY] UPDATE/SHARE marks for.
2442  */
2443  rels = get_relids_in_jointree((Node *) parse->jointree, false);
2444  if (parse->resultRelation)
2445  rels = bms_del_member(rels, parse->resultRelation);
2446 
2447  /*
2448  * Convert RowMarkClauses to PlanRowMark representation.
2449  */
2450  prowmarks = NIL;
2451  foreach(l, parse->rowMarks)
2452  {
2454  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2455  PlanRowMark *newrc;
2456 
2457  /*
2458  * Currently, it is syntactically impossible to have FOR UPDATE et al
2459  * applied to an update/delete target rel. If that ever becomes
2460  * possible, we should drop the target from the PlanRowMark list.
2461  */
2462  Assert(rc->rti != parse->resultRelation);
2463 
2464  /*
2465  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2466  * can't support true locking. Subqueries that got flattened into the
2467  * main query should be ignored completely. Any that didn't will get
2468  * ROW_MARK_COPY items in the next loop.
2469  */
2470  if (rte->rtekind != RTE_RELATION)
2471  continue;
2472 
2473  rels = bms_del_member(rels, rc->rti);
2474 
2475  newrc = makeNode(PlanRowMark);
2476  newrc->rti = newrc->prti = rc->rti;
2477  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2478  newrc->markType = select_rowmark_type(rte, rc->strength);
2479  newrc->allMarkTypes = (1 << newrc->markType);
2480  newrc->strength = rc->strength;
2481  newrc->waitPolicy = rc->waitPolicy;
2482  newrc->isParent = false;
2483 
2484  prowmarks = lappend(prowmarks, newrc);
2485  }
2486 
2487  /*
2488  * Now, add rowmarks for any non-target, non-locked base relations.
2489  */
2490  i = 0;
2491  foreach(l, parse->rtable)
2492  {
2494  PlanRowMark *newrc;
2495 
2496  i++;
2497  if (!bms_is_member(i, rels))
2498  continue;
2499 
2500  newrc = makeNode(PlanRowMark);
2501  newrc->rti = newrc->prti = i;
2502  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2503  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2504  newrc->allMarkTypes = (1 << newrc->markType);
2505  newrc->strength = LCS_NONE;
2506  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2507  newrc->isParent = false;
2508 
2509  prowmarks = lappend(prowmarks, newrc);
2510  }
2511 
2512  root->rowMarks = prowmarks;
2513 }
2514 
2515 /*
2516  * Select RowMarkType to use for a given table
2517  */
2520 {
2521  if (rte->rtekind != RTE_RELATION)
2522  {
2523  /* If it's not a table at all, use ROW_MARK_COPY */
2524  return ROW_MARK_COPY;
2525  }
2526  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2527  {
2528  /* Let the FDW select the rowmark type, if it wants to */
2529  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2530 
2531  if (fdwroutine->GetForeignRowMarkType != NULL)
2532  return fdwroutine->GetForeignRowMarkType(rte, strength);
2533  /* Otherwise, use ROW_MARK_COPY by default */
2534  return ROW_MARK_COPY;
2535  }
2536  else
2537  {
2538  /* Regular table, apply the appropriate lock type */
2539  switch (strength)
2540  {
2541  case LCS_NONE:
2542 
2543  /*
2544  * We don't need a tuple lock, only the ability to re-fetch
2545  * the row.
2546  */
2547  return ROW_MARK_REFERENCE;
2548  break;
2549  case LCS_FORKEYSHARE:
2550  return ROW_MARK_KEYSHARE;
2551  break;
2552  case LCS_FORSHARE:
2553  return ROW_MARK_SHARE;
2554  break;
2555  case LCS_FORNOKEYUPDATE:
2556  return ROW_MARK_NOKEYEXCLUSIVE;
2557  break;
2558  case LCS_FORUPDATE:
2559  return ROW_MARK_EXCLUSIVE;
2560  break;
2561  }
2562  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2563  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2564  }
2565 }
2566 
2567 /*
2568  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2569  *
2570  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2571  * results back in *count_est and *offset_est. These variables are set to
2572  * 0 if the corresponding clause is not present, and -1 if it's present
2573  * but we couldn't estimate the value for it. (The "0" convention is OK
2574  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2575  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2576  * usual practice of never estimating less than one row.) These values will
2577  * be passed to create_limit_path, which see if you change this code.
2578  *
2579  * The return value is the suitably adjusted tuple_fraction to use for
2580  * planning the query. This adjustment is not overridable, since it reflects
2581  * plan actions that grouping_planner() will certainly take, not assumptions
2582  * about context.
2583  */
2584 static double
2585 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2586  int64 *offset_est, int64 *count_est)
2587 {
2588  Query *parse = root->parse;
2589  Node *est;
2590  double limit_fraction;
2591 
2592  /* Should not be called unless LIMIT or OFFSET */
2593  Assert(parse->limitCount || parse->limitOffset);
2594 
2595  /*
2596  * Try to obtain the clause values. We use estimate_expression_value
2597  * primarily because it can sometimes do something useful with Params.
2598  */
2599  if (parse->limitCount)
2600  {
2601  est = estimate_expression_value(root, parse->limitCount);
2602  if (est && IsA(est, Const))
2603  {
2604  if (((Const *) est)->constisnull)
2605  {
2606  /* NULL indicates LIMIT ALL, ie, no limit */
2607  *count_est = 0; /* treat as not present */
2608  }
2609  else
2610  {
2611  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2612  if (*count_est <= 0)
2613  *count_est = 1; /* force to at least 1 */
2614  }
2615  }
2616  else
2617  *count_est = -1; /* can't estimate */
2618  }
2619  else
2620  *count_est = 0; /* not present */
2621 
2622  if (parse->limitOffset)
2623  {
2624  est = estimate_expression_value(root, parse->limitOffset);
2625  if (est && IsA(est, Const))
2626  {
2627  if (((Const *) est)->constisnull)
2628  {
2629  /* Treat NULL as no offset; the executor will too */
2630  *offset_est = 0; /* treat as not present */
2631  }
2632  else
2633  {
2634  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2635  if (*offset_est < 0)
2636  *offset_est = 0; /* treat as not present */
2637  }
2638  }
2639  else
2640  *offset_est = -1; /* can't estimate */
2641  }
2642  else
2643  *offset_est = 0; /* not present */
2644 
2645  if (*count_est != 0)
2646  {
2647  /*
2648  * A LIMIT clause limits the absolute number of tuples returned.
2649  * However, if it's not a constant LIMIT then we have to guess; for
2650  * lack of a better idea, assume 10% of the plan's result is wanted.
2651  */
2652  if (*count_est < 0 || *offset_est < 0)
2653  {
2654  /* LIMIT or OFFSET is an expression ... punt ... */
2655  limit_fraction = 0.10;
2656  }
2657  else
2658  {
2659  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2660  limit_fraction = (double) *count_est + (double) *offset_est;
2661  }
2662 
2663  /*
2664  * If we have absolute limits from both caller and LIMIT, use the
2665  * smaller value; likewise if they are both fractional. If one is
2666  * fractional and the other absolute, we can't easily determine which
2667  * is smaller, but we use the heuristic that the absolute will usually
2668  * be smaller.
2669  */
2670  if (tuple_fraction >= 1.0)
2671  {
2672  if (limit_fraction >= 1.0)
2673  {
2674  /* both absolute */
2675  tuple_fraction = Min(tuple_fraction, limit_fraction);
2676  }
2677  else
2678  {
2679  /* caller absolute, limit fractional; use caller's value */
2680  }
2681  }
2682  else if (tuple_fraction > 0.0)
2683  {
2684  if (limit_fraction >= 1.0)
2685  {
2686  /* caller fractional, limit absolute; use limit */
2687  tuple_fraction = limit_fraction;
2688  }
2689  else
2690  {
2691  /* both fractional */
2692  tuple_fraction = Min(tuple_fraction, limit_fraction);
2693  }
2694  }
2695  else
2696  {
2697  /* no info from caller, just use limit */
2698  tuple_fraction = limit_fraction;
2699  }
2700  }
2701  else if (*offset_est != 0 && tuple_fraction > 0.0)
2702  {
2703  /*
2704  * We have an OFFSET but no LIMIT. This acts entirely differently
2705  * from the LIMIT case: here, we need to increase rather than decrease
2706  * the caller's tuple_fraction, because the OFFSET acts to cause more
2707  * tuples to be fetched instead of fewer. This only matters if we got
2708  * a tuple_fraction > 0, however.
2709  *
2710  * As above, use 10% if OFFSET is present but unestimatable.
2711  */
2712  if (*offset_est < 0)
2713  limit_fraction = 0.10;
2714  else
2715  limit_fraction = (double) *offset_est;
2716 
2717  /*
2718  * If we have absolute counts from both caller and OFFSET, add them
2719  * together; likewise if they are both fractional. If one is
2720  * fractional and the other absolute, we want to take the larger, and
2721  * we heuristically assume that's the fractional one.
2722  */
2723  if (tuple_fraction >= 1.0)
2724  {
2725  if (limit_fraction >= 1.0)
2726  {
2727  /* both absolute, so add them together */
2728  tuple_fraction += limit_fraction;
2729  }
2730  else
2731  {
2732  /* caller absolute, limit fractional; use limit */
2733  tuple_fraction = limit_fraction;
2734  }
2735  }
2736  else
2737  {
2738  if (limit_fraction >= 1.0)
2739  {
2740  /* caller fractional, limit absolute; use caller's value */
2741  }
2742  else
2743  {
2744  /* both fractional, so add them together */
2745  tuple_fraction += limit_fraction;
2746  if (tuple_fraction >= 1.0)
2747  tuple_fraction = 0.0; /* assume fetch all */
2748  }
2749  }
2750  }
2751 
2752  return tuple_fraction;
2753 }
2754 
2755 /*
2756  * limit_needed - do we actually need a Limit plan node?
2757  *
2758  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2759  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2760  * locution for an optimization fence. (Because other places in the planner
2761  * merely check whether parse->limitOffset isn't NULL, it will still work as
2762  * an optimization fence --- we're just suppressing unnecessary run-time
2763  * overhead.)
2764  *
2765  * This might look like it could be merged into preprocess_limit, but there's
2766  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2767  * in preprocess_limit it's good enough to consider estimated values.
2768  */
2769 static bool
2771 {
2772  Node *node;
2773 
2774  node = parse->limitCount;
2775  if (node)
2776  {
2777  if (IsA(node, Const))
2778  {
2779  /* NULL indicates LIMIT ALL, ie, no limit */
2780  if (!((Const *) node)->constisnull)
2781  return true; /* LIMIT with a constant value */
2782  }
2783  else
2784  return true; /* non-constant LIMIT */
2785  }
2786 
2787  node = parse->limitOffset;
2788  if (node)
2789  {
2790  if (IsA(node, Const))
2791  {
2792  /* Treat NULL as no offset; the executor would too */
2793  if (!((Const *) node)->constisnull)
2794  {
2795  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2796 
2797  if (offset != 0)
2798  return true; /* OFFSET with a nonzero value */
2799  }
2800  }
2801  else
2802  return true; /* non-constant OFFSET */
2803  }
2804 
2805  return false; /* don't need a Limit plan node */
2806 }
2807 
2808 
2809 /*
2810  * remove_useless_groupby_columns
2811  * Remove any columns in the GROUP BY clause that are redundant due to
2812  * being functionally dependent on other GROUP BY columns.
2813  *
2814  * Since some other DBMSes do not allow references to ungrouped columns, it's
2815  * not unusual to find all columns listed in GROUP BY even though listing the
2816  * primary-key columns would be sufficient. Deleting such excess columns
2817  * avoids redundant sorting work, so it's worth doing. When we do this, we
2818  * must mark the plan as dependent on the pkey constraint (compare the
2819  * parser's check_ungrouped_columns() and check_functional_grouping()).
2820  *
2821  * In principle, we could treat any NOT-NULL columns appearing in a UNIQUE
2822  * index as the determining columns. But as with check_functional_grouping(),
2823  * there's currently no way to represent dependency on a NOT NULL constraint,
2824  * so we consider only the pkey for now.
2825  */
2826 static void
2828 {
2829  Query *parse = root->parse;
2830  Bitmapset **groupbyattnos;
2831  Bitmapset **surplusvars;
2832  ListCell *lc;
2833  int relid;
2834 
2835  /* No chance to do anything if there are less than two GROUP BY items */
2836  if (list_length(parse->groupClause) < 2)
2837  return;
2838 
2839  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2840  if (parse->groupingSets)
2841  return;
2842 
2843  /*
2844  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2845  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2846  * that are GROUP BY items.
2847  */
2848  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2849  (list_length(parse->rtable) + 1));
2850  foreach(lc, parse->groupClause)
2851  {
2853  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2854  Var *var = (Var *) tle->expr;
2855 
2856  /*
2857  * Ignore non-Vars and Vars from other query levels.
2858  *
2859  * XXX in principle, stable expressions containing Vars could also be
2860  * removed, if all the Vars are functionally dependent on other GROUP
2861  * BY items. But it's not clear that such cases occur often enough to
2862  * be worth troubling over.
2863  */
2864  if (!IsA(var, Var) ||
2865  var->varlevelsup > 0)
2866  continue;
2867 
2868  /* OK, remember we have this Var */
2869  relid = var->varno;
2870  Assert(relid <= list_length(parse->rtable));
2871  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2873  }
2874 
2875  /*
2876  * Consider each relation and see if it is possible to remove some of its
2877  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2878  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2879  * of the column attnos of RTE k that are removable GROUP BY items.
2880  */
2881  surplusvars = NULL; /* don't allocate array unless required */
2882  relid = 0;
2883  foreach(lc, parse->rtable)
2884  {
2886  Bitmapset *relattnos;
2887  Bitmapset *pkattnos;
2888  Oid constraintOid;
2889 
2890  relid++;
2891 
2892  /* Only plain relations could have primary-key constraints */
2893  if (rte->rtekind != RTE_RELATION)
2894  continue;
2895 
2896  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2897  relattnos = groupbyattnos[relid];
2898  if (bms_membership(relattnos) != BMS_MULTIPLE)
2899  continue;
2900 
2901  /*
2902  * Can't remove any columns for this rel if there is no suitable
2903  * (i.e., nondeferrable) primary key constraint.
2904  */
2905  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2906  if (pkattnos == NULL)
2907  continue;
2908 
2909  /*
2910  * If the primary key is a proper subset of relattnos then we have
2911  * some items in the GROUP BY that can be removed.
2912  */
2913  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2914  {
2915  /*
2916  * To easily remember whether we've found anything to do, we don't
2917  * allocate the surplusvars[] array until we find something.
2918  */
2919  if (surplusvars == NULL)
2920  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2921  (list_length(parse->rtable) + 1));
2922 
2923  /* Remember the attnos of the removable columns */
2924  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2925 
2926  /* Also, mark the resulting plan as dependent on this constraint */
2927  parse->constraintDeps = lappend_oid(parse->constraintDeps,
2928  constraintOid);
2929  }
2930  }
2931 
2932  /*
2933  * If we found any surplus Vars, build a new GROUP BY clause without them.
2934  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2935  * markings, but that's harmless.)
2936  */
2937  if (surplusvars != NULL)
2938  {
2939  List *new_groupby = NIL;
2940 
2941  foreach(lc, parse->groupClause)
2942  {
2944  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2945  Var *var = (Var *) tle->expr;
2946 
2947  /*
2948  * New list must include non-Vars, outer Vars, and anything not
2949  * marked as surplus.
2950  */
2951  if (!IsA(var, Var) ||
2952  var->varlevelsup > 0 ||
2954  surplusvars[var->varno]))
2955  new_groupby = lappend(new_groupby, sgc);
2956  }
2957 
2958  parse->groupClause = new_groupby;
2959  }
2960 }
2961 
2962 /*
2963  * preprocess_groupclause - do preparatory work on GROUP BY clause
2964  *
2965  * The idea here is to adjust the ordering of the GROUP BY elements
2966  * (which in itself is semantically insignificant) to match ORDER BY,
2967  * thereby allowing a single sort operation to both implement the ORDER BY
2968  * requirement and set up for a Unique step that implements GROUP BY.
2969  *
2970  * In principle it might be interesting to consider other orderings of the
2971  * GROUP BY elements, which could match the sort ordering of other
2972  * possible plans (eg an indexscan) and thereby reduce cost. We don't
2973  * bother with that, though. Hashed grouping will frequently win anyway.
2974  *
2975  * Note: we need no comparable processing of the distinctClause because
2976  * the parser already enforced that that matches ORDER BY.
2977  *
2978  * For grouping sets, the order of items is instead forced to agree with that
2979  * of the grouping set (and items not in the grouping set are skipped). The
2980  * work of sorting the order of grouping set elements to match the ORDER BY if
2981  * possible is done elsewhere.
2982  */
2983 static List *
2985 {
2986  Query *parse = root->parse;
2987  List *new_groupclause = NIL;
2988  bool partial_match;
2989  ListCell *sl;
2990  ListCell *gl;
2991 
2992  /* For grouping sets, we need to force the ordering */
2993  if (force)
2994  {
2995  foreach(sl, force)
2996  {
2997  Index ref = lfirst_int(sl);
2999 
3000  new_groupclause = lappend(new_groupclause, cl);
3001  }
3002 
3003  return new_groupclause;
3004  }
3005 
3006  /* If no ORDER BY, nothing useful to do here */
3007  if (parse->sortClause == NIL)
3008  return parse->groupClause;
3009 
3010  /*
3011  * Scan the ORDER BY clause and construct a list of matching GROUP BY
3012  * items, but only as far as we can make a matching prefix.
3013  *
3014  * This code assumes that the sortClause contains no duplicate items.
3015  */
3016  foreach(sl, parse->sortClause)
3017  {
3019 
3020  foreach(gl, parse->groupClause)
3021  {
3023 
3024  if (equal(gc, sc))
3025  {
3026  new_groupclause = lappend(new_groupclause, gc);
3027  break;
3028  }
3029  }
3030  if (gl == NULL)
3031  break; /* no match, so stop scanning */
3032  }
3033 
3034  /* Did we match all of the ORDER BY list, or just some of it? */
3035  partial_match = (sl != NULL);
3036 
3037  /* If no match at all, no point in reordering GROUP BY */
3038  if (new_groupclause == NIL)
3039  return parse->groupClause;
3040 
3041  /*
3042  * Add any remaining GROUP BY items to the new list, but only if we were
3043  * able to make a complete match. In other words, we only rearrange the
3044  * GROUP BY list if the result is that one list is a prefix of the other
3045  * --- otherwise there's no possibility of a common sort. Also, give up
3046  * if there are any non-sortable GROUP BY items, since then there's no
3047  * hope anyway.
3048  */
3049  foreach(gl, parse->groupClause)
3050  {
3052 
3053  if (list_member_ptr(new_groupclause, gc))
3054  continue; /* it matched an ORDER BY item */
3055  if (partial_match)
3056  return parse->groupClause; /* give up, no common sort possible */
3057  if (!OidIsValid(gc->sortop))
3058  return parse->groupClause; /* give up, GROUP BY can't be sorted */
3059  new_groupclause = lappend(new_groupclause, gc);
3060  }
3061 
3062  /* Success --- install the rearranged GROUP BY list */
3063  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
3064  return new_groupclause;
3065 }
3066 
3067 /*
3068  * Extract lists of grouping sets that can be implemented using a single
3069  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
3070  *
3071  * Input must be sorted with smallest sets first. Result has each sublist
3072  * sorted with smallest sets first.
3073  *
3074  * We want to produce the absolute minimum possible number of lists here to
3075  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
3076  * of finding the minimal partition of a partially-ordered set into chains
3077  * (which is what we need, taking the list of grouping sets as a poset ordered
3078  * by set inclusion) can be mapped to the problem of finding the maximum
3079  * cardinality matching on a bipartite graph, which is solvable in polynomial
3080  * time with a worst case of no worse than O(n^2.5) and usually much
3081  * better. Since our N is at most 4096, we don't need to consider fallbacks to
3082  * heuristic or approximate methods. (Planning time for a 12-d cube is under
3083  * half a second on my modest system even with optimization off and assertions
3084  * on.)
3085  */
3086 static List *
3088 {
3089  int num_sets_raw = list_length(groupingSets);
3090  int num_empty = 0;
3091  int num_sets = 0; /* distinct sets */
3092  int num_chains = 0;
3093  List *result = NIL;
3094  List **results;
3095  List **orig_sets;
3096  Bitmapset **set_masks;
3097  int *chains;
3098  short **adjacency;
3099  short *adjacency_buf;
3101  int i;
3102  int j;
3103  int j_size;
3104  ListCell *lc1 = list_head(groupingSets);
3105  ListCell *lc;
3106 
3107  /*
3108  * Start by stripping out empty sets. The algorithm doesn't require this,
3109  * but the planner currently needs all empty sets to be returned in the
3110  * first list, so we strip them here and add them back after.
3111  */
3112  while (lc1 && lfirst(lc1) == NIL)
3113  {
3114  ++num_empty;
3115  lc1 = lnext(lc1);
3116  }
3117 
3118  /* bail out now if it turns out that all we had were empty sets. */
3119  if (!lc1)
3120  return list_make1(groupingSets);
3121 
3122  /*----------
3123  * We don't strictly need to remove duplicate sets here, but if we don't,
3124  * they tend to become scattered through the result, which is a bit
3125  * confusing (and irritating if we ever decide to optimize them out).
3126  * So we remove them here and add them back after.
3127  *
3128  * For each non-duplicate set, we fill in the following:
3129  *
3130  * orig_sets[i] = list of the original set lists
3131  * set_masks[i] = bitmapset for testing inclusion
3132  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
3133  *
3134  * chains[i] will be the result group this set is assigned to.
3135  *
3136  * We index all of these from 1 rather than 0 because it is convenient
3137  * to leave 0 free for the NIL node in the graph algorithm.
3138  *----------
3139  */
3140  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
3141  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
3142  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
3143  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
3144 
3145  j_size = 0;
3146  j = 0;
3147  i = 1;
3148 
3149  for_each_cell(lc, lc1)
3150  {
3151  List *candidate = (List *) lfirst(lc);
3152  Bitmapset *candidate_set = NULL;
3153  ListCell *lc2;
3154  int dup_of = 0;
3155 
3156  foreach(lc2, candidate)
3157  {
3158  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
3159  }
3160 
3161  /* we can only be a dup if we're the same length as a previous set */
3162  if (j_size == list_length(candidate))
3163  {
3164  int k;
3165 
3166  for (k = j; k < i; ++k)
3167  {
3168  if (bms_equal(set_masks[k], candidate_set))
3169  {
3170  dup_of = k;
3171  break;
3172  }
3173  }
3174  }
3175  else if (j_size < list_length(candidate))
3176  {
3177  j_size = list_length(candidate);
3178  j = i;
3179  }
3180 
3181  if (dup_of > 0)
3182  {
3183  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3184  bms_free(candidate_set);
3185  }
3186  else
3187  {
3188  int k;
3189  int n_adj = 0;
3190 
3191  orig_sets[i] = list_make1(candidate);
3192  set_masks[i] = candidate_set;
3193 
3194  /* fill in adjacency list; no need to compare equal-size sets */
3195 
3196  for (k = j - 1; k > 0; --k)
3197  {
3198  if (bms_is_subset(set_masks[k], candidate_set))
3199  adjacency_buf[++n_adj] = k;
3200  }
3201 
3202  if (n_adj > 0)
3203  {
3204  adjacency_buf[0] = n_adj;
3205  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3206  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3207  }
3208  else
3209  adjacency[i] = NULL;
3210 
3211  ++i;
3212  }
3213  }
3214 
3215  num_sets = i - 1;
3216 
3217  /*
3218  * Apply the graph matching algorithm to do the work.
3219  */
3220  state = BipartiteMatch(num_sets, num_sets, adjacency);
3221 
3222  /*
3223  * Now, the state->pair* fields have the info we need to assign sets to
3224  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3225  * pair_vu[v] = u (both will be true, but we check both so that we can do
3226  * it in one pass)
3227  */
3228  chains = palloc0((num_sets + 1) * sizeof(int));
3229 
3230  for (i = 1; i <= num_sets; ++i)
3231  {
3232  int u = state->pair_vu[i];
3233  int v = state->pair_uv[i];
3234 
3235  if (u > 0 && u < i)
3236  chains[i] = chains[u];
3237  else if (v > 0 && v < i)
3238  chains[i] = chains[v];
3239  else
3240  chains[i] = ++num_chains;
3241  }
3242 
3243  /* build result lists. */
3244  results = palloc0((num_chains + 1) * sizeof(List *));
3245 
3246  for (i = 1; i <= num_sets; ++i)
3247  {
3248  int c = chains[i];
3249 
3250  Assert(c > 0);
3251 
3252  results[c] = list_concat(results[c], orig_sets[i]);
3253  }
3254 
3255  /* push any empty sets back on the first list. */
3256  while (num_empty-- > 0)
3257  results[1] = lcons(NIL, results[1]);
3258 
3259  /* make result list */
3260  for (i = 1; i <= num_chains; ++i)
3261  result = lappend(result, results[i]);
3262 
3263  /*
3264  * Free all the things.
3265  *
3266  * (This is over-fussy for small sets but for large sets we could have
3267  * tied up a nontrivial amount of memory.)
3268  */
3269  BipartiteMatchFree(state);
3270  pfree(results);
3271  pfree(chains);
3272  for (i = 1; i <= num_sets; ++i)
3273  if (adjacency[i])
3274  pfree(adjacency[i]);
3275  pfree(adjacency);
3276  pfree(adjacency_buf);
3277  pfree(orig_sets);
3278  for (i = 1; i <= num_sets; ++i)
3279  bms_free(set_masks[i]);
3280  pfree(set_masks);
3281 
3282  return result;
3283 }
3284 
3285 /*
3286  * Reorder the elements of a list of grouping sets such that they have correct
3287  * prefix relationships. Also inserts the GroupingSetData annotations.
3288  *
3289  * The input must be ordered with smallest sets first; the result is returned
3290  * with largest sets first. Note that the result shares no list substructure
3291  * with the input, so it's safe for the caller to modify it later.
3292  *
3293  * If we're passed in a sortclause, we follow its order of columns to the
3294  * extent possible, to minimize the chance that we add unnecessary sorts.
3295  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3296  * gets implemented in one pass.)
3297  */
3298 static List *
3299 reorder_grouping_sets(List *groupingsets, List *sortclause)
3300 {
3301  ListCell *lc;
3302  ListCell *lc2;
3303  List *previous = NIL;
3304  List *result = NIL;
3305 
3306  foreach(lc, groupingsets)
3307  {
3308  List *candidate = (List *) lfirst(lc);
3309  List *new_elems = list_difference_int(candidate, previous);
3311 
3312  if (list_length(new_elems) > 0)
3313  {
3314  while (list_length(sortclause) > list_length(previous))
3315  {
3316  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3317  int ref = sc->tleSortGroupRef;
3318 
3319  if (list_member_int(new_elems, ref))
3320  {
3321  previous = lappend_int(previous, ref);
3322  new_elems = list_delete_int(new_elems, ref);
3323  }
3324  else
3325  {
3326  /* diverged from the sortclause; give up on it */
3327  sortclause = NIL;
3328  break;
3329  }
3330  }
3331 
3332  foreach(lc2, new_elems)
3333  {
3334  previous = lappend_int(previous, lfirst_int(lc2));
3335  }
3336  }
3337 
3338  gs->set = list_copy(previous);
3339  result = lcons(gs, result);
3340  list_free(new_elems);
3341  }
3342 
3343  list_free(previous);
3344 
3345  return result;
3346 }
3347 
3348 /*
3349  * Compute query_pathkeys and other pathkeys during plan generation
3350  */
3351 static void
3353 {
3354  Query *parse = root->parse;
3355  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3356  List *tlist = qp_extra->tlist;
3357  List *activeWindows = qp_extra->activeWindows;
3358 
3359  /*
3360  * Calculate pathkeys that represent grouping/ordering requirements. The
3361  * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
3362  * be, in which case we just leave their pathkeys empty.
3363  */
3364  if (qp_extra->groupClause &&
3365  grouping_is_sortable(qp_extra->groupClause))
3366  root->group_pathkeys =
3368  qp_extra->groupClause,
3369  tlist);
3370  else
3371  root->group_pathkeys = NIL;
3372 
3373  /* We consider only the first (bottom) window in pathkeys logic */
3374  if (activeWindows != NIL)
3375  {
3376  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3377 
3379  wc,
3380  tlist);
3381  }
3382  else
3383  root->window_pathkeys = NIL;
3384 
3385  if (parse->distinctClause &&
3387  root->distinct_pathkeys =
3389  parse->distinctClause,
3390  tlist);
3391  else
3392  root->distinct_pathkeys = NIL;
3393 
3394  root->sort_pathkeys =
3396  parse->sortClause,
3397  tlist);
3398 
3399  /*
3400  * Figure out whether we want a sorted result from query_planner.
3401  *
3402  * If we have a sortable GROUP BY clause, then we want a result sorted
3403  * properly for grouping. Otherwise, if we have window functions to
3404  * evaluate, we try to sort for the first window. Otherwise, if there's a
3405  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3406  * we try to produce output that's sufficiently well sorted for the
3407  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3408  * by the ORDER BY clause.
3409  *
3410  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3411  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3412  * that might just leave us failing to exploit an available sort order at
3413  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3414  * much easier, since we know that the parser ensured that one is a
3415  * superset of the other.
3416  */
3417  if (root->group_pathkeys)
3418  root->query_pathkeys = root->group_pathkeys;
3419  else if (root->window_pathkeys)
3420  root->query_pathkeys = root->window_pathkeys;
3421  else if (list_length(root->distinct_pathkeys) >
3422  list_length(root->sort_pathkeys))
3423  root->query_pathkeys = root->distinct_pathkeys;
3424  else if (root->sort_pathkeys)
3425  root->query_pathkeys = root->sort_pathkeys;
3426  else
3427  root->query_pathkeys = NIL;
3428 }
3429 
3430 /*
3431  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3432  *
3433  * path_rows: number of output rows from scan/join step
3434  * gsets: grouping set data, or NULL if not doing grouping sets
3435  *
3436  * If doing grouping sets, we also annotate the gsets data with the estimates
3437  * for each set and each individual rollup list, with a view to later
3438  * determining whether some combination of them could be hashed instead.
3439  */
3440 static double
3442  double path_rows,
3443  grouping_sets_data *gd)
3444 {
3445  Query *parse = root->parse;
3446  double dNumGroups;
3447 
3448  if (parse->groupClause)
3449  {
3450  List *groupExprs;
3451 
3452  if (parse->groupingSets)
3453  {
3454  /* Add up the estimates for each grouping set */
3455  ListCell *lc;
3456  ListCell *lc2;
3457 
3458  Assert(gd); /* keep Coverity happy */
3459 
3460  dNumGroups = 0;
3461 
3462  foreach(lc, gd->rollups)
3463  {
3464  RollupData *rollup = lfirst_node(RollupData, lc);
3465  ListCell *lc;
3466 
3467  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3468  parse->targetList);
3469 
3470  rollup->numGroups = 0.0;
3471 
3472  forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
3473  {
3474  List *gset = (List *) lfirst(lc);
3476  double numGroups = estimate_num_groups(root,
3477  groupExprs,
3478  path_rows,
3479  &gset);
3480 
3481  gs->numGroups = numGroups;
3482  rollup->numGroups += numGroups;
3483  }
3484 
3485  dNumGroups += rollup->numGroups;
3486  }
3487 
3488  if (gd->hash_sets_idx)
3489  {
3490  ListCell *lc;
3491 
3492  gd->dNumHashGroups = 0;
3493 
3494  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3495  parse->targetList);
3496 
3497  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3498  {
3499  List *gset = (List *) lfirst(lc);
3501  double numGroups = estimate_num_groups(root,
3502  groupExprs,
3503  path_rows,
3504  &gset);
3505 
3506  gs->numGroups = numGroups;
3507  gd->dNumHashGroups += numGroups;
3508  }
3509 
3510  dNumGroups += gd->dNumHashGroups;
3511  }
3512  }
3513  else
3514  {
3515  /* Plain GROUP BY */
3516  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3517  parse->targetList);
3518 
3519  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3520  NULL);
3521  }
3522  }
3523  else if (parse->groupingSets)
3524  {
3525  /* Empty grouping sets ... one result row for each one */
3526  dNumGroups = list_length(parse->groupingSets);
3527  }
3528  else if (parse->hasAggs || root->hasHavingQual)
3529  {
3530  /* Plain aggregation, one result row */
3531  dNumGroups = 1;
3532  }
3533  else
3534  {
3535  /* Not grouping */
3536  dNumGroups = 1;
3537  }
3538 
3539  return dNumGroups;
3540 }
3541 
3542 /*
3543  * estimate_hashagg_tablesize
3544  * estimate the number of bytes that a hash aggregate hashtable will
3545  * require based on the agg_costs, path width and dNumGroups.
3546  *
3547  * XXX this may be over-estimating the size now that hashagg knows to omit
3548  * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3549  * grouping columns not in the hashed set are counted here even though hashagg
3550  * won't store them. Is this a problem?
3551  */
3552 static Size
3554  double dNumGroups)
3555 {
3556  Size hashentrysize;
3557 
3558  /* Estimate per-hash-entry space at tuple width... */
3559  hashentrysize = MAXALIGN(path->pathtarget->width) +
3561 
3562  /* plus space for pass-by-ref transition values... */
3563  hashentrysize += agg_costs->transitionSpace;
3564  /* plus the per-hash-entry overhead */
3565  hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3566 
3567  /*
3568  * Note that this disregards the effect of fill-factor and growth policy
3569  * of the hash-table. That's probably ok, given default the default
3570  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3571  * "double-in-size" growth policies here.
3572  */
3573  return hashentrysize * dNumGroups;
3574 }
3575 
3576 /*
3577  * create_grouping_paths
3578  *
3579  * Build a new upperrel containing Paths for grouping and/or aggregation.
3580  *
3581  * input_rel: contains the source-data Paths
3582  * target: the pathtarget for the result Paths to compute
3583  * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
3584  * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
3585  * rollup_groupclauses: list of grouping clauses for grouping sets,
3586  * or NIL if not doing grouping sets
3587  *
3588  * Note: all Paths in input_rel are expected to return the target computed
3589  * by make_group_input_target.
3590  *
3591  * We need to consider sorted and hashed aggregation in the same function,
3592  * because otherwise (1) it would be harder to throw an appropriate error
3593  * message if neither way works, and (2) we should not allow hashtable size
3594  * considerations to dissuade us from using hashing if sorting is not possible.
3595  */
3596 static RelOptInfo *
3598  RelOptInfo *input_rel,
3599  PathTarget *target,
3600  const AggClauseCosts *agg_costs,
3601  grouping_sets_data *gd)
3602 {
3603  Query *parse = root->parse;
3604  Path *cheapest_path = input_rel->cheapest_total_path;
3605  RelOptInfo *grouped_rel;
3606  PathTarget *partial_grouping_target = NULL;
3607  AggClauseCosts agg_partial_costs; /* parallel only */
3608  AggClauseCosts agg_final_costs; /* parallel only */
3609  Size hashaggtablesize;
3610  double dNumGroups;
3611  double dNumPartialGroups = 0;
3612  bool can_hash;
3613  bool can_sort;
3614  bool try_parallel_aggregation;
3615 
3616  ListCell *lc;
3617 
3618  /* For now, do all work in the (GROUP_AGG, NULL) upperrel */
3619  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3620 
3621  /*
3622  * If the input relation is not parallel-safe, then the grouped relation
3623  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3624  * target list and HAVING quals are parallel-safe.
3625  */
3626  if (input_rel->consider_parallel &&
3627  is_parallel_safe(root, (Node *) target->exprs) &&
3628  is_parallel_safe(root, (Node *) parse->havingQual))
3629  grouped_rel->consider_parallel = true;
3630 
3631  /*
3632  * If the input rel belongs to a single FDW, so does the grouped rel.
3633  */
3634  grouped_rel->serverid = input_rel->serverid;
3635  grouped_rel->userid = input_rel->userid;
3636  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3637  grouped_rel->fdwroutine = input_rel->fdwroutine;
3638 
3639  /*
3640  * Check for degenerate grouping.
3641  */
3642  if ((root->hasHavingQual || parse->groupingSets) &&
3643  !parse->hasAggs && parse->groupClause == NIL)
3644  {
3645  /*
3646  * We have a HAVING qual and/or grouping sets, but no aggregates and
3647  * no GROUP BY (which implies that the grouping sets are all empty).
3648  *
3649  * This is a degenerate case in which we are supposed to emit either
3650  * zero or one row for each grouping set depending on whether HAVING
3651  * succeeds. Furthermore, there cannot be any variables in either
3652  * HAVING or the targetlist, so we actually do not need the FROM table
3653  * at all! We can just throw away the plan-so-far and generate a
3654  * Result node. This is a sufficiently unusual corner case that it's
3655  * not worth contorting the structure of this module to avoid having
3656  * to generate the earlier paths in the first place.
3657  */
3658  int nrows = list_length(parse->groupingSets);
3659  Path *path;
3660 
3661  if (nrows > 1)
3662  {
3663  /*
3664  * Doesn't seem worthwhile writing code to cons up a
3665  * generate_series or a values scan to emit multiple rows. Instead
3666  * just make N clones and append them. (With a volatile HAVING
3667  * clause, this means you might get between 0 and N output rows.
3668  * Offhand I think that's desired.)
3669  */
3670  List *paths = NIL;
3671 
3672  while (--nrows >= 0)
3673  {
3674  path = (Path *)
3675  create_result_path(root, grouped_rel,
3676  target,
3677  (List *) parse->havingQual);
3678  paths = lappend(paths, path);
3679  }
3680  path = (Path *)
3681  create_append_path(grouped_rel,
3682  paths,
3683  NIL,
3684  NULL,
3685  0,
3686  false,
3687  NIL,
3688  -1);
3689  path->pathtarget = target;
3690  }
3691  else
3692  {
3693  /* No grouping sets, or just one, so one output row */
3694  path = (Path *)
3695  create_result_path(root, grouped_rel,
3696  target,
3697  (List *) parse->havingQual);
3698  }
3699 
3700  add_path(grouped_rel, path);
3701 
3702  /* No need to consider any other alternatives. */
3703  set_cheapest(grouped_rel);
3704 
3705  return grouped_rel;
3706  }
3707 
3708  /*
3709  * Estimate number of groups.
3710  */
3711  dNumGroups = get_number_of_groups(root,
3712  cheapest_path->rows,
3713  gd);
3714 
3715  /*
3716  * Determine whether it's possible to perform sort-based implementations
3717  * of grouping. (Note that if groupClause is empty,
3718  * grouping_is_sortable() is trivially true, and all the
3719  * pathkeys_contained_in() tests will succeed too, so that we'll consider
3720  * every surviving input path.)
3721  *
3722  * If we have grouping sets, we might be able to sort some but not all of
3723  * them; in this case, we need can_sort to be true as long as we must
3724  * consider any sorted-input plan.
3725  */
3726  can_sort = (gd && gd->rollups != NIL)
3727  || grouping_is_sortable(parse->groupClause);
3728 
3729  /*
3730  * Determine whether we should consider hash-based implementations of
3731  * grouping.
3732  *
3733  * Hashed aggregation only applies if we're grouping. If we have grouping
3734  * sets, some groups might be hashable but others not; in this case we set
3735  * can_hash true as long as there is nothing globally preventing us from
3736  * hashing (and we should therefore consider plans with hashes).
3737  *
3738  * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
3739  * aggregates. (Doing so would imply storing *all* the input values in
3740  * the hash table, and/or running many sorts in parallel, either of which
3741  * seems like a certain loser.) We similarly don't support ordered-set
3742  * aggregates in hashed aggregation, but that case is also included in the
3743  * numOrderedAggs count.
3744  *
3745  * Note: grouping_is_hashable() is much more expensive to check than the
3746  * other gating conditions, so we want to do it last.
3747  */
3748  can_hash = (parse->groupClause != NIL &&
3749  agg_costs->numOrderedAggs == 0 &&
3750  (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
3751 
3752  /*
3753  * If grouped_rel->consider_parallel is true, then paths that we generate
3754  * for this grouping relation could be run inside of a worker, but that
3755  * doesn't mean we can actually use the PartialAggregate/FinalizeAggregate
3756  * execution strategy. Figure that out.
3757  */
3758  if (!grouped_rel->consider_parallel)
3759  {
3760  /* Not even parallel-safe. */
3761  try_parallel_aggregation = false;
3762  }
3763  else if (input_rel->partial_pathlist == NIL)
3764  {
3765  /* Nothing to use as input for partial aggregate. */
3766  try_parallel_aggregation = false;
3767  }
3768  else if (!parse->hasAggs && parse->groupClause == NIL)
3769  {
3770  /*
3771  * We don't know how to do parallel aggregation unless we have either
3772  * some aggregates or a grouping clause.
3773  */
3774  try_parallel_aggregation = false;
3775  }
3776  else if (parse->groupingSets)
3777  {
3778  /* We don't know how to do grouping sets in parallel. */
3779  try_parallel_aggregation = false;
3780  }
3781  else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
3782  {
3783  /* Insufficient support for partial mode. */
3784  try_parallel_aggregation = false;
3785  }
3786  else
3787  {
3788  /* Everything looks good. */
3789  try_parallel_aggregation = true;
3790  }
3791 
3792  /*
3793  * Before generating paths for grouped_rel, we first generate any possible
3794  * partial paths; that way, later code can easily consider both parallel
3795  * and non-parallel approaches to grouping. Note that the partial paths
3796  * we generate here are also partially aggregated, so simply pushing a
3797  * Gather node on top is insufficient to create a final path, as would be
3798  * the case for a scan/join rel.
3799  */
3800  if (try_parallel_aggregation)
3801  {
3802  Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
3803 
3804  /*
3805  * Build target list for partial aggregate paths. These paths cannot
3806  * just emit the same tlist as regular aggregate paths, because (1) we
3807  * must include Vars and Aggrefs needed in HAVING, which might not
3808  * appear in the result tlist, and (2) the Aggrefs must be set in
3809  * partial mode.
3810  */
3811  partial_grouping_target = make_partial_grouping_target(root, target);
3812 
3813  /* Estimate number of partial groups. */
3814  dNumPartialGroups = get_number_of_groups(root,
3815  cheapest_partial_path->rows,
3816  gd);
3817 
3818  /*
3819  * Collect statistics about aggregates for estimating costs of
3820  * performing aggregation in parallel.
3821  */
3822  MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
3823  MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
3824  if (parse->hasAggs)
3825  {
3826  /* partial phase */
3827  get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs,
3829  &agg_partial_costs);
3830 
3831  /* final phase */
3832  get_agg_clause_costs(root, (Node *) target->exprs,
3834  &agg_final_costs);
3835  get_agg_clause_costs(root, parse->havingQual,
3837  &agg_final_costs);
3838  }
3839 
3840  if (can_sort)
3841  {
3842  /* This was checked before setting try_parallel_aggregation */
3843  Assert(parse->hasAggs || parse->groupClause);
3844 
3845  /*
3846  * Use any available suitably-sorted path as input, and also
3847  * consider sorting the cheapest partial path.
3848  */
3849  foreach(lc, input_rel->partial_pathlist)
3850  {
3851  Path *path = (Path *) lfirst(lc);
3852  bool is_sorted;
3853 
3854  is_sorted = pathkeys_contained_in(root->group_pathkeys,
3855  path->pathkeys);
3856  if (path == cheapest_partial_path || is_sorted)
3857  {
3858  /* Sort the cheapest partial path, if it isn't already */
3859  if (!is_sorted)
3860  path = (Path *) create_sort_path(root,
3861  grouped_rel,
3862  path,
3863  root->group_pathkeys,
3864  -1.0);
3865 
3866  if (parse->hasAggs)
3867  add_partial_path(grouped_rel, (Path *)
3868  create_agg_path(root,
3869  grouped_rel,
3870  path,
3871  partial_grouping_target,
3872  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3874  parse->groupClause,
3875  NIL,
3876  &agg_partial_costs,
3877  dNumPartialGroups));
3878  else
3879  add_partial_path(grouped_rel, (Path *)
3880  create_group_path(root,
3881  grouped_rel,
3882  path,
3883  partial_grouping_target,
3884  parse->groupClause,
3885  NIL,
3886  dNumPartialGroups));
3887  }
3888  }
3889  }
3890 
3891  if (can_hash)
3892  {
3893  /* Checked above */
3894  Assert(parse->hasAggs || parse->groupClause);
3895 
3896  hashaggtablesize =
3897  estimate_hashagg_tablesize(cheapest_partial_path,
3898  &agg_partial_costs,
3899  dNumPartialGroups);
3900 
3901  /*
3902  * Tentatively produce a partial HashAgg Path, depending on if it
3903  * looks as if the hash table will fit in work_mem.
3904  */
3905  if (hashaggtablesize < work_mem * 1024L)
3906  {
3907  add_partial_path(grouped_rel, (Path *)
3908  create_agg_path(root,
3909  grouped_rel,
3910  cheapest_partial_path,
3911  partial_grouping_target,
3912  AGG_HASHED,
3914  parse->groupClause,
3915  NIL,
3916  &agg_partial_costs,
3917  dNumPartialGroups));
3918  }
3919  }
3920  }
3921 
3922  /* Build final grouping paths */
3923  if (can_sort)
3924  {
3925  /*
3926  * Use any available suitably-sorted path as input, and also consider
3927  * sorting the cheapest-total path.
3928  */
3929  foreach(lc, input_rel->pathlist)
3930  {
3931  Path *path = (Path *) lfirst(lc);
3932  bool is_sorted;
3933 
3934  is_sorted = pathkeys_contained_in(root->group_pathkeys,
3935  path->pathkeys);
3936  if (path == cheapest_path || is_sorted)
3937  {
3938  /* Sort the cheapest-total path if it isn't already sorted */
3939  if (!is_sorted)
3940  path = (Path *) create_sort_path(root,
3941  grouped_rel,
3942  path,
3943  root->group_pathkeys,
3944  -1.0);
3945 
3946  /* Now decide what to stick atop it */
3947  if (parse->groupingSets)
3948  {
3949  consider_groupingsets_paths(root, grouped_rel,
3950  path, true, can_hash, target,
3951  gd, agg_costs, dNumGroups);
3952  }
3953  else if (parse->hasAggs)
3954  {
3955  /*
3956  * We have aggregation, possibly with plain GROUP BY. Make
3957  * an AggPath.
3958  */
3959  add_path(grouped_rel, (Path *)
3960  create_agg_path(root,
3961  grouped_rel,
3962  path,
3963  target,
3964  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3966  parse->groupClause,
3967  (List *) parse->havingQual,
3968  agg_costs,
3969  dNumGroups));
3970  }
3971  else if (parse->groupClause)
3972  {
3973  /*
3974  * We have GROUP BY without aggregation or grouping sets.
3975  * Make a GroupPath.
3976  */
3977  add_path(grouped_rel, (Path *)
3978  create_group_path(root,
3979  grouped_rel,
3980  path,
3981  target,
3982  parse->groupClause,
3983  (List *) parse->havingQual,
3984  dNumGroups));
3985  }
3986  else
3987  {
3988  /* Other cases should have been handled above */
3989  Assert(false);
3990  }
3991  }
3992  }
3993 
3994  /*
3995  * Now generate a complete GroupAgg Path atop of the cheapest partial
3996  * path. We can do this using either Gather or Gather Merge.
3997  */
3998  if (grouped_rel->partial_pathlist)
3999  {
4000  Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
4001  double total_groups = path->rows * path->parallel_workers;
4002 
4003  path = (Path *) create_gather_path(root,
4004  grouped_rel,
4005  path,
4006  partial_grouping_target,
4007  NULL,
4008  &total_groups);
4009 
4010  /*
4011  * Since Gather's output is always unsorted, we'll need to sort,
4012  * unless there's no GROUP BY clause or a degenerate (constant)
4013  * one, in which case there will only be a single group.
4014  */
4015  if (root->group_pathkeys)
4016  path = (Path *) create_sort_path(root,
4017  grouped_rel,
4018  path,
4019  root->group_pathkeys,
4020  -1.0);
4021 
4022  if (parse->hasAggs)
4023  add_path(grouped_rel, (Path *)
4024  create_agg_path(root,
4025  grouped_rel,
4026  path,
4027  target,
4028  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
4030  parse->groupClause,
4031  (List *) parse->havingQual,
4032  &agg_final_costs,
4033  dNumGroups));
4034  else
4035  add_path(grouped_rel, (Path *)
4036  create_group_path(root,
4037  grouped_rel,
4038  path,
4039  target,
4040  parse->groupClause,
4041  (List *) parse->havingQual,
4042  dNumGroups));
4043 
4044  /*
4045  * The point of using Gather Merge rather than Gather is that it
4046  * can preserve the ordering of the input path, so there's no
4047  * reason to try it unless (1) it's possible to produce more than
4048  * one output row and (2) we want the output path to be ordered.
4049  */
4050  if (parse->groupClause != NIL && root->group_pathkeys != NIL)
4051  {
4052  foreach(lc, grouped_rel->partial_pathlist)
4053  {
4054  Path *subpath = (Path *) lfirst(lc);
4055  Path *gmpath;
4056  double total_groups;
4057 
4058  /*
4059  * It's useful to consider paths that are already properly
4060  * ordered for Gather Merge, because those don't need a
4061  * sort. It's also useful to consider the cheapest path,
4062  * because sorting it in parallel and then doing Gather
4063  * Merge may be better than doing an unordered Gather
4064  * followed by a sort. But there's no point in
4065  * considering non-cheapest paths that aren't already
4066  * sorted correctly.
4067  */
4068  if (path != subpath &&
4070  subpath->pathkeys))
4071  continue;
4072 
4073  total_groups = subpath->rows * subpath->parallel_workers;
4074 
4075  gmpath = (Path *)
4077  grouped_rel,
4078  subpath,
4079  partial_grouping_target,
4080  root->group_pathkeys,
4081  NULL,
4082  &total_groups);
4083 
4084  if (parse->hasAggs)
4085  add_path(grouped_rel, (Path *)
4086  create_agg_path(root,
4087  grouped_rel,
4088  gmpath,
4089  target,
4090  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
4092  parse->groupClause,
4093  (List *) parse->havingQual,
4094  &agg_final_costs,
4095  dNumGroups));
4096  else
4097  add_path(grouped_rel, (Path *)
4098  create_group_path(root,
4099  grouped_rel,
4100  gmpath,
4101  target,
4102  parse->groupClause,
4103  (List *) parse->havingQual,
4104  dNumGroups));
4105  }
4106  }
4107  }
4108  }
4109 
4110  if (can_hash)
4111  {
4112  if (parse->groupingSets)
4113  {
4114  /*
4115  * Try for a hash-only groupingsets path over unsorted input.
4116  */
4117  consider_groupingsets_paths(root, grouped_rel,
4118  cheapest_path, false, true, target,
4119  gd, agg_costs, dNumGroups);
4120  }
4121  else
4122  {
4123  hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
4124  agg_costs,
4125  dNumGroups);
4126 
4127  /*
4128  * Provided that the estimated size of the hashtable does not
4129  * exceed work_mem, we'll generate a HashAgg Path, although if we
4130  * were unable to sort above, then we'd better generate a Path, so
4131  * that we at least have one.
4132  */
4133  if (hashaggtablesize < work_mem * 1024L ||
4134  grouped_rel->pathlist == NIL)
4135  {
4136  /*
4137  * We just need an Agg over the cheapest-total input path,
4138  * since input order won't matter.
4139  */
4140  add_path(grouped_rel, (Path *)
4141  create_agg_path(root, grouped_rel,
4142  cheapest_path,
4143  target,
4144  AGG_HASHED,
4146  parse->groupClause,
4147  (List *) parse->havingQual,
4148  agg_costs,
4149  dNumGroups));
4150  }
4151  }
4152 
4153  /*
4154  * Generate a HashAgg Path atop of the cheapest partial path. Once
4155  * again, we'll only do this if it looks as though the hash table
4156  * won't exceed work_mem.
4157  */
4158  if (grouped_rel->partial_pathlist)
4159  {
4160  Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
4161 
4162  hashaggtablesize = estimate_hashagg_tablesize(path,
4163  &agg_final_costs,
4164  dNumGroups);
4165 
4166  if (hashaggtablesize < work_mem * 1024L)
4167  {
4168  double total_groups = path->rows * path->parallel_workers;
4169 
4170  path = (Path *) create_gather_path(root,
4171  grouped_rel,
4172  path,
4173  partial_grouping_target,
4174  NULL,
4175  &total_groups);
4176 
4177  add_path(grouped_rel, (Path *)
4178  create_agg_path(root,
4179  grouped_rel,
4180  path,
4181  target,
4182  AGG_HASHED,
4184  parse->groupClause,
4185  (List *) parse->havingQual,
4186  &agg_final_costs,
4187  dNumGroups));
4188  }
4189  }
4190  }
4191 
4192  /* Give a helpful error if we failed to find any implementation */
4193  if (grouped_rel->pathlist == NIL)
4194  ereport(ERROR,
4195  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4196  errmsg("could not implement GROUP BY"),
4197  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4198 
4199  /*
4200  * If there is an FDW that's responsible for all baserels of the query,
4201  * let it consider adding ForeignPaths.
4202  */
4203  if (grouped_rel->fdwroutine &&
4204  grouped_rel->fdwroutine->GetForeignUpperPaths)
4206  input_rel, grouped_rel);
4207 
4208  /* Let extensions possibly add some more paths */
4210  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4211  input_rel, grouped_rel);
4212 
4213  /* Now choose the best path(s) */
4214  set_cheapest(grouped_rel);
4215 
4216  /*
4217  * We've been using the partial pathlist for the grouped relation to hold
4218  * partially aggregated paths, but that's actually a little bit bogus
4219  * because it's unsafe for later planning stages -- like ordered_rel ---
4220  * to get the idea that they can use these partial paths as if they didn't
4221  * need a FinalizeAggregate step. Zap the partial pathlist at this stage
4222  * so we don't get confused.
4223  */
4224  grouped_rel->partial_pathlist = NIL;
4225 
4226  return grouped_rel;
4227 }
4228 
4229 
4230 /*
4231  * For a given input path, consider the possible ways of doing grouping sets on
4232  * it, by combinations of hashing and sorting. This can be called multiple
4233  * times, so it's important that it not scribble on input. No result is
4234  * returned, but any generated paths are added to grouped_rel.
4235  */
4236 static void
4238  RelOptInfo *grouped_rel,
4239  Path *path,
4240  bool is_sorted,
4241  bool can_hash,
4242  PathTarget *target,
4243  grouping_sets_data *gd,
4244  const AggClauseCosts *agg_costs,
4245  double dNumGroups)
4246 {
4247  Query *parse = root->parse;
4248 
4249  /*
4250  * If we're not being offered sorted input, then only consider plans that
4251  * can be done entirely by hashing.
4252  *
4253  * We can hash everything if it looks like it'll fit in work_mem. But if
4254  * the input is actually sorted despite not being advertised as such, we
4255  * prefer to make use of that in order to use less memory.
4256  *
4257  * If none of the grouping sets are sortable, then ignore the work_mem
4258  * limit and generate a path anyway, since otherwise we'll just fail.
4259  */
4260  if (!is_sorted)
4261  {
4262  List *new_rollups = NIL;
4263  RollupData *unhashed_rollup = NULL;
4264  List *sets_data;
4265  List *empty_sets_data = NIL;
4266  List *empty_sets = NIL;
4267  ListCell *lc;
4268  ListCell *l_start = list_head(gd->rollups);
4269  AggStrategy strat = AGG_HASHED;
4270  Size hashsize;
4271  double exclude_groups = 0.0;
4272 
4273  Assert(can_hash);
4274 
4275  if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4276  {
4277  unhashed_rollup = lfirst_node(RollupData, l_start);
4278  exclude_groups = unhashed_rollup->numGroups;
4279  l_start = lnext(l_start);
4280  }
4281 
4282  hashsize = estimate_hashagg_tablesize(path,
4283  agg_costs,
4284  dNumGroups - exclude_groups);
4285 
4286  /*
4287  * gd->rollups is empty if we have only unsortable columns to work
4288  * with. Override work_mem in that case; otherwise, we'll rely on the
4289  * sorted-input case to generate usable mixed paths.
4290  */
4291  if (hashsize > work_mem * 1024L && gd->rollups)
4292  return; /* nope, won't fit */
4293 
4294  /*
4295  * We need to burst the existing rollups list into individual grouping
4296  * sets and recompute a groupClause for each set.
4297  */
4298  sets_data = list_copy(gd->unsortable_sets);
4299 
4300  for_each_cell(lc, l_start)
4301  {
4302  RollupData *rollup = lfirst_node(RollupData, lc);
4303 
4304  /*
4305  * If we find an unhashable rollup that's not been skipped by the
4306  * "actually sorted" check above, we can't cope; we'd need sorted
4307  * input (with a different sort order) but we can't get that here.
4308  * So bail out; we'll get a valid path from the is_sorted case
4309  * instead.
4310  *
4311  * The mere presence of empty grouping sets doesn't make a rollup
4312  * unhashable (see preprocess_grouping_sets), we handle those
4313  * specially below.
4314  */
4315  if (!rollup->hashable)
4316  return;
4317  else
4318  sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
4319  }
4320  foreach(lc, sets_data)
4321  {
4323  List *gset = gs->set;
4324  RollupData *rollup;
4325 
4326  if (gset == NIL)
4327  {
4328  /* Empty grouping sets can't be hashed. */
4329  empty_sets_data = lappend(empty_sets_data, gs);
4330  empty_sets = lappend(empty_sets, NIL);
4331  }
4332  else
4333  {
4334  rollup = makeNode(RollupData);
4335 
4336  rollup->groupClause = preprocess_groupclause(root, gset);
4337  rollup->gsets_data = list_make1(gs);
4338  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4339  rollup->gsets_data,
4340  gd->tleref_to_colnum_map);
4341  rollup->numGroups = gs->numGroups;
4342  rollup->hashable = true;
4343  rollup->is_hashed = true;
4344  new_rollups = lappend(new_rollups, rollup);
4345  }
4346  }
4347 
4348  /*
4349  * If we didn't find anything nonempty to hash, then bail. We'll
4350  * generate a path from the is_sorted case.
4351  */
4352  if (new_rollups == NIL)
4353  return;
4354 
4355  /*
4356  * If there were empty grouping sets they should have been in the
4357  * first rollup.
4358  */
4359  Assert(!unhashed_rollup || !empty_sets);
4360 
4361  if (unhashed_rollup)
4362  {
4363  new_rollups = lappend(new_rollups, unhashed_rollup);
4364  strat = AGG_MIXED;
4365  }
4366  else if (empty_sets)
4367  {
4368  RollupData *rollup = makeNode(RollupData);
4369 
4370  rollup->groupClause = NIL;
4371  rollup->gsets_data = empty_sets_data;
4372  rollup->gsets = empty_sets;
4373  rollup->numGroups = list_length(empty_sets);
4374  rollup->hashable = false;
4375  rollup->is_hashed = false;
4376  new_rollups = lappend(new_rollups, rollup);
4377  strat = AGG_MIXED;
4378  }
4379 
4380  add_path(grouped_rel, (Path *)
4382  grouped_rel,
4383  path,
4384  target,
4385  (List *) parse->havingQual,
4386  strat,
4387  new_rollups,
4388  agg_costs,
4389  dNumGroups));
4390  return;
4391  }
4392 
4393  /*
4394  * If we have sorted input but nothing we can do with it, bail.
4395  */
4396  if (list_length(gd->rollups) == 0)
4397  return;
4398 
4399  /*
4400  * Given sorted input, we try and make two paths: one sorted and one mixed
4401  * sort/hash. (We need to try both because hashagg might be disabled, or
4402  * some columns might not be sortable.)
4403  *
4404  * can_hash is passed in as false if some obstacle elsewhere (such as
4405  * ordered aggs) means that we shouldn't consider hashing at all.
4406  */
4407  if (can_hash && gd->any_hashable)
4408  {
4409  List *rollups = NIL;
4410  List *hash_sets = list_copy(gd->unsortable_sets);
4411  double availspace = (work_mem * 1024.0);
4412  ListCell *lc;
4413 
4414  /*
4415  * Account first for space needed for groups we can't sort at all.
4416  */
4417  availspace -= (double) estimate_hashagg_tablesize(path,
4418  agg_costs,
4419  gd->dNumHashGroups);
4420 
4421  if (availspace > 0 && list_length(gd->rollups) > 1)
4422  {
4423  double scale;
4424  int num_rollups = list_length(gd->rollups);
4425  int k_capacity;
4426  int *k_weights = palloc(num_rollups * sizeof(int));
4427  Bitmapset *hash_items = NULL;
4428  int i;
4429 
4430  /*
4431  * We treat this as a knapsack problem: the knapsack capacity
4432  * represents work_mem, the item weights are the estimated memory
4433  * usage of the hashtables needed to implement a single rollup,
4434  * and we really ought to use the cost saving as the item value;
4435  * however, currently the costs assigned to sort nodes don't
4436  * reflect the comparison costs well, and so we treat all items as
4437  * of equal value (each rollup we hash instead saves us one sort).
4438  *
4439  * To use the discrete knapsack, we need to scale the values to a
4440  * reasonably small bounded range. We choose to allow a 5% error
4441  * margin; we have no more than 4096 rollups in the worst possible
4442  * case, which with a 5% error margin will require a bit over 42MB
4443  * of workspace. (Anyone wanting to plan queries that complex had
4444  * better have the memory for it. In more reasonable cases, with
4445  * no more than a couple of dozen rollups, the memory usage will
4446  * be negligible.)
4447  *
4448  * k_capacity is naturally bounded, but we clamp the values for
4449  * scale and weight (below) to avoid overflows or underflows (or
4450  * uselessly trying to use a scale factor less than 1 byte).
4451  */
4452  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4453  k_capacity = (int) floor(availspace / scale);
4454 
4455  /*
4456  * We leave the first rollup out of consideration since it's the
4457  * one that matches the input sort order. We assign indexes "i"
4458  * to only those entries considered for hashing; the second loop,
4459  * below, must use the same condition.
4460  */
4461  i = 0;
4463  {
4464  RollupData *rollup = lfirst_node(RollupData, lc);
4465 
4466  if (rollup->hashable)
4467  {
4468  double sz = estimate_hashagg_tablesize(path,
4469  agg_costs,
4470  rollup->numGroups);
4471 
4472  /*
4473  * If sz is enormous, but work_mem (and hence scale) is
4474  * small, avoid integer overflow here.
4475  */
4476  k_weights[i] = (int) Min(floor(sz / scale),
4477  k_capacity + 1.0);
4478  ++i;
4479  }
4480  }
4481 
4482  /*
4483  * Apply knapsack algorithm; compute the set of items which
4484  * maximizes the value stored (in this case the number of sorts
4485  * saved) while keeping the total size (approximately) within
4486  * capacity.
4487  */
4488  if (i > 0)
4489  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4490 
4491  if (!bms_is_empty(hash_items))
4492  {
4493  rollups = list_make1(linitial(gd->rollups));
4494 
4495  i = 0;
4497  {
4498  RollupData *rollup = lfirst_node(RollupData, lc);
4499 
4500  if (rollup->hashable)
4501  {
4502  if (bms_is_member(i, hash_items))
4503  hash_sets = list_concat(hash_sets,
4504  list_copy(rollup->gsets_data));
4505  else
4506  rollups = lappend(rollups, rollup);
4507  ++i;
4508  }
4509  else
4510  rollups = lappend(rollups, rollup);
4511  }
4512  }
4513  }
4514 
4515  if (!rollups && hash_sets)
4516  rollups = list_copy(gd->rollups);
4517 
4518  foreach(lc, hash_sets)
4519  {
4521  RollupData *rollup = makeNode(RollupData);
4522 
4523  Assert(gs->set != NIL);
4524 
4525  rollup->groupClause = preprocess_groupclause(root, gs->set);
4526  rollup->gsets_data = list_make1(gs);
4527  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4528  rollup->gsets_data,
4529  gd->tleref_to_colnum_map);
4530  rollup->numGroups = gs->numGroups;
4531  rollup->hashable = true;
4532  rollup->is_hashed = true;
4533  rollups = lcons(rollup, rollups);
4534  }
4535 
4536  if (rollups)
4537  {
4538  add_path(grouped_rel, (Path *)
4540  grouped_rel,
4541  path,
4542  target,
4543  (List *) parse->havingQual,
4544  AGG_MIXED,
4545  rollups,
4546  agg_costs,
4547  dNumGroups));
4548  }
4549  }
4550 
4551  /*
4552  * Now try the simple sorted case.
4553  */
4554  if (!gd->unsortable_sets)
4555  add_path(grouped_rel, (Path *)
4557  grouped_rel,
4558  path,
4559  target,
4560  (List *) parse->havingQual,
4561  AGG_SORTED,
4562  gd->rollups,
4563  agg_costs,
4564  dNumGroups));
4565 }
4566 
4567 /*
4568  * create_window_paths
4569  *
4570  * Build a new upperrel containing Paths for window-function evaluation.
4571  *
4572  * input_rel: contains the source-data Paths
4573  * input_target: result of make_window_input_target
4574  * output_target: what the topmost WindowAggPath should return
4575  * tlist: query's target list (needed to look up pathkeys)
4576  * wflists: result of find_window_functions
4577  * activeWindows: result of select_active_windows
4578  *
4579  * Note: all Paths in input_rel are expected to return input_target.
4580  */
4581 static RelOptInfo *
4583  RelOptInfo *input_rel,
4584  PathTarget *input_target,
4585  PathTarget *output_target,
4586  List *tlist,
4587  WindowFuncLists *wflists,
4588  List *activeWindows)
4589 {
4590  RelOptInfo *window_rel;
4591  ListCell *lc;
4592 
4593  /* For now, do all work in the (WINDOW, NULL) upperrel */
4594  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4595 
4596  /*
4597  * If the input relation is not parallel-safe, then the window relation
4598  * can't be parallel-safe, either. Otherwise, we need to examine the
4599  * target list and active windows for non-parallel-safe constructs.
4600  */
4601  if (input_rel->consider_parallel &&
4602  is_parallel_safe(root, (Node *) output_target->exprs) &&
4603  is_parallel_safe(root, (Node *) activeWindows))
4604  window_rel->consider_parallel = true;
4605 
4606  /*
4607  * If the input rel belongs to a single FDW, so does the window rel.
4608  */
4609  window_rel->serverid = input_rel->serverid;
4610  window_rel->userid = input_rel->userid;
4611  window_rel->useridiscurrent = input_rel->useridiscurrent;
4612  window_rel->fdwroutine = input_rel->fdwroutine;
4613 
4614  /*
4615  * Consider computing window functions starting from the existing
4616  * cheapest-total path (which will likely require a sort) as well as any
4617  * existing paths that satisfy root->window_pathkeys (which won't).
4618  */
4619  foreach(lc, input_rel->pathlist)
4620  {
4621  Path *path = (Path *) lfirst(lc);
4622 
4623  if (path == input_rel->cheapest_total_path ||
4626  window_rel,
4627  path,
4628  input_target,
4629  output_target,
4630  tlist,
4631  wflists,
4632  activeWindows);
4633  }
4634 
4635  /*
4636  * If there is an FDW that's responsible for all baserels of the query,
4637  * let it consider adding ForeignPaths.
4638  */
4639  if (window_rel->fdwroutine &&
4640  window_rel->fdwroutine->GetForeignUpperPaths)
4641  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4642  input_rel, window_rel);
4643 
4644  /* Let extensions possibly add some more paths */
4646  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4647  input_rel, window_rel);
4648 
4649  /* Now choose the best path(s) */
4650  set_cheapest(window_rel);
4651 
4652  return window_rel;
4653 }
4654 
4655 /*
4656  * Stack window-function implementation steps atop the given Path, and
4657  * add the result to window_rel.
4658  *
4659  * window_rel: upperrel to contain result
4660  * path: input Path to use (must return input_target)
4661  * input_target: result of make_window_input_target
4662  * output_target: what the topmost WindowAggPath should return
4663  * tlist: query's target list (needed to look up pathkeys)
4664  * wflists: result of find_window_functions
4665  * activeWindows: result of select_active_windows
4666  */
4667 static void
4669  RelOptInfo *window_rel,
4670  Path *path,
4671  PathTarget *input_target,
4672  PathTarget *output_target,
4673  List *tlist,
4674  WindowFuncLists *wflists,
4675  List *activeWindows)
4676 {
4677  PathTarget *window_target;
4678  ListCell *l;
4679 
4680  /*
4681  * Since each window clause could require a different sort order, we stack
4682  * up a WindowAgg node for each clause, with sort steps between them as
4683  * needed. (We assume that select_active_windows chose a good order for
4684  * executing the clauses in.)
4685  *
4686  * input_target should contain all Vars and Aggs needed for the result.
4687  * (In some cases we wouldn't need to propagate all of these all the way
4688  * to the top, since they might only be needed as inputs to WindowFuncs.
4689  * It's probably not worth trying to optimize that though.) It must also
4690  * contain all window partitioning and sorting expressions, to ensure
4691  * they're computed only once at the bottom of the stack (that's critical
4692  * for volatile functions). As we climb up the stack, we'll add outputs
4693  * for the WindowFuncs computed at each level.
4694  */
4695  window_target = input_target;
4696 
4697  foreach(l, activeWindows)
4698  {
4700  List *window_pathkeys;
4701 
4702  window_pathkeys = make_pathkeys_for_window(root,
4703  wc,
4704  tlist);
4705 
4706  /* Sort if necessary */
4707  if (!pathkeys_contained_in(window_pathkeys, path->pathkeys))
4708  {
4709  path = (Path *) create_sort_path(root, window_rel,
4710  path,
4711  window_pathkeys,
4712  -1.0);
4713  }
4714 
4715  if (lnext(l))
4716  {
4717  /*
4718  * Add the current WindowFuncs to the output target for this
4719  * intermediate WindowAggPath. We must copy window_target to
4720  * avoid changing the previous path's target.
4721  *
4722  * Note: a WindowFunc adds nothing to the target's eval costs; but
4723  * we do need to account for the increase in tlist width.
4724  */
4725  ListCell *lc2;
4726 
4727  window_target = copy_pathtarget(window_target);
4728  foreach(lc2, wflists->windowFuncs[wc->winref])
4729  {
4730  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4731 
4732  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4733  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4734  }
4735  }
4736  else
4737  {
4738  /* Install the goal target in the topmost WindowAgg */
4739  window_target = output_target;
4740  }
4741 
4742  path = (Path *)
4743  create_windowagg_path(root, window_rel, path, window_target,
4744  wflists->windowFuncs[wc->winref],
4745  wc,
4746  window_pathkeys);
4747  }
4748 
4749  add_path(window_rel, path);
4750 }
4751 
4752 /*
4753  * create_distinct_paths
4754  *
4755  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4756  *
4757  * input_rel: contains the source-data Paths
4758  *
4759  * Note: input paths should already compute the desired pathtarget, since
4760  * Sort/Unique won't project anything.
4761  */
4762 static RelOptInfo *
4764  RelOptInfo *input_rel)
4765 {
4766  Query *parse = root->parse;
4767  Path *cheapest_input_path = input_rel->cheapest_total_path;
4768  RelOptInfo *distinct_rel;
4769  double numDistinctRows;
4770  bool allow_hash;
4771  Path *path;
4772  ListCell *lc;
4773 
4774  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4775  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4776 
4777  /*
4778  * We don't compute anything at this level, so distinct_rel will be
4779  * parallel-safe if the input rel is parallel-safe. In particular, if
4780  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4781  * output those expressions, and will not be parallel-safe unless those
4782  * expressions are parallel-safe.
4783  */
4784  distinct_rel->consider_parallel = input_rel->consider_parallel;
4785 
4786  /*
4787  * If the input rel belongs to a single FDW, so does the distinct_rel.
4788  */
4789  distinct_rel->serverid = input_rel->serverid;
4790  distinct_rel->userid = input_rel->userid;
4791  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4792  distinct_rel->fdwroutine = input_rel->fdwroutine;
4793 
4794  /* Estimate number of distinct rows there will be */
4795  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4796  root->hasHavingQual)
4797  {
4798  /*
4799  * If there was grouping or aggregation, use the number of input rows
4800  * as the estimated number of DISTINCT rows (ie, assume the input is
4801  * already mostly unique).
4802  */
4803  numDistinctRows = cheapest_input_path->rows;
4804  }
4805  else
4806  {
4807  /*
4808  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4809  */
4810  List *distinctExprs;
4811 
4812  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4813  parse->targetList);
4814  numDistinctRows = estimate_num_groups(root, distinctExprs,
4815  cheapest_input_path->rows,
4816  NULL);
4817  }
4818 
4819  /*
4820  * Consider sort-based implementations of DISTINCT, if possible.
4821  */
4823  {
4824  /*
4825  * First, if we have any adequately-presorted paths, just stick a
4826  * Unique node on those. Then consider doing an explicit sort of the
4827  * cheapest input path and Unique'ing that.
4828  *
4829  * When we have DISTINCT ON, we must sort by the more rigorous of
4830  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4831  * Also, if we do have to do an explicit sort, we might as well use
4832  * the more rigorous ordering to avoid a second sort later. (Note
4833  * that the parser will have ensured that one clause is a prefix of
4834  * the other.)
4835  */
4836  List *needed_pathkeys;
4837 
4838  if (parse->hasDistinctOn &&
4840  list_length(root->sort_pathkeys))
4841  needed_pathkeys = root->sort_pathkeys;
4842  else
4843  needed_pathkeys = root->distinct_pathkeys;
4844 
4845  foreach(lc, input_rel->pathlist)
4846  {
4847  Path *path = (Path *) lfirst(lc);
4848 
4849  if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4850  {
4851  add_path(distinct_rel, (Path *)
4852  create_upper_unique_path(root, distinct_rel,
4853  path,
4855  numDistinctRows));
4856  }
4857  }
4858 
4859  /* For explicit-sort case, always use the more rigorous clause */
4860  if (list_length(root->distinct_pathkeys) <
4861  list_length(root->sort_pathkeys))
4862  {
4863  needed_pathkeys = root->sort_pathkeys;
4864  /* Assert checks that parser didn't mess up... */
4866  needed_pathkeys));
4867  }
4868  else
4869  needed_pathkeys = root->distinct_pathkeys;
4870 
4871  path = cheapest_input_path;
4872  if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4873  path = (Path *) create_sort_path(root, distinct_rel,
4874  path,
4875  needed_pathkeys,
4876  -1.0);
4877 
4878  add_path(distinct_rel, (Path *)
4879  create_upper_unique_path(root, distinct_rel,
4880  path,
4882  numDistinctRows));
4883  }
4884 
4885  /*
4886  * Consider hash-based implementations of DISTINCT, if possible.
4887  *
4888  * If we were not able to make any other types of path, we *must* hash or
4889  * die trying. If we do have other choices, there are several things that
4890  * should prevent selection of hashing: if the query uses DISTINCT ON
4891  * (because it won't really have the expected behavior if we hash), or if
4892  * enable_hashagg is off, or if it looks like the hashtable will exceed
4893  * work_mem.
4894  *
4895  * Note: grouping_is_hashable() is much more expensive to check than the
4896  * other gating conditions, so we want to do it last.
4897  */
4898  if (distinct_rel->pathlist == NIL)
4899  allow_hash = true; /* we have no alternatives */
4900  else if (parse->hasDistinctOn || !enable_hashagg)
4901  allow_hash = false; /* policy-based decision not to hash */
4902  else
4903  {
4904  Size hashentrysize;
4905 
4906  /* Estimate per-hash-entry space at tuple width... */
4907  hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) +
4909  /* plus the per-hash-entry overhead */
4910  hashentrysize += hash_agg_entry_size(0);
4911 
4912  /* Allow hashing only if hashtable is predicted to fit in work_mem */
4913  allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L);
4914  }
4915 
4916  if (allow_hash && grouping_is_hashable(parse->distinctClause))
4917  {
4918  /* Generate hashed aggregate path --- no sort needed */
4919  add_path(distinct_rel, (Path *)
4920  create_agg_path(root,
4921  distinct_rel,
4922  cheapest_input_path,
4923  cheapest_input_path->pathtarget,
4924  AGG_HASHED,
4926  parse->distinctClause,
4927  NIL,
4928  NULL,
4929  numDistinctRows));
4930  }
4931 
4932  /* Give a helpful error if we failed to find any implementation */
4933  if (distinct_rel->pathlist == NIL)
4934  ereport(ERROR,
4935  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4936  errmsg("could not implement DISTINCT"),
4937  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4938 
4939  /*
4940  * If there is an FDW that's responsible for all baserels of the query,
4941  * let it consider adding ForeignPaths.
4942  */
4943  if (distinct_rel->fdwroutine &&
4944  distinct_rel->fdwroutine->GetForeignUpperPaths)
4945  distinct_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_DISTINCT,
4946  input_rel, distinct_rel);
4947 
4948  /* Let extensions possibly add some more paths */
4950  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT,
4951  input_rel, distinct_rel);
4952 
4953  /* Now choose the best path(s) */
4954  set_cheapest(distinct_rel);
4955 
4956  return distinct_rel;
4957 }
4958 
4959 /*
4960  * create_ordered_paths
4961  *
4962  * Build a new upperrel containing Paths for ORDER BY evaluation.
4963  *
4964  * All paths in the result must satisfy the ORDER BY ordering.
4965  * The only new path we need consider is an explicit sort on the
4966  * cheapest-total existing path.
4967  *
4968  * input_rel: contains the source-data Paths
4969  * target: the output tlist the result Paths must emit
4970  * limit_tuples: estimated bound on the number of output tuples,
4971  * or -1 if no LIMIT or couldn't estimate
4972  */
4973 static RelOptInfo *
4975  RelOptInfo *input_rel,
4976  PathTarget *target,
4977  double limit_tuples)
4978 {
4979  Path *cheapest_input_path = input_rel->cheapest_total_path;
4980  RelOptInfo *ordered_rel;
4981  ListCell *lc;
4982 
4983  /* For now, do all work in the (ORDERED, NULL) upperrel */
4984  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4985 
4986  /*
4987  * If the input relation is not parallel-safe, then the ordered relation
4988  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
4989  * target list is parallel-safe.
4990  */
4991  if (input_rel->consider_parallel &&
4992  is_parallel_safe(root, (Node *) target->exprs))
4993  ordered_rel->consider_parallel = true;
4994 
4995  /*
4996  * If the input rel belongs to a single FDW, so does the ordered_rel.
4997  */
4998  ordered_rel->serverid = input_rel->serverid;
4999  ordered_rel->userid = input_rel->userid;
5000  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
5001  ordered_rel->fdwroutine = input_rel->fdwroutine;
5002 
5003  foreach(lc, input_rel->pathlist)
5004  {
5005  Path *path = (Path *) lfirst(lc);
5006  bool is_sorted;
5007 
5008  is_sorted = pathkeys_contained_in(root->sort_pathkeys,
5009  path->pathkeys);
5010  if (path == cheapest_input_path || is_sorted)
5011  {
5012  if (!is_sorted)
5013  {
5014  /* An explicit sort here can take advantage of LIMIT */
5015  path = (Path *) create_sort_path(root,
5016  ordered_rel,
5017  path,
5018  root->sort_pathkeys,
5019  limit_tuples);
5020  }
5021 
5022  /* Add projection step if needed */
5023  if (path->pathtarget != target)
5024  path = apply_projection_to_path(root, ordered_rel,
5025  path, target);
5026 
5027  add_path(ordered_rel, path);
5028  }
5029  }
5030 
5031  /*
5032  * generate_gather_paths() will have already generated a simple Gather
5033  * path for the best parallel path, if any, and the loop above will have
5034  * considered sorting it. Similarly, generate_gather_paths() will also
5035  * have generated order-preserving Gather Merge plans which can be used
5036  * without sorting if they happen to match the sort_pathkeys, and the loop
5037  * above will have handled those as well. However, there's one more
5038  * possibility: it may make sense to sort the cheapest partial path
5039  * according to the required output order and then use Gather Merge.
5040  */
5041  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5042  input_rel->partial_pathlist != NIL)
5043  {
5044  Path *cheapest_partial_path;
5045 
5046  cheapest_partial_path = linitial(input_rel->partial_pathlist);
5047 
5048  /*
5049  * If cheapest partial path doesn't need a sort, this is redundant
5050  * with what's already been tried.
5051  */
5053  cheapest_partial_path->pathkeys))
5054  {
5055  Path *path;
5056  double total_groups;
5057 
5058  path = (Path *) create_sort_path(root,
5059  ordered_rel,
5060  cheapest_partial_path,
5061  root->sort_pathkeys,
5062  -1.0);
5063 
5064  total_groups = cheapest_partial_path->rows *
5065  cheapest_partial_path->parallel_workers;
5066  path = (Path *)
5067  create_gather_merge_path(root, ordered_rel,
5068  path,
5069  path->pathtarget,
5070  root->sort_pathkeys, NULL,
5071  &total_groups);
5072 
5073  /* Add projection step if needed */
5074  if (path->pathtarget != target)
5075  path = apply_projection_to_path(root, ordered_rel,
5076  path, target);
5077 
5078  add_path(ordered_rel, path);
5079  }
5080  }
5081 
5082  /*
5083  * If there is an FDW that's responsible for all baserels of the query,
5084  * let it consider adding ForeignPaths.
5085  */
5086  if (ordered_rel->fdwroutine &&
5087  ordered_rel->fdwroutine->GetForeignUpperPaths)
5088  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5089  input_rel, ordered_rel);
5090 
5091  /* Let extensions possibly add some more paths */
5093  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5094  input_rel, ordered_rel);
5095 
5096  /*
5097  * No need to bother with set_cheapest here; grouping_planner does not
5098  * need us to do it.
5099  */
5100  Assert(ordered_rel->pathlist != NIL);
5101 
5102  return ordered_rel;
5103 }
5104 
5105 
5106 /*
5107  * make_group_input_target
5108  * Generate appropriate PathTarget for initial input to grouping nodes.
5109  *
5110  * If there is grouping or aggregation, the scan/join subplan cannot emit
5111  * the query's final targetlist; for example, it certainly can't emit any
5112  * aggregate function calls. This routine generates the correct target
5113  * for the scan/join subplan.
5114  *
5115  * The query target list passed from the parser already contains entries
5116  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5117  * for variables used only in HAVING clauses; so we need to add those
5118  * variables to the subplan target list. Also, we flatten all expressions
5119  * except GROUP BY items into their component variables; other expressions
5120  * will be computed by the upper plan nodes rather than by the subplan.
5121  * For example, given a query like
5122  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5123  * we want to pass this targetlist to the subplan:
5124  * a+b,c,d
5125  * where the a+b target will be used by the Sort/Group steps, and the
5126  * other targets will be used for computing the final results.
5127  *
5128  * 'final_target' is the query's final target list (in PathTarget form)
5129  *
5130  * The result is the PathTarget to be computed by the Paths returned from
5131  * query_planner().
5132  */
5133 static PathTarget *
5135 {
5136  Query *parse = root->parse;
5137  PathTarget *input_target;
5138  List *non_group_cols;
5139  List *non_group_vars;
5140  int i;
5141  ListCell *lc;
5142 
5143  /*
5144  * We must build a target containing all grouping columns, plus any other
5145  * Vars mentioned in the query's targetlist and HAVING qual.
5146  */
5147  input_target = create_empty_pathtarget();
5148  non_group_cols = NIL;
5149 
5150  i = 0;
5151  foreach(lc, final_target->exprs)
5152  {
5153  Expr *expr = (Expr *) lfirst(lc);
5154  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5155 
5156  if (sgref && parse->groupClause &&
5157  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5158  {
5159  /*
5160  * It's a grouping column, so add it to the input target as-is.
5161  */
5162  add_column_to_pathtarget(input_target, expr, sgref);
5163  }
5164  else
5165  {
5166  /*
5167  * Non-grouping column, so just remember the expression for later
5168  * call to pull_var_clause.
5169  */
5170  non_group_cols = lappend(non_group_cols, expr);
5171  }
5172 
5173  i++;
5174  }
5175 
5176  /*
5177  * If there's a HAVING clause, we'll need the Vars it uses, too.
5178  */
5179  if (parse->havingQual)
5180  non_group_cols = lappend(non_group_cols, parse->havingQual);
5181 
5182  /*
5183  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5184  * add them to the input target if not already present. (A Var used
5185  * directly as a GROUP BY item will be present already.) Note this
5186  * includes Vars used in resjunk items, so we are covering the needs of
5187  * ORDER BY and window specifications. Vars used within Aggrefs and
5188  * WindowFuncs will be pulled out here, too.
5189  */
5190  non_group_vars = pull_var_clause((Node *) non_group_cols,
5194  add_new_columns_to_pathtarget(input_target, non_group_vars);
5195 
5196  /* clean up cruft */
5197  list_free(non_group_vars);
5198  list_free(non_group_cols);
5199 
5200  /* XXX this causes some redundant cost calculation ... */
5201  return set_pathtarget_cost_width(root, input_target);
5202 }
5203 
5204 /*
5205  * make_partial_grouping_target
5206  * Generate appropriate PathTarget for output of partial aggregate
5207  * (or partial grouping, if there are no aggregates) nodes.
5208  *
5209  * A partial aggregation node needs to emit all the same aggregates that
5210  * a regular aggregation node would, plus any aggregates used in HAVING;
5211  * except that the Aggref nodes should be marked as partial aggregates.
5212  *
5213  * In addition, we'd better emit any Vars and PlaceholderVars that are
5214  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5215  * these would be Vars that are grouped by or used in grouping expressions.)
5216  *
5217  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5218  * We get the HAVING clause out of *root.
5219  */
5220 static PathTarget *
5222 {
5223  Query *parse = root->parse;
5224  PathTarget *partial_target;
5225  List *non_group_cols;
5226  List *non_group_exprs;
5227  int i;
5228  ListCell *lc;
5229 
5230  partial_target = create_empty_pathtarget();
5231  non_group_cols = NIL;
5232 
5233  i = 0;
5234  foreach(lc, grouping_target->exprs)
5235  {
5236  Expr *expr = (Expr *) lfirst(lc);
5237  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5238 
5239  if (sgref && parse->groupClause &&
5240  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5241  {
5242  /*
5243  * It's a grouping column, so add it to the partial_target as-is.
5244  * (This allows the upper agg step to repeat the grouping calcs.)
5245  */
5246  add_column_to_pathtarget(partial_target, expr, sgref);
5247  }
5248  else
5249  {
5250  /*
5251  * Non-grouping column, so just remember the expression for later
5252  * call to pull_var_clause.
5253  */
5254  non_group_cols = lappend(non_group_cols, expr);
5255  }
5256 
5257  i++;
5258  }
5259 
5260  /*
5261  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5262  */
5263  if (parse->havingQual)
5264  non_group_cols = lappend(non_group_cols, parse->havingQual);
5265 
5266  /*
5267  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5268  * non-group cols (plus HAVING), and add them to the partial_target if not
5269  * already present. (An expression used directly as a GROUP BY item will
5270  * be present already.) Note this includes Vars used in resjunk items, so
5271  * we are covering the needs of ORDER BY and window specifications.
5272  */
5273  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5277 
5278  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5279 
5280  /*
5281  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5282  * are at the top level of the target list, so we can just scan the list
5283  * rather than recursing through the expression trees.
5284  */
5285  foreach(lc, partial_target->exprs)
5286  {
5287  Aggref *aggref = (Aggref *) lfirst(lc);
5288 
5289  if (IsA(aggref, Aggref))
5290  {
5291  Aggref *newaggref;
5292 
5293  /*
5294  * We shouldn't need to copy the substructure of the Aggref node,
5295  * but flat-copy the node itself to avoid damaging other trees.
5296  */
5297  newaggref = makeNode(Aggref);
5298  memcpy(newaggref, aggref, sizeof(Aggref));
5299 
5300  /* For now, assume serialization is required */
5302 
5303  lfirst(lc) = newaggref;
5304  }
5305  }
5306 
5307  /* clean up cruft */
5308  list_free(non_group_exprs);
5309  list_free(non_group_cols);
5310 
5311  /* XXX this causes some redundant cost calculation ... */
5312  return set_pathtarget_cost_width(root, partial_target);
5313 }
5314 
5315 /*
5316  * mark_partial_aggref
5317  * Adjust an Aggref to make it represent a partial-aggregation step.
5318  *
5319  * The Aggref node is modified in-place; caller must do any copying required.
5320  */
5321 void
5323 {
5324  /* aggtranstype should be computed by this point */
5326  /* ... but aggsplit should still be as the parser left it */
5327  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5328 
5329  /* Mark the Aggref with the intended partial-aggregation mode */
5330  agg->aggsplit = aggsplit;
5331 
5332  /*
5333  * Adjust result type if needed. Normally, a partial aggregate returns
5334  * the aggregate's transition type; but if that's INTERNAL and we're
5335  * serializing, it returns BYTEA instead.
5336  */
5337  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5338  {
5339  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5340  agg->aggtype = BYTEAOID;
5341  else
5342  agg->aggtype = agg->aggtranstype;
5343  }
5344 }
5345 
5346 /*
5347  * postprocess_setop_tlist
5348  * Fix up targetlist returned by plan_set_operations().
5349  *
5350  * We need to transpose sort key info from the orig_tlist into new_tlist.
5351  * NOTE: this would not be good enough if we supported resjunk sort keys
5352  * for results of set operations --- then, we'd need to project a whole
5353  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5354  * find any resjunk columns in orig_tlist.
5355  */
5356 static List *
5357 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5358 {
5359  ListCell *l;
5360  ListCell *orig_tlist_item = list_head(orig_tlist);
5361 
5362  foreach(l, new_tlist)
5363  {
5364  TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5365  TargetEntry *orig_tle;
5366 
5367  /* ignore resjunk columns in setop result */
5368  if (new_tle->resjunk)
5369  continue;
5370 
5371  Assert(orig_tlist_item != NULL);
5372  orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5373  orig_tlist_item = lnext(orig_tlist_item);
5374  if (orig_tle->resjunk) /* should not happen */
5375  elog(ERROR, "resjunk output columns are not implemented");
5376  Assert(new_tle->resno == orig_tle->resno);
5377  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5378  }
5379  if (orig_tlist_item != NULL)
5380  elog(ERROR, "resjunk output columns are not implemented");
5381  return new_tlist;
5382 }
5383 
5384 /*
5385  * select_active_windows
5386  * Create a list of the "active" window clauses (ie, those referenced
5387  * by non-deleted WindowFuncs) in the order they are to be executed.
5388  */
5389 static List *
5391 {
5392  List *result;
5393  List *actives;
5394  ListCell *lc;
5395 
5396  /* First, make a list of the active windows */
5397  actives = NIL;
5398  foreach(lc, root->parse->windowClause)
5399  {
5401 
5402  /* It's only active if wflists shows some related WindowFuncs */
5403  Assert(wc->winref <= wflists->maxWinRef);
5404  if (wflists->windowFuncs[wc->winref] != NIL)
5405  actives = lappend(actives, wc);
5406  }
5407 
5408  /*
5409  * Now, ensure that windows with identical partitioning/ordering clauses
5410  * are adjacent in the list. This is required by the SQL standard, which
5411  * says that only one sort is to be used for such windows, even if they
5412  * are otherwise distinct (eg, different names or framing clauses).
5413  *
5414  * There is room to be much smarter here, for example detecting whether
5415  * one window's sort keys are a prefix of another's (so that sorting for
5416  * the latter would do for the former), or putting windows first that
5417  * match a sort order available for the underlying query. For the moment
5418  * we are content with meeting the spec.
5419  */
5420  result = NIL;
5421  while (actives != NIL)
5422  {
5423  WindowClause *wc = linitial_node(WindowClause, actives);
5424  ListCell *prev;
5425  ListCell *next;
5426 
5427  /* Move wc from actives to result */
5428  actives = list_delete_first(actives);
5429  result = lappend(result, wc);
5430 
5431  /* Now move any matching windows from actives to result */
5432  prev = NULL;
5433  for (lc = list_head(actives); lc; lc = next)
5434  {
5436 
5437  next = lnext(lc);
5438  /* framing options are NOT to be compared here! */
5439  if (equal(wc->partitionClause, wc2->partitionClause) &&
5440  equal(wc->orderClause, wc2->orderClause))
5441  {
5442  actives = list_delete_cell(actives, lc, prev);
5443  result = lappend(result, wc2);
5444  }
5445  else
5446  prev = lc;
5447  }
5448  }
5449 
5450  return result;
5451 }
5452 
5453 /*
5454  * make_window_input_target
5455  * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5456  *
5457  * When the query has window functions, this function computes the desired
5458  * target to be computed by the node just below the first WindowAgg.
5459  * This tlist must contain all values needed to evaluate the window functions,
5460  * compute the final target list, and perform any required final sort step.
5461  * If multiple WindowAggs are needed, each intermediate one adds its window
5462  * function results onto this base tlist; only the topmost WindowAgg computes
5463  * the actual desired target list.
5464  *
5465  * This function is much like make_group_input_target, though not quite enough
5466  * like it to share code. As in that function, we flatten most expressions
5467  * into their component variables. But we do not want to flatten window
5468  * PARTITION BY/ORDER BY clauses, since that might result in multiple
5469  * evaluations of them, which would be bad (possibly even resulting in
5470  * inconsistent answers, if they contain volatile functions).
5471  * Also, we must not flatten GROUP BY clauses that were left unflattened by
5472  * make_group_input_target, because we may no longer have access to the
5473  * individual Vars in them.
5474  *
5475  * Another key difference from make_group_input_target is that we don't
5476  * flatten Aggref expressions, since those are to be computed below the
5477  * window functions and just referenced like Vars above that.
5478  *
5479  * 'final_target' is the query's final target list (in PathTarget form)
5480  * 'activeWindows' is the list of active windows previously identified by
5481  * select_active_windows.
5482  *
5483  * The result is the PathTarget to be computed by the plan node immediately
5484  * below the first WindowAgg node.
5485  */
5486 static PathTarget *
5488  PathTarget *final_target,
5489  List *activeWindows)
5490 {
5491  Query *parse = root->parse;
5492  PathTarget *input_target;
5493  Bitmapset *sgrefs;
5494  List *flattenable_cols;
5495  List *flattenable_vars;
5496  int i;
5497  ListCell *lc;
5498 
5499  Assert(parse->hasWindowFuncs);
5500 
5501  /*
5502  * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5503  * into a bitmapset for convenient reference below.
5504  */
5505  sgrefs = NULL;
5506  foreach(lc, activeWindows)
5507  {
5509  ListCell *lc2;
5510 
5511  foreach(lc2, wc->partitionClause)
5512  {
5514 
5515  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5516  }
5517  foreach(lc2, wc->orderClause)
5518  {
5520 
5521  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5522  }
5523  }
5524 
5525  /* Add in sortgroupref numbers of GROUP BY clauses, too */
5526  foreach(lc, parse->groupClause)
5527  {
5529 
5530  sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5531  }
5532 
5533  /*
5534  * Construct a target containing all the non-flattenable targetlist items,
5535  * and save aside the others for a moment.
5536  */
5537  input_target = create_empty_pathtarget();
5538  flattenable_cols = NIL;
5539 
5540  i = 0;
5541  foreach(lc, final_target->exprs)
5542  {
5543  Expr *expr = (Expr *) lfirst(lc);
5544  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5545 
5546  /*
5547  * Don't want to deconstruct window clauses or GROUP BY items. (Note
5548  * that such items can't contain window functions, so it's okay to
5549  * compute them below the WindowAgg nodes.)
5550  */
5551  if (sgref != 0 && bms_is_member(sgref, sgrefs))
5552  {
5553  /*
5554  * Don't want to deconstruct this value, so add it to the input
5555  * target as-is.
5556  */
5557  add_column_to_pathtarget(input_target, expr, sgref);
5558  }
5559  else
5560  {
5561  /*
5562  * Column is to be flattened, so just remember the expression for
5563  * later call to pull_var_clause.
5564  */
5565  flattenable_cols = lappend(flattenable_cols, expr);
5566  }
5567 
5568  i++;
5569  }
5570 
5571  /*
5572  * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
5573  * add them to the input target if not already present. (Some might be
5574  * there already because they're used directly as window/group clauses.)
5575  *
5576  * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
5577  * Aggrefs are placed in the Agg node's tlist and not left to be computed
5578  * at higher levels. On the other hand, we should recurse into
5579  * WindowFuncs to make sure their input expressions are available.
5580  */
5581  flattenable_vars = pull_var_clause((Node *) flattenable_cols,
5585  add_new_columns_to_pathtarget(input_target, flattenable_vars);
5586 
5587  /* clean up cruft */
5588  list_free(flattenable_vars);
5589  list_free(flattenable_cols);
5590 
5591  /* XXX this causes some redundant cost calculation ... */
5592  return set_pathtarget_cost_width(root, input_target);
5593 }
5594 
5595 /*
5596  * make_pathkeys_for_window
5597  * Create a pathkeys list describing the required input ordering
5598  * for the given WindowClause.
5599  *
5600  * The required ordering is first the PARTITION keys, then the ORDER keys.
5601  * In the future we might try to implement windowing using hashing, in which
5602  * case the ordering could be relaxed, but for now we always sort.
5603  *
5604  * Caution: if you change this, see createplan.c's get_column_info_for_window!
5605  */
5606 static List *
5608  List *tlist)
5609 {
5610  List *window_pathkeys;
5611  List *window_sortclauses;
5612 
5613  /* Throw error if can't sort */
5615  ereport(ERROR,
5616  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5617  errmsg("could not implement window PARTITION BY"),
5618  errdetail("Window partitioning columns must be of sortable datatypes.")));
5620  ereport(ERROR,
5621  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5622  errmsg("could not implement window ORDER BY"),
5623  errdetail("Window ordering columns must be of sortable datatypes.")));
5624 
5625  /* Okay, make the combined pathkeys */
5626  window_sortclauses = list_concat(list_copy(wc->partitionClause),
5627  list_copy(wc->orderClause));
5628  window_pathkeys = make_pathkeys_for_sortclauses(root,
5629  window_sortclauses,
5630  tlist);
5631  list_free(window_sortclauses);
5632  return window_pathkeys;
5633 }
5634 
5635 /*
5636  * make_sort_input_target
5637  * Generate appropriate PathTarget for initial input to Sort step.
5638  *
5639  * If the query has ORDER BY, this function chooses the target to be computed
5640  * by the node just below the Sort (and DISTINCT, if any, since Unique can't
5641  * project) steps. This might or might not be identical to the query's final
5642  * output target.
5643  *
5644  * The main argument for keeping the sort-input tlist the same as the final
5645  * is that we avoid a separate projection node (which will be needed if
5646  * they're different, because Sort can't project). However, there are also
5647  * advantages to postponing tlist evaluation till after the Sort: it ensures
5648  * a consistent order of evaluation for any volatile functions in the tlist,
5649  * and if there's also a LIMIT, we can stop the query without ever computing
5650  * tlist functions for later rows, which is beneficial for both volatile and
5651  * expensive functions.
5652  *
5653  * Our current policy is to postpone volatile expressions till after the sort
5654  * unconditionally (assuming that that's possible, ie they are in plain tlist
5655  * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
5656  * postpone set-returning expressions, because running them beforehand would
5657  * bloat the sort dataset, and because it might cause unexpected output order
5658  * if the sort isn't stable. However there's a constraint on that: all SRFs
5659  * in the tlist should be evaluated at the same plan step, so that they can
5660  * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
5661  * mustn't postpone any SRFs. (Note that in principle that policy should
5662  * probably get applied to the group/window input targetlists too, but we
5663  * have not done that historically.) Lastly, expensive expressions are
5664  * postponed if there is a LIMIT, or if root->tuple_fraction shows that
5665  * partial evaluation of the query is possible (if neither is true, we expect
5666  * to have to evaluate the expressions for every row anyway), or if there are
5667  * any volatile or set-returning expressions (since once we've put in a
5668  * projection at all, it won't cost any more to postpone more stuff).
5669  *
5670  * Another issue that could potentially be considered here is that
5671  * evaluating tlist expressions could result in data that's either wider
5672  * or narrower than the input Vars, thus changing the volume of data that
5673  * has to go through the Sort. However, we usually have only a very bad
5674  * idea of the output width of any expression more complex than a Var,
5675  * so for now it seems too risky to try to optimize on that basis.
5676  *
5677  * Note that if we do produce a modified sort-input target, and then the
5678  * query ends up not using an explicit Sort, no particular harm is done:
5679  * we'll initially use the modified target for the preceding path nodes,
5680  * but then change them to the final target with apply_projection_to_path.
5681  * Moreover, in such a case the guarantees about evaluation order of
5682  * volatile functions still hold, since the rows are sorted already.
5683  *
5684  * This function has some things in common with make_group_input_target and
5685  * make_window_input_target, though the detailed rules for what to do are
5686  * different. We never flatten/postpone any grouping or ordering columns;
5687  * those are needed before the sort. If we do flatten a particular
5688  * expression, we leave Aggref and WindowFunc nodes alone, since those were
5689  * computed earlier.
5690  *
5691  * 'final_target' is the query's final target list (in PathTarget form)
5692  * 'have_postponed_srfs' is an output argument, see below
5693  *
5694  * The result is the PathTarget to be computed by the plan node immediately
5695  * below the Sort step (and the Distinct step, if any). This will be
5696  * exactly final_target if we decide a projection step wouldn't be helpful.
5697  *
5698  * In addition, *have_postponed_srfs is set to true if we choose to postpone
5699  * any set-returning functions to after the Sort.
5700  */
5701 static PathTarget *
5703  PathTarget *final_target,
5704  bool *have_postponed_srfs)
5705 {
5706  Query *parse = root->parse;
5707  PathTarget *input_target;
5708  int ncols;
5709  bool *col_is_srf;
5710  bool *postpone_col;
5711  bool have_srf;
5712  bool have_volatile;
5713  bool have_expensive;
5714  bool have_srf_sortcols;
5715  bool postpone_srfs;
5716  List *postponable_cols;
5717  List *postponable_vars;
5718  int i;
5719  ListCell *lc;
5720 
5721  /* Shouldn't get here unless query has ORDER BY */
5722  Assert(parse->sortClause);
5723 
5724  *have_postponed_srfs = false; /* default result */
5725 
5726  /* Inspect tlist and collect per-column information */
5727  ncols = list_length(final_target->exprs);
5728  col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
5729  postpone_col = (bool *) palloc0(ncols * sizeof(bool));
5730  have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
5731 
5732  i = 0;
5733  foreach(lc, final_target->exprs)
5734  {
5735  Expr *expr = (Expr *) lfirst(lc);
5736 
5737  /*
5738  * If the column has a sortgroupref, assume it has to be evaluated
5739  * before sorting. Generally such columns would be ORDER BY, GROUP
5740  * BY, etc targets. One exception is columns that were removed from
5741  * GROUP BY by remove_useless_groupby_columns() ... but those would
5742  * only be Vars anyway. There don't seem to be any cases where it
5743  * would be worth the trouble to double-check.
5744  */
5745  if (get_pathtarget_sortgroupref(final_target, i) == 0)
5746  {
5747  /*
5748  * Check for SRF or volatile functions. Check the SRF case first
5749  * because we must know whether we have any postponed SRFs.
5750  */
5751  if (parse->hasTargetSRFs &&
5752  expression_returns_set((Node *) expr))
5753  {
5754  /* We'll decide below whether these are postponable */
5755  col_is_srf[i] = true;
5756  have_srf = true;
5757  }
5758  else if (contain_volatile_functions((Node *) expr))
5759  {
5760  /* Unconditionally postpone */
5761  postpone_col[i] = true;
5762  have_volatile = true;
5763  }
5764  else
5765  {
5766  /*
5767  * Else check the cost. XXX it's annoying to have to do this
5768  * when set_pathtarget_cost_width() just did it. Refactor to
5769  * allow sharing the work?
5770  */
5771  QualCost cost;
5772 
5773  cost_qual_eval_node(&cost, (Node *) expr, root);
5774 
5775  /*
5776  * We arbitrarily define "expensive" as "more than 10X
5777  * cpu_operator_cost". Note this will take in any PL function
5778  * with default cost.
5779  */
5780  if (cost.per_tuple > 10 * cpu_operator_cost)
5781  {
5782  postpone_col[i] = true;
5783  have_expensive = true;
5784  }
5785  }
5786  }
5787  else
5788  {
5789  /* For sortgroupref cols, just check if any contain SRFs */
5790  if (!have_srf_sortcols &&
5791  parse->hasTargetSRFs &&
5792  expression_returns_set((Node *) expr))
5793  have_srf_sortcols = true;
5794  }
5795 
5796  i++;
5797  }
5798 
5799  /*
5800  * We can postpone SRFs if we have some but none are in sortgroupref cols.
5801  */
5802  postpone_srfs = (have_srf && !have_srf_sortcols);
5803 
5804  /*
5805  * If we don't need a post-sort projection, just return final_target.
5806  */
5807  if (!(postpone_srfs || have_volatile ||
5808  (have_expensive &&
5809  (parse->limitCount || root->tuple_fraction > 0))))
5810  return final_target;
5811 
5812  /*
5813  * Report whether the post-sort projection will contain set-returning
5814  * functions. This is important because it affects whether the Sort can
5815  * rely on the query's LIMIT (if any) to bound the number of rows it needs
5816  * to return.
5817  */
5818  *have_postponed_srfs = postpone_srfs;
5819 
5820  /*
5821  * Construct the sort-input target, taking all non-postponable columns and
5822  * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
5823  * the postponable ones.
5824  */
5825  input_target = create_empty_pathtarget();
5826  postponable_cols = NIL;
5827 
5828  i = 0;
5829  foreach(lc, final_target->exprs)
5830  {
5831  Expr *expr = (Expr *) lfirst(lc);
5832 
5833  if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
5834  postponable_cols = lappend(postponable_cols, expr);
5835  else
5836  add_column_to_pathtarget(input_target, expr,
5837  get_pathtarget_sortgroupref(final_target, i));
5838 
5839  i++;
5840  }
5841 
5842  /*
5843  * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
5844  * postponable columns, and add them to the sort-input target if not
5845  * already present. (Some might be there already.) We mustn't
5846  * deconstruct Aggrefs or WindowFuncs here, since the projection node
5847  * would be unable to recompute them.
5848  */
5849  postponable_vars = pull_var_clause((Node *) postponable_cols,
5853  add_new_columns_to_pathtarget(input_target, postponable_vars);
5854 
5855  /* clean up cruft */
5856  list_free(postponable_vars);
5857  list_free(postponable_cols);
5858 
5859  /* XXX this represents even more redundant cost calculation ... */
5860  return set_pathtarget_cost_width(root, input_target);
5861 }
5862 
5863 /*
5864  * get_cheapest_fractional_path
5865  * Find the cheapest path for retrieving a specified fraction of all
5866  * the tuples expected to be returned by the given relation.
5867  *
5868  * We interpret tuple_fraction the same way as grouping_planner.
5869  *
5870  * We assume set_cheapest() has been run on the given rel.
5871  */
5872 Path *
5873 get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
5874 {
5875  Path *best_path = rel->cheapest_total_path;
5876  ListCell *l;
5877 
5878  /* If all tuples will be retrieved, just return the cheapest-total path */
5879  if (tuple_fraction <= 0.0)
5880  return best_path;
5881 
5882  /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
5883  if (tuple_fraction >= 1.0 && best_path->rows > 0)
5884  tuple_fraction /= best_path->rows;
5885 
5886  foreach(l, rel->pathlist)
5887  {
5888  Path *path = (Path *) lfirst(l);
5889 
5890  if (path == rel->cheapest_total_path ||
5891  compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
5892  continue;
5893 
5894  best_path = path;
5895  }
5896 
5897  return best_path;
5898 }
5899 
5900 /*
5901  * adjust_paths_for_srfs
5902  * Fix up the Paths of the given upperrel to handle tSRFs properly.
5903  *
5904  * The executor can only handle set-returning functions that appear at the
5905  * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
5906  * that are not at top level, we need to split up the evaluation into multiple
5907  * plan levels in which each level satisfies this constraint. This function
5908  * modifies each Path of an upperrel that (might) compute any SRFs in its
5909  * output tlist to insert appropriate projection steps.
5910  *
5911  * The given targets and targets_contain_srfs lists are from
5912  * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
5913  * target in targets.
5914  */
5915 static