PostgreSQL Source Code  git master
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/genam.h"
22 #include "access/parallel.h"
23 #include "access/sysattr.h"
24 #include "access/table.h"
25 #include "catalog/pg_aggregate.h"
26 #include "catalog/pg_constraint.h"
27 #include "catalog/pg_inherits.h"
28 #include "catalog/pg_proc.h"
29 #include "catalog/pg_type.h"
30 #include "executor/executor.h"
31 #include "foreign/fdwapi.h"
32 #include "jit/jit.h"
33 #include "lib/bipartite_match.h"
34 #include "lib/knapsack.h"
35 #include "miscadmin.h"
36 #include "nodes/makefuncs.h"
37 #include "nodes/nodeFuncs.h"
38 #ifdef OPTIMIZER_DEBUG
39 #include "nodes/print.h"
40 #endif
41 #include "nodes/supportnodes.h"
42 #include "optimizer/appendinfo.h"
43 #include "optimizer/clauses.h"
44 #include "optimizer/cost.h"
45 #include "optimizer/optimizer.h"
46 #include "optimizer/paramassign.h"
47 #include "optimizer/pathnode.h"
48 #include "optimizer/paths.h"
49 #include "optimizer/plancat.h"
50 #include "optimizer/planmain.h"
51 #include "optimizer/planner.h"
52 #include "optimizer/prep.h"
53 #include "optimizer/subselect.h"
54 #include "optimizer/tlist.h"
55 #include "parser/analyze.h"
56 #include "parser/parse_agg.h"
57 #include "parser/parse_clause.h"
58 #include "parser/parse_relation.h"
59 #include "parser/parsetree.h"
60 #include "partitioning/partdesc.h"
61 #include "rewrite/rewriteManip.h"
62 #include "utils/lsyscache.h"
63 #include "utils/rel.h"
64 #include "utils/selfuncs.h"
65 
66 /* GUC parameters */
70 
71 /* Hook for plugins to get control in planner() */
73 
74 /* Hook for plugins to get control when grouping_planner() plans upper rels */
76 
77 
78 /* Expression kind codes for preprocess_expression */
79 #define EXPRKIND_QUAL 0
80 #define EXPRKIND_TARGET 1
81 #define EXPRKIND_RTFUNC 2
82 #define EXPRKIND_RTFUNC_LATERAL 3
83 #define EXPRKIND_VALUES 4
84 #define EXPRKIND_VALUES_LATERAL 5
85 #define EXPRKIND_LIMIT 6
86 #define EXPRKIND_APPINFO 7
87 #define EXPRKIND_PHV 8
88 #define EXPRKIND_TABLESAMPLE 9
89 #define EXPRKIND_ARBITER_ELEM 10
90 #define EXPRKIND_TABLEFUNC 11
91 #define EXPRKIND_TABLEFUNC_LATERAL 12
92 #define EXPRKIND_GROUPEXPR 13
93 
94 /*
95  * Data specific to grouping sets
96  */
97 typedef struct
98 {
108 
109 /*
110  * Temporary structure for use during WindowClause reordering in order to be
111  * able to sort WindowClauses on partitioning/ordering prefix.
112  */
113 typedef struct
114 {
116  List *uniqueOrder; /* A List of unique ordering/partitioning
117  * clauses per Window */
119 
120 /* Passthrough data for standard_qp_callback */
121 typedef struct
122 {
123  List *activeWindows; /* active windows, if any */
124  grouping_sets_data *gset_data; /* grouping sets data, if any */
125  SetOperationStmt *setop; /* parent set operation or NULL if not a
126  * subquery belonging to a set operation */
128 
129 /* Local functions */
130 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
131 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
132 static void grouping_planner(PlannerInfo *root, double tuple_fraction,
133  SetOperationStmt *setops);
135 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
136  int *tleref_to_colnum_map);
137 static void preprocess_rowmarks(PlannerInfo *root);
138 static double preprocess_limit(PlannerInfo *root,
139  double tuple_fraction,
140  int64 *offset_est, int64 *count_est);
143 static List *extract_rollup_sets(List *groupingSets);
144 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
145 static void standard_qp_callback(PlannerInfo *root, void *extra);
146 static double get_number_of_groups(PlannerInfo *root,
147  double path_rows,
148  grouping_sets_data *gd,
149  List *target_list);
151  RelOptInfo *input_rel,
152  PathTarget *target,
153  bool target_parallel_safe,
154  grouping_sets_data *gd);
157  RelOptInfo *input_rel,
158  RelOptInfo *grouped_rel);
160  PathTarget *target, bool target_parallel_safe,
161  Node *havingQual);
163  RelOptInfo *input_rel,
164  RelOptInfo *grouped_rel,
165  const AggClauseCosts *agg_costs,
166  grouping_sets_data *gd,
167  GroupPathExtraData *extra,
168  RelOptInfo **partially_grouped_rel_p);
170  RelOptInfo *grouped_rel,
171  Path *path,
172  bool is_sorted,
173  bool can_hash,
174  grouping_sets_data *gd,
175  const AggClauseCosts *agg_costs,
176  double dNumGroups);
178  RelOptInfo *input_rel,
179  PathTarget *input_target,
180  PathTarget *output_target,
181  bool output_target_parallel_safe,
182  WindowFuncLists *wflists,
183  List *activeWindows);
185  RelOptInfo *window_rel,
186  Path *path,
187  PathTarget *input_target,
188  PathTarget *output_target,
189  WindowFuncLists *wflists,
190  List *activeWindows);
192  RelOptInfo *input_rel,
193  PathTarget *target);
195  RelOptInfo *input_rel,
196  RelOptInfo *final_distinct_rel,
197  PathTarget *target);
199  RelOptInfo *input_rel,
200  RelOptInfo *distinct_rel);
202  RelOptInfo *input_rel,
203  PathTarget *target,
204  bool target_parallel_safe,
205  double limit_tuples);
207  PathTarget *final_target);
209  PathTarget *grouping_target,
210  Node *havingQual);
211 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
213  WindowFuncLists *wflists);
216  PathTarget *final_target,
217  List *activeWindows);
219  List *tlist);
221  PathTarget *final_target,
222  bool *have_postponed_srfs);
224  List *targets, List *targets_contain_srfs);
225 static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
226  RelOptInfo *grouped_rel,
227  RelOptInfo *partially_grouped_rel,
228  const AggClauseCosts *agg_costs,
229  grouping_sets_data *gd,
230  double dNumGroups,
231  GroupPathExtraData *extra);
233  RelOptInfo *grouped_rel,
234  RelOptInfo *input_rel,
235  grouping_sets_data *gd,
236  GroupPathExtraData *extra,
237  bool force_rel_creation);
239 static bool can_partial_agg(PlannerInfo *root);
241  RelOptInfo *rel,
242  List *scanjoin_targets,
243  List *scanjoin_targets_contain_srfs,
244  bool scanjoin_target_parallel_safe,
245  bool tlist_same_exprs);
247  RelOptInfo *input_rel,
248  RelOptInfo *grouped_rel,
249  RelOptInfo *partially_grouped_rel,
250  const AggClauseCosts *agg_costs,
251  grouping_sets_data *gd,
253  GroupPathExtraData *extra);
254 static bool group_by_has_partkey(RelOptInfo *input_rel,
255  List *targetList,
256  List *groupClause);
257 static int common_prefix_cmp(const void *a, const void *b);
259  List *targetlist);
260 
261 
262 /*****************************************************************************
263  *
264  * Query optimizer entry point
265  *
266  * To support loadable plugins that monitor or modify planner behavior,
267  * we provide a hook variable that lets a plugin get control before and
268  * after the standard planning process. The plugin would normally call
269  * standard_planner().
270  *
271  * Note to plugin authors: standard_planner() scribbles on its Query input,
272  * so you'd better copy that data structure if you want to plan more than once.
273  *
274  *****************************************************************************/
275 PlannedStmt *
276 planner(Query *parse, const char *query_string, int cursorOptions,
277  ParamListInfo boundParams)
278 {
279  PlannedStmt *result;
280 
281  if (planner_hook)
282  result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
283  else
284  result = standard_planner(parse, query_string, cursorOptions, boundParams);
285  return result;
286 }
287 
288 PlannedStmt *
289 standard_planner(Query *parse, const char *query_string, int cursorOptions,
290  ParamListInfo boundParams)
291 {
292  PlannedStmt *result;
293  PlannerGlobal *glob;
294  double tuple_fraction;
295  PlannerInfo *root;
296  RelOptInfo *final_rel;
297  Path *best_path;
298  Plan *top_plan;
299  ListCell *lp,
300  *lr;
301 
302  /*
303  * Set up global state for this planner invocation. This data is needed
304  * across all levels of sub-Query that might exist in the given command,
305  * so we keep it in a separate struct that's linked to by each per-Query
306  * PlannerInfo.
307  */
308  glob = makeNode(PlannerGlobal);
309 
310  glob->boundParams = boundParams;
311  glob->subplans = NIL;
312  glob->subpaths = NIL;
313  glob->subroots = NIL;
314  glob->rewindPlanIDs = NULL;
315  glob->finalrtable = NIL;
316  glob->finalrteperminfos = NIL;
317  glob->finalrowmarks = NIL;
318  glob->resultRelations = NIL;
319  glob->appendRelations = NIL;
320  glob->relationOids = NIL;
321  glob->invalItems = NIL;
322  glob->paramExecTypes = NIL;
323  glob->lastPHId = 0;
324  glob->lastRowMarkId = 0;
325  glob->lastPlanNodeId = 0;
326  glob->transientPlan = false;
327  glob->dependsOnRole = false;
328 
329  /*
330  * Assess whether it's feasible to use parallel mode for this query. We
331  * can't do this in a standalone backend, or if the command will try to
332  * modify any data, or if this is a cursor operation, or if GUCs are set
333  * to values that don't permit parallelism, or if parallel-unsafe
334  * functions are present in the query tree.
335  *
336  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
337  * MATERIALIZED VIEW to use parallel plans, but this is safe only because
338  * the command is writing into a completely new table which workers won't
339  * be able to see. If the workers could see the table, the fact that
340  * group locking would cause them to ignore the leader's heavyweight GIN
341  * page locks would make this unsafe. We'll have to fix that somehow if
342  * we want to allow parallel inserts in general; updates and deletes have
343  * additional problems especially around combo CIDs.)
344  *
345  * For now, we don't try to use parallel mode if we're running inside a
346  * parallel worker. We might eventually be able to relax this
347  * restriction, but for now it seems best not to have parallel workers
348  * trying to create their own parallel workers.
349  */
350  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
352  parse->commandType == CMD_SELECT &&
353  !parse->hasModifyingCTE &&
355  !IsParallelWorker())
356  {
357  /* all the cheap tests pass, so scan the query tree */
359  glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
360  }
361  else
362  {
363  /* skip the query tree scan, just assume it's unsafe */
364  glob->maxParallelHazard = PROPARALLEL_UNSAFE;
365  glob->parallelModeOK = false;
366  }
367 
368  /*
369  * glob->parallelModeNeeded is normally set to false here and changed to
370  * true during plan creation if a Gather or Gather Merge plan is actually
371  * created (cf. create_gather_plan, create_gather_merge_plan).
372  *
373  * However, if debug_parallel_query = on or debug_parallel_query =
374  * regress, then we impose parallel mode whenever it's safe to do so, even
375  * if the final plan doesn't use parallelism. It's not safe to do so if
376  * the query contains anything parallel-unsafe; parallelModeOK will be
377  * false in that case. Note that parallelModeOK can't change after this
378  * point. Otherwise, everything in the query is either parallel-safe or
379  * parallel-restricted, and in either case it should be OK to impose
380  * parallel-mode restrictions. If that ends up breaking something, then
381  * either some function the user included in the query is incorrectly
382  * labeled as parallel-safe or parallel-restricted when in reality it's
383  * parallel-unsafe, or else the query planner itself has a bug.
384  */
385  glob->parallelModeNeeded = glob->parallelModeOK &&
387 
388  /* Determine what fraction of the plan is likely to be scanned */
389  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
390  {
391  /*
392  * We have no real idea how many tuples the user will ultimately FETCH
393  * from a cursor, but it is often the case that he doesn't want 'em
394  * all, or would prefer a fast-start plan anyway so that he can
395  * process some of the tuples sooner. Use a GUC parameter to decide
396  * what fraction to optimize for.
397  */
398  tuple_fraction = cursor_tuple_fraction;
399 
400  /*
401  * We document cursor_tuple_fraction as simply being a fraction, which
402  * means the edge cases 0 and 1 have to be treated specially here. We
403  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
404  */
405  if (tuple_fraction >= 1.0)
406  tuple_fraction = 0.0;
407  else if (tuple_fraction <= 0.0)
408  tuple_fraction = 1e-10;
409  }
410  else
411  {
412  /* Default assumption is we need all the tuples */
413  tuple_fraction = 0.0;
414  }
415 
416  /* primary planning entry point (may recurse for subqueries) */
417  root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
418 
419  /* Select best Path and turn it into a Plan */
420  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
421  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
422 
423  top_plan = create_plan(root, best_path);
424 
425  /*
426  * If creating a plan for a scrollable cursor, make sure it can run
427  * backwards on demand. Add a Material node at the top at need.
428  */
429  if (cursorOptions & CURSOR_OPT_SCROLL)
430  {
431  if (!ExecSupportsBackwardScan(top_plan))
432  top_plan = materialize_finished_plan(top_plan);
433  }
434 
435  /*
436  * Optionally add a Gather node for testing purposes, provided this is
437  * actually a safe thing to do.
438  *
439  * We can add Gather even when top_plan has parallel-safe initPlans, but
440  * then we have to move the initPlans to the Gather node because of
441  * SS_finalize_plan's limitations. That would cause cosmetic breakage of
442  * regression tests when debug_parallel_query = regress, because initPlans
443  * that would normally appear on the top_plan move to the Gather, causing
444  * them to disappear from EXPLAIN output. That doesn't seem worth kluging
445  * EXPLAIN to hide, so skip it when debug_parallel_query = regress.
446  */
448  top_plan->parallel_safe &&
449  (top_plan->initPlan == NIL ||
451  {
452  Gather *gather = makeNode(Gather);
453  Cost initplan_cost;
454  bool unsafe_initplans;
455 
456  gather->plan.targetlist = top_plan->targetlist;
457  gather->plan.qual = NIL;
458  gather->plan.lefttree = top_plan;
459  gather->plan.righttree = NULL;
460  gather->num_workers = 1;
461  gather->single_copy = true;
463 
464  /* Transfer any initPlans to the new top node */
465  gather->plan.initPlan = top_plan->initPlan;
466  top_plan->initPlan = NIL;
467 
468  /*
469  * Since this Gather has no parallel-aware descendants to signal to,
470  * we don't need a rescan Param.
471  */
472  gather->rescan_param = -1;
473 
474  /*
475  * Ideally we'd use cost_gather here, but setting up dummy path data
476  * to satisfy it doesn't seem much cleaner than knowing what it does.
477  */
478  gather->plan.startup_cost = top_plan->startup_cost +
480  gather->plan.total_cost = top_plan->total_cost +
482  gather->plan.plan_rows = top_plan->plan_rows;
483  gather->plan.plan_width = top_plan->plan_width;
484  gather->plan.parallel_aware = false;
485  gather->plan.parallel_safe = false;
486 
487  /*
488  * Delete the initplans' cost from top_plan. We needn't add it to the
489  * Gather node, since the above coding already included it there.
490  */
492  &initplan_cost, &unsafe_initplans);
493  top_plan->startup_cost -= initplan_cost;
494  top_plan->total_cost -= initplan_cost;
495 
496  /* use parallel mode for parallel plans. */
497  root->glob->parallelModeNeeded = true;
498 
499  top_plan = &gather->plan;
500  }
501 
502  /*
503  * If any Params were generated, run through the plan tree and compute
504  * each plan node's extParam/allParam sets. Ideally we'd merge this into
505  * set_plan_references' tree traversal, but for now it has to be separate
506  * because we need to visit subplans before not after main plan.
507  */
508  if (glob->paramExecTypes != NIL)
509  {
510  Assert(list_length(glob->subplans) == list_length(glob->subroots));
511  forboth(lp, glob->subplans, lr, glob->subroots)
512  {
513  Plan *subplan = (Plan *) lfirst(lp);
514  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
515 
516  SS_finalize_plan(subroot, subplan);
517  }
518  SS_finalize_plan(root, top_plan);
519  }
520 
521  /* final cleanup of the plan */
522  Assert(glob->finalrtable == NIL);
523  Assert(glob->finalrteperminfos == NIL);
524  Assert(glob->finalrowmarks == NIL);
525  Assert(glob->resultRelations == NIL);
526  Assert(glob->appendRelations == NIL);
527  top_plan = set_plan_references(root, top_plan);
528  /* ... and the subplans (both regular subplans and initplans) */
529  Assert(list_length(glob->subplans) == list_length(glob->subroots));
530  forboth(lp, glob->subplans, lr, glob->subroots)
531  {
532  Plan *subplan = (Plan *) lfirst(lp);
533  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
534 
535  lfirst(lp) = set_plan_references(subroot, subplan);
536  }
537 
538  /* build the PlannedStmt result */
539  result = makeNode(PlannedStmt);
540 
541  result->commandType = parse->commandType;
542  result->queryId = parse->queryId;
543  result->hasReturning = (parse->returningList != NIL);
544  result->hasModifyingCTE = parse->hasModifyingCTE;
545  result->canSetTag = parse->canSetTag;
546  result->transientPlan = glob->transientPlan;
547  result->dependsOnRole = glob->dependsOnRole;
548  result->parallelModeNeeded = glob->parallelModeNeeded;
549  result->planTree = top_plan;
550  result->rtable = glob->finalrtable;
551  result->permInfos = glob->finalrteperminfos;
552  result->resultRelations = glob->resultRelations;
553  result->appendRelations = glob->appendRelations;
554  result->subplans = glob->subplans;
555  result->rewindPlanIDs = glob->rewindPlanIDs;
556  result->rowMarks = glob->finalrowmarks;
557  result->relationOids = glob->relationOids;
558  result->invalItems = glob->invalItems;
559  result->paramExecTypes = glob->paramExecTypes;
560  /* utilityStmt should be null, but we might as well copy it */
561  result->utilityStmt = parse->utilityStmt;
562  result->stmt_location = parse->stmt_location;
563  result->stmt_len = parse->stmt_len;
564 
565  result->jitFlags = PGJIT_NONE;
566  if (jit_enabled && jit_above_cost >= 0 &&
567  top_plan->total_cost > jit_above_cost)
568  {
569  result->jitFlags |= PGJIT_PERFORM;
570 
571  /*
572  * Decide how much effort should be put into generating better code.
573  */
574  if (jit_optimize_above_cost >= 0 &&
576  result->jitFlags |= PGJIT_OPT3;
577  if (jit_inline_above_cost >= 0 &&
578  top_plan->total_cost > jit_inline_above_cost)
579  result->jitFlags |= PGJIT_INLINE;
580 
581  /*
582  * Decide which operations should be JITed.
583  */
584  if (jit_expressions)
585  result->jitFlags |= PGJIT_EXPR;
587  result->jitFlags |= PGJIT_DEFORM;
588  }
589 
590  if (glob->partition_directory != NULL)
591  DestroyPartitionDirectory(glob->partition_directory);
592 
593  return result;
594 }
595 
596 
597 /*--------------------
598  * subquery_planner
599  * Invokes the planner on a subquery. We recurse to here for each
600  * sub-SELECT found in the query tree.
601  *
602  * glob is the global state for the current planner run.
603  * parse is the querytree produced by the parser & rewriter.
604  * parent_root is the immediate parent Query's info (NULL at the top level).
605  * hasRecursion is true if this is a recursive WITH query.
606  * tuple_fraction is the fraction of tuples we expect will be retrieved.
607  * tuple_fraction is interpreted as explained for grouping_planner, below.
608  * setops is used for set operation subqueries to provide the subquery with
609  * the context in which it's being used so that Paths correctly sorted for the
610  * set operation can be generated. NULL when not planning a set operation
611  * child.
612  *
613  * Basically, this routine does the stuff that should only be done once
614  * per Query object. It then calls grouping_planner. At one time,
615  * grouping_planner could be invoked recursively on the same Query object;
616  * that's not currently true, but we keep the separation between the two
617  * routines anyway, in case we need it again someday.
618  *
619  * subquery_planner will be called recursively to handle sub-Query nodes
620  * found within the query's expressions and rangetable.
621  *
622  * Returns the PlannerInfo struct ("root") that contains all data generated
623  * while planning the subquery. In particular, the Path(s) attached to
624  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
625  * cheapest way(s) to implement the query. The top level will select the
626  * best Path and pass it through createplan.c to produce a finished Plan.
627  *--------------------
628  */
629 PlannerInfo *
631  bool hasRecursion, double tuple_fraction,
632  SetOperationStmt *setops)
633 {
634  PlannerInfo *root;
635  List *newWithCheckOptions;
636  List *newHaving;
637  bool hasOuterJoins;
638  bool hasResultRTEs;
639  RelOptInfo *final_rel;
640  ListCell *l;
641 
642  /* Create a PlannerInfo data structure for this subquery */
644  root->parse = parse;
645  root->glob = glob;
646  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
647  root->parent_root = parent_root;
648  root->plan_params = NIL;
649  root->outer_params = NULL;
650  root->planner_cxt = CurrentMemoryContext;
651  root->init_plans = NIL;
652  root->cte_plan_ids = NIL;
653  root->multiexpr_params = NIL;
654  root->join_domains = NIL;
655  root->eq_classes = NIL;
656  root->ec_merging_done = false;
657  root->last_rinfo_serial = 0;
658  root->all_result_relids =
659  parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
660  root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
661  root->append_rel_list = NIL;
662  root->row_identity_vars = NIL;
663  root->rowMarks = NIL;
664  memset(root->upper_rels, 0, sizeof(root->upper_rels));
665  memset(root->upper_targets, 0, sizeof(root->upper_targets));
666  root->processed_groupClause = NIL;
667  root->processed_distinctClause = NIL;
668  root->processed_tlist = NIL;
669  root->update_colnos = NIL;
670  root->grouping_map = NULL;
671  root->minmax_aggs = NIL;
672  root->qual_security_level = 0;
673  root->hasPseudoConstantQuals = false;
674  root->hasAlternativeSubPlans = false;
675  root->placeholdersFrozen = false;
676  root->hasRecursion = hasRecursion;
677  if (hasRecursion)
678  root->wt_param_id = assign_special_exec_param(root);
679  else
680  root->wt_param_id = -1;
681  root->non_recursive_path = NULL;
682  root->partColsUpdated = false;
683 
684  /*
685  * Create the top-level join domain. This won't have valid contents until
686  * deconstruct_jointree fills it in, but the node needs to exist before
687  * that so we can build EquivalenceClasses referencing it.
688  */
689  root->join_domains = list_make1(makeNode(JoinDomain));
690 
691  /*
692  * If there is a WITH list, process each WITH query and either convert it
693  * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
694  */
695  if (parse->cteList)
697 
698  /*
699  * If it's a MERGE command, transform the joinlist as appropriate.
700  */
702 
703  /*
704  * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
705  * that we don't need so many special cases to deal with that situation.
706  */
708 
709  /*
710  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
711  * to transform them into joins. Note that this step does not descend
712  * into subqueries; if we pull up any subqueries below, their SubLinks are
713  * processed just before pulling them up.
714  */
715  if (parse->hasSubLinks)
717 
718  /*
719  * Scan the rangetable for function RTEs, do const-simplification on them,
720  * and then inline them if possible (producing subqueries that might get
721  * pulled up next). Recursion issues here are handled in the same way as
722  * for SubLinks.
723  */
725 
726  /*
727  * Check to see if any subqueries in the jointree can be merged into this
728  * query.
729  */
731 
732  /*
733  * If this is a simple UNION ALL query, flatten it into an appendrel. We
734  * do this now because it requires applying pull_up_subqueries to the leaf
735  * queries of the UNION ALL, which weren't touched above because they
736  * weren't referenced by the jointree (they will be after we do this).
737  */
738  if (parse->setOperations)
740 
741  /*
742  * Survey the rangetable to see what kinds of entries are present. We can
743  * skip some later processing if relevant SQL features are not used; for
744  * example if there are no JOIN RTEs we can avoid the expense of doing
745  * flatten_join_alias_vars(). This must be done after we have finished
746  * adding rangetable entries, of course. (Note: actually, processing of
747  * inherited or partitioned rels can cause RTEs for their child tables to
748  * get added later; but those must all be RTE_RELATION entries, so they
749  * don't invalidate the conclusions drawn here.)
750  */
751  root->hasJoinRTEs = false;
752  root->hasLateralRTEs = false;
753  root->group_rtindex = 0;
754  hasOuterJoins = false;
755  hasResultRTEs = false;
756  foreach(l, parse->rtable)
757  {
759 
760  switch (rte->rtekind)
761  {
762  case RTE_RELATION:
763  if (rte->inh)
764  {
765  /*
766  * Check to see if the relation actually has any children;
767  * if not, clear the inh flag so we can treat it as a
768  * plain base relation.
769  *
770  * Note: this could give a false-positive result, if the
771  * rel once had children but no longer does. We used to
772  * be able to clear rte->inh later on when we discovered
773  * that, but no more; we have to handle such cases as
774  * full-fledged inheritance.
775  */
776  rte->inh = has_subclass(rte->relid);
777  }
778  break;
779  case RTE_JOIN:
780  root->hasJoinRTEs = true;
781  if (IS_OUTER_JOIN(rte->jointype))
782  hasOuterJoins = true;
783  break;
784  case RTE_RESULT:
785  hasResultRTEs = true;
786  break;
787  case RTE_GROUP:
788  Assert(parse->hasGroupRTE);
789  root->group_rtindex = list_cell_number(parse->rtable, l) + 1;
790  break;
791  default:
792  /* No work here for other RTE types */
793  break;
794  }
795 
796  if (rte->lateral)
797  root->hasLateralRTEs = true;
798 
799  /*
800  * We can also determine the maximum security level required for any
801  * securityQuals now. Addition of inheritance-child RTEs won't affect
802  * this, because child tables don't have their own securityQuals; see
803  * expand_single_inheritance_child().
804  */
805  if (rte->securityQuals)
806  root->qual_security_level = Max(root->qual_security_level,
807  list_length(rte->securityQuals));
808  }
809 
810  /*
811  * If we have now verified that the query target relation is
812  * non-inheriting, mark it as a leaf target.
813  */
814  if (parse->resultRelation)
815  {
816  RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
817 
818  if (!rte->inh)
819  root->leaf_result_relids =
820  bms_make_singleton(parse->resultRelation);
821  }
822 
823  /*
824  * Preprocess RowMark information. We need to do this after subquery
825  * pullup, so that all base relations are present.
826  */
828 
829  /*
830  * Set hasHavingQual to remember if HAVING clause is present. Needed
831  * because preprocess_expression will reduce a constant-true condition to
832  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
833  */
834  root->hasHavingQual = (parse->havingQual != NULL);
835 
836  /*
837  * Do expression preprocessing on targetlist and quals, as well as other
838  * random expressions in the querytree. Note that we do not need to
839  * handle sort/group expressions explicitly, because they are actually
840  * part of the targetlist.
841  */
842  parse->targetList = (List *)
843  preprocess_expression(root, (Node *) parse->targetList,
845 
846  newWithCheckOptions = NIL;
847  foreach(l, parse->withCheckOptions)
848  {
850 
851  wco->qual = preprocess_expression(root, wco->qual,
852  EXPRKIND_QUAL);
853  if (wco->qual != NULL)
854  newWithCheckOptions = lappend(newWithCheckOptions, wco);
855  }
856  parse->withCheckOptions = newWithCheckOptions;
857 
858  parse->returningList = (List *)
859  preprocess_expression(root, (Node *) parse->returningList,
861 
862  preprocess_qual_conditions(root, (Node *) parse->jointree);
863 
864  parse->havingQual = preprocess_expression(root, parse->havingQual,
865  EXPRKIND_QUAL);
866 
867  foreach(l, parse->windowClause)
868  {
870 
871  /* partitionClause/orderClause are sort/group expressions */
876  }
877 
878  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
880  parse->limitCount = preprocess_expression(root, parse->limitCount,
882 
883  if (parse->onConflict)
884  {
885  parse->onConflict->arbiterElems = (List *)
887  (Node *) parse->onConflict->arbiterElems,
889  parse->onConflict->arbiterWhere =
891  parse->onConflict->arbiterWhere,
892  EXPRKIND_QUAL);
893  parse->onConflict->onConflictSet = (List *)
895  (Node *) parse->onConflict->onConflictSet,
897  parse->onConflict->onConflictWhere =
899  parse->onConflict->onConflictWhere,
900  EXPRKIND_QUAL);
901  /* exclRelTlist contains only Vars, so no preprocessing needed */
902  }
903 
904  foreach(l, parse->mergeActionList)
905  {
907 
908  action->targetList = (List *)
910  (Node *) action->targetList,
912  action->qual =
914  (Node *) action->qual,
915  EXPRKIND_QUAL);
916  }
917 
918  parse->mergeJoinCondition =
919  preprocess_expression(root, parse->mergeJoinCondition, EXPRKIND_QUAL);
920 
921  root->append_rel_list = (List *)
922  preprocess_expression(root, (Node *) root->append_rel_list,
924 
925  /* Also need to preprocess expressions within RTEs */
926  foreach(l, parse->rtable)
927  {
929  int kind;
930  ListCell *lcsq;
931 
932  if (rte->rtekind == RTE_RELATION)
933  {
934  if (rte->tablesample)
935  rte->tablesample = (TableSampleClause *)
937  (Node *) rte->tablesample,
939  }
940  else if (rte->rtekind == RTE_SUBQUERY)
941  {
942  /*
943  * We don't want to do all preprocessing yet on the subquery's
944  * expressions, since that will happen when we plan it. But if it
945  * contains any join aliases of our level, those have to get
946  * expanded now, because planning of the subquery won't do it.
947  * That's only possible if the subquery is LATERAL.
948  */
949  if (rte->lateral && root->hasJoinRTEs)
950  rte->subquery = (Query *)
952  (Node *) rte->subquery);
953  }
954  else if (rte->rtekind == RTE_FUNCTION)
955  {
956  /* Preprocess the function expression(s) fully */
957  kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
958  rte->functions = (List *)
959  preprocess_expression(root, (Node *) rte->functions, kind);
960  }
961  else if (rte->rtekind == RTE_TABLEFUNC)
962  {
963  /* Preprocess the function expression(s) fully */
964  kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC;
965  rte->tablefunc = (TableFunc *)
966  preprocess_expression(root, (Node *) rte->tablefunc, kind);
967  }
968  else if (rte->rtekind == RTE_VALUES)
969  {
970  /* Preprocess the values lists fully */
971  kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
972  rte->values_lists = (List *)
973  preprocess_expression(root, (Node *) rte->values_lists, kind);
974  }
975  else if (rte->rtekind == RTE_GROUP)
976  {
977  /* Preprocess the groupexprs list fully */
978  rte->groupexprs = (List *)
979  preprocess_expression(root, (Node *) rte->groupexprs,
981  }
982 
983  /*
984  * Process each element of the securityQuals list as if it were a
985  * separate qual expression (as indeed it is). We need to do it this
986  * way to get proper canonicalization of AND/OR structure. Note that
987  * this converts each element into an implicit-AND sublist.
988  */
989  foreach(lcsq, rte->securityQuals)
990  {
992  (Node *) lfirst(lcsq),
993  EXPRKIND_QUAL);
994  }
995  }
996 
997  /*
998  * Now that we are done preprocessing expressions, and in particular done
999  * flattening join alias variables, get rid of the joinaliasvars lists.
1000  * They no longer match what expressions in the rest of the tree look
1001  * like, because we have not preprocessed expressions in those lists (and
1002  * do not want to; for example, expanding a SubLink there would result in
1003  * a useless unreferenced subplan). Leaving them in place simply creates
1004  * a hazard for later scans of the tree. We could try to prevent that by
1005  * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
1006  * but that doesn't sound very reliable.
1007  */
1008  if (root->hasJoinRTEs)
1009  {
1010  foreach(l, parse->rtable)
1011  {
1013 
1014  rte->joinaliasvars = NIL;
1015  }
1016  }
1017 
1018  /*
1019  * Replace any Vars in the subquery's targetlist and havingQual that
1020  * reference GROUP outputs with the underlying grouping expressions.
1021  *
1022  * Note that we need to perform this replacement after we've preprocessed
1023  * the grouping expressions. This is to ensure that there is only one
1024  * instance of SubPlan for each SubLink contained within the grouping
1025  * expressions.
1026  */
1027  if (parse->hasGroupRTE)
1028  {
1029  parse->targetList = (List *)
1030  flatten_group_exprs(root, root->parse, (Node *) parse->targetList);
1031  parse->havingQual =
1032  flatten_group_exprs(root, root->parse, parse->havingQual);
1033  }
1034 
1035  /* Constant-folding might have removed all set-returning functions */
1036  if (parse->hasTargetSRFs)
1037  parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
1038 
1039  /*
1040  * In some cases we may want to transfer a HAVING clause into WHERE. We
1041  * cannot do so if the HAVING clause contains aggregates (obviously) or
1042  * volatile functions (since a HAVING clause is supposed to be executed
1043  * only once per group). We also can't do this if there are any nonempty
1044  * grouping sets; moving such a clause into WHERE would potentially change
1045  * the results, if any referenced column isn't present in all the grouping
1046  * sets. (If there are only empty grouping sets, then the HAVING clause
1047  * must be degenerate as discussed below.)
1048  *
1049  * Also, it may be that the clause is so expensive to execute that we're
1050  * better off doing it only once per group, despite the loss of
1051  * selectivity. This is hard to estimate short of doing the entire
1052  * planning process twice, so we use a heuristic: clauses containing
1053  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
1054  * clause into WHERE, in hopes of eliminating tuples before aggregation
1055  * instead of after.
1056  *
1057  * If the query has explicit grouping then we can simply move such a
1058  * clause into WHERE; any group that fails the clause will not be in the
1059  * output because none of its tuples will reach the grouping or
1060  * aggregation stage. Otherwise we must have a degenerate (variable-free)
1061  * HAVING clause, which we put in WHERE so that query_planner() can use it
1062  * in a gating Result node, but also keep in HAVING to ensure that we
1063  * don't emit a bogus aggregated row. (This could be done better, but it
1064  * seems not worth optimizing.)
1065  *
1066  * Note that a HAVING clause may contain expressions that are not fully
1067  * preprocessed. This can happen if these expressions are part of
1068  * grouping items. In such cases, they are replaced with GROUP Vars in
1069  * the parser and then replaced back after we've done with expression
1070  * preprocessing on havingQual. This is not an issue if the clause
1071  * remains in HAVING, because these expressions will be matched to lower
1072  * target items in setrefs.c. However, if the clause is moved or copied
1073  * into WHERE, we need to ensure that these expressions are fully
1074  * preprocessed.
1075  *
1076  * Note that both havingQual and parse->jointree->quals are in
1077  * implicitly-ANDed-list form at this point, even though they are declared
1078  * as Node *.
1079  */
1080  newHaving = NIL;
1081  foreach(l, (List *) parse->havingQual)
1082  {
1083  Node *havingclause = (Node *) lfirst(l);
1084 
1085  if ((parse->groupClause && parse->groupingSets) ||
1086  contain_agg_clause(havingclause) ||
1087  contain_volatile_functions(havingclause) ||
1088  contain_subplans(havingclause))
1089  {
1090  /* keep it in HAVING */
1091  newHaving = lappend(newHaving, havingclause);
1092  }
1093  else if (parse->groupClause && !parse->groupingSets)
1094  {
1095  Node *whereclause;
1096 
1097  /* Preprocess the HAVING clause fully */
1098  whereclause = preprocess_expression(root, havingclause,
1099  EXPRKIND_QUAL);
1100  /* ... and move it to WHERE */
1101  parse->jointree->quals = (Node *)
1102  list_concat((List *) parse->jointree->quals,
1103  (List *) whereclause);
1104  }
1105  else
1106  {
1107  Node *whereclause;
1108 
1109  /* Preprocess the HAVING clause fully */
1110  whereclause = preprocess_expression(root, copyObject(havingclause),
1111  EXPRKIND_QUAL);
1112  /* ... and put a copy in WHERE */
1113  parse->jointree->quals = (Node *)
1114  list_concat((List *) parse->jointree->quals,
1115  (List *) whereclause);
1116  /* ... and also keep it in HAVING */
1117  newHaving = lappend(newHaving, havingclause);
1118  }
1119  }
1120  parse->havingQual = (Node *) newHaving;
1121 
1122  /*
1123  * If we have any outer joins, try to reduce them to plain inner joins.
1124  * This step is most easily done after we've done expression
1125  * preprocessing.
1126  */
1127  if (hasOuterJoins)
1129 
1130  /*
1131  * If we have any RTE_RESULT relations, see if they can be deleted from
1132  * the jointree. We also rely on this processing to flatten single-child
1133  * FromExprs underneath outer joins. This step is most effectively done
1134  * after we've done expression preprocessing and outer join reduction.
1135  */
1136  if (hasResultRTEs || hasOuterJoins)
1138 
1139  /*
1140  * Do the main planning.
1141  */
1142  grouping_planner(root, tuple_fraction, setops);
1143 
1144  /*
1145  * Capture the set of outer-level param IDs we have access to, for use in
1146  * extParam/allParam calculations later.
1147  */
1149 
1150  /*
1151  * If any initPlans were created in this query level, adjust the surviving
1152  * Paths' costs and parallel-safety flags to account for them. The
1153  * initPlans won't actually get attached to the plan tree till
1154  * create_plan() runs, but we must include their effects now.
1155  */
1156  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1157  SS_charge_for_initplans(root, final_rel);
1158 
1159  /*
1160  * Make sure we've identified the cheapest Path for the final rel. (By
1161  * doing this here not in grouping_planner, we include initPlan costs in
1162  * the decision, though it's unlikely that will change anything.)
1163  */
1164  set_cheapest(final_rel);
1165 
1166  return root;
1167 }
1168 
1169 /*
1170  * preprocess_expression
1171  * Do subquery_planner's preprocessing work for an expression,
1172  * which can be a targetlist, a WHERE clause (including JOIN/ON
1173  * conditions), a HAVING clause, or a few other things.
1174  */
1175 static Node *
1177 {
1178  /*
1179  * Fall out quickly if expression is empty. This occurs often enough to
1180  * be worth checking. Note that null->null is the correct conversion for
1181  * implicit-AND result format, too.
1182  */
1183  if (expr == NULL)
1184  return NULL;
1185 
1186  /*
1187  * If the query has any join RTEs, replace join alias variables with
1188  * base-relation variables. We must do this first, since any expressions
1189  * we may extract from the joinaliasvars lists have not been preprocessed.
1190  * For example, if we did this after sublink processing, sublinks expanded
1191  * out from join aliases would not get processed. But we can skip this in
1192  * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1193  * they can't contain any Vars of the current query level.
1194  */
1195  if (root->hasJoinRTEs &&
1196  !(kind == EXPRKIND_RTFUNC ||
1197  kind == EXPRKIND_VALUES ||
1198  kind == EXPRKIND_TABLESAMPLE ||
1199  kind == EXPRKIND_TABLEFUNC))
1200  expr = flatten_join_alias_vars(root, root->parse, expr);
1201 
1202  /*
1203  * Simplify constant expressions. For function RTEs, this was already
1204  * done by preprocess_function_rtes. (But note we must do it again for
1205  * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1206  * un-simplified subexpressions inserted by flattening of subqueries or
1207  * join alias variables.)
1208  *
1209  * Note: an essential effect of this is to convert named-argument function
1210  * calls to positional notation and insert the current actual values of
1211  * any default arguments for functions. To ensure that happens, we *must*
1212  * process all expressions here. Previous PG versions sometimes skipped
1213  * const-simplification if it didn't seem worth the trouble, but we can't
1214  * do that anymore.
1215  *
1216  * Note: this also flattens nested AND and OR expressions into N-argument
1217  * form. All processing of a qual expression after this point must be
1218  * careful to maintain AND/OR flatness --- that is, do not generate a tree
1219  * with AND directly under AND, nor OR directly under OR.
1220  */
1221  if (kind != EXPRKIND_RTFUNC)
1222  expr = eval_const_expressions(root, expr);
1223 
1224  /*
1225  * If it's a qual or havingQual, canonicalize it.
1226  */
1227  if (kind == EXPRKIND_QUAL)
1228  {
1229  expr = (Node *) canonicalize_qual((Expr *) expr, false);
1230 
1231 #ifdef OPTIMIZER_DEBUG
1232  printf("After canonicalize_qual()\n");
1233  pprint(expr);
1234 #endif
1235  }
1236 
1237  /*
1238  * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1239  * hashfuncid of any that might execute more quickly by using hash lookups
1240  * instead of a linear search.
1241  */
1242  if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1243  {
1245  }
1246 
1247  /* Expand SubLinks to SubPlans */
1248  if (root->parse->hasSubLinks)
1249  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1250 
1251  /*
1252  * XXX do not insert anything here unless you have grokked the comments in
1253  * SS_replace_correlation_vars ...
1254  */
1255 
1256  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1257  if (root->query_level > 1)
1258  expr = SS_replace_correlation_vars(root, expr);
1259 
1260  /*
1261  * If it's a qual or havingQual, convert it to implicit-AND format. (We
1262  * don't want to do this before eval_const_expressions, since the latter
1263  * would be unable to simplify a top-level AND correctly. Also,
1264  * SS_process_sublinks expects explicit-AND format.)
1265  */
1266  if (kind == EXPRKIND_QUAL)
1267  expr = (Node *) make_ands_implicit((Expr *) expr);
1268 
1269  return expr;
1270 }
1271 
1272 /*
1273  * preprocess_qual_conditions
1274  * Recursively scan the query's jointree and do subquery_planner's
1275  * preprocessing work on each qual condition found therein.
1276  */
1277 static void
1279 {
1280  if (jtnode == NULL)
1281  return;
1282  if (IsA(jtnode, RangeTblRef))
1283  {
1284  /* nothing to do here */
1285  }
1286  else if (IsA(jtnode, FromExpr))
1287  {
1288  FromExpr *f = (FromExpr *) jtnode;
1289  ListCell *l;
1290 
1291  foreach(l, f->fromlist)
1293 
1295  }
1296  else if (IsA(jtnode, JoinExpr))
1297  {
1298  JoinExpr *j = (JoinExpr *) jtnode;
1299 
1302 
1303  j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1304  }
1305  else
1306  elog(ERROR, "unrecognized node type: %d",
1307  (int) nodeTag(jtnode));
1308 }
1309 
1310 /*
1311  * preprocess_phv_expression
1312  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1313  *
1314  * If a LATERAL subquery references an output of another subquery, and that
1315  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1316  * join, then we'll push the PlaceHolderVar expression down into the subquery
1317  * and later pull it back up during find_lateral_references, which runs after
1318  * subquery_planner has preprocessed all the expressions that were in the
1319  * current query level to start with. So we need to preprocess it then.
1320  */
1321 Expr *
1323 {
1324  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1325 }
1326 
1327 /*--------------------
1328  * grouping_planner
1329  * Perform planning steps related to grouping, aggregation, etc.
1330  *
1331  * This function adds all required top-level processing to the scan/join
1332  * Path(s) produced by query_planner.
1333  *
1334  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1335  * tuple_fraction is interpreted as follows:
1336  * 0: expect all tuples to be retrieved (normal case)
1337  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1338  * from the plan to be retrieved
1339  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1340  * expected to be retrieved (ie, a LIMIT specification).
1341  * setops is used for set operation subqueries to provide the subquery with
1342  * the context in which it's being used so that Paths correctly sorted for the
1343  * set operation can be generated. NULL when not planning a set operation
1344  * child.
1345  *
1346  * Returns nothing; the useful output is in the Paths we attach to the
1347  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1348  * root->processed_tlist contains the final processed targetlist.
1349  *
1350  * Note that we have not done set_cheapest() on the final rel; it's convenient
1351  * to leave this to the caller.
1352  *--------------------
1353  */
1354 static void
1355 grouping_planner(PlannerInfo *root, double tuple_fraction,
1356  SetOperationStmt *setops)
1357 {
1358  Query *parse = root->parse;
1359  int64 offset_est = 0;
1360  int64 count_est = 0;
1361  double limit_tuples = -1.0;
1362  bool have_postponed_srfs = false;
1363  PathTarget *final_target;
1364  List *final_targets;
1365  List *final_targets_contain_srfs;
1366  bool final_target_parallel_safe;
1367  RelOptInfo *current_rel;
1368  RelOptInfo *final_rel;
1369  FinalPathExtraData extra;
1370  ListCell *lc;
1371 
1372  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1373  if (parse->limitCount || parse->limitOffset)
1374  {
1375  tuple_fraction = preprocess_limit(root, tuple_fraction,
1376  &offset_est, &count_est);
1377 
1378  /*
1379  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1380  * estimate the effects of using a bounded sort.
1381  */
1382  if (count_est > 0 && offset_est >= 0)
1383  limit_tuples = (double) count_est + (double) offset_est;
1384  }
1385 
1386  /* Make tuple_fraction accessible to lower-level routines */
1387  root->tuple_fraction = tuple_fraction;
1388 
1389  if (parse->setOperations)
1390  {
1391  /*
1392  * Construct Paths for set operations. The results will not need any
1393  * work except perhaps a top-level sort and/or LIMIT. Note that any
1394  * special work for recursive unions is the responsibility of
1395  * plan_set_operations.
1396  */
1397  current_rel = plan_set_operations(root);
1398 
1399  /*
1400  * We should not need to call preprocess_targetlist, since we must be
1401  * in a SELECT query node. Instead, use the processed_tlist returned
1402  * by plan_set_operations (since this tells whether it returned any
1403  * resjunk columns!), and transfer any sort key information from the
1404  * original tlist.
1405  */
1406  Assert(parse->commandType == CMD_SELECT);
1407 
1408  /* for safety, copy processed_tlist instead of modifying in-place */
1409  root->processed_tlist =
1410  postprocess_setop_tlist(copyObject(root->processed_tlist),
1411  parse->targetList);
1412 
1413  /* Also extract the PathTarget form of the setop result tlist */
1414  final_target = current_rel->cheapest_total_path->pathtarget;
1415 
1416  /* And check whether it's parallel safe */
1417  final_target_parallel_safe =
1418  is_parallel_safe(root, (Node *) final_target->exprs);
1419 
1420  /* The setop result tlist couldn't contain any SRFs */
1421  Assert(!parse->hasTargetSRFs);
1422  final_targets = final_targets_contain_srfs = NIL;
1423 
1424  /*
1425  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1426  * checked already, but let's make sure).
1427  */
1428  if (parse->rowMarks)
1429  ereport(ERROR,
1430  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1431  /*------
1432  translator: %s is a SQL row locking clause such as FOR UPDATE */
1433  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1435  parse->rowMarks)->strength))));
1436 
1437  /*
1438  * Calculate pathkeys that represent result ordering requirements
1439  */
1440  Assert(parse->distinctClause == NIL);
1441  root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
1442  parse->sortClause,
1443  root->processed_tlist);
1444  }
1445  else
1446  {
1447  /* No set operations, do regular planning */
1448  PathTarget *sort_input_target;
1449  List *sort_input_targets;
1450  List *sort_input_targets_contain_srfs;
1451  bool sort_input_target_parallel_safe;
1452  PathTarget *grouping_target;
1453  List *grouping_targets;
1454  List *grouping_targets_contain_srfs;
1455  bool grouping_target_parallel_safe;
1456  PathTarget *scanjoin_target;
1457  List *scanjoin_targets;
1458  List *scanjoin_targets_contain_srfs;
1459  bool scanjoin_target_parallel_safe;
1460  bool scanjoin_target_same_exprs;
1461  bool have_grouping;
1462  WindowFuncLists *wflists = NULL;
1463  List *activeWindows = NIL;
1464  grouping_sets_data *gset_data = NULL;
1465  standard_qp_extra qp_extra;
1466 
1467  /* A recursive query should always have setOperations */
1468  Assert(!root->hasRecursion);
1469 
1470  /* Preprocess grouping sets and GROUP BY clause, if any */
1471  if (parse->groupingSets)
1472  {
1473  gset_data = preprocess_grouping_sets(root);
1474  }
1475  else if (parse->groupClause)
1476  {
1477  /* Preprocess regular GROUP BY clause, if any */
1478  root->processed_groupClause = preprocess_groupclause(root, NIL);
1479  /* Remove any redundant GROUP BY columns */
1481  }
1482 
1483  /*
1484  * Preprocess targetlist. Note that much of the remaining planning
1485  * work will be done with the PathTarget representation of tlists, but
1486  * we must also maintain the full representation of the final tlist so
1487  * that we can transfer its decoration (resnames etc) to the topmost
1488  * tlist of the finished Plan. This is kept in processed_tlist.
1489  */
1491 
1492  /*
1493  * Mark all the aggregates with resolved aggtranstypes, and detect
1494  * aggregates that are duplicates or can share transition state. We
1495  * must do this before slicing and dicing the tlist into various
1496  * pathtargets, else some copies of the Aggref nodes might escape
1497  * being marked.
1498  */
1499  if (parse->hasAggs)
1500  {
1501  preprocess_aggrefs(root, (Node *) root->processed_tlist);
1502  preprocess_aggrefs(root, (Node *) parse->havingQual);
1503  }
1504 
1505  /*
1506  * Locate any window functions in the tlist. (We don't need to look
1507  * anywhere else, since expressions used in ORDER BY will be in there
1508  * too.) Note that they could all have been eliminated by constant
1509  * folding, in which case we don't need to do any more work.
1510  */
1511  if (parse->hasWindowFuncs)
1512  {
1513  wflists = find_window_functions((Node *) root->processed_tlist,
1514  list_length(parse->windowClause));
1515  if (wflists->numWindowFuncs > 0)
1516  {
1517  /*
1518  * See if any modifications can be made to each WindowClause
1519  * to allow the executor to execute the WindowFuncs more
1520  * quickly.
1521  */
1522  optimize_window_clauses(root, wflists);
1523 
1524  activeWindows = select_active_windows(root, wflists);
1525  }
1526  else
1527  parse->hasWindowFuncs = false;
1528  }
1529 
1530  /*
1531  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1532  * adding logic between here and the query_planner() call. Anything
1533  * that is needed in MIN/MAX-optimizable cases will have to be
1534  * duplicated in planagg.c.
1535  */
1536  if (parse->hasAggs)
1538 
1539  /*
1540  * Figure out whether there's a hard limit on the number of rows that
1541  * query_planner's result subplan needs to return. Even if we know a
1542  * hard limit overall, it doesn't apply if the query has any
1543  * grouping/aggregation operations, or SRFs in the tlist.
1544  */
1545  if (parse->groupClause ||
1546  parse->groupingSets ||
1547  parse->distinctClause ||
1548  parse->hasAggs ||
1549  parse->hasWindowFuncs ||
1550  parse->hasTargetSRFs ||
1551  root->hasHavingQual)
1552  root->limit_tuples = -1.0;
1553  else
1554  root->limit_tuples = limit_tuples;
1555 
1556  /* Set up data needed by standard_qp_callback */
1557  qp_extra.activeWindows = activeWindows;
1558  qp_extra.gset_data = gset_data;
1559 
1560  /*
1561  * If we're a subquery for a set operation, store the SetOperationStmt
1562  * in qp_extra.
1563  */
1564  qp_extra.setop = setops;
1565 
1566  /*
1567  * Generate the best unsorted and presorted paths for the scan/join
1568  * portion of this Query, ie the processing represented by the
1569  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1570  * We also generate (in standard_qp_callback) pathkey representations
1571  * of the query's sort clause, distinct clause, etc.
1572  */
1573  current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1574 
1575  /*
1576  * Convert the query's result tlist into PathTarget format.
1577  *
1578  * Note: this cannot be done before query_planner() has performed
1579  * appendrel expansion, because that might add resjunk entries to
1580  * root->processed_tlist. Waiting till afterwards is also helpful
1581  * because the target width estimates can use per-Var width numbers
1582  * that were obtained within query_planner().
1583  */
1584  final_target = create_pathtarget(root, root->processed_tlist);
1585  final_target_parallel_safe =
1586  is_parallel_safe(root, (Node *) final_target->exprs);
1587 
1588  /*
1589  * If ORDER BY was given, consider whether we should use a post-sort
1590  * projection, and compute the adjusted target for preceding steps if
1591  * so.
1592  */
1593  if (parse->sortClause)
1594  {
1595  sort_input_target = make_sort_input_target(root,
1596  final_target,
1597  &have_postponed_srfs);
1598  sort_input_target_parallel_safe =
1599  is_parallel_safe(root, (Node *) sort_input_target->exprs);
1600  }
1601  else
1602  {
1603  sort_input_target = final_target;
1604  sort_input_target_parallel_safe = final_target_parallel_safe;
1605  }
1606 
1607  /*
1608  * If we have window functions to deal with, the output from any
1609  * grouping step needs to be what the window functions want;
1610  * otherwise, it should be sort_input_target.
1611  */
1612  if (activeWindows)
1613  {
1614  grouping_target = make_window_input_target(root,
1615  final_target,
1616  activeWindows);
1617  grouping_target_parallel_safe =
1618  is_parallel_safe(root, (Node *) grouping_target->exprs);
1619  }
1620  else
1621  {
1622  grouping_target = sort_input_target;
1623  grouping_target_parallel_safe = sort_input_target_parallel_safe;
1624  }
1625 
1626  /*
1627  * If we have grouping or aggregation to do, the topmost scan/join
1628  * plan node must emit what the grouping step wants; otherwise, it
1629  * should emit grouping_target.
1630  */
1631  have_grouping = (parse->groupClause || parse->groupingSets ||
1632  parse->hasAggs || root->hasHavingQual);
1633  if (have_grouping)
1634  {
1635  scanjoin_target = make_group_input_target(root, final_target);
1636  scanjoin_target_parallel_safe =
1637  is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1638  }
1639  else
1640  {
1641  scanjoin_target = grouping_target;
1642  scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1643  }
1644 
1645  /*
1646  * If there are any SRFs in the targetlist, we must separate each of
1647  * these PathTargets into SRF-computing and SRF-free targets. Replace
1648  * each of the named targets with a SRF-free version, and remember the
1649  * list of additional projection steps we need to add afterwards.
1650  */
1651  if (parse->hasTargetSRFs)
1652  {
1653  /* final_target doesn't recompute any SRFs in sort_input_target */
1654  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1655  &final_targets,
1656  &final_targets_contain_srfs);
1657  final_target = linitial_node(PathTarget, final_targets);
1658  Assert(!linitial_int(final_targets_contain_srfs));
1659  /* likewise for sort_input_target vs. grouping_target */
1660  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1661  &sort_input_targets,
1662  &sort_input_targets_contain_srfs);
1663  sort_input_target = linitial_node(PathTarget, sort_input_targets);
1664  Assert(!linitial_int(sort_input_targets_contain_srfs));
1665  /* likewise for grouping_target vs. scanjoin_target */
1666  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1667  &grouping_targets,
1668  &grouping_targets_contain_srfs);
1669  grouping_target = linitial_node(PathTarget, grouping_targets);
1670  Assert(!linitial_int(grouping_targets_contain_srfs));
1671  /* scanjoin_target will not have any SRFs precomputed for it */
1672  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1673  &scanjoin_targets,
1674  &scanjoin_targets_contain_srfs);
1675  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1676  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1677  }
1678  else
1679  {
1680  /* initialize lists; for most of these, dummy values are OK */
1681  final_targets = final_targets_contain_srfs = NIL;
1682  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1683  grouping_targets = grouping_targets_contain_srfs = NIL;
1684  scanjoin_targets = list_make1(scanjoin_target);
1685  scanjoin_targets_contain_srfs = NIL;
1686  }
1687 
1688  /* Apply scan/join target. */
1689  scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1690  && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1691  apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1692  scanjoin_targets_contain_srfs,
1693  scanjoin_target_parallel_safe,
1694  scanjoin_target_same_exprs);
1695 
1696  /*
1697  * Save the various upper-rel PathTargets we just computed into
1698  * root->upper_targets[]. The core code doesn't use this, but it
1699  * provides a convenient place for extensions to get at the info. For
1700  * consistency, we save all the intermediate targets, even though some
1701  * of the corresponding upperrels might not be needed for this query.
1702  */
1703  root->upper_targets[UPPERREL_FINAL] = final_target;
1704  root->upper_targets[UPPERREL_ORDERED] = final_target;
1705  root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1706  root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1707  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1708  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1709 
1710  /*
1711  * If we have grouping and/or aggregation, consider ways to implement
1712  * that. We build a new upperrel representing the output of this
1713  * phase.
1714  */
1715  if (have_grouping)
1716  {
1717  current_rel = create_grouping_paths(root,
1718  current_rel,
1719  grouping_target,
1720  grouping_target_parallel_safe,
1721  gset_data);
1722  /* Fix things up if grouping_target contains SRFs */
1723  if (parse->hasTargetSRFs)
1724  adjust_paths_for_srfs(root, current_rel,
1725  grouping_targets,
1726  grouping_targets_contain_srfs);
1727  }
1728 
1729  /*
1730  * If we have window functions, consider ways to implement those. We
1731  * build a new upperrel representing the output of this phase.
1732  */
1733  if (activeWindows)
1734  {
1735  current_rel = create_window_paths(root,
1736  current_rel,
1737  grouping_target,
1738  sort_input_target,
1739  sort_input_target_parallel_safe,
1740  wflists,
1741  activeWindows);
1742  /* Fix things up if sort_input_target contains SRFs */
1743  if (parse->hasTargetSRFs)
1744  adjust_paths_for_srfs(root, current_rel,
1745  sort_input_targets,
1746  sort_input_targets_contain_srfs);
1747  }
1748 
1749  /*
1750  * If there is a DISTINCT clause, consider ways to implement that. We
1751  * build a new upperrel representing the output of this phase.
1752  */
1753  if (parse->distinctClause)
1754  {
1755  current_rel = create_distinct_paths(root,
1756  current_rel,
1757  sort_input_target);
1758  }
1759  } /* end of if (setOperations) */
1760 
1761  /*
1762  * If ORDER BY was given, consider ways to implement that, and generate a
1763  * new upperrel containing only paths that emit the correct ordering and
1764  * project the correct final_target. We can apply the original
1765  * limit_tuples limit in sort costing here, but only if there are no
1766  * postponed SRFs.
1767  */
1768  if (parse->sortClause)
1769  {
1770  current_rel = create_ordered_paths(root,
1771  current_rel,
1772  final_target,
1773  final_target_parallel_safe,
1774  have_postponed_srfs ? -1.0 :
1775  limit_tuples);
1776  /* Fix things up if final_target contains SRFs */
1777  if (parse->hasTargetSRFs)
1778  adjust_paths_for_srfs(root, current_rel,
1779  final_targets,
1780  final_targets_contain_srfs);
1781  }
1782 
1783  /*
1784  * Now we are prepared to build the final-output upperrel.
1785  */
1786  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1787 
1788  /*
1789  * If the input rel is marked consider_parallel and there's nothing that's
1790  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1791  * consider_parallel as well. Note that if the query has rowMarks or is
1792  * not a SELECT, consider_parallel will be false for every relation in the
1793  * query.
1794  */
1795  if (current_rel->consider_parallel &&
1796  is_parallel_safe(root, parse->limitOffset) &&
1797  is_parallel_safe(root, parse->limitCount))
1798  final_rel->consider_parallel = true;
1799 
1800  /*
1801  * If the current_rel belongs to a single FDW, so does the final_rel.
1802  */
1803  final_rel->serverid = current_rel->serverid;
1804  final_rel->userid = current_rel->userid;
1805  final_rel->useridiscurrent = current_rel->useridiscurrent;
1806  final_rel->fdwroutine = current_rel->fdwroutine;
1807 
1808  /*
1809  * Generate paths for the final_rel. Insert all surviving paths, with
1810  * LockRows, Limit, and/or ModifyTable steps added if needed.
1811  */
1812  foreach(lc, current_rel->pathlist)
1813  {
1814  Path *path = (Path *) lfirst(lc);
1815 
1816  /*
1817  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1818  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1819  * here. If there are only non-locking rowmarks, they should be
1820  * handled by the ModifyTable node instead. However, root->rowMarks
1821  * is what goes into the LockRows node.)
1822  */
1823  if (parse->rowMarks)
1824  {
1825  path = (Path *) create_lockrows_path(root, final_rel, path,
1826  root->rowMarks,
1828  }
1829 
1830  /*
1831  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1832  */
1833  if (limit_needed(parse))
1834  {
1835  path = (Path *) create_limit_path(root, final_rel, path,
1836  parse->limitOffset,
1837  parse->limitCount,
1838  parse->limitOption,
1839  offset_est, count_est);
1840  }
1841 
1842  /*
1843  * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1844  */
1845  if (parse->commandType != CMD_SELECT)
1846  {
1847  Index rootRelation;
1848  List *resultRelations = NIL;
1849  List *updateColnosLists = NIL;
1850  List *withCheckOptionLists = NIL;
1851  List *returningLists = NIL;
1852  List *mergeActionLists = NIL;
1853  List *mergeJoinConditions = NIL;
1854  List *rowMarks;
1855 
1856  if (bms_membership(root->all_result_relids) == BMS_MULTIPLE)
1857  {
1858  /* Inherited UPDATE/DELETE/MERGE */
1859  RelOptInfo *top_result_rel = find_base_rel(root,
1860  parse->resultRelation);
1861  int resultRelation = -1;
1862 
1863  /* Pass the root result rel forward to the executor. */
1864  rootRelation = parse->resultRelation;
1865 
1866  /* Add only leaf children to ModifyTable. */
1867  while ((resultRelation = bms_next_member(root->leaf_result_relids,
1868  resultRelation)) >= 0)
1869  {
1870  RelOptInfo *this_result_rel = find_base_rel(root,
1871  resultRelation);
1872 
1873  /*
1874  * Also exclude any leaf rels that have turned dummy since
1875  * being added to the list, for example, by being excluded
1876  * by constraint exclusion.
1877  */
1878  if (IS_DUMMY_REL(this_result_rel))
1879  continue;
1880 
1881  /* Build per-target-rel lists needed by ModifyTable */
1882  resultRelations = lappend_int(resultRelations,
1883  resultRelation);
1884  if (parse->commandType == CMD_UPDATE)
1885  {
1886  List *update_colnos = root->update_colnos;
1887 
1888  if (this_result_rel != top_result_rel)
1889  update_colnos =
1891  update_colnos,
1892  this_result_rel->relid,
1893  top_result_rel->relid);
1894  updateColnosLists = lappend(updateColnosLists,
1895  update_colnos);
1896  }
1897  if (parse->withCheckOptions)
1898  {
1899  List *withCheckOptions = parse->withCheckOptions;
1900 
1901  if (this_result_rel != top_result_rel)
1902  withCheckOptions = (List *)
1904  (Node *) withCheckOptions,
1905  this_result_rel,
1906  top_result_rel);
1907  withCheckOptionLists = lappend(withCheckOptionLists,
1908  withCheckOptions);
1909  }
1910  if (parse->returningList)
1911  {
1912  List *returningList = parse->returningList;
1913 
1914  if (this_result_rel != top_result_rel)
1915  returningList = (List *)
1917  (Node *) returningList,
1918  this_result_rel,
1919  top_result_rel);
1920  returningLists = lappend(returningLists,
1921  returningList);
1922  }
1923  if (parse->mergeActionList)
1924  {
1925  ListCell *l;
1926  List *mergeActionList = NIL;
1927 
1928  /*
1929  * Copy MergeActions and translate stuff that
1930  * references attribute numbers.
1931  */
1932  foreach(l, parse->mergeActionList)
1933  {
1934  MergeAction *action = lfirst(l),
1935  *leaf_action = copyObject(action);
1936 
1937  leaf_action->qual =
1939  (Node *) action->qual,
1940  this_result_rel,
1941  top_result_rel);
1942  leaf_action->targetList = (List *)
1944  (Node *) action->targetList,
1945  this_result_rel,
1946  top_result_rel);
1947  if (leaf_action->commandType == CMD_UPDATE)
1948  leaf_action->updateColnos =
1950  action->updateColnos,
1951  this_result_rel->relid,
1952  top_result_rel->relid);
1953  mergeActionList = lappend(mergeActionList,
1954  leaf_action);
1955  }
1956 
1957  mergeActionLists = lappend(mergeActionLists,
1958  mergeActionList);
1959  }
1960  if (parse->commandType == CMD_MERGE)
1961  {
1962  Node *mergeJoinCondition = parse->mergeJoinCondition;
1963 
1964  if (this_result_rel != top_result_rel)
1965  mergeJoinCondition =
1967  mergeJoinCondition,
1968  this_result_rel,
1969  top_result_rel);
1970  mergeJoinConditions = lappend(mergeJoinConditions,
1971  mergeJoinCondition);
1972  }
1973  }
1974 
1975  if (resultRelations == NIL)
1976  {
1977  /*
1978  * We managed to exclude every child rel, so generate a
1979  * dummy one-relation plan using info for the top target
1980  * rel (even though that may not be a leaf target).
1981  * Although it's clear that no data will be updated or
1982  * deleted, we still need to have a ModifyTable node so
1983  * that any statement triggers will be executed. (This
1984  * could be cleaner if we fixed nodeModifyTable.c to allow
1985  * zero target relations, but that probably wouldn't be a
1986  * net win.)
1987  */
1988  resultRelations = list_make1_int(parse->resultRelation);
1989  if (parse->commandType == CMD_UPDATE)
1990  updateColnosLists = list_make1(root->update_colnos);
1991  if (parse->withCheckOptions)
1992  withCheckOptionLists = list_make1(parse->withCheckOptions);
1993  if (parse->returningList)
1994  returningLists = list_make1(parse->returningList);
1995  if (parse->mergeActionList)
1996  mergeActionLists = list_make1(parse->mergeActionList);
1997  if (parse->commandType == CMD_MERGE)
1998  mergeJoinConditions = list_make1(parse->mergeJoinCondition);
1999  }
2000  }
2001  else
2002  {
2003  /* Single-relation INSERT/UPDATE/DELETE/MERGE. */
2004  rootRelation = 0; /* there's no separate root rel */
2005  resultRelations = list_make1_int(parse->resultRelation);
2006  if (parse->commandType == CMD_UPDATE)
2007  updateColnosLists = list_make1(root->update_colnos);
2008  if (parse->withCheckOptions)
2009  withCheckOptionLists = list_make1(parse->withCheckOptions);
2010  if (parse->returningList)
2011  returningLists = list_make1(parse->returningList);
2012  if (parse->mergeActionList)
2013  mergeActionLists = list_make1(parse->mergeActionList);
2014  if (parse->commandType == CMD_MERGE)
2015  mergeJoinConditions = list_make1(parse->mergeJoinCondition);
2016  }
2017 
2018  /*
2019  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2020  * will have dealt with fetching non-locked marked rows, else we
2021  * need to have ModifyTable do that.
2022  */
2023  if (parse->rowMarks)
2024  rowMarks = NIL;
2025  else
2026  rowMarks = root->rowMarks;
2027 
2028  path = (Path *)
2029  create_modifytable_path(root, final_rel,
2030  path,
2031  parse->commandType,
2032  parse->canSetTag,
2033  parse->resultRelation,
2034  rootRelation,
2035  root->partColsUpdated,
2036  resultRelations,
2037  updateColnosLists,
2038  withCheckOptionLists,
2039  returningLists,
2040  rowMarks,
2041  parse->onConflict,
2042  mergeActionLists,
2043  mergeJoinConditions,
2045  }
2046 
2047  /* And shove it into final_rel */
2048  add_path(final_rel, path);
2049  }
2050 
2051  /*
2052  * Generate partial paths for final_rel, too, if outer query levels might
2053  * be able to make use of them.
2054  */
2055  if (final_rel->consider_parallel && root->query_level > 1 &&
2056  !limit_needed(parse))
2057  {
2058  Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
2059  foreach(lc, current_rel->partial_pathlist)
2060  {
2061  Path *partial_path = (Path *) lfirst(lc);
2062 
2063  add_partial_path(final_rel, partial_path);
2064  }
2065  }
2066 
2067  extra.limit_needed = limit_needed(parse);
2068  extra.limit_tuples = limit_tuples;
2069  extra.count_est = count_est;
2070  extra.offset_est = offset_est;
2071 
2072  /*
2073  * If there is an FDW that's responsible for all baserels of the query,
2074  * let it consider adding ForeignPaths.
2075  */
2076  if (final_rel->fdwroutine &&
2077  final_rel->fdwroutine->GetForeignUpperPaths)
2078  final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
2079  current_rel, final_rel,
2080  &extra);
2081 
2082  /* Let extensions possibly add some more paths */
2084  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2085  current_rel, final_rel, &extra);
2086 
2087  /* Note: currently, we leave it to callers to do set_cheapest() */
2088 }
2089 
2090 /*
2091  * Do preprocessing for groupingSets clause and related data. This handles the
2092  * preliminary steps of expanding the grouping sets, organizing them into lists
2093  * of rollups, and preparing annotations which will later be filled in with
2094  * size estimates.
2095  */
2096 static grouping_sets_data *
2098 {
2099  Query *parse = root->parse;
2100  List *sets;
2101  int maxref = 0;
2102  ListCell *lc_set;
2104 
2105  parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
2106 
2107  gd->any_hashable = false;
2108  gd->unhashable_refs = NULL;
2109  gd->unsortable_refs = NULL;
2110  gd->unsortable_sets = NIL;
2111 
2112  /*
2113  * We don't currently make any attempt to optimize the groupClause when
2114  * there are grouping sets, so just duplicate it in processed_groupClause.
2115  */
2116  root->processed_groupClause = parse->groupClause;
2117 
2118  if (parse->groupClause)
2119  {
2120  ListCell *lc;
2121 
2122  foreach(lc, parse->groupClause)
2123  {
2125  Index ref = gc->tleSortGroupRef;
2126 
2127  if (ref > maxref)
2128  maxref = ref;
2129 
2130  if (!gc->hashable)
2132 
2133  if (!OidIsValid(gc->sortop))
2135  }
2136  }
2137 
2138  /* Allocate workspace array for remapping */
2139  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2140 
2141  /*
2142  * If we have any unsortable sets, we must extract them before trying to
2143  * prepare rollups. Unsortable sets don't go through
2144  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2145  * here.
2146  */
2147  if (!bms_is_empty(gd->unsortable_refs))
2148  {
2149  List *sortable_sets = NIL;
2150  ListCell *lc;
2151 
2152  foreach(lc, parse->groupingSets)
2153  {
2154  List *gset = (List *) lfirst(lc);
2155 
2156  if (bms_overlap_list(gd->unsortable_refs, gset))
2157  {
2159 
2160  gs->set = gset;
2161  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2162 
2163  /*
2164  * We must enforce here that an unsortable set is hashable;
2165  * later code assumes this. Parse analysis only checks that
2166  * every individual column is either hashable or sortable.
2167  *
2168  * Note that passing this test doesn't guarantee we can
2169  * generate a plan; there might be other showstoppers.
2170  */
2171  if (bms_overlap_list(gd->unhashable_refs, gset))
2172  ereport(ERROR,
2173  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2174  errmsg("could not implement GROUP BY"),
2175  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2176  }
2177  else
2178  sortable_sets = lappend(sortable_sets, gset);
2179  }
2180 
2181  if (sortable_sets)
2182  sets = extract_rollup_sets(sortable_sets);
2183  else
2184  sets = NIL;
2185  }
2186  else
2187  sets = extract_rollup_sets(parse->groupingSets);
2188 
2189  foreach(lc_set, sets)
2190  {
2191  List *current_sets = (List *) lfirst(lc_set);
2192  RollupData *rollup = makeNode(RollupData);
2193  GroupingSetData *gs;
2194 
2195  /*
2196  * Reorder the current list of grouping sets into correct prefix
2197  * order. If only one aggregation pass is needed, try to make the
2198  * list match the ORDER BY clause; if more than one pass is needed, we
2199  * don't bother with that.
2200  *
2201  * Note that this reorders the sets from smallest-member-first to
2202  * largest-member-first, and applies the GroupingSetData annotations,
2203  * though the data will be filled in later.
2204  */
2205  current_sets = reorder_grouping_sets(current_sets,
2206  (list_length(sets) == 1
2207  ? parse->sortClause
2208  : NIL));
2209 
2210  /*
2211  * Get the initial (and therefore largest) grouping set.
2212  */
2213  gs = linitial_node(GroupingSetData, current_sets);
2214 
2215  /*
2216  * Order the groupClause appropriately. If the first grouping set is
2217  * empty, then the groupClause must also be empty; otherwise we have
2218  * to force the groupClause to match that grouping set's order.
2219  *
2220  * (The first grouping set can be empty even though parse->groupClause
2221  * is not empty only if all non-empty grouping sets are unsortable.
2222  * The groupClauses for hashed grouping sets are built later on.)
2223  */
2224  if (gs->set)
2225  rollup->groupClause = preprocess_groupclause(root, gs->set);
2226  else
2227  rollup->groupClause = NIL;
2228 
2229  /*
2230  * Is it hashable? We pretend empty sets are hashable even though we
2231  * actually force them not to be hashed later. But don't bother if
2232  * there's nothing but empty sets (since in that case we can't hash
2233  * anything).
2234  */
2235  if (gs->set &&
2237  {
2238  rollup->hashable = true;
2239  gd->any_hashable = true;
2240  }
2241 
2242  /*
2243  * Now that we've pinned down an order for the groupClause for this
2244  * list of grouping sets, we need to remap the entries in the grouping
2245  * sets from sortgrouprefs to plain indices (0-based) into the
2246  * groupClause for this collection of grouping sets. We keep the
2247  * original form for later use, though.
2248  */
2249  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2250  current_sets,
2251  gd->tleref_to_colnum_map);
2252  rollup->gsets_data = current_sets;
2253 
2254  gd->rollups = lappend(gd->rollups, rollup);
2255  }
2256 
2257  if (gd->unsortable_sets)
2258  {
2259  /*
2260  * We have not yet pinned down a groupclause for this, but we will
2261  * need index-based lists for estimation purposes. Construct
2262  * hash_sets_idx based on the entire original groupclause for now.
2263  */
2264  gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2265  gd->unsortable_sets,
2266  gd->tleref_to_colnum_map);
2267  gd->any_hashable = true;
2268  }
2269 
2270  return gd;
2271 }
2272 
2273 /*
2274  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2275  * (without annotation) mapped to indexes into the given groupclause.
2276  */
2277 static List *
2279  List *gsets,
2280  int *tleref_to_colnum_map)
2281 {
2282  int ref = 0;
2283  List *result = NIL;
2284  ListCell *lc;
2285 
2286  foreach(lc, groupClause)
2287  {
2289 
2290  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2291  }
2292 
2293  foreach(lc, gsets)
2294  {
2295  List *set = NIL;
2296  ListCell *lc2;
2298 
2299  foreach(lc2, gs->set)
2300  {
2301  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2302  }
2303 
2304  result = lappend(result, set);
2305  }
2306 
2307  return result;
2308 }
2309 
2310 
2311 /*
2312  * preprocess_rowmarks - set up PlanRowMarks if needed
2313  */
2314 static void
2316 {
2317  Query *parse = root->parse;
2318  Bitmapset *rels;
2319  List *prowmarks;
2320  ListCell *l;
2321  int i;
2322 
2323  if (parse->rowMarks)
2324  {
2325  /*
2326  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2327  * grouping, since grouping renders a reference to individual tuple
2328  * CTIDs invalid. This is also checked at parse time, but that's
2329  * insufficient because of rule substitution, query pullup, etc.
2330  */
2332  parse->rowMarks)->strength);
2333  }
2334  else
2335  {
2336  /*
2337  * We only need rowmarks for UPDATE, DELETE, MERGE, or FOR [KEY]
2338  * UPDATE/SHARE.
2339  */
2340  if (parse->commandType != CMD_UPDATE &&
2341  parse->commandType != CMD_DELETE &&
2342  parse->commandType != CMD_MERGE)
2343  return;
2344  }
2345 
2346  /*
2347  * We need to have rowmarks for all base relations except the target. We
2348  * make a bitmapset of all base rels and then remove the items we don't
2349  * need or have FOR [KEY] UPDATE/SHARE marks for.
2350  */
2351  rels = get_relids_in_jointree((Node *) parse->jointree, false, false);
2352  if (parse->resultRelation)
2353  rels = bms_del_member(rels, parse->resultRelation);
2354 
2355  /*
2356  * Convert RowMarkClauses to PlanRowMark representation.
2357  */
2358  prowmarks = NIL;
2359  foreach(l, parse->rowMarks)
2360  {
2362  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2363  PlanRowMark *newrc;
2364 
2365  /*
2366  * Currently, it is syntactically impossible to have FOR UPDATE et al
2367  * applied to an update/delete target rel. If that ever becomes
2368  * possible, we should drop the target from the PlanRowMark list.
2369  */
2370  Assert(rc->rti != parse->resultRelation);
2371 
2372  /*
2373  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2374  * can't support true locking. Subqueries that got flattened into the
2375  * main query should be ignored completely. Any that didn't will get
2376  * ROW_MARK_COPY items in the next loop.
2377  */
2378  if (rte->rtekind != RTE_RELATION)
2379  continue;
2380 
2381  rels = bms_del_member(rels, rc->rti);
2382 
2383  newrc = makeNode(PlanRowMark);
2384  newrc->rti = newrc->prti = rc->rti;
2385  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2386  newrc->markType = select_rowmark_type(rte, rc->strength);
2387  newrc->allMarkTypes = (1 << newrc->markType);
2388  newrc->strength = rc->strength;
2389  newrc->waitPolicy = rc->waitPolicy;
2390  newrc->isParent = false;
2391 
2392  prowmarks = lappend(prowmarks, newrc);
2393  }
2394 
2395  /*
2396  * Now, add rowmarks for any non-target, non-locked base relations.
2397  */
2398  i = 0;
2399  foreach(l, parse->rtable)
2400  {
2402  PlanRowMark *newrc;
2403 
2404  i++;
2405  if (!bms_is_member(i, rels))
2406  continue;
2407 
2408  newrc = makeNode(PlanRowMark);
2409  newrc->rti = newrc->prti = i;
2410  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2411  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2412  newrc->allMarkTypes = (1 << newrc->markType);
2413  newrc->strength = LCS_NONE;
2414  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2415  newrc->isParent = false;
2416 
2417  prowmarks = lappend(prowmarks, newrc);
2418  }
2419 
2420  root->rowMarks = prowmarks;
2421 }
2422 
2423 /*
2424  * Select RowMarkType to use for a given table
2425  */
2428 {
2429  if (rte->rtekind != RTE_RELATION)
2430  {
2431  /* If it's not a table at all, use ROW_MARK_COPY */
2432  return ROW_MARK_COPY;
2433  }
2434  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2435  {
2436  /* Let the FDW select the rowmark type, if it wants to */
2437  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2438 
2439  if (fdwroutine->GetForeignRowMarkType != NULL)
2440  return fdwroutine->GetForeignRowMarkType(rte, strength);
2441  /* Otherwise, use ROW_MARK_COPY by default */
2442  return ROW_MARK_COPY;
2443  }
2444  else
2445  {
2446  /* Regular table, apply the appropriate lock type */
2447  switch (strength)
2448  {
2449  case LCS_NONE:
2450 
2451  /*
2452  * We don't need a tuple lock, only the ability to re-fetch
2453  * the row.
2454  */
2455  return ROW_MARK_REFERENCE;
2456  break;
2457  case LCS_FORKEYSHARE:
2458  return ROW_MARK_KEYSHARE;
2459  break;
2460  case LCS_FORSHARE:
2461  return ROW_MARK_SHARE;
2462  break;
2463  case LCS_FORNOKEYUPDATE:
2464  return ROW_MARK_NOKEYEXCLUSIVE;
2465  break;
2466  case LCS_FORUPDATE:
2467  return ROW_MARK_EXCLUSIVE;
2468  break;
2469  }
2470  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2471  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2472  }
2473 }
2474 
2475 /*
2476  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2477  *
2478  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2479  * results back in *count_est and *offset_est. These variables are set to
2480  * 0 if the corresponding clause is not present, and -1 if it's present
2481  * but we couldn't estimate the value for it. (The "0" convention is OK
2482  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2483  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2484  * usual practice of never estimating less than one row.) These values will
2485  * be passed to create_limit_path, which see if you change this code.
2486  *
2487  * The return value is the suitably adjusted tuple_fraction to use for
2488  * planning the query. This adjustment is not overridable, since it reflects
2489  * plan actions that grouping_planner() will certainly take, not assumptions
2490  * about context.
2491  */
2492 static double
2493 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2494  int64 *offset_est, int64 *count_est)
2495 {
2496  Query *parse = root->parse;
2497  Node *est;
2498  double limit_fraction;
2499 
2500  /* Should not be called unless LIMIT or OFFSET */
2501  Assert(parse->limitCount || parse->limitOffset);
2502 
2503  /*
2504  * Try to obtain the clause values. We use estimate_expression_value
2505  * primarily because it can sometimes do something useful with Params.
2506  */
2507  if (parse->limitCount)
2508  {
2509  est = estimate_expression_value(root, parse->limitCount);
2510  if (est && IsA(est, Const))
2511  {
2512  if (((Const *) est)->constisnull)
2513  {
2514  /* NULL indicates LIMIT ALL, ie, no limit */
2515  *count_est = 0; /* treat as not present */
2516  }
2517  else
2518  {
2519  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2520  if (*count_est <= 0)
2521  *count_est = 1; /* force to at least 1 */
2522  }
2523  }
2524  else
2525  *count_est = -1; /* can't estimate */
2526  }
2527  else
2528  *count_est = 0; /* not present */
2529 
2530  if (parse->limitOffset)
2531  {
2532  est = estimate_expression_value(root, parse->limitOffset);
2533  if (est && IsA(est, Const))
2534  {
2535  if (((Const *) est)->constisnull)
2536  {
2537  /* Treat NULL as no offset; the executor will too */
2538  *offset_est = 0; /* treat as not present */
2539  }
2540  else
2541  {
2542  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2543  if (*offset_est < 0)
2544  *offset_est = 0; /* treat as not present */
2545  }
2546  }
2547  else
2548  *offset_est = -1; /* can't estimate */
2549  }
2550  else
2551  *offset_est = 0; /* not present */
2552 
2553  if (*count_est != 0)
2554  {
2555  /*
2556  * A LIMIT clause limits the absolute number of tuples returned.
2557  * However, if it's not a constant LIMIT then we have to guess; for
2558  * lack of a better idea, assume 10% of the plan's result is wanted.
2559  */
2560  if (*count_est < 0 || *offset_est < 0)
2561  {
2562  /* LIMIT or OFFSET is an expression ... punt ... */
2563  limit_fraction = 0.10;
2564  }
2565  else
2566  {
2567  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2568  limit_fraction = (double) *count_est + (double) *offset_est;
2569  }
2570 
2571  /*
2572  * If we have absolute limits from both caller and LIMIT, use the
2573  * smaller value; likewise if they are both fractional. If one is
2574  * fractional and the other absolute, we can't easily determine which
2575  * is smaller, but we use the heuristic that the absolute will usually
2576  * be smaller.
2577  */
2578  if (tuple_fraction >= 1.0)
2579  {
2580  if (limit_fraction >= 1.0)
2581  {
2582  /* both absolute */
2583  tuple_fraction = Min(tuple_fraction, limit_fraction);
2584  }
2585  else
2586  {
2587  /* caller absolute, limit fractional; use caller's value */
2588  }
2589  }
2590  else if (tuple_fraction > 0.0)
2591  {
2592  if (limit_fraction >= 1.0)
2593  {
2594  /* caller fractional, limit absolute; use limit */
2595  tuple_fraction = limit_fraction;
2596  }
2597  else
2598  {
2599  /* both fractional */
2600  tuple_fraction = Min(tuple_fraction, limit_fraction);
2601  }
2602  }
2603  else
2604  {
2605  /* no info from caller, just use limit */
2606  tuple_fraction = limit_fraction;
2607  }
2608  }
2609  else if (*offset_est != 0 && tuple_fraction > 0.0)
2610  {
2611  /*
2612  * We have an OFFSET but no LIMIT. This acts entirely differently
2613  * from the LIMIT case: here, we need to increase rather than decrease
2614  * the caller's tuple_fraction, because the OFFSET acts to cause more
2615  * tuples to be fetched instead of fewer. This only matters if we got
2616  * a tuple_fraction > 0, however.
2617  *
2618  * As above, use 10% if OFFSET is present but unestimatable.
2619  */
2620  if (*offset_est < 0)
2621  limit_fraction = 0.10;
2622  else
2623  limit_fraction = (double) *offset_est;
2624 
2625  /*
2626  * If we have absolute counts from both caller and OFFSET, add them
2627  * together; likewise if they are both fractional. If one is
2628  * fractional and the other absolute, we want to take the larger, and
2629  * we heuristically assume that's the fractional one.
2630  */
2631  if (tuple_fraction >= 1.0)
2632  {
2633  if (limit_fraction >= 1.0)
2634  {
2635  /* both absolute, so add them together */
2636  tuple_fraction += limit_fraction;
2637  }
2638  else
2639  {
2640  /* caller absolute, limit fractional; use limit */
2641  tuple_fraction = limit_fraction;
2642  }
2643  }
2644  else
2645  {
2646  if (limit_fraction >= 1.0)
2647  {
2648  /* caller fractional, limit absolute; use caller's value */
2649  }
2650  else
2651  {
2652  /* both fractional, so add them together */
2653  tuple_fraction += limit_fraction;
2654  if (tuple_fraction >= 1.0)
2655  tuple_fraction = 0.0; /* assume fetch all */
2656  }
2657  }
2658  }
2659 
2660  return tuple_fraction;
2661 }
2662 
2663 /*
2664  * limit_needed - do we actually need a Limit plan node?
2665  *
2666  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2667  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2668  * locution for an optimization fence. (Because other places in the planner
2669  * merely check whether parse->limitOffset isn't NULL, it will still work as
2670  * an optimization fence --- we're just suppressing unnecessary run-time
2671  * overhead.)
2672  *
2673  * This might look like it could be merged into preprocess_limit, but there's
2674  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2675  * in preprocess_limit it's good enough to consider estimated values.
2676  */
2677 bool
2679 {
2680  Node *node;
2681 
2682  node = parse->limitCount;
2683  if (node)
2684  {
2685  if (IsA(node, Const))
2686  {
2687  /* NULL indicates LIMIT ALL, ie, no limit */
2688  if (!((Const *) node)->constisnull)
2689  return true; /* LIMIT with a constant value */
2690  }
2691  else
2692  return true; /* non-constant LIMIT */
2693  }
2694 
2695  node = parse->limitOffset;
2696  if (node)
2697  {
2698  if (IsA(node, Const))
2699  {
2700  /* Treat NULL as no offset; the executor would too */
2701  if (!((Const *) node)->constisnull)
2702  {
2703  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2704 
2705  if (offset != 0)
2706  return true; /* OFFSET with a nonzero value */
2707  }
2708  }
2709  else
2710  return true; /* non-constant OFFSET */
2711  }
2712 
2713  return false; /* don't need a Limit plan node */
2714 }
2715 
2716 
2717 /*
2718  * remove_useless_groupby_columns
2719  * Remove any columns in the GROUP BY clause that are redundant due to
2720  * being functionally dependent on other GROUP BY columns.
2721  *
2722  * Since some other DBMSes do not allow references to ungrouped columns, it's
2723  * not unusual to find all columns listed in GROUP BY even though listing the
2724  * primary-key columns would be sufficient. Deleting such excess columns
2725  * avoids redundant sorting work, so it's worth doing.
2726  *
2727  * Relcache invalidations will ensure that cached plans become invalidated
2728  * when the underlying index of the pkey constraint is dropped.
2729  *
2730  * Currently, we only make use of pkey constraints for this, however, we may
2731  * wish to take this further in the future and also use unique constraints
2732  * which have NOT NULL columns. In that case, plan invalidation will still
2733  * work since relations will receive a relcache invalidation when a NOT NULL
2734  * constraint is dropped.
2735  */
2736 static void
2738 {
2739  Query *parse = root->parse;
2740  Bitmapset **groupbyattnos;
2741  Bitmapset **surplusvars;
2742  ListCell *lc;
2743  int relid;
2744 
2745  /* No chance to do anything if there are less than two GROUP BY items */
2746  if (list_length(root->processed_groupClause) < 2)
2747  return;
2748 
2749  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2750  if (parse->groupingSets)
2751  return;
2752 
2753  /*
2754  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2755  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2756  * that are GROUP BY items.
2757  */
2758  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2759  (list_length(parse->rtable) + 1));
2760  foreach(lc, root->processed_groupClause)
2761  {
2763  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2764  Var *var = (Var *) tle->expr;
2765 
2766  /*
2767  * Ignore non-Vars and Vars from other query levels.
2768  *
2769  * XXX in principle, stable expressions containing Vars could also be
2770  * removed, if all the Vars are functionally dependent on other GROUP
2771  * BY items. But it's not clear that such cases occur often enough to
2772  * be worth troubling over.
2773  */
2774  if (!IsA(var, Var) ||
2775  var->varlevelsup > 0)
2776  continue;
2777 
2778  /* OK, remember we have this Var */
2779  relid = var->varno;
2780  Assert(relid <= list_length(parse->rtable));
2781  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2783  }
2784 
2785  /*
2786  * Consider each relation and see if it is possible to remove some of its
2787  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2788  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2789  * of the column attnos of RTE k that are removable GROUP BY items.
2790  */
2791  surplusvars = NULL; /* don't allocate array unless required */
2792  relid = 0;
2793  foreach(lc, parse->rtable)
2794  {
2796  Bitmapset *relattnos;
2797  Bitmapset *pkattnos;
2798  Oid constraintOid;
2799 
2800  relid++;
2801 
2802  /* Only plain relations could have primary-key constraints */
2803  if (rte->rtekind != RTE_RELATION)
2804  continue;
2805 
2806  /*
2807  * We must skip inheritance parent tables as some of the child rels
2808  * may cause duplicate rows. This cannot happen with partitioned
2809  * tables, however.
2810  */
2811  if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
2812  continue;
2813 
2814  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2815  relattnos = groupbyattnos[relid];
2816  if (bms_membership(relattnos) != BMS_MULTIPLE)
2817  continue;
2818 
2819  /*
2820  * Can't remove any columns for this rel if there is no suitable
2821  * (i.e., nondeferrable) primary key constraint.
2822  */
2823  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2824  if (pkattnos == NULL)
2825  continue;
2826 
2827  /*
2828  * If the primary key is a proper subset of relattnos then we have
2829  * some items in the GROUP BY that can be removed.
2830  */
2831  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2832  {
2833  /*
2834  * To easily remember whether we've found anything to do, we don't
2835  * allocate the surplusvars[] array until we find something.
2836  */
2837  if (surplusvars == NULL)
2838  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2839  (list_length(parse->rtable) + 1));
2840 
2841  /* Remember the attnos of the removable columns */
2842  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2843  }
2844  }
2845 
2846  /*
2847  * If we found any surplus Vars, build a new GROUP BY clause without them.
2848  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2849  * markings, but that's harmless.)
2850  */
2851  if (surplusvars != NULL)
2852  {
2853  List *new_groupby = NIL;
2854 
2855  foreach(lc, root->processed_groupClause)
2856  {
2858  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2859  Var *var = (Var *) tle->expr;
2860 
2861  /*
2862  * New list must include non-Vars, outer Vars, and anything not
2863  * marked as surplus.
2864  */
2865  if (!IsA(var, Var) ||
2866  var->varlevelsup > 0 ||
2868  surplusvars[var->varno]))
2869  new_groupby = lappend(new_groupby, sgc);
2870  }
2871 
2872  root->processed_groupClause = new_groupby;
2873  }
2874 }
2875 
2876 /*
2877  * preprocess_groupclause - do preparatory work on GROUP BY clause
2878  *
2879  * The idea here is to adjust the ordering of the GROUP BY elements
2880  * (which in itself is semantically insignificant) to match ORDER BY,
2881  * thereby allowing a single sort operation to both implement the ORDER BY
2882  * requirement and set up for a Unique step that implements GROUP BY.
2883  * We also consider partial match between GROUP BY and ORDER BY elements,
2884  * which could allow to implement ORDER BY using the incremental sort.
2885  *
2886  * We also consider other orderings of the GROUP BY elements, which could
2887  * match the sort ordering of other possible plans (eg an indexscan) and
2888  * thereby reduce cost. This is implemented during the generation of grouping
2889  * paths. See get_useful_group_keys_orderings() for details.
2890  *
2891  * Note: we need no comparable processing of the distinctClause because
2892  * the parser already enforced that that matches ORDER BY.
2893  *
2894  * Note: we return a fresh List, but its elements are the same
2895  * SortGroupClauses appearing in parse->groupClause. This is important
2896  * because later processing may modify the processed_groupClause list.
2897  *
2898  * For grouping sets, the order of items is instead forced to agree with that
2899  * of the grouping set (and items not in the grouping set are skipped). The
2900  * work of sorting the order of grouping set elements to match the ORDER BY if
2901  * possible is done elsewhere.
2902  */
2903 static List *
2905 {
2906  Query *parse = root->parse;
2907  List *new_groupclause = NIL;
2908  ListCell *sl;
2909  ListCell *gl;
2910 
2911  /* For grouping sets, we need to force the ordering */
2912  if (force)
2913  {
2914  foreach(sl, force)
2915  {
2916  Index ref = lfirst_int(sl);
2917  SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2918 
2919  new_groupclause = lappend(new_groupclause, cl);
2920  }
2921 
2922  return new_groupclause;
2923  }
2924 
2925  /* If no ORDER BY, nothing useful to do here */
2926  if (parse->sortClause == NIL)
2927  return list_copy(parse->groupClause);
2928 
2929  /*
2930  * Scan the ORDER BY clause and construct a list of matching GROUP BY
2931  * items, but only as far as we can make a matching prefix.
2932  *
2933  * This code assumes that the sortClause contains no duplicate items.
2934  */
2935  foreach(sl, parse->sortClause)
2936  {
2938 
2939  foreach(gl, parse->groupClause)
2940  {
2942 
2943  if (equal(gc, sc))
2944  {
2945  new_groupclause = lappend(new_groupclause, gc);
2946  break;
2947  }
2948  }
2949  if (gl == NULL)
2950  break; /* no match, so stop scanning */
2951  }
2952 
2953 
2954  /* If no match at all, no point in reordering GROUP BY */
2955  if (new_groupclause == NIL)
2956  return list_copy(parse->groupClause);
2957 
2958  /*
2959  * Add any remaining GROUP BY items to the new list. We don't require a
2960  * complete match, because even partial match allows ORDER BY to be
2961  * implemented using incremental sort. Also, give up if there are any
2962  * non-sortable GROUP BY items, since then there's no hope anyway.
2963  */
2964  foreach(gl, parse->groupClause)
2965  {
2967 
2968  if (list_member_ptr(new_groupclause, gc))
2969  continue; /* it matched an ORDER BY item */
2970  if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */
2971  return list_copy(parse->groupClause);
2972  new_groupclause = lappend(new_groupclause, gc);
2973  }
2974 
2975  /* Success --- install the rearranged GROUP BY list */
2976  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2977  return new_groupclause;
2978 }
2979 
2980 /*
2981  * Extract lists of grouping sets that can be implemented using a single
2982  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2983  *
2984  * Input must be sorted with smallest sets first. Result has each sublist
2985  * sorted with smallest sets first.
2986  *
2987  * We want to produce the absolute minimum possible number of lists here to
2988  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2989  * of finding the minimal partition of a partially-ordered set into chains
2990  * (which is what we need, taking the list of grouping sets as a poset ordered
2991  * by set inclusion) can be mapped to the problem of finding the maximum
2992  * cardinality matching on a bipartite graph, which is solvable in polynomial
2993  * time with a worst case of no worse than O(n^2.5) and usually much
2994  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2995  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2996  * half a second on my modest system even with optimization off and assertions
2997  * on.)
2998  */
2999 static List *
3001 {
3002  int num_sets_raw = list_length(groupingSets);
3003  int num_empty = 0;
3004  int num_sets = 0; /* distinct sets */
3005  int num_chains = 0;
3006  List *result = NIL;
3007  List **results;
3008  List **orig_sets;
3009  Bitmapset **set_masks;
3010  int *chains;
3011  short **adjacency;
3012  short *adjacency_buf;
3014  int i;
3015  int j;
3016  int j_size;
3017  ListCell *lc1 = list_head(groupingSets);
3018  ListCell *lc;
3019 
3020  /*
3021  * Start by stripping out empty sets. The algorithm doesn't require this,
3022  * but the planner currently needs all empty sets to be returned in the
3023  * first list, so we strip them here and add them back after.
3024  */
3025  while (lc1 && lfirst(lc1) == NIL)
3026  {
3027  ++num_empty;
3028  lc1 = lnext(groupingSets, lc1);
3029  }
3030 
3031  /* bail out now if it turns out that all we had were empty sets. */
3032  if (!lc1)
3033  return list_make1(groupingSets);
3034 
3035  /*----------
3036  * We don't strictly need to remove duplicate sets here, but if we don't,
3037  * they tend to become scattered through the result, which is a bit
3038  * confusing (and irritating if we ever decide to optimize them out).
3039  * So we remove them here and add them back after.
3040  *
3041  * For each non-duplicate set, we fill in the following:
3042  *
3043  * orig_sets[i] = list of the original set lists
3044  * set_masks[i] = bitmapset for testing inclusion
3045  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
3046  *
3047  * chains[i] will be the result group this set is assigned to.
3048  *
3049  * We index all of these from 1 rather than 0 because it is convenient
3050  * to leave 0 free for the NIL node in the graph algorithm.
3051  *----------
3052  */
3053  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
3054  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
3055  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
3056  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
3057 
3058  j_size = 0;
3059  j = 0;
3060  i = 1;
3061 
3062  for_each_cell(lc, groupingSets, lc1)
3063  {
3064  List *candidate = (List *) lfirst(lc);
3065  Bitmapset *candidate_set = NULL;
3066  ListCell *lc2;
3067  int dup_of = 0;
3068 
3069  foreach(lc2, candidate)
3070  {
3071  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
3072  }
3073 
3074  /* we can only be a dup if we're the same length as a previous set */
3075  if (j_size == list_length(candidate))
3076  {
3077  int k;
3078 
3079  for (k = j; k < i; ++k)
3080  {
3081  if (bms_equal(set_masks[k], candidate_set))
3082  {
3083  dup_of = k;
3084  break;
3085  }
3086  }
3087  }
3088  else if (j_size < list_length(candidate))
3089  {
3090  j_size = list_length(candidate);
3091  j = i;
3092  }
3093 
3094  if (dup_of > 0)
3095  {
3096  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3097  bms_free(candidate_set);
3098  }
3099  else
3100  {
3101  int k;
3102  int n_adj = 0;
3103 
3104  orig_sets[i] = list_make1(candidate);
3105  set_masks[i] = candidate_set;
3106 
3107  /* fill in adjacency list; no need to compare equal-size sets */
3108 
3109  for (k = j - 1; k > 0; --k)
3110  {
3111  if (bms_is_subset(set_masks[k], candidate_set))
3112  adjacency_buf[++n_adj] = k;
3113  }
3114 
3115  if (n_adj > 0)
3116  {
3117  adjacency_buf[0] = n_adj;
3118  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3119  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3120  }
3121  else
3122  adjacency[i] = NULL;
3123 
3124  ++i;
3125  }
3126  }
3127 
3128  num_sets = i - 1;
3129 
3130  /*
3131  * Apply the graph matching algorithm to do the work.
3132  */
3133  state = BipartiteMatch(num_sets, num_sets, adjacency);
3134 
3135  /*
3136  * Now, the state->pair* fields have the info we need to assign sets to
3137  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3138  * pair_vu[v] = u (both will be true, but we check both so that we can do
3139  * it in one pass)
3140  */
3141  chains = palloc0((num_sets + 1) * sizeof(int));
3142 
3143  for (i = 1; i <= num_sets; ++i)
3144  {
3145  int u = state->pair_vu[i];
3146  int v = state->pair_uv[i];
3147 
3148  if (u > 0 && u < i)
3149  chains[i] = chains[u];
3150  else if (v > 0 && v < i)
3151  chains[i] = chains[v];
3152  else
3153  chains[i] = ++num_chains;
3154  }
3155 
3156  /* build result lists. */
3157  results = palloc0((num_chains + 1) * sizeof(List *));
3158 
3159  for (i = 1; i <= num_sets; ++i)
3160  {
3161  int c = chains[i];
3162 
3163  Assert(c > 0);
3164 
3165  results[c] = list_concat(results[c], orig_sets[i]);
3166  }
3167 
3168  /* push any empty sets back on the first list. */
3169  while (num_empty-- > 0)
3170  results[1] = lcons(NIL, results[1]);
3171 
3172  /* make result list */
3173  for (i = 1; i <= num_chains; ++i)
3174  result = lappend(result, results[i]);
3175 
3176  /*
3177  * Free all the things.
3178  *
3179  * (This is over-fussy for small sets but for large sets we could have
3180  * tied up a nontrivial amount of memory.)
3181  */
3183  pfree(results);
3184  pfree(chains);
3185  for (i = 1; i <= num_sets; ++i)
3186  if (adjacency[i])
3187  pfree(adjacency[i]);
3188  pfree(adjacency);
3189  pfree(adjacency_buf);
3190  pfree(orig_sets);
3191  for (i = 1; i <= num_sets; ++i)
3192  bms_free(set_masks[i]);
3193  pfree(set_masks);
3194 
3195  return result;
3196 }
3197 
3198 /*
3199  * Reorder the elements of a list of grouping sets such that they have correct
3200  * prefix relationships. Also inserts the GroupingSetData annotations.
3201  *
3202  * The input must be ordered with smallest sets first; the result is returned
3203  * with largest sets first. Note that the result shares no list substructure
3204  * with the input, so it's safe for the caller to modify it later.
3205  *
3206  * If we're passed in a sortclause, we follow its order of columns to the
3207  * extent possible, to minimize the chance that we add unnecessary sorts.
3208  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3209  * gets implemented in one pass.)
3210  */
3211 static List *
3212 reorder_grouping_sets(List *groupingSets, List *sortclause)
3213 {
3214  ListCell *lc;
3215  List *previous = NIL;
3216  List *result = NIL;
3217 
3218  foreach(lc, groupingSets)
3219  {
3220  List *candidate = (List *) lfirst(lc);
3221  List *new_elems = list_difference_int(candidate, previous);
3223 
3224  while (list_length(sortclause) > list_length(previous) &&
3225  new_elems != NIL)
3226  {
3227  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3228  int ref = sc->tleSortGroupRef;
3229 
3230  if (list_member_int(new_elems, ref))
3231  {
3232  previous = lappend_int(previous, ref);
3233  new_elems = list_delete_int(new_elems, ref);
3234  }
3235  else
3236  {
3237  /* diverged from the sortclause; give up on it */
3238  sortclause = NIL;
3239  break;
3240  }
3241  }
3242 
3243  previous = list_concat(previous, new_elems);
3244 
3245  gs->set = list_copy(previous);
3246  result = lcons(gs, result);
3247  }
3248 
3249  list_free(previous);
3250 
3251  return result;
3252 }
3253 
3254 /*
3255  * has_volatile_pathkey
3256  * Returns true if any PathKey in 'keys' has an EquivalenceClass
3257  * containing a volatile function. Otherwise returns false.
3258  */
3259 static bool
3261 {
3262  ListCell *lc;
3263 
3264  foreach(lc, keys)
3265  {
3266  PathKey *pathkey = lfirst_node(PathKey, lc);
3267 
3268  if (pathkey->pk_eclass->ec_has_volatile)
3269  return true;
3270  }
3271 
3272  return false;
3273 }
3274 
3275 /*
3276  * adjust_group_pathkeys_for_groupagg
3277  * Add pathkeys to root->group_pathkeys to reflect the best set of
3278  * pre-ordered input for ordered aggregates.
3279  *
3280  * We define "best" as the pathkeys that suit the largest number of
3281  * aggregate functions. We find these by looking at the first ORDER BY /
3282  * DISTINCT aggregate and take the pathkeys for that before searching for
3283  * other aggregates that require the same or a more strict variation of the
3284  * same pathkeys. We then repeat that process for any remaining aggregates
3285  * with different pathkeys and if we find another set of pathkeys that suits a
3286  * larger number of aggregates then we select those pathkeys instead.
3287  *
3288  * When the best pathkeys are found we also mark each Aggref that can use
3289  * those pathkeys as aggpresorted = true.
3290  *
3291  * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
3292  * volatile functions, we never make use of these pathkeys. We want to ensure
3293  * that sorts using volatile functions are done independently in each Aggref
3294  * rather than once at the query level. If we were to allow this then Aggrefs
3295  * with compatible sort orders would all transition their rows in the same
3296  * order if those pathkeys were deemed to be the best pathkeys to sort on.
3297  * Whereas, if some other set of Aggref's pathkeys happened to be deemed
3298  * better pathkeys to sort on, then the volatile function Aggrefs would be
3299  * left to perform their sorts individually. To avoid this inconsistent
3300  * behavior which could make Aggref results depend on what other Aggrefs the
3301  * query contains, we always force Aggrefs with volatile functions to perform
3302  * their own sorts.
3303  */
3304 static void
3306 {
3307  List *grouppathkeys = root->group_pathkeys;
3308  List *bestpathkeys;
3309  Bitmapset *bestaggs;
3310  Bitmapset *unprocessed_aggs;
3311  ListCell *lc;
3312  int i;
3313 
3314  /* Shouldn't be here if there are grouping sets */
3315  Assert(root->parse->groupingSets == NIL);
3316  /* Shouldn't be here unless there are some ordered aggregates */
3317  Assert(root->numOrderedAggs > 0);
3318 
3319  /* Do nothing if disabled */
3321  return;
3322 
3323  /*
3324  * Make a first pass over all AggInfos to collect a Bitmapset containing
3325  * the indexes of all AggInfos to be processed below.
3326  */
3327  unprocessed_aggs = NULL;
3328  foreach(lc, root->agginfos)
3329  {
3330  AggInfo *agginfo = lfirst_node(AggInfo, lc);
3331  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3332 
3333  if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
3334  continue;
3335 
3336  /* only add aggregates with a DISTINCT or ORDER BY */
3337  if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
3338  unprocessed_aggs = bms_add_member(unprocessed_aggs,
3339  foreach_current_index(lc));
3340  }
3341 
3342  /*
3343  * Now process all the unprocessed_aggs to find the best set of pathkeys
3344  * for the given set of aggregates.
3345  *
3346  * On the first outer loop here 'bestaggs' will be empty. We'll populate
3347  * this during the first loop using the pathkeys for the very first
3348  * AggInfo then taking any stronger pathkeys from any other AggInfos with
3349  * a more strict set of compatible pathkeys. Once the outer loop is
3350  * complete, we mark off all the aggregates with compatible pathkeys then
3351  * remove those from the unprocessed_aggs and repeat the process to try to
3352  * find another set of pathkeys that are suitable for a larger number of
3353  * aggregates. The outer loop will stop when there are not enough
3354  * unprocessed aggregates for it to be possible to find a set of pathkeys
3355  * to suit a larger number of aggregates.
3356  */
3357  bestpathkeys = NIL;
3358  bestaggs = NULL;
3359  while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
3360  {
3361  Bitmapset *aggindexes = NULL;
3362  List *currpathkeys = NIL;
3363 
3364  i = -1;
3365  while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
3366  {
3367  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3368  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3369  List *sortlist;
3370  List *pathkeys;
3371 
3372  if (aggref->aggdistinct != NIL)
3373  sortlist = aggref->aggdistinct;
3374  else
3375  sortlist = aggref->aggorder;
3376 
3377  pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3378  aggref->args);
3379 
3380  /*
3381  * Ignore Aggrefs which have volatile functions in their ORDER BY
3382  * or DISTINCT clause.
3383  */
3384  if (has_volatile_pathkey(pathkeys))
3385  {
3386  unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
3387  continue;
3388  }
3389 
3390  /*
3391  * When not set yet, take the pathkeys from the first unprocessed
3392  * aggregate.
3393  */
3394  if (currpathkeys == NIL)
3395  {
3396  currpathkeys = pathkeys;
3397 
3398  /* include the GROUP BY pathkeys, if they exist */
3399  if (grouppathkeys != NIL)
3400  currpathkeys = append_pathkeys(list_copy(grouppathkeys),
3401  currpathkeys);
3402 
3403  /* record that we found pathkeys for this aggregate */
3404  aggindexes = bms_add_member(aggindexes, i);
3405  }
3406  else
3407  {
3408  /* now look for a stronger set of matching pathkeys */
3409 
3410  /* include the GROUP BY pathkeys, if they exist */
3411  if (grouppathkeys != NIL)
3412  pathkeys = append_pathkeys(list_copy(grouppathkeys),
3413  pathkeys);
3414 
3415  /* are 'pathkeys' compatible or better than 'currpathkeys'? */
3416  switch (compare_pathkeys(currpathkeys, pathkeys))
3417  {
3418  case PATHKEYS_BETTER2:
3419  /* 'pathkeys' are stronger, use these ones instead */
3420  currpathkeys = pathkeys;
3421  /* FALLTHROUGH */
3422 
3423  case PATHKEYS_BETTER1:
3424  /* 'pathkeys' are less strict */
3425  /* FALLTHROUGH */
3426 
3427  case PATHKEYS_EQUAL:
3428  /* mark this aggregate as covered by 'currpathkeys' */
3429  aggindexes = bms_add_member(aggindexes, i);
3430  break;
3431 
3432  case PATHKEYS_DIFFERENT:
3433  break;
3434  }
3435  }
3436  }
3437 
3438  /* remove the aggregates that we've just processed */
3439  unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
3440 
3441  /*
3442  * If this pass included more aggregates than the previous best then
3443  * use these ones as the best set.
3444  */
3445  if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
3446  {
3447  bestaggs = aggindexes;
3448  bestpathkeys = currpathkeys;
3449  }
3450  }
3451 
3452  /*
3453  * If we found any ordered aggregates, update root->group_pathkeys to add
3454  * the best set of aggregate pathkeys. Note that bestpathkeys includes
3455  * the original GROUP BY pathkeys already.
3456  */
3457  if (bestpathkeys != NIL)
3458  root->group_pathkeys = bestpathkeys;
3459 
3460  /*
3461  * Now that we've found the best set of aggregates we can set the
3462  * presorted flag to indicate to the executor that it needn't bother
3463  * performing a sort for these Aggrefs. We're able to do this now as
3464  * there's no chance of a Hash Aggregate plan as create_grouping_paths
3465  * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
3466  * of ordered aggregates.
3467  */
3468  i = -1;
3469  while ((i = bms_next_member(bestaggs, i)) >= 0)
3470  {
3471  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3472 
3473  foreach(lc, agginfo->aggrefs)
3474  {
3475  Aggref *aggref = lfirst_node(Aggref, lc);
3476 
3477  aggref->aggpresorted = true;
3478  }
3479  }
3480 }
3481 
3482 /*
3483  * Compute query_pathkeys and other pathkeys during plan generation
3484  */
3485 static void
3487 {
3488  Query *parse = root->parse;
3489  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3490  List *tlist = root->processed_tlist;
3491  List *activeWindows = qp_extra->activeWindows;
3492 
3493  /*
3494  * Calculate pathkeys that represent grouping/ordering and/or ordered
3495  * aggregate requirements.
3496  */
3497  if (qp_extra->gset_data)
3498  {
3499  /*
3500  * With grouping sets, just use the first RollupData's groupClause. We
3501  * don't make any effort to optimize grouping clauses when there are
3502  * grouping sets, nor can we combine aggregate ordering keys with
3503  * grouping.
3504  */
3505  List *rollups = qp_extra->gset_data->rollups;
3506  List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
3507 
3508  if (grouping_is_sortable(groupClause))
3509  {
3510  bool sortable;
3511 
3512  /*
3513  * The groupClause is logically below the grouping step. So if
3514  * there is an RTE entry for the grouping step, we need to remove
3515  * its RT index from the sort expressions before we make PathKeys
3516  * for them.
3517  */
3518  root->group_pathkeys =
3520  &groupClause,
3521  tlist,
3522  false,
3523  parse->hasGroupRTE,
3524  &sortable,
3525  false);
3526  Assert(sortable);
3527  root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3528  }
3529  else
3530  {
3531  root->group_pathkeys = NIL;
3532  root->num_groupby_pathkeys = 0;
3533  }
3534  }
3535  else if (parse->groupClause || root->numOrderedAggs > 0)
3536  {
3537  /*
3538  * With a plain GROUP BY list, we can remove any grouping items that
3539  * are proven redundant by EquivalenceClass processing. For example,
3540  * we can remove y given "WHERE x = y GROUP BY x, y". These aren't
3541  * especially common cases, but they're nearly free to detect. Note
3542  * that we remove redundant items from processed_groupClause but not
3543  * the original parse->groupClause.
3544  */
3545  bool sortable;
3546 
3547  /*
3548  * Convert group clauses into pathkeys. Set the ec_sortref field of
3549  * EquivalenceClass'es if it's not set yet.
3550  */
3551  root->group_pathkeys =
3553  &root->processed_groupClause,
3554  tlist,
3555  true,
3556  false,
3557  &sortable,
3558  true);
3559  if (!sortable)
3560  {
3561  /* Can't sort; no point in considering aggregate ordering either */
3562  root->group_pathkeys = NIL;
3563  root->num_groupby_pathkeys = 0;
3564  }
3565  else
3566  {
3567  root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3568  /* If we have ordered aggs, consider adding onto group_pathkeys */
3569  if (root->numOrderedAggs > 0)
3571  }
3572  }
3573  else
3574  {
3575  root->group_pathkeys = NIL;
3576  root->num_groupby_pathkeys = 0;
3577  }
3578 
3579  /* We consider only the first (bottom) window in pathkeys logic */
3580  if (activeWindows != NIL)
3581  {
3582  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3583 
3584  root->window_pathkeys = make_pathkeys_for_window(root,
3585  wc,
3586  tlist);
3587  }
3588  else
3589  root->window_pathkeys = NIL;
3590 
3591  /*
3592  * As with GROUP BY, we can discard any DISTINCT items that are proven
3593  * redundant by EquivalenceClass processing. The non-redundant list is
3594  * kept in root->processed_distinctClause, leaving the original
3595  * parse->distinctClause alone.
3596  */
3597  if (parse->distinctClause)
3598  {
3599  bool sortable;
3600 
3601  /* Make a copy since pathkey processing can modify the list */
3602  root->processed_distinctClause = list_copy(parse->distinctClause);
3603  root->distinct_pathkeys =
3605  &root->processed_distinctClause,
3606  tlist,
3607  true,
3608  false,
3609  &sortable,
3610  false);
3611  if (!sortable)
3612  root->distinct_pathkeys = NIL;
3613  }
3614  else
3615  root->distinct_pathkeys = NIL;
3616 
3617  root->sort_pathkeys =
3619  parse->sortClause,
3620  tlist);
3621 
3622  /* setting setop_pathkeys might be useful to the union planner */
3623  if (qp_extra->setop != NULL &&
3625  {
3626  List *groupClauses;
3627  bool sortable;
3628 
3629  groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
3630 
3631  root->setop_pathkeys =
3633  &groupClauses,
3634  tlist,
3635  false,
3636  false,
3637  &sortable,
3638  false);
3639  if (!sortable)
3640  root->setop_pathkeys = NIL;
3641  }
3642  else
3643  root->setop_pathkeys = NIL;
3644 
3645  /*
3646  * Figure out whether we want a sorted result from query_planner.
3647  *
3648  * If we have a sortable GROUP BY clause, then we want a result sorted
3649  * properly for grouping. Otherwise, if we have window functions to
3650  * evaluate, we try to sort for the first window. Otherwise, if there's a
3651  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3652  * we try to produce output that's sufficiently well sorted for the
3653  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3654  * by the ORDER BY clause. Otherwise, if we're a subquery being planned
3655  * for a set operation which can benefit from presorted results and have a
3656  * sortable targetlist, we want to sort by the target list.
3657  *
3658  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3659  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3660  * that might just leave us failing to exploit an available sort order at
3661  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3662  * much easier, since we know that the parser ensured that one is a
3663  * superset of the other.
3664  */
3665  if (root->group_pathkeys)
3666  root->query_pathkeys = root->group_pathkeys;
3667  else if (root->window_pathkeys)
3668  root->query_pathkeys = root->window_pathkeys;
3669  else if (list_length(root->distinct_pathkeys) >
3670  list_length(root->sort_pathkeys))
3671  root->query_pathkeys = root->distinct_pathkeys;
3672  else if (root->sort_pathkeys)
3673  root->query_pathkeys = root->sort_pathkeys;
3674  else if (root->setop_pathkeys != NIL)
3675  root->query_pathkeys = root->setop_pathkeys;
3676  else
3677  root->query_pathkeys = NIL;
3678 }
3679 
3680 /*
3681  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3682  *
3683  * path_rows: number of output rows from scan/join step
3684  * gd: grouping sets data including list of grouping sets and their clauses
3685  * target_list: target list containing group clause references
3686  *
3687  * If doing grouping sets, we also annotate the gsets data with the estimates
3688  * for each set and each individual rollup list, with a view to later
3689  * determining whether some combination of them could be hashed instead.
3690  */
3691 static double
3693  double path_rows,
3694  grouping_sets_data *gd,
3695  List *target_list)
3696 {
3697  Query *parse = root->parse;
3698  double dNumGroups;
3699 
3700  if (parse->groupClause)
3701  {
3702  List *groupExprs;
3703 
3704  if (parse->groupingSets)
3705  {
3706  /* Add up the estimates for each grouping set */
3707  ListCell *lc;
3708 
3709  Assert(gd); /* keep Coverity happy */
3710 
3711  dNumGroups = 0;
3712 
3713  foreach(lc, gd->rollups)
3714  {
3715  RollupData *rollup = lfirst_node(RollupData, lc);
3716  ListCell *lc2;
3717  ListCell *lc3;
3718 
3719  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3720  target_list);
3721 
3722  rollup->numGroups = 0.0;
3723 
3724  forboth(lc2, rollup->gsets, lc3, rollup->gsets_data)
3725  {
3726  List *gset = (List *) lfirst(lc2);
3728  double numGroups = estimate_num_groups(root,
3729  groupExprs,
3730  path_rows,
3731  &gset,
3732  NULL);
3733 
3734  gs->numGroups = numGroups;
3735  rollup->numGroups += numGroups;
3736  }
3737 
3738  dNumGroups += rollup->numGroups;
3739  }
3740 
3741  if (gd->hash_sets_idx)
3742  {
3743  ListCell *lc2;
3744 
3745  gd->dNumHashGroups = 0;
3746 
3747  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3748  target_list);
3749 
3750  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3751  {
3752  List *gset = (List *) lfirst(lc);
3754  double numGroups = estimate_num_groups(root,
3755  groupExprs,
3756  path_rows,
3757  &gset,
3758  NULL);
3759 
3760  gs->numGroups = numGroups;
3761  gd->dNumHashGroups += numGroups;
3762  }
3763 
3764  dNumGroups += gd->dNumHashGroups;
3765  }
3766  }
3767  else
3768  {
3769  /* Plain GROUP BY -- estimate based on optimized groupClause */
3770  groupExprs = get_sortgrouplist_exprs(root->processed_groupClause,
3771  target_list);
3772 
3773  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3774  NULL, NULL);
3775  }
3776  }
3777  else if (parse->groupingSets)
3778  {
3779  /* Empty grouping sets ... one result row for each one */
3780  dNumGroups = list_length(parse->groupingSets);
3781  }
3782  else if (parse->hasAggs || root->hasHavingQual)
3783  {
3784  /* Plain aggregation, one result row */
3785  dNumGroups = 1;
3786  }
3787  else
3788  {
3789  /* Not grouping */
3790  dNumGroups = 1;
3791  }
3792 
3793  return dNumGroups;
3794 }
3795 
3796 /*
3797  * create_grouping_paths
3798  *
3799  * Build a new upperrel containing Paths for grouping and/or aggregation.
3800  * Along the way, we also build an upperrel for Paths which are partially
3801  * grouped and/or aggregated. A partially grouped and/or aggregated path
3802  * needs a FinalizeAggregate node to complete the aggregation. Currently,
3803  * the only partially grouped paths we build are also partial paths; that
3804  * is, they need a Gather and then a FinalizeAggregate.
3805  *
3806  * input_rel: contains the source-data Paths
3807  * target: the pathtarget for the result Paths to compute
3808  * gd: grouping sets data including list of grouping sets and their clauses
3809  *
3810  * Note: all Paths in input_rel are expected to return the target computed
3811  * by make_group_input_target.
3812  */
3813 static RelOptInfo *
3815  RelOptInfo *input_rel,
3816  PathTarget *target,
3817  bool target_parallel_safe,
3818  grouping_sets_data *gd)
3819 {
3820  Query *parse = root->parse;
3821  RelOptInfo *grouped_rel;
3822  RelOptInfo *partially_grouped_rel;
3823  AggClauseCosts agg_costs;
3824 
3825  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3827 
3828  /*
3829  * Create grouping relation to hold fully aggregated grouping and/or
3830  * aggregation paths.
3831  */
3832  grouped_rel = make_grouping_rel(root, input_rel, target,
3833  target_parallel_safe, parse->havingQual);
3834 
3835  /*
3836  * Create either paths for a degenerate grouping or paths for ordinary
3837  * grouping, as appropriate.
3838  */
3840  create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3841  else
3842  {
3843  int flags = 0;
3844  GroupPathExtraData extra;
3845 
3846  /*
3847  * Determine whether it's possible to perform sort-based
3848  * implementations of grouping. (Note that if processed_groupClause
3849  * is empty, grouping_is_sortable() is trivially true, and all the
3850  * pathkeys_contained_in() tests will succeed too, so that we'll
3851  * consider every surviving input path.)
3852  *
3853  * If we have grouping sets, we might be able to sort some but not all
3854  * of them; in this case, we need can_sort to be true as long as we
3855  * must consider any sorted-input plan.
3856  */
3857  if ((gd && gd->rollups != NIL)
3858  || grouping_is_sortable(root->processed_groupClause))
3859  flags |= GROUPING_CAN_USE_SORT;
3860 
3861  /*
3862  * Determine whether we should consider hash-based implementations of
3863  * grouping.
3864  *
3865  * Hashed aggregation only applies if we're grouping. If we have
3866  * grouping sets, some groups might be hashable but others not; in
3867  * this case we set can_hash true as long as there is nothing globally
3868  * preventing us from hashing (and we should therefore consider plans
3869  * with hashes).
3870  *
3871  * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3872  * BY aggregates. (Doing so would imply storing *all* the input
3873  * values in the hash table, and/or running many sorts in parallel,
3874  * either of which seems like a certain loser.) We similarly don't
3875  * support ordered-set aggregates in hashed aggregation, but that case
3876  * is also included in the numOrderedAggs count.
3877  *
3878  * Note: grouping_is_hashable() is much more expensive to check than
3879  * the other gating conditions, so we want to do it last.
3880  */
3881  if ((parse->groupClause != NIL &&
3882  root->numOrderedAggs == 0 &&
3883  (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause))))
3884  flags |= GROUPING_CAN_USE_HASH;
3885 
3886  /*
3887  * Determine whether partial aggregation is possible.
3888  */
3889  if (can_partial_agg(root))
3890  flags |= GROUPING_CAN_PARTIAL_AGG;
3891 
3892  extra.flags = flags;
3893  extra.target_parallel_safe = target_parallel_safe;
3894  extra.havingQual = parse->havingQual;
3895  extra.targetList = parse->targetList;
3896  extra.partial_costs_set = false;
3897 
3898  /*
3899  * Determine whether partitionwise aggregation is in theory possible.
3900  * It can be disabled by the user, and for now, we don't try to
3901  * support grouping sets. create_ordinary_grouping_paths() will check
3902  * additional conditions, such as whether input_rel is partitioned.
3903  */
3904  if (enable_partitionwise_aggregate && !parse->groupingSets)
3906  else
3908 
3909  create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3910  &agg_costs, gd, &extra,
3911  &partially_grouped_rel);
3912  }
3913 
3914  set_cheapest(grouped_rel);
3915  return grouped_rel;
3916 }
3917 
3918 /*
3919  * make_grouping_rel
3920  *
3921  * Create a new grouping rel and set basic properties.
3922  *
3923  * input_rel represents the underlying scan/join relation.
3924  * target is the output expected from the grouping relation.
3925  */
3926 static RelOptInfo *
3928  PathTarget *target, bool target_parallel_safe,
3929  Node *havingQual)
3930 {
3931  RelOptInfo *grouped_rel;
3932 
3933  if (IS_OTHER_REL(input_rel))
3934  {
3935  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3936  input_rel->relids);
3937  grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3938  }
3939  else
3940  {
3941  /*
3942  * By tradition, the relids set for the main grouping relation is
3943  * NULL. (This could be changed, but might require adjustments
3944  * elsewhere.)
3945  */
3946  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3947  }
3948 
3949  /* Set target. */
3950  grouped_rel->reltarget = target;
3951 
3952  /*
3953  * If the input relation is not parallel-safe, then the grouped relation
3954  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3955  * target list and HAVING quals are parallel-safe.
3956  */
3957  if (input_rel->consider_parallel && target_parallel_safe &&
3958  is_parallel_safe(root, (Node *) havingQual))
3959  grouped_rel->consider_parallel = true;
3960 
3961  /*
3962  * If the input rel belongs to a single FDW, so does the grouped rel.
3963  */
3964  grouped_rel->serverid = input_rel->serverid;
3965  grouped_rel->userid = input_rel->userid;
3966  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3967  grouped_rel->fdwroutine = input_rel->fdwroutine;
3968 
3969  return grouped_rel;
3970 }
3971 
3972 /*
3973  * is_degenerate_grouping
3974  *
3975  * A degenerate grouping is one in which the query has a HAVING qual and/or
3976  * grouping sets, but no aggregates and no GROUP BY (which implies that the
3977  * grouping sets are all empty).
3978  */
3979 static bool
3981 {
3982  Query *parse = root->parse;
3983 
3984  return (root->hasHavingQual || parse->groupingSets) &&
3985  !parse->hasAggs && parse->groupClause == NIL;
3986 }
3987 
3988 /*
3989  * create_degenerate_grouping_paths
3990  *
3991  * When the grouping is degenerate (see is_degenerate_grouping), we are
3992  * supposed to emit either zero or one row for each grouping set depending on
3993  * whether HAVING succeeds. Furthermore, there cannot be any variables in
3994  * either HAVING or the targetlist, so we actually do not need the FROM table
3995  * at all! We can just throw away the plan-so-far and generate a Result node.
3996  * This is a sufficiently unusual corner case that it's not worth contorting
3997  * the structure of this module to avoid having to generate the earlier paths
3998  * in the first place.
3999  */
4000 static void
4002  RelOptInfo *grouped_rel)
4003 {
4004  Query *parse = root->parse;
4005  int nrows;
4006  Path *path;
4007 
4008  nrows = list_length(parse->groupingSets);
4009  if (nrows > 1)
4010  {
4011  /*
4012  * Doesn't seem worthwhile writing code to cons up a generate_series
4013  * or a values scan to emit multiple rows. Instead just make N clones
4014  * and append them. (With a volatile HAVING clause, this means you
4015  * might get between 0 and N output rows. Offhand I think that's
4016  * desired.)
4017  */
4018  List *paths = NIL;
4019 
4020  while (--nrows >= 0)
4021  {
4022  path = (Path *)
4023  create_group_result_path(root, grouped_rel,
4024  grouped_rel->reltarget,
4025  (List *) parse->havingQual);
4026  paths = lappend(paths, path);
4027  }
4028  path = (Path *)
4030  grouped_rel,
4031  paths,
4032  NIL,
4033  NIL,
4034  NULL,
4035  0,
4036  false,
4037  -1);
4038  }
4039  else
4040  {
4041  /* No grouping sets, or just one, so one output row */
4042  path = (Path *)
4043  create_group_result_path(root, grouped_rel,
4044  grouped_rel->reltarget,
4045  (List *) parse->havingQual);
4046  }
4047 
4048  add_path(grouped_rel, path);
4049 }
4050 
4051 /*
4052  * create_ordinary_grouping_paths
4053  *
4054  * Create grouping paths for the ordinary (that is, non-degenerate) case.
4055  *
4056  * We need to consider sorted and hashed aggregation in the same function,
4057  * because otherwise (1) it would be harder to throw an appropriate error
4058  * message if neither way works, and (2) we should not allow hashtable size
4059  * considerations to dissuade us from using hashing if sorting is not possible.
4060  *
4061  * *partially_grouped_rel_p will be set to the partially grouped rel which this
4062  * function creates, or to NULL if it doesn't create one.
4063  */
4064 static void
4066  RelOptInfo *grouped_rel,
4067  const AggClauseCosts *agg_costs,
4068  grouping_sets_data *gd,
4069  GroupPathExtraData *extra,
4070  RelOptInfo **partially_grouped_rel_p)
4071 {
4072  Path *cheapest_path = input_rel->cheapest_total_path;
4073  RelOptInfo *partially_grouped_rel = NULL;
4074  double dNumGroups;
4076 
4077  /*
4078  * If this is the topmost grouping relation or if the parent relation is
4079  * doing some form of partitionwise aggregation, then we may be able to do
4080  * it at this level also. However, if the input relation is not
4081  * partitioned, partitionwise aggregate is impossible.
4082  */
4083  if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
4084  IS_PARTITIONED_REL(input_rel))
4085  {
4086  /*
4087  * If this is the topmost relation or if the parent relation is doing
4088  * full partitionwise aggregation, then we can do full partitionwise
4089  * aggregation provided that the GROUP BY clause contains all of the
4090  * partitioning columns at this level. Otherwise, we can do at most
4091  * partial partitionwise aggregation. But if partial aggregation is
4092  * not supported in general then we can't use it for partitionwise
4093  * aggregation either.
4094  *
4095  * Check parse->groupClause not processed_groupClause, because it's
4096  * okay if some of the partitioning columns were proved redundant.
4097  */
4098  if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
4099  group_by_has_partkey(input_rel, extra->targetList,
4100  root->parse->groupClause))
4102  else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
4104  else
4106  }
4107 
4108  /*
4109  * Before generating paths for grouped_rel, we first generate any possible
4110  * partially grouped paths; that way, later code can easily consider both
4111  * parallel and non-parallel approaches to grouping.
4112  */
4113  if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
4114  {
4115  bool force_rel_creation;
4116 
4117  /*
4118  * If we're doing partitionwise aggregation at this level, force
4119  * creation of a partially_grouped_rel so we can add partitionwise
4120  * paths to it.
4121  */
4122  force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
4123 
4124  partially_grouped_rel =
4126  grouped_rel,
4127  input_rel,
4128  gd,
4129  extra,
4130  force_rel_creation);
4131  }
4132 
4133  /* Set out parameter. */
4134  *partially_grouped_rel_p = partially_grouped_rel;
4135 
4136  /* Apply partitionwise aggregation technique, if possible. */
4137  if (patype != PARTITIONWISE_AGGREGATE_NONE)
4138  create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
4139  partially_grouped_rel, agg_costs,
4140  gd, patype, extra);
4141 
4142  /* If we are doing partial aggregation only, return. */
4144  {
4145  Assert(partially_grouped_rel);
4146 
4147  if (partially_grouped_rel->pathlist)
4148  set_cheapest(partially_grouped_rel);
4149 
4150  return;
4151  }
4152 
4153  /* Gather any partially grouped partial paths. */
4154  if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
4155  {
4156  gather_grouping_paths(root, partially_grouped_rel);
4157  set_cheapest(partially_grouped_rel);
4158  }
4159 
4160  /*
4161  * Estimate number of groups.
4162  */
4163  dNumGroups = get_number_of_groups(root,
4164  cheapest_path->rows,
4165  gd,
4166  extra->targetList);
4167 
4168  /* Build final grouping paths */
4169  add_paths_to_grouping_rel(root, input_rel, grouped_rel,
4170  partially_grouped_rel, agg_costs, gd,
4171  dNumGroups, extra);
4172 
4173  /* Give a helpful error if we failed to find any implementation */
4174  if (grouped_rel->pathlist == NIL)
4175  ereport(ERROR,
4176  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4177  errmsg("could not implement GROUP BY"),
4178  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4179 
4180  /*
4181  * If there is an FDW that's responsible for all baserels of the query,
4182  * let it consider adding ForeignPaths.
4183  */
4184  if (grouped_rel->fdwroutine &&
4185  grouped_rel->fdwroutine->GetForeignUpperPaths)
4186  grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
4187  input_rel, grouped_rel,
4188  extra);
4189 
4190  /* Let extensions possibly add some more paths */
4192  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4193  input_rel, grouped_rel,
4194  extra);
4195 }
4196 
4197 /*
4198  * For a given input path, consider the possible ways of doing grouping sets on
4199  * it, by combinations of hashing and sorting. This can be called multiple
4200  * times, so it's important that it not scribble on input. No result is
4201  * returned, but any generated paths are added to grouped_rel.
4202  */
4203 static void
4205  RelOptInfo *grouped_rel,
4206  Path *path,
4207  bool is_sorted,
4208  bool can_hash,
4209  grouping_sets_data *gd,
4210  const AggClauseCosts *agg_costs,
4211  double dNumGroups)
4212 {
4213  Query *parse = root->parse;
4214  Size hash_mem_limit = get_hash_memory_limit();
4215 
4216  /*
4217  * If we're not being offered sorted input, then only consider plans that
4218  * can be done entirely by hashing.
4219  *
4220  * We can hash everything if it looks like it'll fit in hash_mem. But if
4221  * the input is actually sorted despite not being advertised as such, we
4222  * prefer to make use of that in order to use less memory.
4223  *
4224  * If none of the grouping sets are sortable, then ignore the hash_mem
4225  * limit and generate a path anyway, since otherwise we'll just fail.
4226  */
4227  if (!is_sorted)
4228  {
4229  List *new_rollups = NIL;
4230  RollupData *unhashed_rollup = NULL;
4231  List *sets_data;
4232  List *empty_sets_data = NIL;
4233  List *empty_sets = NIL;
4234  ListCell *lc;
4235  ListCell *l_start = list_head(gd->rollups);
4236  AggStrategy strat = AGG_HASHED;
4237  double hashsize;
4238  double exclude_groups = 0.0;
4239 
4240  Assert(can_hash);
4241 
4242  /*
4243  * If the input is coincidentally sorted usefully (which can happen
4244  * even if is_sorted is false, since that only means that our caller
4245  * has set up the sorting for us), then save some hashtable space by
4246  * making use of that. But we need to watch out for degenerate cases:
4247  *
4248  * 1) If there are any empty grouping sets, then group_pathkeys might
4249  * be NIL if all non-empty grouping sets are unsortable. In this case,
4250  * there will be a rollup containing only empty groups, and the
4251  * pathkeys_contained_in test is vacuously true; this is ok.
4252  *
4253  * XXX: the above relies on the fact that group_pathkeys is generated
4254  * from the first rollup. If we add the ability to consider multiple
4255  * sort orders for grouping input, this assumption might fail.
4256  *
4257  * 2) If there are no empty sets and only unsortable sets, then the
4258  * rollups list will be empty (and thus l_start == NULL), and
4259  * group_pathkeys will be NIL; we must ensure that the vacuously-true
4260  * pathkeys_contained_in test doesn't cause us to crash.
4261  */
4262  if (l_start != NULL &&
4263  pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4264  {
4265  unhashed_rollup = lfirst_node(RollupData, l_start);
4266  exclude_groups = unhashed_rollup->numGroups;
4267  l_start = lnext(gd->rollups, l_start);
4268  }
4269 
4270  hashsize = estimate_hashagg_tablesize(root,
4271  path,
4272  agg_costs,
4273  dNumGroups - exclude_groups);
4274 
4275  /*
4276  * gd->rollups is empty if we have only unsortable columns to work
4277  * with. Override hash_mem in that case; otherwise, we'll rely on the
4278  * sorted-input case to generate usable mixed paths.
4279  */
4280  if (hashsize > hash_mem_limit && gd->rollups)
4281  return; /* nope, won't fit */
4282 
4283  /*
4284  * We need to burst the existing rollups list into individual grouping
4285  * sets and recompute a groupClause for each set.
4286  */
4287  sets_data = list_copy(gd->unsortable_sets);
4288 
4289  for_each_cell(lc, gd->rollups, l_start)
4290  {
4291  RollupData *rollup = lfirst_node(RollupData, lc);
4292 
4293  /*
4294  * If we find an unhashable rollup that's not been skipped by the
4295  * "actually sorted" check above, we can't cope; we'd need sorted
4296  * input (with a different sort order) but we can't get that here.
4297  * So bail out; we'll get a valid path from the is_sorted case
4298  * instead.
4299  *
4300  * The mere presence of empty grouping sets doesn't make a rollup
4301  * unhashable (see preprocess_grouping_sets), we handle those
4302  * specially below.
4303  */
4304  if (!rollup->hashable)
4305  return;
4306 
4307  sets_data = list_concat(sets_data, rollup->gsets_data);
4308  }
4309  foreach(lc, sets_data)
4310  {
4312  List *gset = gs->set;
4313  RollupData *rollup;
4314 
4315  if (gset == NIL)
4316  {
4317  /* Empty grouping sets can't be hashed. */
4318  empty_sets_data = lappend(empty_sets_data, gs);
4319  empty_sets = lappend(empty_sets, NIL);
4320  }
4321  else
4322  {
4323  rollup = makeNode(RollupData);
4324 
4325  rollup->groupClause = preprocess_groupclause(root, gset);
4326  rollup->gsets_data = list_make1(gs);
4327  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4328  rollup->gsets_data,
4329  gd->tleref_to_colnum_map);
4330  rollup->numGroups = gs->numGroups;
4331  rollup->hashable = true;
4332  rollup->is_hashed = true;
4333  new_rollups = lappend(new_rollups, rollup);
4334  }
4335  }
4336 
4337  /*
4338  * If we didn't find anything nonempty to hash, then bail. We'll
4339  * generate a path from the is_sorted case.
4340  */
4341  if (new_rollups == NIL)
4342  return;
4343 
4344  /*
4345  * If there were empty grouping sets they should have been in the
4346  * first rollup.
4347  */
4348  Assert(!unhashed_rollup || !empty_sets);
4349 
4350  if (unhashed_rollup)
4351  {
4352  new_rollups = lappend(new_rollups, unhashed_rollup);
4353  strat = AGG_MIXED;
4354  }
4355  else if (empty_sets)
4356  {
4357  RollupData *rollup = makeNode(RollupData);
4358 
4359  rollup->groupClause = NIL;
4360  rollup->gsets_data = empty_sets_data;
4361  rollup->gsets = empty_sets;
4362  rollup->numGroups = list_length(empty_sets);
4363  rollup->hashable = false;
4364  rollup->is_hashed = false;
4365  new_rollups = lappend(new_rollups, rollup);
4366  strat = AGG_MIXED;
4367  }
4368 
4369  add_path(grouped_rel, (Path *)
4371  grouped_rel,
4372  path,
4373  (List *) parse->havingQual,
4374  strat,
4375  new_rollups,
4376  agg_costs));
4377  return;
4378  }
4379 
4380  /*
4381  * If we have sorted input but nothing we can do with it, bail.
4382  */
4383  if (gd->rollups == NIL)
4384  return;
4385 
4386  /*
4387  * Given sorted input, we try and make two paths: one sorted and one mixed
4388  * sort/hash. (We need to try both because hashagg might be disabled, or
4389  * some columns might not be sortable.)
4390  *
4391  * can_hash is passed in as false if some obstacle elsewhere (such as
4392  * ordered aggs) means that we shouldn't consider hashing at all.
4393  */
4394  if (can_hash && gd->any_hashable)
4395  {
4396  List *rollups = NIL;
4397  List *hash_sets = list_copy(gd->unsortable_sets);
4398  double availspace = hash_mem_limit;
4399  ListCell *lc;
4400 
4401  /*
4402  * Account first for space needed for groups we can't sort at all.
4403  */
4404  availspace -= estimate_hashagg_tablesize(root,
4405  path,
4406  agg_costs,
4407  gd->dNumHashGroups);
4408 
4409  if (availspace > 0 && list_length(gd->rollups) > 1)
4410  {
4411  double scale;
4412  int num_rollups = list_length(gd->rollups);
4413  int k_capacity;
4414  int *k_weights = palloc(num_rollups * sizeof(int));
4415  Bitmapset *hash_items = NULL;
4416  int i;
4417 
4418  /*
4419  * We treat this as a knapsack problem: the knapsack capacity
4420  * represents hash_mem, the item weights are the estimated memory
4421  * usage of the hashtables needed to implement a single rollup,
4422  * and we really ought to use the cost saving as the item value;
4423  * however, currently the costs assigned to sort nodes don't
4424  * reflect the comparison costs well, and so we treat all items as
4425  * of equal value (each rollup we hash instead saves us one sort).
4426  *
4427  * To use the discrete knapsack, we need to scale the values to a
4428  * reasonably small bounded range. We choose to allow a 5% error
4429  * margin; we have no more than 4096 rollups in the worst possible
4430  * case, which with a 5% error margin will require a bit over 42MB
4431  * of workspace. (Anyone wanting to plan queries that complex had
4432  * better have the memory for it. In more reasonable cases, with
4433  * no more than a couple of dozen rollups, the memory usage will
4434  * be negligible.)
4435  *
4436  * k_capacity is naturally bounded, but we clamp the values for
4437  * scale and weight (below) to avoid overflows or underflows (or
4438  * uselessly trying to use a scale factor less than 1 byte).
4439  */
4440  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4441  k_capacity = (int) floor(availspace / scale);
4442 
4443  /*
4444  * We leave the first rollup out of consideration since it's the
4445  * one that matches the input sort order. We assign indexes "i"
4446  * to only those entries considered for hashing; the second loop,
4447  * below, must use the same condition.
4448  */
4449  i = 0;
4450  for_each_from(lc, gd->rollups, 1)
4451  {
4452  RollupData *rollup = lfirst_node(RollupData, lc);
4453 
4454  if (rollup->hashable)
4455  {
4456  double sz = estimate_hashagg_tablesize(root,
4457  path,
4458  agg_costs,
4459  rollup->numGroups);
4460 
4461  /*
4462  * If sz is enormous, but hash_mem (and hence scale) is
4463  * small, avoid integer overflow here.
4464  */
4465  k_weights[i] = (int) Min(floor(sz / scale),
4466  k_capacity + 1.0);
4467  ++i;
4468  }
4469  }
4470 
4471  /*
4472  * Apply knapsack algorithm; compute the set of items which
4473  * maximizes the value stored (in this case the number of sorts
4474  * saved) while keeping the total size (approximately) within
4475  * capacity.
4476  */
4477  if (i > 0)
4478  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4479 
4480  if (!bms_is_empty(hash_items))
4481  {
4482  rollups = list_make1(linitial(gd->rollups));
4483 
4484  i = 0;
4485  for_each_from(lc, gd->rollups, 1)
4486  {
4487  RollupData *rollup = lfirst_node(RollupData, lc);
4488 
4489  if (rollup->hashable)
4490  {
4491  if (bms_is_member(i, hash_items))
4492  hash_sets = list_concat(hash_sets,
4493  rollup->gsets_data);
4494  else
4495  rollups = lappend(rollups, rollup);
4496  ++i;
4497  }
4498  else
4499  rollups = lappend(rollups, rollup);
4500  }
4501  }
4502  }
4503 
4504  if (!rollups && hash_sets)
4505  rollups = list_copy(gd->rollups);
4506 
4507  foreach(lc, hash_sets)
4508  {
4510  RollupData *rollup = makeNode(RollupData);
4511 
4512  Assert(gs->set != NIL);
4513 
4514  rollup->groupClause = preprocess_groupclause(root, gs->set);
4515  rollup->gsets_data = list_make1(gs);
4516  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4517  rollup->gsets_data,
4518  gd->tleref_to_colnum_map);
4519  rollup->numGroups = gs->numGroups;
4520  rollup->hashable = true;
4521  rollup->is_hashed = true;
4522  rollups = lcons(rollup, rollups);
4523  }
4524 
4525  if (rollups)
4526  {
4527  add_path(grouped_rel, (Path *)
4529  grouped_rel,
4530  path,
4531  (List *) parse->havingQual,
4532  AGG_MIXED,
4533  rollups,
4534  agg_costs));
4535  }
4536  }
4537 
4538  /*
4539  * Now try the simple sorted case.
4540  */
4541  if (!gd->unsortable_sets)
4542  add_path(grouped_rel, (Path *)
4544  grouped_rel,
4545  path,
4546  (List *) parse->havingQual,
4547  AGG_SORTED,
4548  gd->rollups,
4549  agg_costs));
4550 }
4551 
4552 /*
4553  * create_window_paths
4554  *
4555  * Build a new upperrel containing Paths for window-function evaluation.
4556  *
4557  * input_rel: contains the source-data Paths
4558  * input_target: result of make_window_input_target
4559  * output_target: what the topmost WindowAggPath should return
4560  * wflists: result of find_window_functions
4561  * activeWindows: result of select_active_windows
4562  *
4563  * Note: all Paths in input_rel are expected to return input_target.
4564  */
4565 static RelOptInfo *
4567  RelOptInfo *input_rel,
4568  PathTarget *input_target,
4569  PathTarget *output_target,
4570  bool output_target_parallel_safe,
4571  WindowFuncLists *wflists,
4572  List *activeWindows)
4573 {
4574  RelOptInfo *window_rel;
4575  ListCell *lc;
4576 
4577  /* For now, do all work in the (WINDOW, NULL) upperrel */
4578  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4579 
4580  /*
4581  * If the input relation is not parallel-safe, then the window relation
4582  * can't be parallel-safe, either. Otherwise, we need to examine the
4583  * target list and active windows for non-parallel-safe constructs.
4584  */
4585  if (input_rel->consider_parallel && output_target_parallel_safe &&
4586  is_parallel_safe(root, (Node *) activeWindows))
4587  window_rel->consider_parallel = true;
4588 
4589  /*
4590  * If the input rel belongs to a single FDW, so does the window rel.
4591  */
4592  window_rel->serverid = input_rel->serverid;
4593  window_rel->userid = input_rel->userid;
4594  window_rel->useridiscurrent = input_rel->useridiscurrent;
4595  window_rel->fdwroutine = input_rel->fdwroutine;
4596 
4597  /*
4598  * Consider computing window functions starting from the existing
4599  * cheapest-total path (which will likely require a sort) as well as any
4600  * existing paths that satisfy or partially satisfy root->window_pathkeys.
4601  */
4602  foreach(lc, input_rel->pathlist)
4603  {
4604  Path *path = (Path *) lfirst(lc);
4605  int presorted_keys;
4606 
4607  if (path == input_rel->cheapest_total_path ||
4608  pathkeys_count_contained_in(root->window_pathkeys, path->pathkeys,
4609  &presorted_keys) ||
4610  presorted_keys > 0)
4612  window_rel,
4613  path,
4614  input_target,
4615  output_target,
4616  wflists,
4617  activeWindows);
4618  }
4619 
4620  /*
4621  * If there is an FDW that's responsible for all baserels of the query,
4622  * let it consider adding ForeignPaths.
4623  */
4624  if (window_rel->fdwroutine &&
4625  window_rel->fdwroutine->GetForeignUpperPaths)
4626  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4627  input_rel, window_rel,
4628  NULL);
4629 
4630  /* Let extensions possibly add some more paths */
4632  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4633  input_rel, window_rel, NULL);
4634 
4635  /* Now choose the best path(s) */
4636  set_cheapest(window_rel);
4637 
4638  return window_rel;
4639 }
4640 
4641 /*
4642  * Stack window-function implementation steps atop the given Path, and
4643  * add the result to window_rel.
4644  *
4645  * window_rel: upperrel to contain result
4646  * path: input Path to use (must return input_target)
4647  * input_target: result of make_window_input_target
4648  * output_target: what the topmost WindowAggPath should return
4649  * wflists: result of find_window_functions
4650  * activeWindows: result of select_active_windows
4651  */
4652 static void
4654  RelOptInfo *window_rel,
4655  Path *path,
4656  PathTarget *input_target,
4657  PathTarget *output_target,
4658  WindowFuncLists *wflists,
4659  List *activeWindows)
4660 {
4661  PathTarget *window_target;
4662  ListCell *l;
4663  List *topqual = NIL;
4664 
4665  /*
4666  * Since each window clause could require a different sort order, we stack
4667  * up a WindowAgg node for each clause, with sort steps between them as
4668  * needed. (We assume that select_active_windows chose a good order for
4669  * executing the clauses in.)
4670  *
4671  * input_target should contain all Vars and Aggs needed for the result.
4672  * (In some cases we wouldn't need to propagate all of these all the way
4673  * to the top, since they might only be needed as inputs to WindowFuncs.
4674  * It's probably not worth trying to optimize that though.) It must also
4675  * contain all window partitioning and sorting expressions, to ensure
4676  * they're computed only once at the bottom of the stack (that's critical
4677  * for volatile functions). As we climb up the stack, we'll add outputs
4678  * for the WindowFuncs computed at each level.
4679  */
4680  window_target = input_target;
4681 
4682  foreach(l, activeWindows)
4683  {
4685  List *window_pathkeys;
4686  List *runcondition = NIL;
4687  int presorted_keys;
4688  bool is_sorted;
4689  bool topwindow;
4690  ListCell *lc2;
4691 
4692  window_pathkeys = make_pathkeys_for_window(root,
4693  wc,
4694  root->processed_tlist);
4695 
4696  is_sorted = pathkeys_count_contained_in(window_pathkeys,
4697  path->pathkeys,
4698  &presorted_keys);
4699 
4700  /* Sort if necessary */
4701  if (!is_sorted)
4702  {
4703  /*
4704  * No presorted keys or incremental sort disabled, just perform a
4705  * complete sort.
4706  */
4707  if (presorted_keys == 0 || !enable_incremental_sort)
4708  path = (Path *) create_sort_path(root, window_rel,
4709  path,
4710  window_pathkeys,
4711  -1.0);
4712  else
4713  {
4714  /*
4715  * Since we have presorted keys and incremental sort is
4716  * enabled, just use incremental sort.
4717  */
4719  window_rel,
4720  path,
4721  window_pathkeys,
4722  presorted_keys,
4723  -1.0);
4724  }
4725  }
4726 
4727  if (lnext(activeWindows, l))
4728  {
4729  /*
4730  * Add the current WindowFuncs to the output target for this
4731  * intermediate WindowAggPath. We must copy window_target to
4732  * avoid changing the previous path's target.
4733  *
4734  * Note: a WindowFunc adds nothing to the target's eval costs; but
4735  * we do need to account for the increase in tlist width.
4736  */
4737  int64 tuple_width = window_target->width;
4738 
4739  window_target = copy_pathtarget(window_target);
4740  foreach(lc2, wflists->windowFuncs[wc->winref])
4741  {
4742  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4743 
4744  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4745  tuple_width += get_typavgwidth(wfunc->wintype, -1);
4746  }
4747  window_target->width = clamp_width_est(tuple_width);
4748  }
4749  else
4750  {
4751  /* Install the goal target in the topmost WindowAgg */
4752  window_target = output_target;
4753  }
4754 
4755  /* mark the final item in the list as the top-level window */
4756  topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4757 
4758  /*
4759  * Collect the WindowFuncRunConditions from each WindowFunc and
4760  * convert them into OpExprs
4761  */
4762  foreach(lc2, wflists->windowFuncs[wc->winref])
4763  {
4764  ListCell *lc3;
4765  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4766 
4767  foreach(lc3, wfunc->runCondition)
4768  {
4769  WindowFuncRunCondition *wfuncrc =
4771  Expr *opexpr;
4772  Expr *leftop;
4773  Expr *rightop;
4774 
4775  if (wfuncrc->wfunc_left)
4776  {
4777  leftop = (Expr *) copyObject(wfunc);
4778  rightop = copyObject(wfuncrc->arg);
4779  }
4780  else
4781  {
4782  leftop = copyObject(wfuncrc->arg);
4783  rightop = (Expr *) copyObject(wfunc);
4784  }
4785 
4786  opexpr = make_opclause(wfuncrc->opno,
4787  BOOLOID,
4788  false,
4789  leftop,
4790  rightop,
4791  InvalidOid,
4792  wfuncrc->inputcollid);
4793 
4794  runcondition = lappend(runcondition, opexpr);
4795 
4796  if (!topwindow)
4797  topqual = lappend(topqual, opexpr);
4798  }
4799  }
4800 
4801  path = (Path *)
4802  create_windowagg_path(root, window_rel, path, window_target,
4803  wflists->windowFuncs[wc->winref],
4804  runcondition, wc,
4805  topwindow ? topqual : NIL, topwindow);
4806  }
4807 
4808  add_path(window_rel, path);
4809 }
4810 
4811 /*
4812  * create_distinct_paths
4813  *
4814  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4815  *
4816  * input_rel: contains the source-data Paths
4817  * target: the pathtarget for the result Paths to compute
4818  *
4819  * Note: input paths should already compute the desired pathtarget, since
4820  * Sort/Unique won't project anything.
4821  */
4822 static RelOptInfo *
4824  PathTarget *target)
4825 {
4826  RelOptInfo *distinct_rel;
4827 
4828  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4829  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4830 
4831  /*
4832  * We don't compute anything at this level, so distinct_rel will be
4833  * parallel-safe if the input rel is parallel-safe. In particular, if
4834  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4835  * output those expressions, and will not be parallel-safe unless those
4836  * expressions are parallel-safe.
4837  */
4838  distinct_rel->consider_parallel = input_rel->consider_parallel;
4839 
4840  /*
4841  * If the input rel belongs to a single FDW, so does the distinct_rel.
4842  */
4843  distinct_rel->serverid = input_rel->serverid;
4844  distinct_rel->userid = input_rel->userid;
4845  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4846  distinct_rel->fdwroutine = input_rel->fdwroutine;
4847 
4848  /* build distinct paths based on input_rel's pathlist */
4849  create_final_distinct_paths(root, input_rel, distinct_rel);
4850 
4851  /* now build distinct paths based on input_rel's partial_pathlist */
4852  create_partial_distinct_paths(root, input_rel, distinct_rel, target);
4853 
4854  /* Give a helpful error if we failed to create any paths */
4855  if (distinct_rel->pathlist == NIL)
4856  ereport(ERROR,
4857  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4858  errmsg("could not implement DISTINCT"),
4859  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4860 
4861  /*
4862  * If there is an FDW that's responsible for all baserels of the query,
4863  * let it consider adding ForeignPaths.
4864  */
4865  if (distinct_rel->fdwroutine &&
4866  distinct_rel->fdwroutine->GetForeignUpperPaths)
4867  distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4869  input_rel,
4870  distinct_rel,
4871  NULL);
4872 
4873  /* Let extensions possibly add some more paths */
4875  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4876  distinct_rel, NULL);
4877 
4878  /* Now choose the best path(s) */
4879  set_cheapest(distinct_rel);
4880 
4881  return distinct_rel;
4882 }
4883 
4884 /*
4885  * create_partial_distinct_paths
4886  *
4887  * Process 'input_rel' partial paths and add unique/aggregate paths to the
4888  * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4889  * paths on top and add a final unique/aggregate path to remove any duplicate
4890  * produced from combining rows from parallel workers.
4891  */
4892 static void
4894  RelOptInfo *final_distinct_rel,
4895  PathTarget *target)
4896 {
4897  RelOptInfo *partial_distinct_rel;
4898  Query *parse;
4899  List *distinctExprs;
4900  double numDistinctRows;
4901  Path *cheapest_partial_path;
4902  ListCell *lc;
4903 
4904  /* nothing to do when there are no partial paths in the input rel */
4905  if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4906  return;
4907 
4908  parse = root->parse;
4909 
4910  /* can't do parallel DISTINCT ON */
4911  if (parse->hasDistinctOn)
4912  return;
4913 
4914  partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4915  NULL);
4916  partial_distinct_rel->reltarget = target;
4917  partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4918 
4919  /*
4920  * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4921  */
4922  partial_distinct_rel->serverid = input_rel->serverid;
4923  partial_distinct_rel->userid = input_rel->userid;
4924  partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4925  partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4926 
4927  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4928 
4929  distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4930  parse->targetList);
4931 
4932  /* estimate how many distinct rows we'll get from each worker */
4933  numDistinctRows = estimate_num_groups(root, distinctExprs,
4934  cheapest_partial_path->rows,
4935  NULL, NULL);
4936 
4937  /*
4938  * Try sorting the cheapest path and incrementally sorting any paths with
4939  * presorted keys and put a unique paths atop of those.
4940  */
4941  if (grouping_is_sortable(root->processed_distinctClause))
4942  {
4943  foreach(lc, input_rel->partial_pathlist)
4944  {
4945  Path *input_path = (Path *) lfirst(lc);
4946  Path *sorted_path;
4947  bool is_sorted;
4948  int presorted_keys;
4949 
4950  is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys,
4951  input_path->pathkeys,
4952  &presorted_keys);
4953 
4954  if (is_sorted)
4955  sorted_path = input_path;
4956  else
4957  {
4958  /*
4959  * Try at least sorting the cheapest path and also try
4960  * incrementally sorting any path which is partially sorted
4961  * already (no need to deal with paths which have presorted
4962  * keys when incremental sort is disabled unless it's the
4963  * cheapest partial path).
4964  */
4965  if (input_path != cheapest_partial_path &&
4966  (presorted_keys == 0 || !enable_incremental_sort))
4967  continue;
4968 
4969  /*
4970  * We've no need to consider both a sort and incremental sort.
4971  * We'll just do a sort if there are no presorted keys and an
4972  * incremental sort when there are presorted keys.
4973  */
4974  if (presorted_keys == 0 || !enable_incremental_sort)
4975  sorted_path = (Path *) create_sort_path(root,
4976  partial_distinct_rel,
4977  input_path,
4978  root->distinct_pathkeys,
4979  -1.0);
4980  else
4981  sorted_path = (Path *) create_incremental_sort_path(root,
4982  partial_distinct_rel,
4983  input_path,
4984  root->distinct_pathkeys,
4985  presorted_keys,
4986  -1.0);
4987  }
4988 
4989  /*
4990  * An empty distinct_pathkeys means all tuples have the same value
4991  * for the DISTINCT clause. See create_final_distinct_paths()
4992  */
4993  if (root->distinct_pathkeys == NIL)
4994  {
4995  Node *limitCount;
4996 
4997  limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
4998  sizeof(int64),
4999  Int64GetDatum(1), false,
5000  FLOAT8PASSBYVAL);
5001 
5002  /*
5003  * Apply a LimitPath onto the partial path to restrict the
5004  * tuples from each worker to 1. create_final_distinct_paths
5005  * will need to apply an additional LimitPath to restrict this
5006  * to a single row after the Gather node. If the query
5007  * already has a LIMIT clause, then we could end up with three
5008  * Limit nodes in the final plan. Consolidating the top two
5009  * of these could be done, but does not seem worth troubling
5010  * over.
5011  */
5012  add_partial_path(partial_distinct_rel, (Path *)
5013  create_limit_path(root, partial_distinct_rel,
5014  sorted_path,
5015  NULL,
5016  limitCount,
5018  0, 1));
5019  }
5020  else
5021  {
5022  add_partial_path(partial_distinct_rel, (Path *)
5023  create_upper_unique_path(root, partial_distinct_rel,
5024  sorted_path,
5025  list_length(root->distinct_pathkeys),
5026  numDistinctRows));
5027  }
5028  }
5029  }
5030 
5031  /*
5032  * Now try hash aggregate paths, if enabled and hashing is possible. Since
5033  * we're not on the hook to ensure we do our best to create at least one
5034  * path here, we treat enable_hashagg as a hard off-switch rather than the
5035  * slightly softer variant in create_final_distinct_paths.
5036  */
5037  if (enable_hashagg && grouping_is_hashable(root->processed_distinctClause))
5038  {
5039  add_partial_path(partial_distinct_rel, (Path *)
5041  partial_distinct_rel,
5042  cheapest_partial_path,
5043  cheapest_partial_path->pathtarget,
5044  AGG_HASHED,
5046  root->processed_distinctClause,
5047  NIL,
5048  NULL,
5049  numDistinctRows));
5050  }
5051 
5052  /*
5053  * If there is an FDW that's responsible for all baserels of the query,
5054  * let it consider adding ForeignPaths.
5055  */
5056  if (partial_distinct_rel->fdwroutine &&
5057  partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
5058  partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
5060  input_rel,
5061  partial_distinct_rel,
5062  NULL);
5063 
5064  /* Let extensions possibly add some more partial paths */
5066  (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
5067  input_rel, partial_distinct_rel, NULL);
5068 
5069  if (partial_distinct_rel->partial_pathlist != NIL)
5070  {
5071  generate_useful_gather_paths(root, partial_distinct_rel, true);
5072  set_cheapest(partial_distinct_rel);
5073 
5074  /*
5075  * Finally, create paths to distinctify the final result. This step
5076  * is needed to remove any duplicates due to combining rows from
5077  * parallel workers.
5078  */
5079  create_final_distinct_paths(root, partial_distinct_rel,
5080  final_distinct_rel);
5081  }
5082 }
5083 
5084 /*
5085  * create_final_distinct_paths
5086  * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
5087  *
5088  * input_rel: contains the source-data paths
5089  * distinct_rel: destination relation for storing created paths
5090  */
5091 static RelOptInfo *
5093  RelOptInfo *distinct_rel)
5094 {
5095  Query *parse = root->parse;
5096  Path *cheapest_input_path = input_rel->cheapest_total_path;
5097  double numDistinctRows;
5098  bool allow_hash;
5099 
5100  /* Estimate number of distinct rows there will be */
5101  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
5102  root->hasHavingQual)
5103  {
5104  /*
5105  * If there was grouping or aggregation, use the number of input rows
5106  * as the estimated number of DISTINCT rows (ie, assume the input is
5107  * already mostly unique).
5108  */
5109  numDistinctRows = cheapest_input_path->rows;
5110  }
5111  else
5112  {
5113  /*
5114  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
5115  */
5116  List *distinctExprs;
5117 
5118  distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
5119  parse->targetList);
5120  numDistinctRows = estimate_num_groups(root, distinctExprs,
5121  cheapest_input_path->rows,
5122  NULL, NULL);
5123  }
5124 
5125  /*
5126  * Consider sort-based implementations of DISTINCT, if possible.
5127  */
5128  if (grouping_is_sortable(root->processed_distinctClause))
5129  {
5130  /*
5131  * Firstly, if we have any adequately-presorted paths, just stick a
5132  * Unique node on those. We also, consider doing an explicit sort of
5133  * the cheapest input path and Unique'ing that. If any paths have
5134  * presorted keys then we'll create an incremental sort atop of those
5135  * before adding a unique node on the top.
5136  *
5137  * When we have DISTINCT ON, we must sort by the more rigorous of
5138  * DISTINCT and ORDER BY, else it won't have the desired behavior.
5139  * Also, if we do have to do an explicit sort, we might as well use
5140  * the more rigorous ordering to avoid a second sort later. (Note
5141  * that the parser will have ensured that one clause is a prefix of
5142  * the other.)
5143  */
5144  List *needed_pathkeys;
5145  ListCell *lc;
5146  double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
5147 
5148  if (parse->hasDistinctOn &&
5149  list_length(root->distinct_pathkeys) <
5150  list_length(root->sort_pathkeys))
5151  needed_pathkeys = root->sort_pathkeys;
5152  else
5153  needed_pathkeys = root->distinct_pathkeys;
5154 
5155  foreach(lc, input_rel->pathlist)
5156  {
5157  Path *input_path = (Path *) lfirst(lc);
5158  Path *sorted_path;
5159  bool is_sorted;
5160  int presorted_keys;
5161 
5162  is_sorted = pathkeys_count_contained_in(needed_pathkeys,
5163  input_path->pathkeys,
5164  &presorted_keys);
5165 
5166  if (is_sorted)
5167  sorted_path = input_path;
5168  else
5169  {
5170  /*
5171  * Try at least sorting the cheapest path and also try
5172  * incrementally sorting any path which is partially sorted
5173  * already (no need to deal with paths which have presorted
5174  * keys when incremental sort is disabled unless it's the
5175  * cheapest input path).
5176  */
5177  if (input_path != cheapest_input_path &&
5178  (presorted_keys == 0 || !enable_incremental_sort))
5179  continue;
5180 
5181  /*
5182  * We've no need to consider both a sort and incremental sort.
5183  * We'll just do a sort if there are no presorted keys and an
5184  * incremental sort when there are presorted keys.
5185  */
5186  if (presorted_keys == 0 || !enable_incremental_sort)
5187  sorted_path = (Path *) create_sort_path(root,
5188  distinct_rel,
5189  input_path,
5190  needed_pathkeys,
5191  limittuples);
5192  else
5193  sorted_path = (Path *) create_incremental_sort_path(root,
5194  distinct_rel,
5195  input_path,
5196  needed_pathkeys,
5197  presorted_keys,
5198  limittuples);
5199  }
5200 
5201  /*
5202  * distinct_pathkeys may have become empty if all of the pathkeys
5203  * were determined to be redundant. If all of the pathkeys are
5204  * redundant then each DISTINCT target must only allow a single
5205  * value, therefore all resulting tuples must be identical (or at
5206  * least indistinguishable by an equality check). We can uniquify
5207  * these tuples simply by just taking the first tuple. All we do
5208  * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
5209  * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
5210  * list, so we must still only do this with paths which are
5211  * correctly sorted by sort_pathkeys.
5212  */
5213  if (root->distinct_pathkeys == NIL)
5214  {
5215  Node *limitCount;
5216 
5217  limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
5218  sizeof(int64),
5219  Int64GetDatum(1), false,
5220  FLOAT8PASSBYVAL);
5221 
5222  /*
5223  * If the query already has a LIMIT clause, then we could end
5224  * up with a duplicate LimitPath in the final plan. That does
5225  * not seem worth troubling over too much.
5226  */
5227  add_path(distinct_rel, (Path *)
5228  create_limit_path(root, distinct_rel, sorted_path,
5229  NULL, limitCount,
5230  LIMIT_OPTION_COUNT, 0, 1));
5231  }
5232  else
5233  {
5234  add_path(distinct_rel, (Path *)
5235  create_upper_unique_path(root, distinct_rel,
5236  sorted_path,
5237  list_length(root->distinct_pathkeys),
5238  numDistinctRows));
5239  }
5240  }
5241  }
5242 
5243  /*
5244  * Consider hash-based implementations of DISTINCT, if possible.
5245  *
5246  * If we were not able to make any other types of path, we *must* hash or
5247  * die trying. If we do have other choices, there are two things that
5248  * should prevent selection of hashing: if the query uses DISTINCT ON
5249  * (because it won't really have the expected behavior if we hash), or if
5250  * enable_hashagg is off.
5251  *
5252  * Note: grouping_is_hashable() is much more expensive to check than the
5253  * other gating conditions, so we want to do it last.
5254  */
5255  if (distinct_rel->pathlist == NIL)
5256  allow_hash = true; /* we have no alternatives */
5257  else if (parse->hasDistinctOn || !enable_hashagg)
5258  allow_hash = false; /* policy-based decision not to hash */
5259  else
5260  allow_hash = true; /* default */
5261 
5262  if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
5263  {
5264  /* Generate hashed aggregate path --- no sort needed */
5265  add_path(distinct_rel, (Path *)
5267  distinct_rel,
5268  cheapest_input_path,
5269  cheapest_input_path->pathtarget,
5270  AGG_HASHED,
5272  root->processed_distinctClause,
5273  NIL,
5274  NULL,
5275  numDistinctRows));
5276  }
5277 
5278  return distinct_rel;
5279 }
5280 
5281 /*
5282  * create_ordered_paths
5283  *
5284  * Build a new upperrel containing Paths for ORDER BY evaluation.
5285  *
5286  * All paths in the result must satisfy the ORDER BY ordering.
5287  * The only new paths we need consider are an explicit full sort
5288  * and incremental sort on the cheapest-total existing path.
5289  *
5290  * input_rel: contains the source-data Paths
5291  * target: the output tlist the result Paths must emit
5292  * limit_tuples: estimated bound on the number of output tuples,
5293  * or -1 if no LIMIT or couldn't estimate
5294  *
5295  * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
5296  * other pathkeys (grouping, ...) like generate_useful_gather_paths.
5297  */
5298 static RelOptInfo *
5300  RelOptInfo *input_rel,
5301  PathTarget *target,
5302  bool target_parallel_safe,
5303  double limit_tuples)
5304 {
5305  Path *cheapest_input_path = input_rel->cheapest_total_path;
5306  RelOptInfo *ordered_rel;
5307  ListCell *lc;
5308 
5309  /* For now, do all work in the (ORDERED, NULL) upperrel */
5310  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
5311 
5312  /*
5313  * If the input relation is not parallel-safe, then the ordered relation
5314  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
5315  * target list is parallel-safe.
5316  */
5317  if (input_rel->consider_parallel && target_parallel_safe)
5318  ordered_rel->consider_parallel = true;
5319 
5320  /*
5321  * If the input rel belongs to a single FDW, so does the ordered_rel.
5322  */
5323  ordered_rel->serverid = input_rel->serverid;
5324  ordered_rel->userid = input_rel->userid;
5325  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
5326  ordered_rel->fdwroutine = input_rel->fdwroutine;
5327 
5328  foreach(lc, input_rel->pathlist)
5329  {
5330  Path *input_path = (Path *) lfirst(lc);
5331  Path *sorted_path;
5332  bool is_sorted;
5333  int presorted_keys;
5334 
5335  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5336  input_path->pathkeys, &presorted_keys);
5337 
5338  if (is_sorted)
5339  sorted_path = input_path;
5340  else
5341  {
5342  /*
5343  * Try at least sorting the cheapest path and also try
5344  * incrementally sorting any path which is partially sorted
5345  * already (no need to deal with paths which have presorted keys
5346  * when incremental sort is disabled unless it's the cheapest
5347  * input path).
5348  */
5349  if (input_path != cheapest_input_path &&
5350  (presorted_keys == 0 || !enable_incremental_sort))
5351  continue;
5352 
5353  /*
5354  * We've no need to consider both a sort and incremental sort.
5355  * We'll just do a sort if there are no presorted keys and an
5356  * incremental sort when there are presorted keys.
5357  */
5358  if (presorted_keys == 0 || !enable_incremental_sort)
5359  sorted_path = (Path *) create_sort_path(root,
5360  ordered_rel,
5361  input_path,
5362  root->sort_pathkeys,
5363  limit_tuples);
5364  else
5365  sorted_path = (Path *) create_incremental_sort_path(root,
5366  ordered_rel,
5367  input_path,
5368  root->sort_pathkeys,
5369  presorted_keys,
5370  limit_tuples);
5371  }
5372 
5373  /*
5374  * If the pathtarget of the result path has different expressions from
5375  * the target to be applied, a projection step is needed.
5376  */
5377  if (!equal(sorted_path->pathtarget->exprs, target->exprs))
5378  sorted_path = apply_projection_to_path(root, ordered_rel,
5379  sorted_path, target);
5380 
5381  add_path(ordered_rel, sorted_path);
5382  }
5383 
5384  /*
5385  * generate_gather_paths() will have already generated a simple Gather
5386  * path for the best parallel path, if any, and the loop above will have
5387  * considered sorting it. Similarly, generate_gather_paths() will also
5388  * have generated order-preserving Gather Merge plans which can be used
5389  * without sorting if they happen to match the sort_pathkeys, and the loop
5390  * above will have handled those as well. However, there's one more
5391  * possibility: it may make sense to sort the cheapest partial path or
5392  * incrementally sort any partial path that is partially sorted according
5393  * to the required output order and then use Gather Merge.
5394  */
5395  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5396  input_rel->partial_pathlist != NIL)
5397  {
5398  Path *cheapest_partial_path;
5399 
5400  cheapest_partial_path = linitial(input_rel->partial_pathlist);
5401 
5402  foreach(lc, input_rel->partial_pathlist)
5403  {
5404  Path *input_path = (Path *) lfirst(lc);
5405  Path *sorted_path;
5406  bool is_sorted;
5407  int presorted_keys;
5408  double total_groups;
5409 
5410  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5411  input_path->pathkeys,
5412  &presorted_keys);
5413 
5414  if (is_sorted)
5415  continue;
5416 
5417  /*
5418  * Try at least sorting the cheapest path and also try
5419  * incrementally sorting any path which is partially sorted
5420  * already (no need to deal with paths which have presorted keys
5421  * when incremental sort is disabled unless it's the cheapest
5422  * partial path).
5423  */
5424  if (input_path != cheapest_partial_path &&
5425  (presorted_keys == 0 || !enable_incremental_sort))
5426  continue;
5427 
5428  /*
5429  * We've no need to consider both a sort and incremental sort.
5430  * We'll just do a sort if there are no presorted keys and an
5431  * incremental sort when there are presorted keys.
5432  */
5433  if (presorted_keys == 0 || !enable_incremental_sort)
5434  sorted_path = (Path *) create_sort_path(root,
5435  ordered_rel,
5436  input_path,
5437  root->sort_pathkeys,
5438  limit_tuples);
5439  else
5440  sorted_path = (Path *) create_incremental_sort_path(root,
5441  ordered_rel,
5442  input_path,
5443  root->sort_pathkeys,
5444  presorted_keys,
5445  limit_tuples);
5446  total_groups = compute_gather_rows(sorted_path);
5447  sorted_path = (Path *)
5448  create_gather_merge_path(root, ordered_rel,
5449  sorted_path,
5450  sorted_path->pathtarget,
5451  root->sort_pathkeys, NULL,
5452  &total_groups);
5453 
5454  /*
5455  * If the pathtarget of the result path has different expressions
5456  * from the target to be applied, a projection step is needed.
5457  */
5458  if (!equal(sorted_path->pathtarget->exprs, target->exprs))
5459  sorted_path = apply_projection_to_path(root, ordered_rel,
5460  sorted_path, target);
5461 
5462  add_path(ordered_rel, sorted_path);
5463  }
5464  }
5465 
5466  /*
5467  * If there is an FDW that's responsible for all baserels of the query,
5468  * let it consider adding ForeignPaths.
5469  */
5470  if (ordered_rel->fdwroutine &&
5471  ordered_rel->fdwroutine->GetForeignUpperPaths)
5472  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5473  input_rel, ordered_rel,
5474  NULL);
5475 
5476  /* Let extensions possibly add some more paths */
5478  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5479  input_rel, ordered_rel, NULL);
5480 
5481  /*
5482  * No need to bother with set_cheapest here; grouping_planner does not
5483  * need us to do it.
5484  */
5485  Assert(ordered_rel->pathlist != NIL);
5486 
5487  return ordered_rel;
5488 }
5489 
5490 
5491 /*
5492  * make_group_input_target
5493  * Generate appropriate PathTarget for initial input to grouping nodes.
5494  *
5495  * If there is grouping or aggregation, the scan/join subplan cannot emit
5496  * the query's final targetlist; for example, it certainly can't emit any
5497  * aggregate function calls. This routine generates the correct target
5498  * for the scan/join subplan.
5499  *
5500  * The query target list passed from the parser already contains entries
5501  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5502  * for variables used only in HAVING clauses; so we need to add those
5503  * variables to the subplan target list. Also, we flatten all expressions
5504  * except GROUP BY items into their component variables; other expressions
5505  * will be computed by the upper plan nodes rather than by the subplan.
5506  * For example, given a query like
5507  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5508  * we want to pass this targetlist to the subplan:
5509  * a+b,c,d
5510  * where the a+b target will be used by the Sort/Group steps, and the
5511  * other targets will be used for computing the final results.
5512  *
5513  * 'final_target' is the query's final target list (in PathTarget form)
5514  *
5515  * The result is the PathTarget to be computed by the Paths returned from
5516  * query_planner().
5517  */
5518 static PathTarget *
5520 {
5521  Query *parse = root->parse;
5522  PathTarget *input_target;
5523  List *non_group_cols;
5524  List *non_group_vars;
5525  int i;
5526  ListCell *lc;
5527 
5528  /*
5529  * We must build a target containing all grouping columns, plus any other
5530  * Vars mentioned in the query's targetlist and HAVING qual.
5531  */
5532  input_target = create_empty_pathtarget();
5533  non_group_cols = NIL;
5534 
5535  i = 0;
5536  foreach(lc, final_target->exprs)
5537  {
5538  Expr *expr = (Expr *) lfirst(lc);
5539  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5540 
5541  if (sgref && root->processed_groupClause &&
5543  root->processed_groupClause) != NULL)
5544  {
5545  /*
5546  * It's a grouping column, so add it to the input target as-is.
5547  *
5548  * Note that the target is logically below the grouping step. So
5549  * with grouping sets we need to remove the RT index of the
5550  * grouping step if there is any from the target expression.
5551  */
5552  if (parse->hasGroupRTE && parse->groupingSets != NIL)
5553  {
5554  Assert(root->group_rtindex > 0);
5555  expr = (Expr *)
5556  remove_nulling_relids((Node *) expr,
5557  bms_make_singleton(root->group_rtindex),
5558  NULL);
5559  }
5560  add_column_to_pathtarget(input_target, expr, sgref);
5561  }
5562  else
5563  {
5564  /*
5565  * Non-grouping column, so just remember the expression for later
5566  * call to pull_var_clause.
5567  */
5568  non_group_cols = lappend(non_group_cols, expr);
5569  }
5570 
5571  i++;
5572  }
5573 
5574  /*
5575  * If there's a HAVING clause, we'll need the Vars it uses, too.
5576  */
5577  if (parse->havingQual)
5578  non_group_cols = lappend(non_group_cols, parse->havingQual);
5579 
5580  /*
5581  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5582  * add them to the input target if not already present. (A Var used
5583  * directly as a GROUP BY item will be present already.) Note this
5584  * includes Vars used in resjunk items, so we are covering the needs of
5585  * ORDER BY and window specifications. Vars used within Aggrefs and
5586  * WindowFuncs will be pulled out here, too.
5587  *
5588  * Note that the target is logically below the grouping step. So with
5589  * grouping sets we need to remove the RT index of the grouping step if
5590  * there is any from the non-group Vars.
5591  */
5592  non_group_vars = pull_var_clause((Node *) non_group_cols,
5596  if (parse->hasGroupRTE && parse->groupingSets != NIL)
5597  {
5598  Assert(root->group_rtindex > 0);
5599  non_group_vars = (List *)
5600  remove_nulling_relids((Node *) non_group_vars,
5601  bms_make_singleton(root->group_rtindex),
5602  NULL);
5603  }
5604  add_new_columns_to_pathtarget(input_target, non_group_vars);
5605 
5606  /* clean up cruft */
5607  list_free(non_group_vars);
5608  list_free(non_group_cols);
5609 
5610  /* XXX this causes some redundant cost calculation ... */
5611  return set_pathtarget_cost_width(root, input_target);
5612 }
5613 
5614 /*
5615  * make_partial_grouping_target
5616  * Generate appropriate PathTarget for output of partial aggregate
5617  * (or partial grouping, if there are no aggregates) nodes.
5618  *
5619  * A partial aggregation node needs to emit all the same aggregates that
5620  * a regular aggregation node would, plus any aggregates used in HAVING;
5621  * except that the Aggref nodes should be marked as partial aggregates.
5622  *
5623  * In addition, we'd better emit any Vars and PlaceHolderVars that are
5624  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5625  * these would be Vars that are grouped by or used in grouping expressions.)
5626  *
5627  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5628  * havingQual represents the HAVING clause.
5629  */
5630 static PathTarget *
5632  PathTarget *grouping_target,
5633  Node *havingQual)
5634 {
5635  PathTarget *partial_target;
5636  List *non_group_cols;
5637  List *non_group_exprs;
5638  int i;
5639  ListCell *lc;
5640 
5641  partial_target = create_empty_pathtarget();
5642  non_group_cols = NIL;
5643 
5644  i = 0;
5645  foreach(lc, grouping_target->exprs)
5646  {
5647  Expr *expr = (Expr *) lfirst(lc);
5648  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5649 
5650  if (sgref && root->processed_groupClause &&
5652  root->processed_groupClause) != NULL)
5653  {
5654  /*
5655  * It's a grouping column, so add it to the partial_target as-is.
5656  * (This allows the upper agg step to repeat the grouping calcs.)
5657  */
5658  add_column_to_pathtarget(partial_target, expr, sgref);
5659  }
5660  else
5661  {
5662  /*
5663  * Non-grouping column, so just remember the expression for later
5664  * call to pull_var_clause.
5665  */
5666  non_group_cols = lappend(non_group_cols, expr);
5667  }
5668 
5669  i++;
5670  }
5671 
5672  /*
5673  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5674  */
5675  if (havingQual)
5676  non_group_cols = lappend(non_group_cols, havingQual);
5677 
5678  /*
5679  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5680  * non-group cols (plus HAVING), and add them to the partial_target if not
5681  * already present. (An expression used directly as a GROUP BY item will
5682  * be present already.) Note this includes Vars used in resjunk items, so
5683  * we are covering the needs of ORDER BY and window specifications.
5684  */
5685  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5689 
5690  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5691 
5692  /*
5693  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5694  * are at the top level of the target list, so we can just scan the list
5695  * rather than recursing through the expression trees.
5696  */
5697  foreach(lc, partial_target->exprs)
5698  {
5699  Aggref *aggref = (Aggref *) lfirst(lc);
5700 
5701  if (IsA(aggref, Aggref))
5702  {
5703  Aggref *newaggref;
5704 
5705  /*
5706  * We shouldn't need to copy the substructure of the Aggref node,
5707  * but flat-copy the node itself to avoid damaging other trees.
5708  */
5709  newaggref = makeNode(Aggref);
5710  memcpy(newaggref, aggref, sizeof(Aggref));
5711 
5712  /* For now, assume serialization is required */
5713  mark_partial_aggref(newaggref,