PostgreSQL Source Code  git master
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/genam.h"
22 #include "access/htup_details.h"
23 #include "access/parallel.h"
24 #include "access/sysattr.h"
25 #include "access/table.h"
26 #include "access/xact.h"
27 #include "catalog/pg_aggregate.h"
28 #include "catalog/pg_constraint.h"
29 #include "catalog/pg_inherits.h"
30 #include "catalog/pg_proc.h"
31 #include "catalog/pg_type.h"
32 #include "executor/executor.h"
33 #include "executor/nodeAgg.h"
34 #include "foreign/fdwapi.h"
35 #include "jit/jit.h"
36 #include "lib/bipartite_match.h"
37 #include "lib/knapsack.h"
38 #include "miscadmin.h"
39 #include "nodes/makefuncs.h"
40 #include "nodes/nodeFuncs.h"
41 #ifdef OPTIMIZER_DEBUG
42 #include "nodes/print.h"
43 #endif
44 #include "optimizer/appendinfo.h"
45 #include "optimizer/clauses.h"
46 #include "optimizer/cost.h"
47 #include "optimizer/inherit.h"
48 #include "optimizer/optimizer.h"
49 #include "optimizer/paramassign.h"
50 #include "optimizer/pathnode.h"
51 #include "optimizer/paths.h"
52 #include "optimizer/plancat.h"
53 #include "optimizer/planmain.h"
54 #include "optimizer/planner.h"
55 #include "optimizer/prep.h"
56 #include "optimizer/subselect.h"
57 #include "optimizer/tlist.h"
58 #include "parser/analyze.h"
59 #include "parser/parse_agg.h"
60 #include "parser/parsetree.h"
61 #include "partitioning/partdesc.h"
62 #include "rewrite/rewriteManip.h"
63 #include "storage/dsm_impl.h"
64 #include "utils/lsyscache.h"
65 #include "utils/rel.h"
66 #include "utils/selfuncs.h"
67 #include "utils/syscache.h"
68 
69 /* GUC parameters */
73 
74 /* Hook for plugins to get control in planner() */
76 
77 /* Hook for plugins to get control when grouping_planner() plans upper rels */
79 
80 
81 /* Expression kind codes for preprocess_expression */
82 #define EXPRKIND_QUAL 0
83 #define EXPRKIND_TARGET 1
84 #define EXPRKIND_RTFUNC 2
85 #define EXPRKIND_RTFUNC_LATERAL 3
86 #define EXPRKIND_VALUES 4
87 #define EXPRKIND_VALUES_LATERAL 5
88 #define EXPRKIND_LIMIT 6
89 #define EXPRKIND_APPINFO 7
90 #define EXPRKIND_PHV 8
91 #define EXPRKIND_TABLESAMPLE 9
92 #define EXPRKIND_ARBITER_ELEM 10
93 #define EXPRKIND_TABLEFUNC 11
94 #define EXPRKIND_TABLEFUNC_LATERAL 12
95 
96 /* Passthrough data for standard_qp_callback */
97 typedef struct
98 {
99  List *activeWindows; /* active windows, if any */
100  List *groupClause; /* overrides parse->groupClause */
102 
103 /*
104  * Data specific to grouping sets
105  */
106 
107 typedef struct
108 {
118 
119 /*
120  * Temporary structure for use during WindowClause reordering in order to be
121  * able to sort WindowClauses on partitioning/ordering prefix.
122  */
123 typedef struct
124 {
126  List *uniqueOrder; /* A List of unique ordering/partitioning
127  * clauses per Window */
129 
130 /* Local functions */
131 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
132 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
133 static void grouping_planner(PlannerInfo *root, double tuple_fraction);
135 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
136  int *tleref_to_colnum_map);
137 static void preprocess_rowmarks(PlannerInfo *root);
138 static double preprocess_limit(PlannerInfo *root,
139  double tuple_fraction,
140  int64 *offset_est, int64 *count_est);
142 static List *preprocess_groupclause(PlannerInfo *root, List *force);
143 static List *extract_rollup_sets(List *groupingSets);
144 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
145 static void standard_qp_callback(PlannerInfo *root, void *extra);
146 static double get_number_of_groups(PlannerInfo *root,
147  double path_rows,
148  grouping_sets_data *gd,
149  List *target_list);
151  RelOptInfo *input_rel,
152  PathTarget *target,
153  bool target_parallel_safe,
154  grouping_sets_data *gd);
155 static bool is_degenerate_grouping(PlannerInfo *root);
157  RelOptInfo *input_rel,
158  RelOptInfo *grouped_rel);
159 static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
160  PathTarget *target, bool target_parallel_safe,
161  Node *havingQual);
163  RelOptInfo *input_rel,
164  RelOptInfo *grouped_rel,
165  const AggClauseCosts *agg_costs,
166  grouping_sets_data *gd,
167  GroupPathExtraData *extra,
168  RelOptInfo **partially_grouped_rel_p);
169 static void consider_groupingsets_paths(PlannerInfo *root,
170  RelOptInfo *grouped_rel,
171  Path *path,
172  bool is_sorted,
173  bool can_hash,
174  grouping_sets_data *gd,
175  const AggClauseCosts *agg_costs,
176  double dNumGroups);
178  RelOptInfo *input_rel,
179  PathTarget *input_target,
180  PathTarget *output_target,
181  bool output_target_parallel_safe,
182  WindowFuncLists *wflists,
183  List *activeWindows);
184 static void create_one_window_path(PlannerInfo *root,
185  RelOptInfo *window_rel,
186  Path *path,
187  PathTarget *input_target,
188  PathTarget *output_target,
189  WindowFuncLists *wflists,
190  List *activeWindows);
192  RelOptInfo *input_rel);
194  RelOptInfo *input_rel,
195  RelOptInfo *final_distinct_rel);
197  RelOptInfo *input_rel,
198  RelOptInfo *distinct_rel);
200  RelOptInfo *input_rel,
201  PathTarget *target,
202  bool target_parallel_safe,
203  double limit_tuples);
205  PathTarget *final_target);
207  PathTarget *grouping_target,
208  Node *havingQual);
209 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
210 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
212  PathTarget *final_target,
213  List *activeWindows);
215  List *tlist);
217  PathTarget *final_target,
218  bool *have_postponed_srfs);
219 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
220  List *targets, List *targets_contain_srfs);
221 static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
222  RelOptInfo *grouped_rel,
223  RelOptInfo *partially_grouped_rel,
224  const AggClauseCosts *agg_costs,
225  grouping_sets_data *gd,
226  double dNumGroups,
227  GroupPathExtraData *extra);
229  RelOptInfo *grouped_rel,
230  RelOptInfo *input_rel,
231  grouping_sets_data *gd,
232  GroupPathExtraData *extra,
233  bool force_rel_creation);
234 static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
235 static bool can_partial_agg(PlannerInfo *root);
237  RelOptInfo *rel,
238  List *scanjoin_targets,
239  List *scanjoin_targets_contain_srfs,
240  bool scanjoin_target_parallel_safe,
241  bool tlist_same_exprs);
243  RelOptInfo *input_rel,
244  RelOptInfo *grouped_rel,
245  RelOptInfo *partially_grouped_rel,
246  const AggClauseCosts *agg_costs,
247  grouping_sets_data *gd,
249  GroupPathExtraData *extra);
250 static bool group_by_has_partkey(RelOptInfo *input_rel,
251  List *targetList,
252  List *groupClause);
253 static int common_prefix_cmp(const void *a, const void *b);
254 
255 
256 /*****************************************************************************
257  *
258  * Query optimizer entry point
259  *
260  * To support loadable plugins that monitor or modify planner behavior,
261  * we provide a hook variable that lets a plugin get control before and
262  * after the standard planning process. The plugin would normally call
263  * standard_planner().
264  *
265  * Note to plugin authors: standard_planner() scribbles on its Query input,
266  * so you'd better copy that data structure if you want to plan more than once.
267  *
268  *****************************************************************************/
269 PlannedStmt *
270 planner(Query *parse, const char *query_string, int cursorOptions,
271  ParamListInfo boundParams)
272 {
273  PlannedStmt *result;
274 
275  if (planner_hook)
276  result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
277  else
278  result = standard_planner(parse, query_string, cursorOptions, boundParams);
279  return result;
280 }
281 
282 PlannedStmt *
283 standard_planner(Query *parse, const char *query_string, int cursorOptions,
284  ParamListInfo boundParams)
285 {
286  PlannedStmt *result;
287  PlannerGlobal *glob;
288  double tuple_fraction;
289  PlannerInfo *root;
290  RelOptInfo *final_rel;
291  Path *best_path;
292  Plan *top_plan;
293  ListCell *lp,
294  *lr;
295 
296  /*
297  * Set up global state for this planner invocation. This data is needed
298  * across all levels of sub-Query that might exist in the given command,
299  * so we keep it in a separate struct that's linked to by each per-Query
300  * PlannerInfo.
301  */
302  glob = makeNode(PlannerGlobal);
303 
304  glob->boundParams = boundParams;
305  glob->subplans = NIL;
306  glob->subroots = NIL;
307  glob->rewindPlanIDs = NULL;
308  glob->finalrtable = NIL;
309  glob->finalrowmarks = NIL;
310  glob->resultRelations = NIL;
311  glob->appendRelations = NIL;
312  glob->relationOids = NIL;
313  glob->invalItems = NIL;
314  glob->paramExecTypes = NIL;
315  glob->lastPHId = 0;
316  glob->lastRowMarkId = 0;
317  glob->lastPlanNodeId = 0;
318  glob->transientPlan = false;
319  glob->dependsOnRole = false;
320 
321  /*
322  * Assess whether it's feasible to use parallel mode for this query. We
323  * can't do this in a standalone backend, or if the command will try to
324  * modify any data, or if this is a cursor operation, or if GUCs are set
325  * to values that don't permit parallelism, or if parallel-unsafe
326  * functions are present in the query tree.
327  *
328  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
329  * MATERIALIZED VIEW to use parallel plans, but as of now, only the leader
330  * backend writes into a completely new table. In the future, we can
331  * extend it to allow workers to write into the table. However, to allow
332  * parallel updates and deletes, we have to solve other problems,
333  * especially around combo CIDs.)
334  *
335  * For now, we don't try to use parallel mode if we're running inside a
336  * parallel worker. We might eventually be able to relax this
337  * restriction, but for now it seems best not to have parallel workers
338  * trying to create their own parallel workers.
339  */
340  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
342  parse->commandType == CMD_SELECT &&
343  !parse->hasModifyingCTE &&
345  !IsParallelWorker())
346  {
347  /* all the cheap tests pass, so scan the query tree */
349  glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
350  }
351  else
352  {
353  /* skip the query tree scan, just assume it's unsafe */
354  glob->maxParallelHazard = PROPARALLEL_UNSAFE;
355  glob->parallelModeOK = false;
356  }
357 
358  /*
359  * glob->parallelModeNeeded is normally set to false here and changed to
360  * true during plan creation if a Gather or Gather Merge plan is actually
361  * created (cf. create_gather_plan, create_gather_merge_plan).
362  *
363  * However, if force_parallel_mode = on or force_parallel_mode = regress,
364  * then we impose parallel mode whenever it's safe to do so, even if the
365  * final plan doesn't use parallelism. It's not safe to do so if the
366  * query contains anything parallel-unsafe; parallelModeOK will be false
367  * in that case. Note that parallelModeOK can't change after this point.
368  * Otherwise, everything in the query is either parallel-safe or
369  * parallel-restricted, and in either case it should be OK to impose
370  * parallel-mode restrictions. If that ends up breaking something, then
371  * either some function the user included in the query is incorrectly
372  * labeled as parallel-safe or parallel-restricted when in reality it's
373  * parallel-unsafe, or else the query planner itself has a bug.
374  */
375  glob->parallelModeNeeded = glob->parallelModeOK &&
377 
378  /* Determine what fraction of the plan is likely to be scanned */
379  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
380  {
381  /*
382  * We have no real idea how many tuples the user will ultimately FETCH
383  * from a cursor, but it is often the case that he doesn't want 'em
384  * all, or would prefer a fast-start plan anyway so that he can
385  * process some of the tuples sooner. Use a GUC parameter to decide
386  * what fraction to optimize for.
387  */
388  tuple_fraction = cursor_tuple_fraction;
389 
390  /*
391  * We document cursor_tuple_fraction as simply being a fraction, which
392  * means the edge cases 0 and 1 have to be treated specially here. We
393  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
394  */
395  if (tuple_fraction >= 1.0)
396  tuple_fraction = 0.0;
397  else if (tuple_fraction <= 0.0)
398  tuple_fraction = 1e-10;
399  }
400  else
401  {
402  /* Default assumption is we need all the tuples */
403  tuple_fraction = 0.0;
404  }
405 
406  /* primary planning entry point (may recurse for subqueries) */
407  root = subquery_planner(glob, parse, NULL,
408  false, tuple_fraction);
409 
410  /* Select best Path and turn it into a Plan */
411  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
412  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
413 
414  top_plan = create_plan(root, best_path);
415 
416  /*
417  * If creating a plan for a scrollable cursor, make sure it can run
418  * backwards on demand. Add a Material node at the top at need.
419  */
420  if (cursorOptions & CURSOR_OPT_SCROLL)
421  {
422  if (!ExecSupportsBackwardScan(top_plan))
423  top_plan = materialize_finished_plan(top_plan);
424  }
425 
426  /*
427  * Optionally add a Gather node for testing purposes, provided this is
428  * actually a safe thing to do.
429  */
431  {
432  Gather *gather = makeNode(Gather);
433 
434  /*
435  * If there are any initPlans attached to the formerly-top plan node,
436  * move them up to the Gather node; same as we do for Material node in
437  * materialize_finished_plan.
438  */
439  gather->plan.initPlan = top_plan->initPlan;
440  top_plan->initPlan = NIL;
441 
442  gather->plan.targetlist = top_plan->targetlist;
443  gather->plan.qual = NIL;
444  gather->plan.lefttree = top_plan;
445  gather->plan.righttree = NULL;
446  gather->num_workers = 1;
447  gather->single_copy = true;
449 
450  /*
451  * Since this Gather has no parallel-aware descendants to signal to,
452  * we don't need a rescan Param.
453  */
454  gather->rescan_param = -1;
455 
456  /*
457  * Ideally we'd use cost_gather here, but setting up dummy path data
458  * to satisfy it doesn't seem much cleaner than knowing what it does.
459  */
460  gather->plan.startup_cost = top_plan->startup_cost +
462  gather->plan.total_cost = top_plan->total_cost +
464  gather->plan.plan_rows = top_plan->plan_rows;
465  gather->plan.plan_width = top_plan->plan_width;
466  gather->plan.parallel_aware = false;
467  gather->plan.parallel_safe = false;
468 
469  /* use parallel mode for parallel plans. */
470  root->glob->parallelModeNeeded = true;
471 
472  top_plan = &gather->plan;
473  }
474 
475  /*
476  * If any Params were generated, run through the plan tree and compute
477  * each plan node's extParam/allParam sets. Ideally we'd merge this into
478  * set_plan_references' tree traversal, but for now it has to be separate
479  * because we need to visit subplans before not after main plan.
480  */
481  if (glob->paramExecTypes != NIL)
482  {
483  Assert(list_length(glob->subplans) == list_length(glob->subroots));
484  forboth(lp, glob->subplans, lr, glob->subroots)
485  {
486  Plan *subplan = (Plan *) lfirst(lp);
487  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
488 
489  SS_finalize_plan(subroot, subplan);
490  }
491  SS_finalize_plan(root, top_plan);
492  }
493 
494  /* final cleanup of the plan */
495  Assert(glob->finalrtable == NIL);
496  Assert(glob->finalrowmarks == NIL);
497  Assert(glob->resultRelations == NIL);
498  Assert(glob->appendRelations == NIL);
499  top_plan = set_plan_references(root, top_plan);
500  /* ... and the subplans (both regular subplans and initplans) */
501  Assert(list_length(glob->subplans) == list_length(glob->subroots));
502  forboth(lp, glob->subplans, lr, glob->subroots)
503  {
504  Plan *subplan = (Plan *) lfirst(lp);
505  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
506 
507  lfirst(lp) = set_plan_references(subroot, subplan);
508  }
509 
510  /* build the PlannedStmt result */
511  result = makeNode(PlannedStmt);
512 
513  result->commandType = parse->commandType;
514  result->queryId = parse->queryId;
515  result->hasReturning = (parse->returningList != NIL);
516  result->hasModifyingCTE = parse->hasModifyingCTE;
517  result->canSetTag = parse->canSetTag;
518  result->transientPlan = glob->transientPlan;
519  result->dependsOnRole = glob->dependsOnRole;
520  result->parallelModeNeeded = glob->parallelModeNeeded;
521  result->planTree = top_plan;
522  result->partPruneInfos = glob->partPruneInfos;
523  result->rtable = glob->finalrtable;
524  result->resultRelations = glob->resultRelations;
525  result->appendRelations = glob->appendRelations;
526  result->subplans = glob->subplans;
527  result->rewindPlanIDs = glob->rewindPlanIDs;
528  result->rowMarks = glob->finalrowmarks;
529  result->relationOids = glob->relationOids;
530  result->invalItems = glob->invalItems;
531  result->paramExecTypes = glob->paramExecTypes;
532  /* utilityStmt should be null, but we might as well copy it */
533  result->utilityStmt = parse->utilityStmt;
534  result->stmt_location = parse->stmt_location;
535  result->stmt_len = parse->stmt_len;
536 
537  result->jitFlags = PGJIT_NONE;
538  if (jit_enabled && jit_above_cost >= 0 &&
539  top_plan->total_cost > jit_above_cost)
540  {
541  result->jitFlags |= PGJIT_PERFORM;
542 
543  /*
544  * Decide how much effort should be put into generating better code.
545  */
546  if (jit_optimize_above_cost >= 0 &&
548  result->jitFlags |= PGJIT_OPT3;
549  if (jit_inline_above_cost >= 0 &&
550  top_plan->total_cost > jit_inline_above_cost)
551  result->jitFlags |= PGJIT_INLINE;
552 
553  /*
554  * Decide which operations should be JITed.
555  */
556  if (jit_expressions)
557  result->jitFlags |= PGJIT_EXPR;
559  result->jitFlags |= PGJIT_DEFORM;
560  }
561 
562  if (glob->partition_directory != NULL)
563  DestroyPartitionDirectory(glob->partition_directory);
564 
565  return result;
566 }
567 
568 
569 /*--------------------
570  * subquery_planner
571  * Invokes the planner on a subquery. We recurse to here for each
572  * sub-SELECT found in the query tree.
573  *
574  * glob is the global state for the current planner run.
575  * parse is the querytree produced by the parser & rewriter.
576  * parent_root is the immediate parent Query's info (NULL at the top level).
577  * hasRecursion is true if this is a recursive WITH query.
578  * tuple_fraction is the fraction of tuples we expect will be retrieved.
579  * tuple_fraction is interpreted as explained for grouping_planner, below.
580  *
581  * Basically, this routine does the stuff that should only be done once
582  * per Query object. It then calls grouping_planner. At one time,
583  * grouping_planner could be invoked recursively on the same Query object;
584  * that's not currently true, but we keep the separation between the two
585  * routines anyway, in case we need it again someday.
586  *
587  * subquery_planner will be called recursively to handle sub-Query nodes
588  * found within the query's expressions and rangetable.
589  *
590  * Returns the PlannerInfo struct ("root") that contains all data generated
591  * while planning the subquery. In particular, the Path(s) attached to
592  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
593  * cheapest way(s) to implement the query. The top level will select the
594  * best Path and pass it through createplan.c to produce a finished Plan.
595  *--------------------
596  */
597 PlannerInfo *
599  PlannerInfo *parent_root,
600  bool hasRecursion, double tuple_fraction)
601 {
602  PlannerInfo *root;
603  List *newWithCheckOptions;
604  List *newHaving;
605  bool hasOuterJoins;
606  bool hasResultRTEs;
607  RelOptInfo *final_rel;
608  ListCell *l;
609 
610  /* Create a PlannerInfo data structure for this subquery */
611  root = makeNode(PlannerInfo);
612  root->parse = parse;
613  root->glob = glob;
614  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
615  root->parent_root = parent_root;
616  root->plan_params = NIL;
617  root->outer_params = NULL;
618  root->planner_cxt = CurrentMemoryContext;
619  root->init_plans = NIL;
620  root->cte_plan_ids = NIL;
621  root->multiexpr_params = NIL;
622  root->eq_classes = NIL;
623  root->ec_merging_done = false;
624  root->all_result_relids =
625  parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
626  root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
627  root->append_rel_list = NIL;
628  root->row_identity_vars = NIL;
629  root->rowMarks = NIL;
630  memset(root->upper_rels, 0, sizeof(root->upper_rels));
631  memset(root->upper_targets, 0, sizeof(root->upper_targets));
632  root->processed_tlist = NIL;
633  root->update_colnos = NIL;
634  root->grouping_map = NULL;
635  root->minmax_aggs = NIL;
636  root->qual_security_level = 0;
637  root->hasPseudoConstantQuals = false;
638  root->hasAlternativeSubPlans = false;
639  root->placeholdersFrozen = false;
640  root->hasRecursion = hasRecursion;
641  if (hasRecursion)
643  else
644  root->wt_param_id = -1;
645  root->non_recursive_path = NULL;
646  root->partColsUpdated = false;
647 
648  /*
649  * If there is a WITH list, process each WITH query and either convert it
650  * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
651  */
652  if (parse->cteList)
653  SS_process_ctes(root);
654 
655  /*
656  * If it's a MERGE command, transform the joinlist as appropriate.
657  */
659 
660  /*
661  * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
662  * that we don't need so many special cases to deal with that situation.
663  */
665 
666  /*
667  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
668  * to transform them into joins. Note that this step does not descend
669  * into subqueries; if we pull up any subqueries below, their SubLinks are
670  * processed just before pulling them up.
671  */
672  if (parse->hasSubLinks)
673  pull_up_sublinks(root);
674 
675  /*
676  * Scan the rangetable for function RTEs, do const-simplification on them,
677  * and then inline them if possible (producing subqueries that might get
678  * pulled up next). Recursion issues here are handled in the same way as
679  * for SubLinks.
680  */
682 
683  /*
684  * Check to see if any subqueries in the jointree can be merged into this
685  * query.
686  */
687  pull_up_subqueries(root);
688 
689  /*
690  * If this is a simple UNION ALL query, flatten it into an appendrel. We
691  * do this now because it requires applying pull_up_subqueries to the leaf
692  * queries of the UNION ALL, which weren't touched above because they
693  * weren't referenced by the jointree (they will be after we do this).
694  */
695  if (parse->setOperations)
697 
698  /*
699  * Survey the rangetable to see what kinds of entries are present. We can
700  * skip some later processing if relevant SQL features are not used; for
701  * example if there are no JOIN RTEs we can avoid the expense of doing
702  * flatten_join_alias_vars(). This must be done after we have finished
703  * adding rangetable entries, of course. (Note: actually, processing of
704  * inherited or partitioned rels can cause RTEs for their child tables to
705  * get added later; but those must all be RTE_RELATION entries, so they
706  * don't invalidate the conclusions drawn here.)
707  */
708  root->hasJoinRTEs = false;
709  root->hasLateralRTEs = false;
710  hasOuterJoins = false;
711  hasResultRTEs = false;
712  foreach(l, parse->rtable)
713  {
715 
716  switch (rte->rtekind)
717  {
718  case RTE_RELATION:
719  if (rte->inh)
720  {
721  /*
722  * Check to see if the relation actually has any children;
723  * if not, clear the inh flag so we can treat it as a
724  * plain base relation.
725  *
726  * Note: this could give a false-positive result, if the
727  * rel once had children but no longer does. We used to
728  * be able to clear rte->inh later on when we discovered
729  * that, but no more; we have to handle such cases as
730  * full-fledged inheritance.
731  */
732  rte->inh = has_subclass(rte->relid);
733  }
734  break;
735  case RTE_JOIN:
736  root->hasJoinRTEs = true;
737  if (IS_OUTER_JOIN(rte->jointype))
738  hasOuterJoins = true;
739  break;
740  case RTE_RESULT:
741  hasResultRTEs = true;
742  break;
743  default:
744  /* No work here for other RTE types */
745  break;
746  }
747 
748  if (rte->lateral)
749  root->hasLateralRTEs = true;
750 
751  /*
752  * We can also determine the maximum security level required for any
753  * securityQuals now. Addition of inheritance-child RTEs won't affect
754  * this, because child tables don't have their own securityQuals; see
755  * expand_single_inheritance_child().
756  */
757  if (rte->securityQuals)
759  list_length(rte->securityQuals));
760  }
761 
762  /*
763  * If we have now verified that the query target relation is
764  * non-inheriting, mark it as a leaf target.
765  */
766  if (parse->resultRelation)
767  {
768  RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
769 
770  if (!rte->inh)
771  root->leaf_result_relids =
772  bms_make_singleton(parse->resultRelation);
773  }
774 
775  /*
776  * Preprocess RowMark information. We need to do this after subquery
777  * pullup, so that all base relations are present.
778  */
779  preprocess_rowmarks(root);
780 
781  /*
782  * Set hasHavingQual to remember if HAVING clause is present. Needed
783  * because preprocess_expression will reduce a constant-true condition to
784  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
785  */
786  root->hasHavingQual = (parse->havingQual != NULL);
787 
788  /*
789  * Do expression preprocessing on targetlist and quals, as well as other
790  * random expressions in the querytree. Note that we do not need to
791  * handle sort/group expressions explicitly, because they are actually
792  * part of the targetlist.
793  */
794  parse->targetList = (List *)
795  preprocess_expression(root, (Node *) parse->targetList,
797 
798  /* Constant-folding might have removed all set-returning functions */
799  if (parse->hasTargetSRFs)
800  parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
801 
802  newWithCheckOptions = NIL;
803  foreach(l, parse->withCheckOptions)
804  {
806 
807  wco->qual = preprocess_expression(root, wco->qual,
808  EXPRKIND_QUAL);
809  if (wco->qual != NULL)
810  newWithCheckOptions = lappend(newWithCheckOptions, wco);
811  }
812  parse->withCheckOptions = newWithCheckOptions;
813 
814  parse->returningList = (List *)
815  preprocess_expression(root, (Node *) parse->returningList,
817 
818  preprocess_qual_conditions(root, (Node *) parse->jointree);
819 
820  parse->havingQual = preprocess_expression(root, parse->havingQual,
821  EXPRKIND_QUAL);
822 
823  foreach(l, parse->windowClause)
824  {
826 
827  /* partitionClause/orderClause are sort/group expressions */
830  wc->endOffset = preprocess_expression(root, wc->endOffset,
832  }
833 
834  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
836  parse->limitCount = preprocess_expression(root, parse->limitCount,
838 
839  if (parse->onConflict)
840  {
841  parse->onConflict->arbiterElems = (List *)
843  (Node *) parse->onConflict->arbiterElems,
845  parse->onConflict->arbiterWhere =
847  parse->onConflict->arbiterWhere,
848  EXPRKIND_QUAL);
849  parse->onConflict->onConflictSet = (List *)
851  (Node *) parse->onConflict->onConflictSet,
853  parse->onConflict->onConflictWhere =
855  parse->onConflict->onConflictWhere,
856  EXPRKIND_QUAL);
857  /* exclRelTlist contains only Vars, so no preprocessing needed */
858  }
859 
860  foreach(l, parse->mergeActionList)
861  {
863 
864  action->targetList = (List *)
866  (Node *) action->targetList,
868  action->qual =
870  (Node *) action->qual,
871  EXPRKIND_QUAL);
872  }
873 
874  root->append_rel_list = (List *)
877 
878  /* Also need to preprocess expressions within RTEs */
879  foreach(l, parse->rtable)
880  {
882  int kind;
883  ListCell *lcsq;
884 
885  if (rte->rtekind == RTE_RELATION)
886  {
887  if (rte->tablesample)
888  rte->tablesample = (TableSampleClause *)
890  (Node *) rte->tablesample,
892  }
893  else if (rte->rtekind == RTE_SUBQUERY)
894  {
895  /*
896  * We don't want to do all preprocessing yet on the subquery's
897  * expressions, since that will happen when we plan it. But if it
898  * contains any join aliases of our level, those have to get
899  * expanded now, because planning of the subquery won't do it.
900  * That's only possible if the subquery is LATERAL.
901  */
902  if (rte->lateral && root->hasJoinRTEs)
903  rte->subquery = (Query *)
905  (Node *) rte->subquery);
906  }
907  else if (rte->rtekind == RTE_FUNCTION)
908  {
909  /* Preprocess the function expression(s) fully */
911  rte->functions = (List *)
912  preprocess_expression(root, (Node *) rte->functions, kind);
913  }
914  else if (rte->rtekind == RTE_TABLEFUNC)
915  {
916  /* Preprocess the function expression(s) fully */
918  rte->tablefunc = (TableFunc *)
919  preprocess_expression(root, (Node *) rte->tablefunc, kind);
920  }
921  else if (rte->rtekind == RTE_VALUES)
922  {
923  /* Preprocess the values lists fully */
925  rte->values_lists = (List *)
926  preprocess_expression(root, (Node *) rte->values_lists, kind);
927  }
928 
929  /*
930  * Process each element of the securityQuals list as if it were a
931  * separate qual expression (as indeed it is). We need to do it this
932  * way to get proper canonicalization of AND/OR structure. Note that
933  * this converts each element into an implicit-AND sublist.
934  */
935  foreach(lcsq, rte->securityQuals)
936  {
937  lfirst(lcsq) = preprocess_expression(root,
938  (Node *) lfirst(lcsq),
939  EXPRKIND_QUAL);
940  }
941  }
942 
943  /*
944  * Now that we are done preprocessing expressions, and in particular done
945  * flattening join alias variables, get rid of the joinaliasvars lists.
946  * They no longer match what expressions in the rest of the tree look
947  * like, because we have not preprocessed expressions in those lists (and
948  * do not want to; for example, expanding a SubLink there would result in
949  * a useless unreferenced subplan). Leaving them in place simply creates
950  * a hazard for later scans of the tree. We could try to prevent that by
951  * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
952  * but that doesn't sound very reliable.
953  */
954  if (root->hasJoinRTEs)
955  {
956  foreach(l, parse->rtable)
957  {
959 
960  rte->joinaliasvars = NIL;
961  }
962  }
963 
964  /*
965  * In some cases we may want to transfer a HAVING clause into WHERE. We
966  * cannot do so if the HAVING clause contains aggregates (obviously) or
967  * volatile functions (since a HAVING clause is supposed to be executed
968  * only once per group). We also can't do this if there are any nonempty
969  * grouping sets; moving such a clause into WHERE would potentially change
970  * the results, if any referenced column isn't present in all the grouping
971  * sets. (If there are only empty grouping sets, then the HAVING clause
972  * must be degenerate as discussed below.)
973  *
974  * Also, it may be that the clause is so expensive to execute that we're
975  * better off doing it only once per group, despite the loss of
976  * selectivity. This is hard to estimate short of doing the entire
977  * planning process twice, so we use a heuristic: clauses containing
978  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
979  * clause into WHERE, in hopes of eliminating tuples before aggregation
980  * instead of after.
981  *
982  * If the query has explicit grouping then we can simply move such a
983  * clause into WHERE; any group that fails the clause will not be in the
984  * output because none of its tuples will reach the grouping or
985  * aggregation stage. Otherwise we must have a degenerate (variable-free)
986  * HAVING clause, which we put in WHERE so that query_planner() can use it
987  * in a gating Result node, but also keep in HAVING to ensure that we
988  * don't emit a bogus aggregated row. (This could be done better, but it
989  * seems not worth optimizing.)
990  *
991  * Note that both havingQual and parse->jointree->quals are in
992  * implicitly-ANDed-list form at this point, even though they are declared
993  * as Node *.
994  */
995  newHaving = NIL;
996  foreach(l, (List *) parse->havingQual)
997  {
998  Node *havingclause = (Node *) lfirst(l);
999 
1000  if ((parse->groupClause && parse->groupingSets) ||
1001  contain_agg_clause(havingclause) ||
1002  contain_volatile_functions(havingclause) ||
1003  contain_subplans(havingclause))
1004  {
1005  /* keep it in HAVING */
1006  newHaving = lappend(newHaving, havingclause);
1007  }
1008  else if (parse->groupClause && !parse->groupingSets)
1009  {
1010  /* move it to WHERE */
1011  parse->jointree->quals = (Node *)
1012  lappend((List *) parse->jointree->quals, havingclause);
1013  }
1014  else
1015  {
1016  /* put a copy in WHERE, keep it in HAVING */
1017  parse->jointree->quals = (Node *)
1018  lappend((List *) parse->jointree->quals,
1019  copyObject(havingclause));
1020  newHaving = lappend(newHaving, havingclause);
1021  }
1022  }
1023  parse->havingQual = (Node *) newHaving;
1024 
1025  /* Remove any redundant GROUP BY columns */
1027 
1028  /*
1029  * If we have any outer joins, try to reduce them to plain inner joins.
1030  * This step is most easily done after we've done expression
1031  * preprocessing.
1032  */
1033  if (hasOuterJoins)
1034  reduce_outer_joins(root);
1035 
1036  /*
1037  * If we have any RTE_RESULT relations, see if they can be deleted from
1038  * the jointree. This step is most effectively done after we've done
1039  * expression preprocessing and outer join reduction.
1040  */
1041  if (hasResultRTEs)
1043 
1044  /*
1045  * Do the main planning.
1046  */
1047  grouping_planner(root, tuple_fraction);
1048 
1049  /*
1050  * Capture the set of outer-level param IDs we have access to, for use in
1051  * extParam/allParam calculations later.
1052  */
1054 
1055  /*
1056  * If any initPlans were created in this query level, adjust the surviving
1057  * Paths' costs and parallel-safety flags to account for them. The
1058  * initPlans won't actually get attached to the plan tree till
1059  * create_plan() runs, but we must include their effects now.
1060  */
1061  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1062  SS_charge_for_initplans(root, final_rel);
1063 
1064  /*
1065  * Make sure we've identified the cheapest Path for the final rel. (By
1066  * doing this here not in grouping_planner, we include initPlan costs in
1067  * the decision, though it's unlikely that will change anything.)
1068  */
1069  set_cheapest(final_rel);
1070 
1071  return root;
1072 }
1073 
1074 /*
1075  * preprocess_expression
1076  * Do subquery_planner's preprocessing work for an expression,
1077  * which can be a targetlist, a WHERE clause (including JOIN/ON
1078  * conditions), a HAVING clause, or a few other things.
1079  */
1080 static Node *
1081 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1082 {
1083  /*
1084  * Fall out quickly if expression is empty. This occurs often enough to
1085  * be worth checking. Note that null->null is the correct conversion for
1086  * implicit-AND result format, too.
1087  */
1088  if (expr == NULL)
1089  return NULL;
1090 
1091  /*
1092  * If the query has any join RTEs, replace join alias variables with
1093  * base-relation variables. We must do this first, since any expressions
1094  * we may extract from the joinaliasvars lists have not been preprocessed.
1095  * For example, if we did this after sublink processing, sublinks expanded
1096  * out from join aliases would not get processed. But we can skip this in
1097  * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1098  * they can't contain any Vars of the current query level.
1099  */
1100  if (root->hasJoinRTEs &&
1101  !(kind == EXPRKIND_RTFUNC ||
1102  kind == EXPRKIND_VALUES ||
1103  kind == EXPRKIND_TABLESAMPLE ||
1104  kind == EXPRKIND_TABLEFUNC))
1105  expr = flatten_join_alias_vars(root->parse, expr);
1106 
1107  /*
1108  * Simplify constant expressions. For function RTEs, this was already
1109  * done by preprocess_function_rtes. (But note we must do it again for
1110  * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1111  * un-simplified subexpressions inserted by flattening of subqueries or
1112  * join alias variables.)
1113  *
1114  * Note: an essential effect of this is to convert named-argument function
1115  * calls to positional notation and insert the current actual values of
1116  * any default arguments for functions. To ensure that happens, we *must*
1117  * process all expressions here. Previous PG versions sometimes skipped
1118  * const-simplification if it didn't seem worth the trouble, but we can't
1119  * do that anymore.
1120  *
1121  * Note: this also flattens nested AND and OR expressions into N-argument
1122  * form. All processing of a qual expression after this point must be
1123  * careful to maintain AND/OR flatness --- that is, do not generate a tree
1124  * with AND directly under AND, nor OR directly under OR.
1125  */
1126  if (kind != EXPRKIND_RTFUNC)
1127  expr = eval_const_expressions(root, expr);
1128 
1129  /*
1130  * If it's a qual or havingQual, canonicalize it.
1131  */
1132  if (kind == EXPRKIND_QUAL)
1133  {
1134  expr = (Node *) canonicalize_qual((Expr *) expr, false);
1135 
1136 #ifdef OPTIMIZER_DEBUG
1137  printf("After canonicalize_qual()\n");
1138  pprint(expr);
1139 #endif
1140  }
1141 
1142  /*
1143  * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1144  * hashfuncid of any that might execute more quickly by using hash lookups
1145  * instead of a linear search.
1146  */
1147  if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1148  {
1150  }
1151 
1152  /* Expand SubLinks to SubPlans */
1153  if (root->parse->hasSubLinks)
1154  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1155 
1156  /*
1157  * XXX do not insert anything here unless you have grokked the comments in
1158  * SS_replace_correlation_vars ...
1159  */
1160 
1161  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1162  if (root->query_level > 1)
1163  expr = SS_replace_correlation_vars(root, expr);
1164 
1165  /*
1166  * If it's a qual or havingQual, convert it to implicit-AND format. (We
1167  * don't want to do this before eval_const_expressions, since the latter
1168  * would be unable to simplify a top-level AND correctly. Also,
1169  * SS_process_sublinks expects explicit-AND format.)
1170  */
1171  if (kind == EXPRKIND_QUAL)
1172  expr = (Node *) make_ands_implicit((Expr *) expr);
1173 
1174  return expr;
1175 }
1176 
1177 /*
1178  * preprocess_qual_conditions
1179  * Recursively scan the query's jointree and do subquery_planner's
1180  * preprocessing work on each qual condition found therein.
1181  */
1182 static void
1184 {
1185  if (jtnode == NULL)
1186  return;
1187  if (IsA(jtnode, RangeTblRef))
1188  {
1189  /* nothing to do here */
1190  }
1191  else if (IsA(jtnode, FromExpr))
1192  {
1193  FromExpr *f = (FromExpr *) jtnode;
1194  ListCell *l;
1195 
1196  foreach(l, f->fromlist)
1198 
1200  }
1201  else if (IsA(jtnode, JoinExpr))
1202  {
1203  JoinExpr *j = (JoinExpr *) jtnode;
1204 
1205  preprocess_qual_conditions(root, j->larg);
1206  preprocess_qual_conditions(root, j->rarg);
1207 
1208  j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1209  }
1210  else
1211  elog(ERROR, "unrecognized node type: %d",
1212  (int) nodeTag(jtnode));
1213 }
1214 
1215 /*
1216  * preprocess_phv_expression
1217  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1218  *
1219  * If a LATERAL subquery references an output of another subquery, and that
1220  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1221  * join, then we'll push the PlaceHolderVar expression down into the subquery
1222  * and later pull it back up during find_lateral_references, which runs after
1223  * subquery_planner has preprocessed all the expressions that were in the
1224  * current query level to start with. So we need to preprocess it then.
1225  */
1226 Expr *
1228 {
1229  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1230 }
1231 
1232 /*--------------------
1233  * grouping_planner
1234  * Perform planning steps related to grouping, aggregation, etc.
1235  *
1236  * This function adds all required top-level processing to the scan/join
1237  * Path(s) produced by query_planner.
1238  *
1239  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1240  * tuple_fraction is interpreted as follows:
1241  * 0: expect all tuples to be retrieved (normal case)
1242  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1243  * from the plan to be retrieved
1244  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1245  * expected to be retrieved (ie, a LIMIT specification)
1246  *
1247  * Returns nothing; the useful output is in the Paths we attach to the
1248  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1249  * root->processed_tlist contains the final processed targetlist.
1250  *
1251  * Note that we have not done set_cheapest() on the final rel; it's convenient
1252  * to leave this to the caller.
1253  *--------------------
1254  */
1255 static void
1256 grouping_planner(PlannerInfo *root, double tuple_fraction)
1257 {
1258  Query *parse = root->parse;
1259  int64 offset_est = 0;
1260  int64 count_est = 0;
1261  double limit_tuples = -1.0;
1262  bool have_postponed_srfs = false;
1263  PathTarget *final_target;
1264  List *final_targets;
1265  List *final_targets_contain_srfs;
1266  bool final_target_parallel_safe;
1267  RelOptInfo *current_rel;
1268  RelOptInfo *final_rel;
1269  FinalPathExtraData extra;
1270  ListCell *lc;
1271 
1272  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1273  if (parse->limitCount || parse->limitOffset)
1274  {
1275  tuple_fraction = preprocess_limit(root, tuple_fraction,
1276  &offset_est, &count_est);
1277 
1278  /*
1279  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1280  * estimate the effects of using a bounded sort.
1281  */
1282  if (count_est > 0 && offset_est >= 0)
1283  limit_tuples = (double) count_est + (double) offset_est;
1284  }
1285 
1286  /* Make tuple_fraction accessible to lower-level routines */
1287  root->tuple_fraction = tuple_fraction;
1288 
1289  if (parse->setOperations)
1290  {
1291  /*
1292  * If there's a top-level ORDER BY, assume we have to fetch all the
1293  * tuples. This might be too simplistic given all the hackery below
1294  * to possibly avoid the sort; but the odds of accurate estimates here
1295  * are pretty low anyway. XXX try to get rid of this in favor of
1296  * letting plan_set_operations generate both fast-start and
1297  * cheapest-total paths.
1298  */
1299  if (parse->sortClause)
1300  root->tuple_fraction = 0.0;
1301 
1302  /*
1303  * Construct Paths for set operations. The results will not need any
1304  * work except perhaps a top-level sort and/or LIMIT. Note that any
1305  * special work for recursive unions is the responsibility of
1306  * plan_set_operations.
1307  */
1308  current_rel = plan_set_operations(root);
1309 
1310  /*
1311  * We should not need to call preprocess_targetlist, since we must be
1312  * in a SELECT query node. Instead, use the processed_tlist returned
1313  * by plan_set_operations (since this tells whether it returned any
1314  * resjunk columns!), and transfer any sort key information from the
1315  * original tlist.
1316  */
1317  Assert(parse->commandType == CMD_SELECT);
1318 
1319  /* for safety, copy processed_tlist instead of modifying in-place */
1320  root->processed_tlist =
1322  parse->targetList);
1323 
1324  /* Also extract the PathTarget form of the setop result tlist */
1325  final_target = current_rel->cheapest_total_path->pathtarget;
1326 
1327  /* And check whether it's parallel safe */
1328  final_target_parallel_safe =
1329  is_parallel_safe(root, (Node *) final_target->exprs);
1330 
1331  /* The setop result tlist couldn't contain any SRFs */
1332  Assert(!parse->hasTargetSRFs);
1333  final_targets = final_targets_contain_srfs = NIL;
1334 
1335  /*
1336  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1337  * checked already, but let's make sure).
1338  */
1339  if (parse->rowMarks)
1340  ereport(ERROR,
1341  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1342  /*------
1343  translator: %s is a SQL row locking clause such as FOR UPDATE */
1344  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1346  parse->rowMarks)->strength))));
1347 
1348  /*
1349  * Calculate pathkeys that represent result ordering requirements
1350  */
1351  Assert(parse->distinctClause == NIL);
1353  parse->sortClause,
1354  root->processed_tlist);
1355  }
1356  else
1357  {
1358  /* No set operations, do regular planning */
1359  PathTarget *sort_input_target;
1360  List *sort_input_targets;
1361  List *sort_input_targets_contain_srfs;
1362  bool sort_input_target_parallel_safe;
1363  PathTarget *grouping_target;
1364  List *grouping_targets;
1365  List *grouping_targets_contain_srfs;
1366  bool grouping_target_parallel_safe;
1367  PathTarget *scanjoin_target;
1368  List *scanjoin_targets;
1369  List *scanjoin_targets_contain_srfs;
1370  bool scanjoin_target_parallel_safe;
1371  bool scanjoin_target_same_exprs;
1372  bool have_grouping;
1373  WindowFuncLists *wflists = NULL;
1374  List *activeWindows = NIL;
1375  grouping_sets_data *gset_data = NULL;
1376  standard_qp_extra qp_extra;
1377 
1378  /* A recursive query should always have setOperations */
1379  Assert(!root->hasRecursion);
1380 
1381  /* Preprocess grouping sets and GROUP BY clause, if any */
1382  if (parse->groupingSets)
1383  {
1384  gset_data = preprocess_grouping_sets(root);
1385  }
1386  else
1387  {
1388  /* Preprocess regular GROUP BY clause, if any */
1389  if (parse->groupClause)
1390  parse->groupClause = preprocess_groupclause(root, NIL);
1391  }
1392 
1393  /*
1394  * Preprocess targetlist. Note that much of the remaining planning
1395  * work will be done with the PathTarget representation of tlists, but
1396  * we must also maintain the full representation of the final tlist so
1397  * that we can transfer its decoration (resnames etc) to the topmost
1398  * tlist of the finished Plan. This is kept in processed_tlist.
1399  */
1400  preprocess_targetlist(root);
1401 
1402  /*
1403  * Mark all the aggregates with resolved aggtranstypes, and detect
1404  * aggregates that are duplicates or can share transition state. We
1405  * must do this before slicing and dicing the tlist into various
1406  * pathtargets, else some copies of the Aggref nodes might escape
1407  * being marked.
1408  */
1409  if (parse->hasAggs)
1410  {
1411  preprocess_aggrefs(root, (Node *) root->processed_tlist);
1412  preprocess_aggrefs(root, (Node *) parse->havingQual);
1413  }
1414 
1415  /*
1416  * Locate any window functions in the tlist. (We don't need to look
1417  * anywhere else, since expressions used in ORDER BY will be in there
1418  * too.) Note that they could all have been eliminated by constant
1419  * folding, in which case we don't need to do any more work.
1420  */
1421  if (parse->hasWindowFuncs)
1422  {
1423  wflists = find_window_functions((Node *) root->processed_tlist,
1424  list_length(parse->windowClause));
1425  if (wflists->numWindowFuncs > 0)
1426  activeWindows = select_active_windows(root, wflists);
1427  else
1428  parse->hasWindowFuncs = false;
1429  }
1430 
1431  /*
1432  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1433  * adding logic between here and the query_planner() call. Anything
1434  * that is needed in MIN/MAX-optimizable cases will have to be
1435  * duplicated in planagg.c.
1436  */
1437  if (parse->hasAggs)
1439 
1440  /*
1441  * Figure out whether there's a hard limit on the number of rows that
1442  * query_planner's result subplan needs to return. Even if we know a
1443  * hard limit overall, it doesn't apply if the query has any
1444  * grouping/aggregation operations, or SRFs in the tlist.
1445  */
1446  if (parse->groupClause ||
1447  parse->groupingSets ||
1448  parse->distinctClause ||
1449  parse->hasAggs ||
1450  parse->hasWindowFuncs ||
1451  parse->hasTargetSRFs ||
1452  root->hasHavingQual)
1453  root->limit_tuples = -1.0;
1454  else
1455  root->limit_tuples = limit_tuples;
1456 
1457  /* Set up data needed by standard_qp_callback */
1458  qp_extra.activeWindows = activeWindows;
1459  qp_extra.groupClause = (gset_data
1460  ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
1461  : parse->groupClause);
1462 
1463  /*
1464  * Generate the best unsorted and presorted paths for the scan/join
1465  * portion of this Query, ie the processing represented by the
1466  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1467  * We also generate (in standard_qp_callback) pathkey representations
1468  * of the query's sort clause, distinct clause, etc.
1469  */
1470  current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1471 
1472  /*
1473  * Convert the query's result tlist into PathTarget format.
1474  *
1475  * Note: this cannot be done before query_planner() has performed
1476  * appendrel expansion, because that might add resjunk entries to
1477  * root->processed_tlist. Waiting till afterwards is also helpful
1478  * because the target width estimates can use per-Var width numbers
1479  * that were obtained within query_planner().
1480  */
1481  final_target = create_pathtarget(root, root->processed_tlist);
1482  final_target_parallel_safe =
1483  is_parallel_safe(root, (Node *) final_target->exprs);
1484 
1485  /*
1486  * If ORDER BY was given, consider whether we should use a post-sort
1487  * projection, and compute the adjusted target for preceding steps if
1488  * so.
1489  */
1490  if (parse->sortClause)
1491  {
1492  sort_input_target = make_sort_input_target(root,
1493  final_target,
1494  &have_postponed_srfs);
1495  sort_input_target_parallel_safe =
1496  is_parallel_safe(root, (Node *) sort_input_target->exprs);
1497  }
1498  else
1499  {
1500  sort_input_target = final_target;
1501  sort_input_target_parallel_safe = final_target_parallel_safe;
1502  }
1503 
1504  /*
1505  * If we have window functions to deal with, the output from any
1506  * grouping step needs to be what the window functions want;
1507  * otherwise, it should be sort_input_target.
1508  */
1509  if (activeWindows)
1510  {
1511  grouping_target = make_window_input_target(root,
1512  final_target,
1513  activeWindows);
1514  grouping_target_parallel_safe =
1515  is_parallel_safe(root, (Node *) grouping_target->exprs);
1516  }
1517  else
1518  {
1519  grouping_target = sort_input_target;
1520  grouping_target_parallel_safe = sort_input_target_parallel_safe;
1521  }
1522 
1523  /*
1524  * If we have grouping or aggregation to do, the topmost scan/join
1525  * plan node must emit what the grouping step wants; otherwise, it
1526  * should emit grouping_target.
1527  */
1528  have_grouping = (parse->groupClause || parse->groupingSets ||
1529  parse->hasAggs || root->hasHavingQual);
1530  if (have_grouping)
1531  {
1532  scanjoin_target = make_group_input_target(root, final_target);
1533  scanjoin_target_parallel_safe =
1534  is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1535  }
1536  else
1537  {
1538  scanjoin_target = grouping_target;
1539  scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1540  }
1541 
1542  /*
1543  * If there are any SRFs in the targetlist, we must separate each of
1544  * these PathTargets into SRF-computing and SRF-free targets. Replace
1545  * each of the named targets with a SRF-free version, and remember the
1546  * list of additional projection steps we need to add afterwards.
1547  */
1548  if (parse->hasTargetSRFs)
1549  {
1550  /* final_target doesn't recompute any SRFs in sort_input_target */
1551  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1552  &final_targets,
1553  &final_targets_contain_srfs);
1554  final_target = linitial_node(PathTarget, final_targets);
1555  Assert(!linitial_int(final_targets_contain_srfs));
1556  /* likewise for sort_input_target vs. grouping_target */
1557  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1558  &sort_input_targets,
1559  &sort_input_targets_contain_srfs);
1560  sort_input_target = linitial_node(PathTarget, sort_input_targets);
1561  Assert(!linitial_int(sort_input_targets_contain_srfs));
1562  /* likewise for grouping_target vs. scanjoin_target */
1563  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1564  &grouping_targets,
1565  &grouping_targets_contain_srfs);
1566  grouping_target = linitial_node(PathTarget, grouping_targets);
1567  Assert(!linitial_int(grouping_targets_contain_srfs));
1568  /* scanjoin_target will not have any SRFs precomputed for it */
1569  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1570  &scanjoin_targets,
1571  &scanjoin_targets_contain_srfs);
1572  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1573  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1574  }
1575  else
1576  {
1577  /* initialize lists; for most of these, dummy values are OK */
1578  final_targets = final_targets_contain_srfs = NIL;
1579  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1580  grouping_targets = grouping_targets_contain_srfs = NIL;
1581  scanjoin_targets = list_make1(scanjoin_target);
1582  scanjoin_targets_contain_srfs = NIL;
1583  }
1584 
1585  /* Apply scan/join target. */
1586  scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1587  && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1588  apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1589  scanjoin_targets_contain_srfs,
1590  scanjoin_target_parallel_safe,
1591  scanjoin_target_same_exprs);
1592 
1593  /*
1594  * Save the various upper-rel PathTargets we just computed into
1595  * root->upper_targets[]. The core code doesn't use this, but it
1596  * provides a convenient place for extensions to get at the info. For
1597  * consistency, we save all the intermediate targets, even though some
1598  * of the corresponding upperrels might not be needed for this query.
1599  */
1600  root->upper_targets[UPPERREL_FINAL] = final_target;
1601  root->upper_targets[UPPERREL_ORDERED] = final_target;
1602  root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1603  root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1604  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1605  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1606 
1607  /*
1608  * If we have grouping and/or aggregation, consider ways to implement
1609  * that. We build a new upperrel representing the output of this
1610  * phase.
1611  */
1612  if (have_grouping)
1613  {
1614  current_rel = create_grouping_paths(root,
1615  current_rel,
1616  grouping_target,
1617  grouping_target_parallel_safe,
1618  gset_data);
1619  /* Fix things up if grouping_target contains SRFs */
1620  if (parse->hasTargetSRFs)
1621  adjust_paths_for_srfs(root, current_rel,
1622  grouping_targets,
1623  grouping_targets_contain_srfs);
1624  }
1625 
1626  /*
1627  * If we have window functions, consider ways to implement those. We
1628  * build a new upperrel representing the output of this phase.
1629  */
1630  if (activeWindows)
1631  {
1632  current_rel = create_window_paths(root,
1633  current_rel,
1634  grouping_target,
1635  sort_input_target,
1636  sort_input_target_parallel_safe,
1637  wflists,
1638  activeWindows);
1639  /* Fix things up if sort_input_target contains SRFs */
1640  if (parse->hasTargetSRFs)
1641  adjust_paths_for_srfs(root, current_rel,
1642  sort_input_targets,
1643  sort_input_targets_contain_srfs);
1644  }
1645 
1646  /*
1647  * If there is a DISTINCT clause, consider ways to implement that. We
1648  * build a new upperrel representing the output of this phase.
1649  */
1650  if (parse->distinctClause)
1651  {
1652  current_rel = create_distinct_paths(root,
1653  current_rel);
1654  }
1655  } /* end of if (setOperations) */
1656 
1657  /*
1658  * If ORDER BY was given, consider ways to implement that, and generate a
1659  * new upperrel containing only paths that emit the correct ordering and
1660  * project the correct final_target. We can apply the original
1661  * limit_tuples limit in sort costing here, but only if there are no
1662  * postponed SRFs.
1663  */
1664  if (parse->sortClause)
1665  {
1666  current_rel = create_ordered_paths(root,
1667  current_rel,
1668  final_target,
1669  final_target_parallel_safe,
1670  have_postponed_srfs ? -1.0 :
1671  limit_tuples);
1672  /* Fix things up if final_target contains SRFs */
1673  if (parse->hasTargetSRFs)
1674  adjust_paths_for_srfs(root, current_rel,
1675  final_targets,
1676  final_targets_contain_srfs);
1677  }
1678 
1679  /*
1680  * Now we are prepared to build the final-output upperrel.
1681  */
1682  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1683 
1684  /*
1685  * If the input rel is marked consider_parallel and there's nothing that's
1686  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1687  * consider_parallel as well. Note that if the query has rowMarks or is
1688  * not a SELECT, consider_parallel will be false for every relation in the
1689  * query.
1690  */
1691  if (current_rel->consider_parallel &&
1692  is_parallel_safe(root, parse->limitOffset) &&
1693  is_parallel_safe(root, parse->limitCount))
1694  final_rel->consider_parallel = true;
1695 
1696  /*
1697  * If the current_rel belongs to a single FDW, so does the final_rel.
1698  */
1699  final_rel->serverid = current_rel->serverid;
1700  final_rel->userid = current_rel->userid;
1701  final_rel->useridiscurrent = current_rel->useridiscurrent;
1702  final_rel->fdwroutine = current_rel->fdwroutine;
1703 
1704  /*
1705  * Generate paths for the final_rel. Insert all surviving paths, with
1706  * LockRows, Limit, and/or ModifyTable steps added if needed.
1707  */
1708  foreach(lc, current_rel->pathlist)
1709  {
1710  Path *path = (Path *) lfirst(lc);
1711 
1712  /*
1713  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1714  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1715  * here. If there are only non-locking rowmarks, they should be
1716  * handled by the ModifyTable node instead. However, root->rowMarks
1717  * is what goes into the LockRows node.)
1718  */
1719  if (parse->rowMarks)
1720  {
1721  path = (Path *) create_lockrows_path(root, final_rel, path,
1722  root->rowMarks,
1724  }
1725 
1726  /*
1727  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1728  */
1729  if (limit_needed(parse))
1730  {
1731  path = (Path *) create_limit_path(root, final_rel, path,
1732  parse->limitOffset,
1733  parse->limitCount,
1734  parse->limitOption,
1735  offset_est, count_est);
1736  }
1737 
1738  /*
1739  * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1740  */
1741  if (parse->commandType != CMD_SELECT)
1742  {
1743  Index rootRelation;
1744  List *resultRelations = NIL;
1745  List *updateColnosLists = NIL;
1746  List *withCheckOptionLists = NIL;
1747  List *returningLists = NIL;
1748  List *mergeActionLists = NIL;
1749  List *rowMarks;
1750 
1752  {
1753  /* Inherited UPDATE/DELETE/MERGE */
1754  RelOptInfo *top_result_rel = find_base_rel(root,
1755  parse->resultRelation);
1756  int resultRelation = -1;
1757 
1758  /* Add only leaf children to ModifyTable. */
1759  while ((resultRelation = bms_next_member(root->leaf_result_relids,
1760  resultRelation)) >= 0)
1761  {
1762  RelOptInfo *this_result_rel = find_base_rel(root,
1763  resultRelation);
1764 
1765  /*
1766  * Also exclude any leaf rels that have turned dummy since
1767  * being added to the list, for example, by being excluded
1768  * by constraint exclusion.
1769  */
1770  if (IS_DUMMY_REL(this_result_rel))
1771  continue;
1772 
1773  /* Build per-target-rel lists needed by ModifyTable */
1774  resultRelations = lappend_int(resultRelations,
1775  resultRelation);
1776  if (parse->commandType == CMD_UPDATE)
1777  {
1778  List *update_colnos = root->update_colnos;
1779 
1780  if (this_result_rel != top_result_rel)
1781  update_colnos =
1783  update_colnos,
1784  this_result_rel->relid,
1785  top_result_rel->relid);
1786  updateColnosLists = lappend(updateColnosLists,
1787  update_colnos);
1788  }
1789  if (parse->withCheckOptions)
1790  {
1791  List *withCheckOptions = parse->withCheckOptions;
1792 
1793  if (this_result_rel != top_result_rel)
1794  withCheckOptions = (List *)
1796  (Node *) withCheckOptions,
1797  this_result_rel,
1798  top_result_rel);
1799  withCheckOptionLists = lappend(withCheckOptionLists,
1800  withCheckOptions);
1801  }
1802  if (parse->returningList)
1803  {
1804  List *returningList = parse->returningList;
1805 
1806  if (this_result_rel != top_result_rel)
1807  returningList = (List *)
1809  (Node *) returningList,
1810  this_result_rel,
1811  top_result_rel);
1812  returningLists = lappend(returningLists,
1813  returningList);
1814  }
1815  if (parse->mergeActionList)
1816  {
1817  ListCell *l;
1818  List *mergeActionList = NIL;
1819 
1820  /*
1821  * Copy MergeActions and translate stuff that
1822  * references attribute numbers.
1823  */
1824  foreach(l, parse->mergeActionList)
1825  {
1826  MergeAction *action = lfirst(l),
1827  *leaf_action = copyObject(action);
1828 
1829  leaf_action->qual =
1831  (Node *) action->qual,
1832  this_result_rel,
1833  top_result_rel);
1834  leaf_action->targetList = (List *)
1836  (Node *) action->targetList,
1837  this_result_rel,
1838  top_result_rel);
1839  if (leaf_action->commandType == CMD_UPDATE)
1840  leaf_action->updateColnos =
1842  action->updateColnos,
1843  this_result_rel->relid,
1844  top_result_rel->relid);
1845  mergeActionList = lappend(mergeActionList,
1846  leaf_action);
1847  }
1848 
1849  mergeActionLists = lappend(mergeActionLists,
1850  mergeActionList);
1851  }
1852  }
1853 
1854  if (resultRelations == NIL)
1855  {
1856  /*
1857  * We managed to exclude every child rel, so generate a
1858  * dummy one-relation plan using info for the top target
1859  * rel (even though that may not be a leaf target).
1860  * Although it's clear that no data will be updated or
1861  * deleted, we still need to have a ModifyTable node so
1862  * that any statement triggers will be executed. (This
1863  * could be cleaner if we fixed nodeModifyTable.c to allow
1864  * zero target relations, but that probably wouldn't be a
1865  * net win.)
1866  */
1867  resultRelations = list_make1_int(parse->resultRelation);
1868  if (parse->commandType == CMD_UPDATE)
1869  updateColnosLists = list_make1(root->update_colnos);
1870  if (parse->withCheckOptions)
1871  withCheckOptionLists = list_make1(parse->withCheckOptions);
1872  if (parse->returningList)
1873  returningLists = list_make1(parse->returningList);
1874  if (parse->mergeActionList)
1875  mergeActionLists = list_make1(parse->mergeActionList);
1876  }
1877  }
1878  else
1879  {
1880  /* Single-relation INSERT/UPDATE/DELETE/MERGE. */
1881  resultRelations = list_make1_int(parse->resultRelation);
1882  if (parse->commandType == CMD_UPDATE)
1883  updateColnosLists = list_make1(root->update_colnos);
1884  if (parse->withCheckOptions)
1885  withCheckOptionLists = list_make1(parse->withCheckOptions);
1886  if (parse->returningList)
1887  returningLists = list_make1(parse->returningList);
1888  if (parse->mergeActionList)
1889  mergeActionLists = list_make1(parse->mergeActionList);
1890  }
1891 
1892  /*
1893  * If target is a partition root table, we need to mark the
1894  * ModifyTable node appropriately for that.
1895  */
1896  if (rt_fetch(parse->resultRelation, parse->rtable)->relkind ==
1897  RELKIND_PARTITIONED_TABLE)
1898  rootRelation = parse->resultRelation;
1899  else
1900  rootRelation = 0;
1901 
1902  /*
1903  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
1904  * will have dealt with fetching non-locked marked rows, else we
1905  * need to have ModifyTable do that.
1906  */
1907  if (parse->rowMarks)
1908  rowMarks = NIL;
1909  else
1910  rowMarks = root->rowMarks;
1911 
1912  path = (Path *)
1913  create_modifytable_path(root, final_rel,
1914  path,
1915  parse->commandType,
1916  parse->canSetTag,
1917  parse->resultRelation,
1918  rootRelation,
1919  root->partColsUpdated,
1920  resultRelations,
1921  updateColnosLists,
1922  withCheckOptionLists,
1923  returningLists,
1924  rowMarks,
1925  parse->onConflict,
1926  mergeActionLists,
1928  }
1929 
1930  /* And shove it into final_rel */
1931  add_path(final_rel, path);
1932  }
1933 
1934  /*
1935  * Generate partial paths for final_rel, too, if outer query levels might
1936  * be able to make use of them.
1937  */
1938  if (final_rel->consider_parallel && root->query_level > 1 &&
1939  !limit_needed(parse))
1940  {
1941  Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
1942  foreach(lc, current_rel->partial_pathlist)
1943  {
1944  Path *partial_path = (Path *) lfirst(lc);
1945 
1946  add_partial_path(final_rel, partial_path);
1947  }
1948  }
1949 
1950  extra.limit_needed = limit_needed(parse);
1951  extra.limit_tuples = limit_tuples;
1952  extra.count_est = count_est;
1953  extra.offset_est = offset_est;
1954 
1955  /*
1956  * If there is an FDW that's responsible for all baserels of the query,
1957  * let it consider adding ForeignPaths.
1958  */
1959  if (final_rel->fdwroutine &&
1960  final_rel->fdwroutine->GetForeignUpperPaths)
1961  final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
1962  current_rel, final_rel,
1963  &extra);
1964 
1965  /* Let extensions possibly add some more paths */
1967  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
1968  current_rel, final_rel, &extra);
1969 
1970  /* Note: currently, we leave it to callers to do set_cheapest() */
1971 }
1972 
1973 /*
1974  * Do preprocessing for groupingSets clause and related data. This handles the
1975  * preliminary steps of expanding the grouping sets, organizing them into lists
1976  * of rollups, and preparing annotations which will later be filled in with
1977  * size estimates.
1978  */
1979 static grouping_sets_data *
1981 {
1982  Query *parse = root->parse;
1983  List *sets;
1984  int maxref = 0;
1985  ListCell *lc_set;
1987 
1988  parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
1989 
1990  gd->any_hashable = false;
1991  gd->unhashable_refs = NULL;
1992  gd->unsortable_refs = NULL;
1993  gd->unsortable_sets = NIL;
1994 
1995  if (parse->groupClause)
1996  {
1997  ListCell *lc;
1998 
1999  foreach(lc, parse->groupClause)
2000  {
2002  Index ref = gc->tleSortGroupRef;
2003 
2004  if (ref > maxref)
2005  maxref = ref;
2006 
2007  if (!gc->hashable)
2009 
2010  if (!OidIsValid(gc->sortop))
2012  }
2013  }
2014 
2015  /* Allocate workspace array for remapping */
2016  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2017 
2018  /*
2019  * If we have any unsortable sets, we must extract them before trying to
2020  * prepare rollups. Unsortable sets don't go through
2021  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2022  * here.
2023  */
2024  if (!bms_is_empty(gd->unsortable_refs))
2025  {
2026  List *sortable_sets = NIL;
2027  ListCell *lc;
2028 
2029  foreach(lc, parse->groupingSets)
2030  {
2031  List *gset = (List *) lfirst(lc);
2032 
2033  if (bms_overlap_list(gd->unsortable_refs, gset))
2034  {
2036 
2037  gs->set = gset;
2038  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2039 
2040  /*
2041  * We must enforce here that an unsortable set is hashable;
2042  * later code assumes this. Parse analysis only checks that
2043  * every individual column is either hashable or sortable.
2044  *
2045  * Note that passing this test doesn't guarantee we can
2046  * generate a plan; there might be other showstoppers.
2047  */
2048  if (bms_overlap_list(gd->unhashable_refs, gset))
2049  ereport(ERROR,
2050  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2051  errmsg("could not implement GROUP BY"),
2052  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2053  }
2054  else
2055  sortable_sets = lappend(sortable_sets, gset);
2056  }
2057 
2058  if (sortable_sets)
2059  sets = extract_rollup_sets(sortable_sets);
2060  else
2061  sets = NIL;
2062  }
2063  else
2064  sets = extract_rollup_sets(parse->groupingSets);
2065 
2066  foreach(lc_set, sets)
2067  {
2068  List *current_sets = (List *) lfirst(lc_set);
2069  RollupData *rollup = makeNode(RollupData);
2070  GroupingSetData *gs;
2071 
2072  /*
2073  * Reorder the current list of grouping sets into correct prefix
2074  * order. If only one aggregation pass is needed, try to make the
2075  * list match the ORDER BY clause; if more than one pass is needed, we
2076  * don't bother with that.
2077  *
2078  * Note that this reorders the sets from smallest-member-first to
2079  * largest-member-first, and applies the GroupingSetData annotations,
2080  * though the data will be filled in later.
2081  */
2082  current_sets = reorder_grouping_sets(current_sets,
2083  (list_length(sets) == 1
2084  ? parse->sortClause
2085  : NIL));
2086 
2087  /*
2088  * Get the initial (and therefore largest) grouping set.
2089  */
2090  gs = linitial_node(GroupingSetData, current_sets);
2091 
2092  /*
2093  * Order the groupClause appropriately. If the first grouping set is
2094  * empty, then the groupClause must also be empty; otherwise we have
2095  * to force the groupClause to match that grouping set's order.
2096  *
2097  * (The first grouping set can be empty even though parse->groupClause
2098  * is not empty only if all non-empty grouping sets are unsortable.
2099  * The groupClauses for hashed grouping sets are built later on.)
2100  */
2101  if (gs->set)
2102  rollup->groupClause = preprocess_groupclause(root, gs->set);
2103  else
2104  rollup->groupClause = NIL;
2105 
2106  /*
2107  * Is it hashable? We pretend empty sets are hashable even though we
2108  * actually force them not to be hashed later. But don't bother if
2109  * there's nothing but empty sets (since in that case we can't hash
2110  * anything).
2111  */
2112  if (gs->set &&
2114  {
2115  rollup->hashable = true;
2116  gd->any_hashable = true;
2117  }
2118 
2119  /*
2120  * Now that we've pinned down an order for the groupClause for this
2121  * list of grouping sets, we need to remap the entries in the grouping
2122  * sets from sortgrouprefs to plain indices (0-based) into the
2123  * groupClause for this collection of grouping sets. We keep the
2124  * original form for later use, though.
2125  */
2126  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2127  current_sets,
2128  gd->tleref_to_colnum_map);
2129  rollup->gsets_data = current_sets;
2130 
2131  gd->rollups = lappend(gd->rollups, rollup);
2132  }
2133 
2134  if (gd->unsortable_sets)
2135  {
2136  /*
2137  * We have not yet pinned down a groupclause for this, but we will
2138  * need index-based lists for estimation purposes. Construct
2139  * hash_sets_idx based on the entire original groupclause for now.
2140  */
2141  gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2142  gd->unsortable_sets,
2143  gd->tleref_to_colnum_map);
2144  gd->any_hashable = true;
2145  }
2146 
2147  return gd;
2148 }
2149 
2150 /*
2151  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2152  * (without annotation) mapped to indexes into the given groupclause.
2153  */
2154 static List *
2156  List *gsets,
2157  int *tleref_to_colnum_map)
2158 {
2159  int ref = 0;
2160  List *result = NIL;
2161  ListCell *lc;
2162 
2163  foreach(lc, groupClause)
2164  {
2166 
2167  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2168  }
2169 
2170  foreach(lc, gsets)
2171  {
2172  List *set = NIL;
2173  ListCell *lc2;
2175 
2176  foreach(lc2, gs->set)
2177  {
2178  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2179  }
2180 
2181  result = lappend(result, set);
2182  }
2183 
2184  return result;
2185 }
2186 
2187 
2188 /*
2189  * preprocess_rowmarks - set up PlanRowMarks if needed
2190  */
2191 static void
2193 {
2194  Query *parse = root->parse;
2195  Bitmapset *rels;
2196  List *prowmarks;
2197  ListCell *l;
2198  int i;
2199 
2200  if (parse->rowMarks)
2201  {
2202  /*
2203  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2204  * grouping, since grouping renders a reference to individual tuple
2205  * CTIDs invalid. This is also checked at parse time, but that's
2206  * insufficient because of rule substitution, query pullup, etc.
2207  */
2209  parse->rowMarks)->strength);
2210  }
2211  else
2212  {
2213  /*
2214  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2215  * UPDATE/SHARE.
2216  */
2217  if (parse->commandType != CMD_UPDATE &&
2218  parse->commandType != CMD_DELETE)
2219  return;
2220  }
2221 
2222  /*
2223  * We need to have rowmarks for all base relations except the target. We
2224  * make a bitmapset of all base rels and then remove the items we don't
2225  * need or have FOR [KEY] UPDATE/SHARE marks for.
2226  */
2227  rels = get_relids_in_jointree((Node *) parse->jointree, false);
2228  if (parse->resultRelation)
2229  rels = bms_del_member(rels, parse->resultRelation);
2230 
2231  /*
2232  * Convert RowMarkClauses to PlanRowMark representation.
2233  */
2234  prowmarks = NIL;
2235  foreach(l, parse->rowMarks)
2236  {
2238  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2239  PlanRowMark *newrc;
2240 
2241  /*
2242  * Currently, it is syntactically impossible to have FOR UPDATE et al
2243  * applied to an update/delete target rel. If that ever becomes
2244  * possible, we should drop the target from the PlanRowMark list.
2245  */
2246  Assert(rc->rti != parse->resultRelation);
2247 
2248  /*
2249  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2250  * can't support true locking. Subqueries that got flattened into the
2251  * main query should be ignored completely. Any that didn't will get
2252  * ROW_MARK_COPY items in the next loop.
2253  */
2254  if (rte->rtekind != RTE_RELATION)
2255  continue;
2256 
2257  rels = bms_del_member(rels, rc->rti);
2258 
2259  newrc = makeNode(PlanRowMark);
2260  newrc->rti = newrc->prti = rc->rti;
2261  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2262  newrc->markType = select_rowmark_type(rte, rc->strength);
2263  newrc->allMarkTypes = (1 << newrc->markType);
2264  newrc->strength = rc->strength;
2265  newrc->waitPolicy = rc->waitPolicy;
2266  newrc->isParent = false;
2267 
2268  prowmarks = lappend(prowmarks, newrc);
2269  }
2270 
2271  /*
2272  * Now, add rowmarks for any non-target, non-locked base relations.
2273  */
2274  i = 0;
2275  foreach(l, parse->rtable)
2276  {
2278  PlanRowMark *newrc;
2279 
2280  i++;
2281  if (!bms_is_member(i, rels))
2282  continue;
2283 
2284  newrc = makeNode(PlanRowMark);
2285  newrc->rti = newrc->prti = i;
2286  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2287  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2288  newrc->allMarkTypes = (1 << newrc->markType);
2289  newrc->strength = LCS_NONE;
2290  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2291  newrc->isParent = false;
2292 
2293  prowmarks = lappend(prowmarks, newrc);
2294  }
2295 
2296  root->rowMarks = prowmarks;
2297 }
2298 
2299 /*
2300  * Select RowMarkType to use for a given table
2301  */
2304 {
2305  if (rte->rtekind != RTE_RELATION)
2306  {
2307  /* If it's not a table at all, use ROW_MARK_COPY */
2308  return ROW_MARK_COPY;
2309  }
2310  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2311  {
2312  /* Let the FDW select the rowmark type, if it wants to */
2313  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2314 
2315  if (fdwroutine->GetForeignRowMarkType != NULL)
2316  return fdwroutine->GetForeignRowMarkType(rte, strength);
2317  /* Otherwise, use ROW_MARK_COPY by default */
2318  return ROW_MARK_COPY;
2319  }
2320  else
2321  {
2322  /* Regular table, apply the appropriate lock type */
2323  switch (strength)
2324  {
2325  case LCS_NONE:
2326 
2327  /*
2328  * We don't need a tuple lock, only the ability to re-fetch
2329  * the row.
2330  */
2331  return ROW_MARK_REFERENCE;
2332  break;
2333  case LCS_FORKEYSHARE:
2334  return ROW_MARK_KEYSHARE;
2335  break;
2336  case LCS_FORSHARE:
2337  return ROW_MARK_SHARE;
2338  break;
2339  case LCS_FORNOKEYUPDATE:
2340  return ROW_MARK_NOKEYEXCLUSIVE;
2341  break;
2342  case LCS_FORUPDATE:
2343  return ROW_MARK_EXCLUSIVE;
2344  break;
2345  }
2346  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2347  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2348  }
2349 }
2350 
2351 /*
2352  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2353  *
2354  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2355  * results back in *count_est and *offset_est. These variables are set to
2356  * 0 if the corresponding clause is not present, and -1 if it's present
2357  * but we couldn't estimate the value for it. (The "0" convention is OK
2358  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2359  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2360  * usual practice of never estimating less than one row.) These values will
2361  * be passed to create_limit_path, which see if you change this code.
2362  *
2363  * The return value is the suitably adjusted tuple_fraction to use for
2364  * planning the query. This adjustment is not overridable, since it reflects
2365  * plan actions that grouping_planner() will certainly take, not assumptions
2366  * about context.
2367  */
2368 static double
2369 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2370  int64 *offset_est, int64 *count_est)
2371 {
2372  Query *parse = root->parse;
2373  Node *est;
2374  double limit_fraction;
2375 
2376  /* Should not be called unless LIMIT or OFFSET */
2377  Assert(parse->limitCount || parse->limitOffset);
2378 
2379  /*
2380  * Try to obtain the clause values. We use estimate_expression_value
2381  * primarily because it can sometimes do something useful with Params.
2382  */
2383  if (parse->limitCount)
2384  {
2385  est = estimate_expression_value(root, parse->limitCount);
2386  if (est && IsA(est, Const))
2387  {
2388  if (((Const *) est)->constisnull)
2389  {
2390  /* NULL indicates LIMIT ALL, ie, no limit */
2391  *count_est = 0; /* treat as not present */
2392  }
2393  else
2394  {
2395  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2396  if (*count_est <= 0)
2397  *count_est = 1; /* force to at least 1 */
2398  }
2399  }
2400  else
2401  *count_est = -1; /* can't estimate */
2402  }
2403  else
2404  *count_est = 0; /* not present */
2405 
2406  if (parse->limitOffset)
2407  {
2408  est = estimate_expression_value(root, parse->limitOffset);
2409  if (est && IsA(est, Const))
2410  {
2411  if (((Const *) est)->constisnull)
2412  {
2413  /* Treat NULL as no offset; the executor will too */
2414  *offset_est = 0; /* treat as not present */
2415  }
2416  else
2417  {
2418  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2419  if (*offset_est < 0)
2420  *offset_est = 0; /* treat as not present */
2421  }
2422  }
2423  else
2424  *offset_est = -1; /* can't estimate */
2425  }
2426  else
2427  *offset_est = 0; /* not present */
2428 
2429  if (*count_est != 0)
2430  {
2431  /*
2432  * A LIMIT clause limits the absolute number of tuples returned.
2433  * However, if it's not a constant LIMIT then we have to guess; for
2434  * lack of a better idea, assume 10% of the plan's result is wanted.
2435  */
2436  if (*count_est < 0 || *offset_est < 0)
2437  {
2438  /* LIMIT or OFFSET is an expression ... punt ... */
2439  limit_fraction = 0.10;
2440  }
2441  else
2442  {
2443  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2444  limit_fraction = (double) *count_est + (double) *offset_est;
2445  }
2446 
2447  /*
2448  * If we have absolute limits from both caller and LIMIT, use the
2449  * smaller value; likewise if they are both fractional. If one is
2450  * fractional and the other absolute, we can't easily determine which
2451  * is smaller, but we use the heuristic that the absolute will usually
2452  * be smaller.
2453  */
2454  if (tuple_fraction >= 1.0)
2455  {
2456  if (limit_fraction >= 1.0)
2457  {
2458  /* both absolute */
2459  tuple_fraction = Min(tuple_fraction, limit_fraction);
2460  }
2461  else
2462  {
2463  /* caller absolute, limit fractional; use caller's value */
2464  }
2465  }
2466  else if (tuple_fraction > 0.0)
2467  {
2468  if (limit_fraction >= 1.0)
2469  {
2470  /* caller fractional, limit absolute; use limit */
2471  tuple_fraction = limit_fraction;
2472  }
2473  else
2474  {
2475  /* both fractional */
2476  tuple_fraction = Min(tuple_fraction, limit_fraction);
2477  }
2478  }
2479  else
2480  {
2481  /* no info from caller, just use limit */
2482  tuple_fraction = limit_fraction;
2483  }
2484  }
2485  else if (*offset_est != 0 && tuple_fraction > 0.0)
2486  {
2487  /*
2488  * We have an OFFSET but no LIMIT. This acts entirely differently
2489  * from the LIMIT case: here, we need to increase rather than decrease
2490  * the caller's tuple_fraction, because the OFFSET acts to cause more
2491  * tuples to be fetched instead of fewer. This only matters if we got
2492  * a tuple_fraction > 0, however.
2493  *
2494  * As above, use 10% if OFFSET is present but unestimatable.
2495  */
2496  if (*offset_est < 0)
2497  limit_fraction = 0.10;
2498  else
2499  limit_fraction = (double) *offset_est;
2500 
2501  /*
2502  * If we have absolute counts from both caller and OFFSET, add them
2503  * together; likewise if they are both fractional. If one is
2504  * fractional and the other absolute, we want to take the larger, and
2505  * we heuristically assume that's the fractional one.
2506  */
2507  if (tuple_fraction >= 1.0)
2508  {
2509  if (limit_fraction >= 1.0)
2510  {
2511  /* both absolute, so add them together */
2512  tuple_fraction += limit_fraction;
2513  }
2514  else
2515  {
2516  /* caller absolute, limit fractional; use limit */
2517  tuple_fraction = limit_fraction;
2518  }
2519  }
2520  else
2521  {
2522  if (limit_fraction >= 1.0)
2523  {
2524  /* caller fractional, limit absolute; use caller's value */
2525  }
2526  else
2527  {
2528  /* both fractional, so add them together */
2529  tuple_fraction += limit_fraction;
2530  if (tuple_fraction >= 1.0)
2531  tuple_fraction = 0.0; /* assume fetch all */
2532  }
2533  }
2534  }
2535 
2536  return tuple_fraction;
2537 }
2538 
2539 /*
2540  * limit_needed - do we actually need a Limit plan node?
2541  *
2542  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2543  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2544  * locution for an optimization fence. (Because other places in the planner
2545  * merely check whether parse->limitOffset isn't NULL, it will still work as
2546  * an optimization fence --- we're just suppressing unnecessary run-time
2547  * overhead.)
2548  *
2549  * This might look like it could be merged into preprocess_limit, but there's
2550  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2551  * in preprocess_limit it's good enough to consider estimated values.
2552  */
2553 bool
2555 {
2556  Node *node;
2557 
2558  node = parse->limitCount;
2559  if (node)
2560  {
2561  if (IsA(node, Const))
2562  {
2563  /* NULL indicates LIMIT ALL, ie, no limit */
2564  if (!((Const *) node)->constisnull)
2565  return true; /* LIMIT with a constant value */
2566  }
2567  else
2568  return true; /* non-constant LIMIT */
2569  }
2570 
2571  node = parse->limitOffset;
2572  if (node)
2573  {
2574  if (IsA(node, Const))
2575  {
2576  /* Treat NULL as no offset; the executor would too */
2577  if (!((Const *) node)->constisnull)
2578  {
2579  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2580 
2581  if (offset != 0)
2582  return true; /* OFFSET with a nonzero value */
2583  }
2584  }
2585  else
2586  return true; /* non-constant OFFSET */
2587  }
2588 
2589  return false; /* don't need a Limit plan node */
2590 }
2591 
2592 
2593 /*
2594  * remove_useless_groupby_columns
2595  * Remove any columns in the GROUP BY clause that are redundant due to
2596  * being functionally dependent on other GROUP BY columns.
2597  *
2598  * Since some other DBMSes do not allow references to ungrouped columns, it's
2599  * not unusual to find all columns listed in GROUP BY even though listing the
2600  * primary-key columns would be sufficient. Deleting such excess columns
2601  * avoids redundant sorting work, so it's worth doing.
2602  *
2603  * Relcache invalidations will ensure that cached plans become invalidated
2604  * when the underlying index of the pkey constraint is dropped.
2605  *
2606  * Currently, we only make use of pkey constraints for this, however, we may
2607  * wish to take this further in the future and also use unique constraints
2608  * which have NOT NULL columns. In that case, plan invalidation will still
2609  * work since relations will receive a relcache invalidation when a NOT NULL
2610  * constraint is dropped.
2611  */
2612 static void
2614 {
2615  Query *parse = root->parse;
2616  Bitmapset **groupbyattnos;
2617  Bitmapset **surplusvars;
2618  ListCell *lc;
2619  int relid;
2620 
2621  /* No chance to do anything if there are less than two GROUP BY items */
2622  if (list_length(parse->groupClause) < 2)
2623  return;
2624 
2625  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2626  if (parse->groupingSets)
2627  return;
2628 
2629  /*
2630  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2631  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2632  * that are GROUP BY items.
2633  */
2634  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2635  (list_length(parse->rtable) + 1));
2636  foreach(lc, parse->groupClause)
2637  {
2639  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2640  Var *var = (Var *) tle->expr;
2641 
2642  /*
2643  * Ignore non-Vars and Vars from other query levels.
2644  *
2645  * XXX in principle, stable expressions containing Vars could also be
2646  * removed, if all the Vars are functionally dependent on other GROUP
2647  * BY items. But it's not clear that such cases occur often enough to
2648  * be worth troubling over.
2649  */
2650  if (!IsA(var, Var) ||
2651  var->varlevelsup > 0)
2652  continue;
2653 
2654  /* OK, remember we have this Var */
2655  relid = var->varno;
2656  Assert(relid <= list_length(parse->rtable));
2657  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2659  }
2660 
2661  /*
2662  * Consider each relation and see if it is possible to remove some of its
2663  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2664  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2665  * of the column attnos of RTE k that are removable GROUP BY items.
2666  */
2667  surplusvars = NULL; /* don't allocate array unless required */
2668  relid = 0;
2669  foreach(lc, parse->rtable)
2670  {
2672  Bitmapset *relattnos;
2673  Bitmapset *pkattnos;
2674  Oid constraintOid;
2675 
2676  relid++;
2677 
2678  /* Only plain relations could have primary-key constraints */
2679  if (rte->rtekind != RTE_RELATION)
2680  continue;
2681 
2682  /*
2683  * We must skip inheritance parent tables as some of the child rels
2684  * may cause duplicate rows. This cannot happen with partitioned
2685  * tables, however.
2686  */
2687  if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
2688  continue;
2689 
2690  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2691  relattnos = groupbyattnos[relid];
2692  if (bms_membership(relattnos) != BMS_MULTIPLE)
2693  continue;
2694 
2695  /*
2696  * Can't remove any columns for this rel if there is no suitable
2697  * (i.e., nondeferrable) primary key constraint.
2698  */
2699  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2700  if (pkattnos == NULL)
2701  continue;
2702 
2703  /*
2704  * If the primary key is a proper subset of relattnos then we have
2705  * some items in the GROUP BY that can be removed.
2706  */
2707  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2708  {
2709  /*
2710  * To easily remember whether we've found anything to do, we don't
2711  * allocate the surplusvars[] array until we find something.
2712  */
2713  if (surplusvars == NULL)
2714  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2715  (list_length(parse->rtable) + 1));
2716 
2717  /* Remember the attnos of the removable columns */
2718  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2719  }
2720  }
2721 
2722  /*
2723  * If we found any surplus Vars, build a new GROUP BY clause without them.
2724  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2725  * markings, but that's harmless.)
2726  */
2727  if (surplusvars != NULL)
2728  {
2729  List *new_groupby = NIL;
2730 
2731  foreach(lc, parse->groupClause)
2732  {
2734  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2735  Var *var = (Var *) tle->expr;
2736 
2737  /*
2738  * New list must include non-Vars, outer Vars, and anything not
2739  * marked as surplus.
2740  */
2741  if (!IsA(var, Var) ||
2742  var->varlevelsup > 0 ||
2744  surplusvars[var->varno]))
2745  new_groupby = lappend(new_groupby, sgc);
2746  }
2747 
2748  parse->groupClause = new_groupby;
2749  }
2750 }
2751 
2752 /*
2753  * preprocess_groupclause - do preparatory work on GROUP BY clause
2754  *
2755  * The idea here is to adjust the ordering of the GROUP BY elements
2756  * (which in itself is semantically insignificant) to match ORDER BY,
2757  * thereby allowing a single sort operation to both implement the ORDER BY
2758  * requirement and set up for a Unique step that implements GROUP BY.
2759  *
2760  * In principle it might be interesting to consider other orderings of the
2761  * GROUP BY elements, which could match the sort ordering of other
2762  * possible plans (eg an indexscan) and thereby reduce cost. We don't
2763  * bother with that, though. Hashed grouping will frequently win anyway.
2764  *
2765  * Note: we need no comparable processing of the distinctClause because
2766  * the parser already enforced that that matches ORDER BY.
2767  *
2768  * For grouping sets, the order of items is instead forced to agree with that
2769  * of the grouping set (and items not in the grouping set are skipped). The
2770  * work of sorting the order of grouping set elements to match the ORDER BY if
2771  * possible is done elsewhere.
2772  */
2773 static List *
2775 {
2776  Query *parse = root->parse;
2777  List *new_groupclause = NIL;
2778  bool partial_match;
2779  ListCell *sl;
2780  ListCell *gl;
2781 
2782  /* For grouping sets, we need to force the ordering */
2783  if (force)
2784  {
2785  foreach(sl, force)
2786  {
2787  Index ref = lfirst_int(sl);
2788  SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2789 
2790  new_groupclause = lappend(new_groupclause, cl);
2791  }
2792 
2793  return new_groupclause;
2794  }
2795 
2796  /* If no ORDER BY, nothing useful to do here */
2797  if (parse->sortClause == NIL)
2798  return parse->groupClause;
2799 
2800  /*
2801  * Scan the ORDER BY clause and construct a list of matching GROUP BY
2802  * items, but only as far as we can make a matching prefix.
2803  *
2804  * This code assumes that the sortClause contains no duplicate items.
2805  */
2806  foreach(sl, parse->sortClause)
2807  {
2809 
2810  foreach(gl, parse->groupClause)
2811  {
2813 
2814  if (equal(gc, sc))
2815  {
2816  new_groupclause = lappend(new_groupclause, gc);
2817  break;
2818  }
2819  }
2820  if (gl == NULL)
2821  break; /* no match, so stop scanning */
2822  }
2823 
2824  /* Did we match all of the ORDER BY list, or just some of it? */
2825  partial_match = (sl != NULL);
2826 
2827  /* If no match at all, no point in reordering GROUP BY */
2828  if (new_groupclause == NIL)
2829  return parse->groupClause;
2830 
2831  /*
2832  * Add any remaining GROUP BY items to the new list, but only if we were
2833  * able to make a complete match. In other words, we only rearrange the
2834  * GROUP BY list if the result is that one list is a prefix of the other
2835  * --- otherwise there's no possibility of a common sort. Also, give up
2836  * if there are any non-sortable GROUP BY items, since then there's no
2837  * hope anyway.
2838  */
2839  foreach(gl, parse->groupClause)
2840  {
2842 
2843  if (list_member_ptr(new_groupclause, gc))
2844  continue; /* it matched an ORDER BY item */
2845  if (partial_match)
2846  return parse->groupClause; /* give up, no common sort possible */
2847  if (!OidIsValid(gc->sortop))
2848  return parse->groupClause; /* give up, GROUP BY can't be sorted */
2849  new_groupclause = lappend(new_groupclause, gc);
2850  }
2851 
2852  /* Success --- install the rearranged GROUP BY list */
2853  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2854  return new_groupclause;
2855 }
2856 
2857 /*
2858  * Extract lists of grouping sets that can be implemented using a single
2859  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2860  *
2861  * Input must be sorted with smallest sets first. Result has each sublist
2862  * sorted with smallest sets first.
2863  *
2864  * We want to produce the absolute minimum possible number of lists here to
2865  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2866  * of finding the minimal partition of a partially-ordered set into chains
2867  * (which is what we need, taking the list of grouping sets as a poset ordered
2868  * by set inclusion) can be mapped to the problem of finding the maximum
2869  * cardinality matching on a bipartite graph, which is solvable in polynomial
2870  * time with a worst case of no worse than O(n^2.5) and usually much
2871  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2872  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2873  * half a second on my modest system even with optimization off and assertions
2874  * on.)
2875  */
2876 static List *
2878 {
2879  int num_sets_raw = list_length(groupingSets);
2880  int num_empty = 0;
2881  int num_sets = 0; /* distinct sets */
2882  int num_chains = 0;
2883  List *result = NIL;
2884  List **results;
2885  List **orig_sets;
2886  Bitmapset **set_masks;
2887  int *chains;
2888  short **adjacency;
2889  short *adjacency_buf;
2891  int i;
2892  int j;
2893  int j_size;
2894  ListCell *lc1 = list_head(groupingSets);
2895  ListCell *lc;
2896 
2897  /*
2898  * Start by stripping out empty sets. The algorithm doesn't require this,
2899  * but the planner currently needs all empty sets to be returned in the
2900  * first list, so we strip them here and add them back after.
2901  */
2902  while (lc1 && lfirst(lc1) == NIL)
2903  {
2904  ++num_empty;
2905  lc1 = lnext(groupingSets, lc1);
2906  }
2907 
2908  /* bail out now if it turns out that all we had were empty sets. */
2909  if (!lc1)
2910  return list_make1(groupingSets);
2911 
2912  /*----------
2913  * We don't strictly need to remove duplicate sets here, but if we don't,
2914  * they tend to become scattered through the result, which is a bit
2915  * confusing (and irritating if we ever decide to optimize them out).
2916  * So we remove them here and add them back after.
2917  *
2918  * For each non-duplicate set, we fill in the following:
2919  *
2920  * orig_sets[i] = list of the original set lists
2921  * set_masks[i] = bitmapset for testing inclusion
2922  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2923  *
2924  * chains[i] will be the result group this set is assigned to.
2925  *
2926  * We index all of these from 1 rather than 0 because it is convenient
2927  * to leave 0 free for the NIL node in the graph algorithm.
2928  *----------
2929  */
2930  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2931  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2932  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2933  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2934 
2935  j_size = 0;
2936  j = 0;
2937  i = 1;
2938 
2939  for_each_cell(lc, groupingSets, lc1)
2940  {
2941  List *candidate = (List *) lfirst(lc);
2942  Bitmapset *candidate_set = NULL;
2943  ListCell *lc2;
2944  int dup_of = 0;
2945 
2946  foreach(lc2, candidate)
2947  {
2948  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
2949  }
2950 
2951  /* we can only be a dup if we're the same length as a previous set */
2952  if (j_size == list_length(candidate))
2953  {
2954  int k;
2955 
2956  for (k = j; k < i; ++k)
2957  {
2958  if (bms_equal(set_masks[k], candidate_set))
2959  {
2960  dup_of = k;
2961  break;
2962  }
2963  }
2964  }
2965  else if (j_size < list_length(candidate))
2966  {
2967  j_size = list_length(candidate);
2968  j = i;
2969  }
2970 
2971  if (dup_of > 0)
2972  {
2973  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
2974  bms_free(candidate_set);
2975  }
2976  else
2977  {
2978  int k;
2979  int n_adj = 0;
2980 
2981  orig_sets[i] = list_make1(candidate);
2982  set_masks[i] = candidate_set;
2983 
2984  /* fill in adjacency list; no need to compare equal-size sets */
2985 
2986  for (k = j - 1; k > 0; --k)
2987  {
2988  if (bms_is_subset(set_masks[k], candidate_set))
2989  adjacency_buf[++n_adj] = k;
2990  }
2991 
2992  if (n_adj > 0)
2993  {
2994  adjacency_buf[0] = n_adj;
2995  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
2996  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
2997  }
2998  else
2999  adjacency[i] = NULL;
3000 
3001  ++i;
3002  }
3003  }
3004 
3005  num_sets = i - 1;
3006 
3007  /*
3008  * Apply the graph matching algorithm to do the work.
3009  */
3010  state = BipartiteMatch(num_sets, num_sets, adjacency);
3011 
3012  /*
3013  * Now, the state->pair* fields have the info we need to assign sets to
3014  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3015  * pair_vu[v] = u (both will be true, but we check both so that we can do
3016  * it in one pass)
3017  */
3018  chains = palloc0((num_sets + 1) * sizeof(int));
3019 
3020  for (i = 1; i <= num_sets; ++i)
3021  {
3022  int u = state->pair_vu[i];
3023  int v = state->pair_uv[i];
3024 
3025  if (u > 0 && u < i)
3026  chains[i] = chains[u];
3027  else if (v > 0 && v < i)
3028  chains[i] = chains[v];
3029  else
3030  chains[i] = ++num_chains;
3031  }
3032 
3033  /* build result lists. */
3034  results = palloc0((num_chains + 1) * sizeof(List *));
3035 
3036  for (i = 1; i <= num_sets; ++i)
3037  {
3038  int c = chains[i];
3039 
3040  Assert(c > 0);
3041 
3042  results[c] = list_concat(results[c], orig_sets[i]);
3043  }
3044 
3045  /* push any empty sets back on the first list. */
3046  while (num_empty-- > 0)
3047  results[1] = lcons(NIL, results[1]);
3048 
3049  /* make result list */
3050  for (i = 1; i <= num_chains; ++i)
3051  result = lappend(result, results[i]);
3052 
3053  /*
3054  * Free all the things.
3055  *
3056  * (This is over-fussy for small sets but for large sets we could have
3057  * tied up a nontrivial amount of memory.)
3058  */
3060  pfree(results);
3061  pfree(chains);
3062  for (i = 1; i <= num_sets; ++i)
3063  if (adjacency[i])
3064  pfree(adjacency[i]);
3065  pfree(adjacency);
3066  pfree(adjacency_buf);
3067  pfree(orig_sets);
3068  for (i = 1; i <= num_sets; ++i)
3069  bms_free(set_masks[i]);
3070  pfree(set_masks);
3071 
3072  return result;
3073 }
3074 
3075 /*
3076  * Reorder the elements of a list of grouping sets such that they have correct
3077  * prefix relationships. Also inserts the GroupingSetData annotations.
3078  *
3079  * The input must be ordered with smallest sets first; the result is returned
3080  * with largest sets first. Note that the result shares no list substructure
3081  * with the input, so it's safe for the caller to modify it later.
3082  *
3083  * If we're passed in a sortclause, we follow its order of columns to the
3084  * extent possible, to minimize the chance that we add unnecessary sorts.
3085  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3086  * gets implemented in one pass.)
3087  */
3088 static List *
3089 reorder_grouping_sets(List *groupingSets, List *sortclause)
3090 {
3091  ListCell *lc;
3092  List *previous = NIL;
3093  List *result = NIL;
3094 
3095  foreach(lc, groupingSets)
3096  {
3097  List *candidate = (List *) lfirst(lc);
3098  List *new_elems = list_difference_int(candidate, previous);
3100 
3101  while (list_length(sortclause) > list_length(previous) &&
3102  new_elems != NIL)
3103  {
3104  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3105  int ref = sc->tleSortGroupRef;
3106 
3107  if (list_member_int(new_elems, ref))
3108  {
3109  previous = lappend_int(previous, ref);
3110  new_elems = list_delete_int(new_elems, ref);
3111  }
3112  else
3113  {
3114  /* diverged from the sortclause; give up on it */
3115  sortclause = NIL;
3116  break;
3117  }
3118  }
3119 
3120  previous = list_concat(previous, new_elems);
3121 
3122  gs->set = list_copy(previous);
3123  result = lcons(gs, result);
3124  }
3125 
3126  list_free(previous);
3127 
3128  return result;
3129 }
3130 
3131 /*
3132  * make_pathkeys_for_groupagg
3133  * Determine the pathkeys for the GROUP BY clause and/or any ordered
3134  * aggregates. We expect at least one of these here.
3135  *
3136  * Building the pathkeys for the GROUP BY is simple. Most of the complexity
3137  * involved here comes from calculating the best pathkeys for ordered
3138  * aggregates. We define "best" as the pathkeys that suit the most number of
3139  * aggregate functions. We find these by looking at the first ORDER BY /
3140  * DISTINCT aggregate and take the pathkeys for that before searching for
3141  * other aggregates that require the same or a more strict variation of the
3142  * same pathkeys. We then repeat that process for any remaining aggregates
3143  * with different pathkeys and if we find another set of pathkeys that suits a
3144  * larger number of aggregates then we return those pathkeys instead.
3145  *
3146  * *number_groupby_pathkeys gets set to the number of elements in the returned
3147  * list that belong to the GROUP BY clause. Any elements above this number
3148  * must belong to ORDER BY / DISTINCT aggregates.
3149  *
3150  * When the best pathkeys are found we also mark each Aggref that can use
3151  * those pathkeys as aggpresorted = true.
3152  */
3153 static List *
3154 make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
3155  int *number_groupby_pathkeys)
3156 {
3157  List *grouppathkeys = NIL;
3158  List *bestpathkeys;
3159  Bitmapset *bestaggs;
3160  Bitmapset *unprocessed_aggs;
3161  ListCell *lc;
3162  int i;
3163 
3164  Assert(groupClause != NIL || root->numOrderedAggs > 0);
3165 
3166  if (groupClause != NIL)
3167  {
3168  /* no pathkeys possible if there's an unsortable GROUP BY */
3169  if (!grouping_is_sortable(groupClause))
3170  {
3171  *number_groupby_pathkeys = 0;
3172  return NIL;
3173  }
3174 
3175  grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause,
3176  tlist);
3177  *number_groupby_pathkeys = list_length(grouppathkeys);
3178  }
3179  else
3180  *number_groupby_pathkeys = 0;
3181 
3182  /*
3183  * We can't add pathkeys for ordered aggregates if there are any grouping
3184  * sets. All handling specific to ordered aggregates must be done by the
3185  * executor in that case.
3186  */
3187  if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL)
3188  return grouppathkeys;
3189 
3190  /*
3191  * Make a first pass over all AggInfos to collect a Bitmapset containing
3192  * the indexes of all AggInfos to be processed below.
3193  */
3194  unprocessed_aggs = NULL;
3195  foreach(lc, root->agginfos)
3196  {
3197  AggInfo *agginfo = lfirst_node(AggInfo, lc);
3198  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3199 
3200  if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
3201  continue;
3202 
3203  /* only add aggregates with a DISTINCT or ORDER BY */
3204  if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
3205  unprocessed_aggs = bms_add_member(unprocessed_aggs,
3206  foreach_current_index(lc));
3207  }
3208 
3209  /*
3210  * Now process all the unprocessed_aggs to find the best set of pathkeys
3211  * for the given set of aggregates.
3212  *
3213  * On the first outer loop here 'bestaggs' will be empty. We'll populate
3214  * this during the first loop using the pathkeys for the very first
3215  * AggInfo then taking any stronger pathkeys from any other AggInfos with
3216  * a more strict set of compatible pathkeys. Once the outer loop is
3217  * complete, we mark off all the aggregates with compatible pathkeys then
3218  * remove those from the unprocessed_aggs and repeat the process to try to
3219  * find another set of pathkeys that are suitable for a larger number of
3220  * aggregates. The outer loop will stop when there are not enough
3221  * unprocessed aggregates for it to be possible to find a set of pathkeys
3222  * to suit a larger number of aggregates.
3223  */
3224  bestpathkeys = NIL;
3225  bestaggs = NULL;
3226  while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
3227  {
3228  Bitmapset *aggindexes = NULL;
3229  List *currpathkeys = NIL;
3230 
3231  i = -1;
3232  while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
3233  {
3234  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3235  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3236  List *sortlist;
3237 
3238  if (aggref->aggdistinct != NIL)
3239  sortlist = aggref->aggdistinct;
3240  else
3241  sortlist = aggref->aggorder;
3242 
3243  /*
3244  * When not set yet, take the pathkeys from the first unprocessed
3245  * aggregate.
3246  */
3247  if (currpathkeys == NIL)
3248  {
3249  currpathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3250  aggref->args);
3251 
3252  /* include the GROUP BY pathkeys, if they exist */
3253  if (grouppathkeys != NIL)
3254  currpathkeys = append_pathkeys(list_copy(grouppathkeys),
3255  currpathkeys);
3256 
3257  /* record that we found pathkeys for this aggregate */
3258  aggindexes = bms_add_member(aggindexes, i);
3259  }
3260  else
3261  {
3262  List *pathkeys;
3263 
3264  /* now look for a stronger set of matching pathkeys */
3265  pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3266  aggref->args);
3267 
3268  /* include the GROUP BY pathkeys, if they exist */
3269  if (grouppathkeys != NIL)
3270  pathkeys = append_pathkeys(list_copy(grouppathkeys),
3271  pathkeys);
3272 
3273  /* are 'pathkeys' compatible or better than 'currpathkeys'? */
3274  switch (compare_pathkeys(currpathkeys, pathkeys))
3275  {
3276  case PATHKEYS_BETTER2:
3277  /* 'pathkeys' are stronger, use these ones instead */
3278  currpathkeys = pathkeys;
3279  /* FALLTHROUGH */
3280 
3281  case PATHKEYS_BETTER1:
3282  /* 'pathkeys' are less strict */
3283  /* FALLTHROUGH */
3284 
3285  case PATHKEYS_EQUAL:
3286  /* mark this aggregate as covered by 'currpathkeys' */
3287  aggindexes = bms_add_member(aggindexes, i);
3288  break;
3289 
3290  case PATHKEYS_DIFFERENT:
3291  break;
3292  }
3293  }
3294  }
3295 
3296  /* remove the aggregates that we've just processed */
3297  unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
3298 
3299  /*
3300  * If this pass included more aggregates than the previous best then
3301  * use these ones as the best set.
3302  */
3303  if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
3304  {
3305  bestaggs = aggindexes;
3306  bestpathkeys = currpathkeys;
3307  }
3308  }
3309 
3310  /*
3311  * Now that we've found the best set of aggregates we can set the
3312  * presorted flag to indicate to the executor that it needn't bother
3313  * performing a sort for these Aggrefs. We're able to do this now as
3314  * there's no chance of a Hash Aggregate plan as create_grouping_paths
3315  * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
3316  * of ordered aggregates.
3317  */
3318  i = -1;
3319  while ((i = bms_next_member(bestaggs, i)) >= 0)
3320  {
3321  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3322 
3323  foreach(lc, agginfo->aggrefs)
3324  {
3325  Aggref *aggref = lfirst_node(Aggref, lc);
3326 
3327  aggref->aggpresorted = true;
3328  }
3329  }
3330 
3331  /*
3332  * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered
3333  * aggregates, then return bestpathkeys, otherwise return the
3334  * grouppathkeys.
3335  */
3336  if (bestpathkeys != NIL)
3337  return bestpathkeys;
3338 
3339  return grouppathkeys;
3340 }
3341 
3342 /*
3343  * Compute query_pathkeys and other pathkeys during plan generation
3344  */
3345 static void
3347 {
3348  Query *parse = root->parse;
3349  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3350  List *tlist = root->processed_tlist;
3351  List *activeWindows = qp_extra->activeWindows;
3352 
3353  /*
3354  * Calculate pathkeys that represent grouping/ordering and/or ordered
3355  * aggregate requirements.
3356  */
3357  if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0)
3359  qp_extra->groupClause,
3360  tlist,
3361  &root->num_groupby_pathkeys);
3362  else
3363  {
3364  root->group_pathkeys = NIL;
3365  root->num_groupby_pathkeys = 0;
3366  }
3367 
3368  /* We consider only the first (bottom) window in pathkeys logic */
3369  if (activeWindows != NIL)
3370  {
3371  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3372 
3374  wc,
3375  tlist);
3376  }
3377  else
3378  root->window_pathkeys = NIL;
3379 
3380  if (parse->distinctClause &&
3381  grouping_is_sortable(parse->distinctClause))
3382  root->distinct_pathkeys =
3384  parse->distinctClause,
3385  tlist);
3386  else
3387  root->distinct_pathkeys = NIL;
3388 
3389  root->sort_pathkeys =
3391  parse->sortClause,
3392  tlist);
3393 
3394  /*
3395  * Figure out whether we want a sorted result from query_planner.
3396  *
3397  * If we have a sortable GROUP BY clause, then we want a result sorted
3398  * properly for grouping. Otherwise, if we have window functions to
3399  * evaluate, we try to sort for the first window. Otherwise, if there's a
3400  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3401  * we try to produce output that's sufficiently well sorted for the
3402  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3403  * by the ORDER BY clause.
3404  *
3405  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3406  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3407  * that might just leave us failing to exploit an available sort order at
3408  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3409  * much easier, since we know that the parser ensured that one is a
3410  * superset of the other.
3411  */
3412  if (root->group_pathkeys)
3413  root->query_pathkeys = root->group_pathkeys;
3414  else if (root->window_pathkeys)
3415  root->query_pathkeys = root->window_pathkeys;
3416  else if (list_length(root->distinct_pathkeys) >
3417  list_length(root->sort_pathkeys))
3418  root->query_pathkeys = root->distinct_pathkeys;
3419  else if (root->sort_pathkeys)
3420  root->query_pathkeys = root->sort_pathkeys;
3421  else
3422  root->query_pathkeys = NIL;
3423 }
3424 
3425 /*
3426  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3427  *
3428  * path_rows: number of output rows from scan/join step
3429  * gd: grouping sets data including list of grouping sets and their clauses
3430  * target_list: target list containing group clause references
3431  *
3432  * If doing grouping sets, we also annotate the gsets data with the estimates
3433  * for each set and each individual rollup list, with a view to later
3434  * determining whether some combination of them could be hashed instead.
3435  */
3436 static double
3438  double path_rows,
3439  grouping_sets_data *gd,
3440  List *target_list)
3441 {
3442  Query *parse = root->parse;
3443  double dNumGroups;
3444 
3445  if (parse->groupClause)
3446  {
3447  List *groupExprs;
3448 
3449  if (parse->groupingSets)
3450  {
3451  /* Add up the estimates for each grouping set */
3452  ListCell *lc;
3453 
3454  Assert(gd); /* keep Coverity happy */
3455 
3456  dNumGroups = 0;
3457 
3458  foreach(lc, gd->rollups)
3459  {
3460  RollupData *rollup = lfirst_node(RollupData, lc);
3461  ListCell *lc2;
3462  ListCell *lc3;
3463 
3464  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3465  target_list);
3466 
3467  rollup->numGroups = 0.0;
3468 
3469  forboth(lc2, rollup->gsets, lc3, rollup->gsets_data)
3470  {
3471  List *gset = (List *) lfirst(lc2);
3473  double numGroups = estimate_num_groups(root,
3474  groupExprs,
3475  path_rows,
3476  &gset,
3477  NULL);
3478 
3479  gs->numGroups = numGroups;
3480  rollup->numGroups += numGroups;
3481  }
3482 
3483  dNumGroups += rollup->numGroups;
3484  }
3485 
3486  if (gd->hash_sets_idx)
3487  {
3488  ListCell *lc2;
3489 
3490  gd->dNumHashGroups = 0;
3491 
3492  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3493  target_list);
3494 
3495  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3496  {
3497  List *gset = (List *) lfirst(lc);
3499  double numGroups = estimate_num_groups(root,
3500  groupExprs,
3501  path_rows,
3502  &gset,
3503  NULL);
3504 
3505  gs->numGroups = numGroups;
3506  gd->dNumHashGroups += numGroups;
3507  }
3508 
3509  dNumGroups += gd->dNumHashGroups;
3510  }
3511  }
3512  else
3513  {
3514  /* Plain GROUP BY */
3515  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3516  target_list);
3517 
3518  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3519  NULL, NULL);
3520  }
3521  }
3522  else if (parse->groupingSets)
3523  {
3524  /* Empty grouping sets ... one result row for each one */
3525  dNumGroups = list_length(parse->groupingSets);
3526  }
3527  else if (parse->hasAggs || root->hasHavingQual)
3528  {
3529  /* Plain aggregation, one result row */
3530  dNumGroups = 1;
3531  }
3532  else
3533  {
3534  /* Not grouping */
3535  dNumGroups = 1;
3536  }
3537 
3538  return dNumGroups;
3539 }
3540 
3541 /*
3542  * create_grouping_paths
3543  *
3544  * Build a new upperrel containing Paths for grouping and/or aggregation.
3545  * Along the way, we also build an upperrel for Paths which are partially
3546  * grouped and/or aggregated. A partially grouped and/or aggregated path
3547  * needs a FinalizeAggregate node to complete the aggregation. Currently,
3548  * the only partially grouped paths we build are also partial paths; that
3549  * is, they need a Gather and then a FinalizeAggregate.
3550  *
3551  * input_rel: contains the source-data Paths
3552  * target: the pathtarget for the result Paths to compute
3553  * gd: grouping sets data including list of grouping sets and their clauses
3554  *
3555  * Note: all Paths in input_rel are expected to return the target computed
3556  * by make_group_input_target.
3557  */
3558 static RelOptInfo *
3560  RelOptInfo *input_rel,
3561  PathTarget *target,
3562  bool target_parallel_safe,
3563  grouping_sets_data *gd)
3564 {
3565  Query *parse = root->parse;
3566  RelOptInfo *grouped_rel;
3567  RelOptInfo *partially_grouped_rel;
3568  AggClauseCosts agg_costs;
3569 
3570  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3571  get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
3572 
3573  /*
3574  * Create grouping relation to hold fully aggregated grouping and/or
3575  * aggregation paths.
3576  */
3577  grouped_rel = make_grouping_rel(root, input_rel, target,
3578  target_parallel_safe, parse->havingQual);
3579 
3580  /*
3581  * Create either paths for a degenerate grouping or paths for ordinary
3582  * grouping, as appropriate.
3583  */
3584  if (is_degenerate_grouping(root))
3585  create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3586  else
3587  {
3588  int flags = 0;
3589  GroupPathExtraData extra;
3590 
3591  /*
3592  * Determine whether it's possible to perform sort-based
3593  * implementations of grouping. (Note that if groupClause is empty,
3594  * grouping_is_sortable() is trivially true, and all the
3595  * pathkeys_contained_in() tests will succeed too, so that we'll
3596  * consider every surviving input path.)
3597  *
3598  * If we have grouping sets, we might be able to sort some but not all
3599  * of them; in this case, we need can_sort to be true as long as we
3600  * must consider any sorted-input plan.
3601  */
3602  if ((gd && gd->rollups != NIL)
3603  || grouping_is_sortable(parse->groupClause))
3604  flags |= GROUPING_CAN_USE_SORT;
3605 
3606  /*
3607  * Determine whether we should consider hash-based implementations of
3608  * grouping.
3609  *
3610  * Hashed aggregation only applies if we're grouping. If we have
3611  * grouping sets, some groups might be hashable but others not; in
3612  * this case we set can_hash true as long as there is nothing globally
3613  * preventing us from hashing (and we should therefore consider plans
3614  * with hashes).
3615  *
3616  * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3617  * BY aggregates. (Doing so would imply storing *all* the input
3618  * values in the hash table, and/or running many sorts in parallel,
3619  * either of which seems like a certain loser.) We similarly don't
3620  * support ordered-set aggregates in hashed aggregation, but that case
3621  * is also included in the numOrderedAggs count.
3622  *
3623  * Note: grouping_is_hashable() is much more expensive to check than
3624  * the other gating conditions, so we want to do it last.
3625  */
3626  if ((parse->groupClause != NIL &&
3627  root->numOrderedAggs == 0 &&
3628  (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))))
3629  flags |= GROUPING_CAN_USE_HASH;
3630 
3631  /*
3632  * Determine whether partial aggregation is possible.
3633  */
3634  if (can_partial_agg(root))
3635  flags |= GROUPING_CAN_PARTIAL_AGG;
3636 
3637  extra.flags = flags;
3638  extra.target_parallel_safe = target_parallel_safe;
3639  extra.havingQual = parse->havingQual;
3640  extra.targetList = parse->targetList;
3641  extra.partial_costs_set = false;
3642 
3643  /*
3644  * Determine whether partitionwise aggregation is in theory possible.
3645  * It can be disabled by the user, and for now, we don't try to
3646  * support grouping sets. create_ordinary_grouping_paths() will check
3647  * additional conditions, such as whether input_rel is partitioned.
3648  */
3649  if (enable_partitionwise_aggregate && !parse->groupingSets)
3651  else
3653 
3654  create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3655  &agg_costs, gd, &extra,
3656  &partially_grouped_rel);
3657  }
3658 
3659  set_cheapest(grouped_rel);
3660  return grouped_rel;
3661 }
3662 
3663 /*
3664  * make_grouping_rel
3665  *
3666  * Create a new grouping rel and set basic properties.
3667  *
3668  * input_rel represents the underlying scan/join relation.
3669  * target is the output expected from the grouping relation.
3670  */
3671 static RelOptInfo *
3673  PathTarget *target, bool target_parallel_safe,
3674  Node *havingQual)
3675 {
3676  RelOptInfo *grouped_rel;
3677 
3678  if (IS_OTHER_REL(input_rel))
3679  {
3680  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3681  input_rel->relids);
3682  grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3683  }
3684  else
3685  {
3686  /*
3687  * By tradition, the relids set for the main grouping relation is
3688  * NULL. (This could be changed, but might require adjustments
3689  * elsewhere.)
3690  */
3691  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3692  }
3693 
3694  /* Set target. */
3695  grouped_rel->reltarget = target;
3696 
3697  /*
3698  * If the input relation is not parallel-safe, then the grouped relation
3699  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3700  * target list and HAVING quals are parallel-safe.
3701  */
3702  if (input_rel->consider_parallel && target_parallel_safe &&
3703  is_parallel_safe(root, (Node *) havingQual))
3704  grouped_rel->consider_parallel = true;
3705 
3706  /*
3707  * If the input rel belongs to a single FDW, so does the grouped rel.
3708  */
3709  grouped_rel->serverid = input_rel->serverid;
3710  grouped_rel->userid = input_rel->userid;
3711  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3712  grouped_rel->fdwroutine = input_rel->fdwroutine;
3713 
3714  return grouped_rel;
3715 }
3716 
3717 /*
3718  * is_degenerate_grouping
3719  *
3720  * A degenerate grouping is one in which the query has a HAVING qual and/or
3721  * grouping sets, but no aggregates and no GROUP BY (which implies that the
3722  * grouping sets are all empty).
3723  */
3724 static bool
3726 {
3727  Query *parse = root->parse;
3728 
3729  return (root->hasHavingQual || parse->groupingSets) &&
3730  !parse->hasAggs && parse->groupClause == NIL;
3731 }
3732 
3733 /*
3734  * create_degenerate_grouping_paths
3735  *
3736  * When the grouping is degenerate (see is_degenerate_grouping), we are
3737  * supposed to emit either zero or one row for each grouping set depending on
3738  * whether HAVING succeeds. Furthermore, there cannot be any variables in
3739  * either HAVING or the targetlist, so we actually do not need the FROM table
3740  * at all! We can just throw away the plan-so-far and generate a Result node.
3741  * This is a sufficiently unusual corner case that it's not worth contorting
3742  * the structure of this module to avoid having to generate the earlier paths
3743  * in the first place.
3744  */
3745 static void
3747  RelOptInfo *grouped_rel)
3748 {
3749  Query *parse = root->parse;
3750  int nrows;
3751  Path *path;
3752 
3753  nrows = list_length(parse->groupingSets);
3754  if (nrows > 1)
3755  {
3756  /*
3757  * Doesn't seem worthwhile writing code to cons up a generate_series
3758  * or a values scan to emit multiple rows. Instead just make N clones
3759  * and append them. (With a volatile HAVING clause, this means you
3760  * might get between 0 and N output rows. Offhand I think that's
3761  * desired.)
3762  */
3763  List *paths = NIL;
3764 
3765  while (--nrows >= 0)
3766  {
3767  path = (Path *)
3768  create_group_result_path(root, grouped_rel,
3769  grouped_rel->reltarget,
3770  (List *) parse->havingQual);
3771  paths = lappend(paths, path);
3772  }
3773  path = (Path *)
3774  create_append_path(root,
3775  grouped_rel,
3776  paths,
3777  NIL,
3778  NIL,
3779  NULL,
3780  0,
3781  false,
3782  -1);
3783  }
3784  else
3785  {
3786  /* No grouping sets, or just one, so one output row */
3787  path = (Path *)
3788  create_group_result_path(root, grouped_rel,
3789  grouped_rel->reltarget,
3790  (List *) parse->havingQual);
3791  }
3792 
3793  add_path(grouped_rel, path);
3794 }
3795 
3796 /*
3797  * create_ordinary_grouping_paths
3798  *
3799  * Create grouping paths for the ordinary (that is, non-degenerate) case.
3800  *
3801  * We need to consider sorted and hashed aggregation in the same function,
3802  * because otherwise (1) it would be harder to throw an appropriate error
3803  * message if neither way works, and (2) we should not allow hashtable size
3804  * considerations to dissuade us from using hashing if sorting is not possible.
3805  *
3806  * *partially_grouped_rel_p will be set to the partially grouped rel which this
3807  * function creates, or to NULL if it doesn't create one.
3808  */
3809 static void
3811  RelOptInfo *grouped_rel,
3812  const AggClauseCosts *agg_costs,
3813  grouping_sets_data *gd,
3814  GroupPathExtraData *extra,
3815  RelOptInfo **partially_grouped_rel_p)
3816 {
3817  Path *cheapest_path = input_rel->cheapest_total_path;
3818  RelOptInfo *partially_grouped_rel = NULL;
3819  double dNumGroups;
3821 
3822  /*
3823  * If this is the topmost grouping relation or if the parent relation is
3824  * doing some form of partitionwise aggregation, then we may be able to do
3825  * it at this level also. However, if the input relation is not
3826  * partitioned, partitionwise aggregate is impossible.
3827  */
3828  if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
3829  IS_PARTITIONED_REL(input_rel))
3830  {
3831  /*
3832  * If this is the topmost relation or if the parent relation is doing
3833  * full partitionwise aggregation, then we can do full partitionwise
3834  * aggregation provided that the GROUP BY clause contains all of the
3835  * partitioning columns at this level. Otherwise, we can do at most
3836  * partial partitionwise aggregation. But if partial aggregation is
3837  * not supported in general then we can't use it for partitionwise
3838  * aggregation either.
3839  */
3840  if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
3841  group_by_has_partkey(input_rel, extra->targetList,
3842  root->parse->groupClause))
3844  else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3846  else
3848  }
3849 
3850  /*
3851  * Before generating paths for grouped_rel, we first generate any possible
3852  * partially grouped paths; that way, later code can easily consider both
3853  * parallel and non-parallel approaches to grouping.
3854  */
3855  if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3856  {
3857  bool force_rel_creation;
3858 
3859  /*
3860  * If we're doing partitionwise aggregation at this level, force
3861  * creation of a partially_grouped_rel so we can add partitionwise
3862  * paths to it.
3863  */
3864  force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
3865 
3866  partially_grouped_rel =
3868  grouped_rel,
3869  input_rel,
3870  gd,
3871  extra,
3872  force_rel_creation);
3873  }
3874 
3875  /* Set out parameter. */
3876  *partially_grouped_rel_p = partially_grouped_rel;
3877 
3878  /* Apply partitionwise aggregation technique, if possible. */
3879  if (patype != PARTITIONWISE_AGGREGATE_NONE)
3880  create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
3881  partially_grouped_rel, agg_costs,
3882  gd, patype, extra);
3883 
3884  /* If we are doing partial aggregation only, return. */
3886  {
3887  Assert(partially_grouped_rel);
3888 
3889  if (partially_grouped_rel->pathlist)
3890  set_cheapest(partially_grouped_rel);
3891 
3892  return;
3893  }
3894 
3895  /* Gather any partially grouped partial paths. */
3896  if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
3897  {
3898  gather_grouping_paths(root, partially_grouped_rel);
3899  set_cheapest(partially_grouped_rel);
3900  }
3901 
3902  /*
3903  * Estimate number of groups.
3904  */
3905  dNumGroups = get_number_of_groups(root,
3906  cheapest_path->rows,
3907  gd,
3908  extra->targetList);
3909 
3910  /* Build final grouping paths */
3911  add_paths_to_grouping_rel(root, input_rel, grouped_rel,
3912  partially_grouped_rel, agg_costs, gd,
3913  dNumGroups, extra);
3914 
3915  /* Give a helpful error if we failed to find any implementation */
3916  if (grouped_rel->pathlist == NIL)
3917  ereport(ERROR,
3918  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3919  errmsg("could not implement GROUP BY"),
3920  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
3921 
3922  /*
3923  * If there is an FDW that's responsible for all baserels of the query,
3924  * let it consider adding ForeignPaths.
3925  */
3926  if (grouped_rel->fdwroutine &&
3927  grouped_rel->fdwroutine->GetForeignUpperPaths)
3928  grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
3929  input_rel, grouped_rel,
3930  extra);
3931 
3932  /* Let extensions possibly add some more paths */
3934  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
3935  input_rel, grouped_rel,
3936  extra);
3937 }
3938 
3939 /*
3940  * For a given input path, consider the possible ways of doing grouping sets on
3941  * it, by combinations of hashing and sorting. This can be called multiple
3942  * times, so it's important that it not scribble on input. No result is
3943  * returned, but any generated paths are added to grouped_rel.
3944  */
3945 static void
3947  RelOptInfo *grouped_rel,
3948  Path *path,
3949  bool is_sorted,
3950  bool can_hash,
3951  grouping_sets_data *gd,
3952  const AggClauseCosts *agg_costs,
3953  double dNumGroups)
3954 {
3955  Query *parse = root->parse;
3956  Size hash_mem_limit = get_hash_memory_limit();
3957 
3958  /*
3959  * If we're not being offered sorted input, then only consider plans that
3960  * can be done entirely by hashing.
3961  *
3962  * We can hash everything if it looks like it'll fit in hash_mem. But if
3963  * the input is actually sorted despite not being advertised as such, we
3964  * prefer to make use of that in order to use less memory.
3965  *
3966  * If none of the grouping sets are sortable, then ignore the hash_mem
3967  * limit and generate a path anyway, since otherwise we'll just fail.
3968  */
3969  if (!is_sorted)
3970  {
3971  List *new_rollups = NIL;
3972  RollupData *unhashed_rollup = NULL;
3973  List *sets_data;
3974  List *empty_sets_data = NIL;
3975  List *empty_sets = NIL;
3976  ListCell *lc;
3977  ListCell *l_start = list_head(gd->rollups);
3978  AggStrategy strat = AGG_HASHED;
3979  double hashsize;
3980  double exclude_groups = 0.0;
3981 
3982  Assert(can_hash);
3983 
3984  /*
3985  * If the input is coincidentally sorted usefully (which can happen
3986  * even if is_sorted is false, since that only means that our caller
3987  * has set up the sorting for us), then save some hashtable space by
3988  * making use of that. But we need to watch out for degenerate cases:
3989  *
3990  * 1) If there are any empty grouping sets, then group_pathkeys might
3991  * be NIL if all non-empty grouping sets are unsortable. In this case,
3992  * there will be a rollup containing only empty groups, and the
3993  * pathkeys_contained_in test is vacuously true; this is ok.
3994  *
3995  * XXX: the above relies on the fact that group_pathkeys is generated
3996  * from the first rollup. If we add the ability to consider multiple
3997  * sort orders for grouping input, this assumption might fail.
3998  *
3999  * 2) If there are no empty sets and only unsortable sets, then the
4000  * rollups list will be empty (and thus l_start == NULL), and
4001  * group_pathkeys will be NIL; we must ensure that the vacuously-true
4002  * pathkeys_contained_in test doesn't cause us to crash.
4003  */
4004  if (l_start != NULL &&
4006  {
4007  unhashed_rollup = lfirst_node(RollupData, l_start);
4008  exclude_groups = unhashed_rollup->numGroups;
4009  l_start = lnext(gd->rollups, l_start);
4010  }
4011 
4012  hashsize = estimate_hashagg_tablesize(root,
4013  path,
4014  agg_costs,
4015  dNumGroups - exclude_groups);
4016 
4017  /*
4018  * gd->rollups is empty if we have only unsortable columns to work
4019  * with. Override hash_mem in that case; otherwise, we'll rely on the
4020  * sorted-input case to generate usable mixed paths.
4021  */
4022  if (hashsize > hash_mem_limit && gd->rollups)
4023  return; /* nope, won't fit */
4024 
4025  /*
4026  * We need to burst the existing rollups list into individual grouping
4027  * sets and recompute a groupClause for each set.
4028  */
4029  sets_data = list_copy(gd->unsortable_sets);
4030 
4031  for_each_cell(lc, gd->rollups, l_start)
4032  {
4033  RollupData *rollup = lfirst_node(RollupData, lc);
4034 
4035  /*
4036  * If we find an unhashable rollup that's not been skipped by the
4037  * "actually sorted" check above, we can't cope; we'd need sorted
4038  * input (with a different sort order) but we can't get that here.
4039  * So bail out; we'll get a valid path from the is_sorted case
4040  * instead.
4041  *
4042  * The mere presence of empty grouping sets doesn't make a rollup
4043  * unhashable (see preprocess_grouping_sets), we handle those
4044  * specially below.
4045  */
4046  if (!rollup->hashable)
4047  return;
4048 
4049  sets_data = list_concat(sets_data, rollup->gsets_data);
4050  }
4051  foreach(lc, sets_data)
4052  {
4054  List *gset = gs->set;
4055  RollupData *rollup;
4056 
4057  if (gset == NIL)
4058  {
4059  /* Empty grouping sets can't be hashed. */
4060  empty_sets_data = lappend(empty_sets_data, gs);
4061  empty_sets = lappend(empty_sets, NIL);
4062  }
4063  else
4064  {
4065  rollup = makeNode(RollupData);
4066 
4067  rollup->groupClause = preprocess_groupclause(root, gset);
4068  rollup->gsets_data = list_make1(gs);
4069  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4070  rollup->gsets_data,
4071  gd->tleref_to_colnum_map);
4072  rollup->numGroups = gs->numGroups;
4073  rollup->hashable = true;
4074  rollup->is_hashed = true;
4075  new_rollups = lappend(new_rollups, rollup);
4076  }
4077  }
4078 
4079  /*
4080  * If we didn't find anything nonempty to hash, then bail. We'll
4081  * generate a path from the is_sorted case.
4082  */
4083  if (new_rollups == NIL)
4084  return;
4085 
4086  /*
4087  * If there were empty grouping sets they should have been in the
4088  * first rollup.
4089  */
4090  Assert(!unhashed_rollup || !empty_sets);
4091 
4092  if (unhashed_rollup)
4093  {
4094  new_rollups = lappend(new_rollups, unhashed_rollup);
4095  strat = AGG_MIXED;
4096  }
4097  else if (empty_sets)
4098  {
4099  RollupData *rollup = makeNode(RollupData);
4100 
4101  rollup->groupClause = NIL;
4102  rollup->gsets_data = empty_sets_data;
4103  rollup->gsets = empty_sets;
4104  rollup->numGroups = list_length(empty_sets);
4105  rollup->hashable = false;
4106  rollup->is_hashed = false;
4107  new_rollups = lappend(new_rollups, rollup);
4108  strat = AGG_MIXED;
4109  }
4110 
4111  add_path(grouped_rel, (Path *)
4113  grouped_rel,
4114  path,
4115  (List *) parse->havingQual,
4116  strat,
4117  new_rollups,
4118  agg_costs));
4119  return;
4120  }
4121 
4122  /*
4123  * If we have sorted input but nothing we can do with it, bail.
4124  */
4125  if (gd->rollups == NIL)
4126  return;
4127 
4128  /*
4129  * Given sorted input, we try and make two paths: one sorted and one mixed
4130  * sort/hash. (We need to try both because hashagg might be disabled, or
4131  * some columns might not be sortable.)
4132  *
4133  * can_hash is passed in as false if some obstacle elsewhere (such as
4134  * ordered aggs) means that we shouldn't consider hashing at all.
4135  */
4136  if (can_hash && gd->any_hashable)
4137  {
4138  List *rollups = NIL;
4139  List *hash_sets = list_copy(gd->unsortable_sets);
4140  double availspace = hash_mem_limit;
4141  ListCell *lc;
4142 
4143  /*
4144  * Account first for space needed for groups we can't sort at all.
4145  */
4146  availspace -= estimate_hashagg_tablesize(root,
4147  path,
4148  agg_costs,
4149  gd->dNumHashGroups);
4150 
4151  if (availspace > 0 && list_length(gd->rollups) > 1)
4152  {
4153  double scale;
4154  int num_rollups = list_length(gd->rollups);
4155  int k_capacity;
4156  int *k_weights = palloc(num_rollups * sizeof(int));
4157  Bitmapset *hash_items = NULL;
4158  int i;
4159 
4160  /*
4161  * We treat this as a knapsack problem: the knapsack capacity
4162  * represents hash_mem, the item weights are the estimated memory
4163  * usage of the hashtables needed to implement a single rollup,
4164  * and we really ought to use the cost saving as the item value;
4165  * however, currently the costs assigned to sort nodes don't
4166  * reflect the comparison costs well, and so we treat all items as
4167  * of equal value (each rollup we hash instead saves us one sort).
4168  *
4169  * To use the discrete knapsack, we need to scale the values to a
4170  * reasonably small bounded range. We choose to allow a 5% error
4171  * margin; we have no more than 4096 rollups in the worst possible
4172  * case, which with a 5% error margin will require a bit over 42MB
4173  * of workspace. (Anyone wanting to plan queries that complex had
4174  * better have the memory for it. In more reasonable cases, with
4175  * no more than a couple of dozen rollups, the memory usage will
4176  * be negligible.)
4177  *
4178  * k_capacity is naturally bounded, but we clamp the values for
4179  * scale and weight (below) to avoid overflows or underflows (or
4180  * uselessly trying to use a scale factor less than 1 byte).
4181  */
4182  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4183  k_capacity = (int) floor(availspace / scale);
4184 
4185  /*
4186  * We leave the first rollup out of consideration since it's the
4187  * one that matches the input sort order. We assign indexes "i"
4188  * to only those entries considered for hashing; the second loop,
4189  * below, must use the same condition.
4190  */
4191  i = 0;
4192  for_each_from(lc, gd->rollups, 1)
4193  {
4194  RollupData *rollup = lfirst_node(RollupData, lc);
4195 
4196  if (rollup->hashable)
4197  {
4198  double sz = estimate_hashagg_tablesize(root,
4199  path,
4200  agg_costs,
4201  rollup->numGroups);
4202 
4203  /*
4204  * If sz is enormous, but hash_mem (and hence scale) is
4205  * small, avoid integer overflow here.
4206  */
4207  k_weights[i] = (int) Min(floor(sz / scale),
4208  k_capacity + 1.0);
4209  ++i;
4210  }
4211  }
4212 
4213  /*
4214  * Apply knapsack algorithm; compute the set of items which
4215  * maximizes the value stored (in this case the number of sorts
4216  * saved) while keeping the total size (approximately) within
4217  * capacity.
4218  */
4219  if (i > 0)
4220  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4221 
4222  if (!bms_is_empty(hash_items))
4223  {
4224  rollups = list_make1(linitial(gd->rollups));
4225 
4226  i = 0;
4227  for_each_from(lc, gd->rollups, 1)
4228  {
4229  RollupData *rollup = lfirst_node(RollupData, lc);
4230 
4231  if (rollup->hashable)
4232  {
4233  if (bms_is_member(i, hash_items))
4234  hash_sets = list_concat(hash_sets,
4235  rollup->gsets_data);
4236  else
4237  rollups = lappend(rollups, rollup);
4238  ++i;
4239  }
4240  else
4241  rollups = lappend(rollups, rollup);
4242  }
4243  }
4244  }
4245 
4246  if (!rollups && hash_sets)
4247  rollups = list_copy(gd->rollups);
4248 
4249  foreach(lc, hash_sets)
4250  {
4252  RollupData *rollup = makeNode(RollupData);
4253 
4254  Assert(gs->set != NIL);
4255 
4256  rollup->groupClause = preprocess_groupclause(root, gs->set);
4257  rollup->gsets_data = list_make1(gs);
4258  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4259  rollup->gsets_data,
4260  gd->tleref_to_colnum_map);
4261  rollup->numGroups = gs->numGroups;
4262  rollup->hashable = true;
4263  rollup->is_hashed = true;
4264  rollups = lcons(rollup, rollups);
4265  }
4266 
4267  if (rollups)
4268  {
4269  add_path(grouped_rel, (Path *)
4271  grouped_rel,
4272  path,
4273  (List *) parse->havingQual,
4274  AGG_MIXED,
4275  rollups,
4276  agg_costs));
4277  }
4278  }
4279 
4280  /*
4281  * Now try the simple sorted case.
4282  */
4283  if (!gd->unsortable_sets)
4284  add_path(grouped_rel, (Path *)
4286  grouped_rel,
4287  path,
4288  (List *) parse->havingQual,
4289  AGG_SORTED,
4290  gd->rollups,
4291  agg_costs));
4292 }
4293 
4294 /*
4295  * create_window_paths
4296  *
4297  * Build a new upperrel containing Paths for window-function evaluation.
4298  *
4299  * input_rel: contains the source-data Paths
4300  * input_target: result of make_window_input_target
4301  * output_target: what the topmost WindowAggPath should return
4302  * wflists: result of find_window_functions
4303  * activeWindows: result of select_active_windows
4304  *
4305  * Note: all Paths in input_rel are expected to return input_target.
4306  */
4307 static RelOptInfo *
4309  RelOptInfo *input_rel,
4310  PathTarget *input_target,
4311  PathTarget *output_target,
4312  bool output_target_parallel_safe,
4313  WindowFuncLists *wflists,
4314  List *activeWindows)
4315 {
4316  RelOptInfo *window_rel;
4317  ListCell *lc;
4318 
4319  /* For now, do all work in the (WINDOW, NULL) upperrel */
4320  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4321 
4322  /*
4323  * If the input relation is not parallel-safe, then the window relation
4324  * can't be parallel-safe, either. Otherwise, we need to examine the
4325  * target list and active windows for non-parallel-safe constructs.
4326  */
4327  if (input_rel->consider_parallel && output_target_parallel_safe &&
4328  is_parallel_safe(root, (Node *) activeWindows))
4329  window_rel->consider_parallel = true;
4330 
4331  /*
4332  * If the input rel belongs to a single FDW, so does the window rel.
4333  */
4334  window_rel->serverid = input_rel->serverid;
4335  window_rel->userid = input_rel->userid;
4336  window_rel->useridiscurrent = input_rel->useridiscurrent;
4337  window_rel->fdwroutine = input_rel->fdwroutine;
4338 
4339  /*
4340  * Consider computing window functions starting from the existing
4341  * cheapest-total path (which will likely require a sort) as well as any
4342  * existing paths that satisfy or partially satisfy root->window_pathkeys.
4343  */
4344  foreach(lc, input_rel->pathlist)
4345  {
4346  Path *path = (Path *) lfirst(lc);
4347  int presorted_keys;
4348 
4349  if (path == input_rel->cheapest_total_path ||
4351  &presorted_keys) ||
4352  presorted_keys > 0)
4354  window_rel,
4355  path,
4356  input_target,
4357  output_target,
4358  wflists,
4359  activeWindows);
4360  }
4361 
4362  /*
4363  * If there is an FDW that's responsible for all baserels of the query,
4364  * let it consider adding ForeignPaths.
4365  */
4366  if (window_rel->fdwroutine &&
4367  window_rel->fdwroutine->GetForeignUpperPaths)
4368  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4369  input_rel, window_rel,
4370  NULL);
4371 
4372  /* Let extensions possibly add some more paths */
4374  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4375  input_rel, window_rel, NULL);
4376 
4377  /* Now choose the best path(s) */
4378  set_cheapest(window_rel);
4379 
4380  return window_rel;
4381 }
4382 
4383 /*
4384  * Stack window-function implementation steps atop the given Path, and
4385  * add the result to window_rel.
4386  *
4387  * window_rel: upperrel to contain result
4388  * path: input Path to use (must return input_target)
4389  * input_target: result of make_window_input_target
4390  * output_target: what the topmost WindowAggPath should return
4391  * wflists: result of find_window_functions
4392  * activeWindows: result of select_active_windows
4393  */
4394 static void
4396  RelOptInfo *window_rel,
4397  Path *path,
4398  PathTarget *input_target,
4399  PathTarget *output_target,
4400  WindowFuncLists *wflists,
4401  List *activeWindows)
4402 {
4403  PathTarget *window_target;
4404  ListCell *l;
4405  List *topqual = NIL;
4406 
4407  /*
4408  * Since each window clause could require a different sort order, we stack
4409  * up a WindowAgg node for each clause, with sort steps between them as
4410  * needed. (We assume that select_active_windows chose a good order for
4411  * executing the clauses in.)
4412  *
4413  * input_target should contain all Vars and Aggs needed for the result.
4414  * (In some cases we wouldn't need to propagate all of these all the way
4415  * to the top, since they might only be needed as inputs to WindowFuncs.
4416  * It's probably not worth trying to optimize that though.) It must also
4417  * contain all window partitioning and sorting expressions, to ensure
4418  * they're computed only once at the bottom of the stack (that's critical
4419  * for volatile functions). As we climb up the stack, we'll add outputs
4420  * for the WindowFuncs computed at each level.
4421  */
4422  window_target = input_target;
4423 
4424  foreach(l, activeWindows)
4425  {
4427  List *window_pathkeys;
4428  int presorted_keys;
4429  bool is_sorted;
4430  bool topwindow;
4431 
4432  window_pathkeys = make_pathkeys_for_window(root,
4433  wc,
4434  root->processed_tlist);
4435 
4436  is_sorted = pathkeys_count_contained_in(window_pathkeys,
4437  path->pathkeys,
4438  &presorted_keys);
4439 
4440  /* Sort if necessary */
4441  if (!is_sorted)
4442  {
4443  /*
4444  * No presorted keys or incremental sort disabled, just perform a
4445  * complete sort.
4446  */
4447  if (presorted_keys == 0 || !enable_incremental_sort)
4448  path = (Path *) create_sort_path(root, window_rel,
4449  path,
4450  window_pathkeys,
4451  -1.0);
4452  else
4453  {
4454  /*
4455  * Since we have presorted keys and incremental sort is
4456  * enabled, just use incremental sort.
4457  */
4458  path = (Path *) create_incremental_sort_path(root,
4459  window_rel,
4460  path,
4461  window_pathkeys,
4462  presorted_keys,
4463  -1.0);
4464  }
4465  }
4466 
4467  if (lnext(activeWindows, l))
4468  {
4469  /*
4470  * Add the current WindowFuncs to the output target for this
4471  * intermediate WindowAggPath. We must copy window_target to
4472  * avoid changing the previous path's target.
4473  *
4474  * Note: a WindowFunc adds nothing to the target's eval costs; but
4475  * we do need to account for the increase in tlist width.
4476  */
4477  ListCell *lc2;
4478 
4479  window_target = copy_pathtarget(window_target);
4480  foreach(lc2, wflists->windowFuncs[wc->winref])
4481  {
4482  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4483 
4484  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4485  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4486  }
4487  }
4488  else
4489  {
4490  /* Install the goal target in the topmost WindowAgg */
4491  window_target = output_target;
4492  }
4493 
4494  /* mark the final item in the list as the top-level window */
4495  topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4496 
4497  /*
4498  * Accumulate all of the runConditions from each intermediate
4499  * WindowClause. The top-level WindowAgg must pass these as a qual so
4500  * that it filters out unwanted tuples correctly.
4501  */
4502  if (!topwindow)
4503  topqual = list_concat(topqual, wc->runCondition);
4504 
4505  path = (Path *)
4506  create_windowagg_path(root, window_rel, path, window_target,
4507  wflists->windowFuncs[wc->winref],
4508  wc, topwindow ? topqual : NIL, topwindow);
4509  }
4510 
4511  add_path(window_rel, path);
4512 }
4513 
4514 /*
4515  * create_distinct_paths
4516  *
4517  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4518  *
4519  * input_rel: contains the source-data Paths
4520  *
4521  * Note: input paths should already compute the desired pathtarget, since
4522  * Sort/Unique won't project anything.
4523  */
4524 static RelOptInfo *
4526 {
4527  RelOptInfo *distinct_rel;
4528 
4529  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4530  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4531 
4532  /*
4533  * We don't compute anything at this level, so distinct_rel will be
4534  * parallel-safe if the input rel is parallel-safe. In particular, if
4535  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4536  * output those expressions, and will not be parallel-safe unless those
4537  * expressions are parallel-safe.
4538  */
4539  distinct_rel->consider_parallel = input_rel->consider_parallel;
4540 
4541  /*
4542  * If the input rel belongs to a single FDW, so does the distinct_rel.
4543  */
4544  distinct_rel->serverid = input_rel->serverid;
4545  distinct_rel->userid = input_rel->userid;
4546  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4547  distinct_rel->fdwroutine = input_rel->fdwroutine;
4548 
4549  /* build distinct paths based on input_rel's pathlist */
4550  create_final_distinct_paths(root, input_rel, distinct_rel);
4551 
4552  /* now build distinct paths based on input_rel's partial_pathlist */
4553  create_partial_distinct_paths(root, input_rel, distinct_rel);
4554 
4555  /* Give a helpful error if we failed to create any paths */
4556  if (distinct_rel->pathlist == NIL)
4557  ereport(ERROR,
4558  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4559  errmsg("could not implement DISTINCT"),
4560  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4561 
4562  /*
4563  * If there is an FDW that's responsible for all baserels of the query,
4564  * let it consider adding ForeignPaths.
4565  */
4566  if (distinct_rel->fdwroutine &&
4567  distinct_rel->fdwroutine->GetForeignUpperPaths)
4568  distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4570  input_rel,
4571  distinct_rel,
4572  NULL);
4573 
4574  /* Let extensions possibly add some more paths */
4576  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4577  distinct_rel, NULL);
4578 
4579  /* Now choose the best path(s) */
4580  set_cheapest(distinct_rel);
4581 
4582  return distinct_rel;
4583 }
4584 
4585 /*
4586  * create_partial_distinct_paths
4587  *
4588  * Process 'input_rel' partial paths and add unique/aggregate paths to the
4589  * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4590  * paths on top and add a final unique/aggregate path to remove any duplicate
4591  * produced from combining rows from parallel workers.
4592  */
4593 static void
4595  RelOptInfo *final_distinct_rel)
4596 {
4597  RelOptInfo *partial_distinct_rel;
4598  Query *parse;
4599  List *distinctExprs;
4600  double numDistinctRows;
4601  Path *cheapest_partial_path;
4602  ListCell *lc;
4603 
4604  /* nothing to do when there are no partial paths in the input rel */
4605  if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4606  return;
4607 
4608  parse = root->parse;
4609 
4610  /* can't do parallel DISTINCT ON */
4611  if (parse->hasDistinctOn)
4612  return;
4613 
4614  partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4615  NULL);
4616  partial_distinct_rel->reltarget = root->upper_targets[UPPERREL_PARTIAL_DISTINCT];
4617  partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4618 
4619  /*
4620  * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4621  */
4622  partial_distinct_rel->serverid = input_rel->serverid;
4623  partial_distinct_rel->userid = input_rel->userid;
4624  partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4625  partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4626 
4627  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4628 
4629  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4630  parse->targetList);
4631 
4632  /* estimate how many distinct rows we'll get from each worker */
4633  numDistinctRows = estimate_num_groups(root, distinctExprs,
4634  cheapest_partial_path->rows,
4635  NULL, NULL);
4636 
4637  /* first try adding unique paths atop of sorted paths */
4638  if (grouping_is_sortable(parse->distinctClause))
4639  {
4640  foreach(lc, input_rel->partial_pathlist)
4641  {
4642  Path *path = (Path *) lfirst(lc);
4643 
4645  {
4646  add_partial_path(partial_distinct_rel, (Path *)
4648  partial_distinct_rel,
4649  path,
4651  numDistinctRows));
4652  }
4653  }
4654  }
4655 
4656  /*
4657  * Now try hash aggregate paths, if enabled and hashing is possible. Since
4658  * we're not on the hook to ensure we do our best to create at least one
4659  * path here, we treat enable_hashagg as a hard off-switch rather than the
4660  * slightly softer variant in create_final_distinct_paths.
4661  */
4662  if (enable_hashagg && grouping_is_hashable(parse->distinctClause))
4663  {
4664  add_partial_path(partial_distinct_rel, (Path *)
4665  create_agg_path(root,
4666  partial_distinct_rel,
4667  cheapest_partial_path,
4668  cheapest_partial_path->pathtarget,
4669  AGG_HASHED,
4671  parse->distinctClause,
4672  NIL,
4673  NULL,
4674  numDistinctRows));
4675  }
4676 
4677  /*
4678  * If there is an FDW that's responsible for all baserels of the query,
4679  * let it consider adding ForeignPaths.
4680  */
4681  if (partial_distinct_rel->fdwroutine &&
4682  partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
4683  partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4685  input_rel,
4686  partial_distinct_rel,
4687  NULL);
4688 
4689  /* Let extensions possibly add some more partial paths */
4691  (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
4692  input_rel, partial_distinct_rel, NULL);
4693 
4694  if (partial_distinct_rel->partial_pathlist != NIL)
4695  {
4696  generate_gather_paths(root, partial_distinct_rel, true);
4697  set_cheapest(partial_distinct_rel);
4698 
4699  /*
4700  * Finally, create paths to distinctify the final result. This step
4701  * is needed to remove any duplicates due to combining rows from
4702  * parallel workers.
4703  */
4704  create_final_distinct_paths(root, partial_distinct_rel,
4705  final_distinct_rel);
4706  }
4707 }
4708 
4709 /*
4710  * create_final_distinct_paths
4711  * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4712  *
4713  * input_rel: contains the source-data paths
4714  * distinct_rel: destination relation for storing created paths
4715  */
4716 static RelOptInfo *
4718  RelOptInfo *distinct_rel)
4719 {
4720  Query *parse = root->parse;
4721  Path *cheapest_input_path = input_rel->cheapest_total_path;
4722  double numDistinctRows;
4723  bool allow_hash;
4724 
4725  /* Estimate number of distinct rows there will be */
4726  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4727  root->hasHavingQual)
4728  {
4729  /*
4730  * If there was grouping or aggregation, use the number of input rows
4731  * as the estimated number of DISTINCT rows (ie, assume the input is
4732  * already mostly unique).
4733  */
4734  numDistinctRows = cheapest_input_path->rows;
4735  }
4736  else
4737  {
4738  /*
4739  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4740  */
4741  List *distinctExprs;
4742 
4743  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4744  parse->targetList);
4745  numDistinctRows = estimate_num_groups(root, distinctExprs,
4746  cheapest_input_path->rows,
4747  NULL, NULL);
4748  }
4749 
4750  /*
4751  * Consider sort-based implementations of DISTINCT, if possible.
4752  */
4753  if (grouping_is_sortable(parse->distinctClause))
4754  {
4755  /*
4756  * First, if we have any adequately-presorted paths, just stick a
4757  * Unique node on those. Then consider doing an explicit sort of the
4758  * cheapest input path and Unique'ing that.
4759  *
4760  * When we have DISTINCT ON, we must sort by the more rigorous of
4761  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4762  * Also, if we do have to do an explicit sort, we might as well use
4763  * the more rigorous ordering to avoid a second sort later. (Note
4764  * that the parser will have ensured that one clause is a prefix of
4765  * the other.)
4766  */
4767  List *needed_pathkeys;
4768  Path *path;
4769  ListCell *lc;
4770 
4771  if (parse->hasDistinctOn &&
4773  list_length(root->sort_pathkeys))
4774  needed_pathkeys = root->sort_pathkeys;
4775  else
4776  needed_pathkeys = root->distinct_pathkeys;
4777 
4778  foreach(lc, input_rel->pathlist)
4779  {
4780  path = (Path *) lfirst(lc);
4781 
4782  if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4783  {
4784  /*
4785  * distinct_pathkeys may have become empty if all of the
4786  * pathkeys were determined to be redundant. If all of the
4787  * pathkeys are redundant then each DISTINCT target must only
4788  * allow a single value, therefore all resulting tuples must
4789  * be identical (or at least indistinguishable by an equality
4790  * check). We can uniquify these tuples simply by just taking
4791  * the first tuple. All we do here is add a path to do "LIMIT
4792  * 1" atop of 'path'. When doing a DISTINCT ON we may still
4793  * have a non-NIL sort_pathkeys list, so we must still only do
4794  * this with paths which are correctly sorted by
4795  * sort_pathkeys.
4796  */
4797  if (root->distinct_pathkeys == NIL)
4798  {
4799  Node *limitCount;
4800 
4801  limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
4802  sizeof(int64),
4803  Int64GetDatum(1), false,
4804  FLOAT8PASSBYVAL);
4805 
4806  /*
4807  * If the query already has a LIMIT clause, then we could
4808  * end up with a duplicate LimitPath in the final plan.
4809  * That does not seem worth troubling over too much.
4810  */
4811  add_path(distinct_rel, (Path *)
4812  create_limit_path(root, distinct_rel, path, NULL,
4813  limitCount, LIMIT_OPTION_COUNT,
4814  0, 1));
4815  }
4816  else
4817  {
4818  add_path(distinct_rel, (Path *)
4819  create_upper_unique_path(root, distinct_rel,
4820  path,
4822  numDistinctRows));
4823  }
4824  }
4825  }
4826 
4827  /* For explicit-sort case, always use the more rigorous clause */
4828  if (list_length(root->distinct_pathkeys) <
4829  list_length(root->sort_pathkeys))
4830  {
4831  needed_pathkeys = root->sort_pathkeys;
4832  /* Assert checks that parser didn't mess up... */
4834  needed_pathkeys));
4835  }
4836  else
4837  needed_pathkeys = root->distinct_pathkeys;
4838 
4839  path = cheapest_input_path;
4840  if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4841  path = (Path *) create_sort_path(root, distinct_rel,
4842  path,
4843  needed_pathkeys,
4844  root->distinct_pathkeys == NIL ?
4845  1.0 : -1.0);
4846 
4847  /*
4848  * As above, use a LimitPath instead of a UniquePath when all of the
4849  * distinct_pathkeys are redundant and we're only going to get a
4850  * series of tuples all with the same values anyway.
4851  */
4852  if (root->distinct_pathkeys == NIL)
4853  {
4854  Node *limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
4855  sizeof(int64),
4856  Int64GetDatum(1), false,
4857  FLOAT8PASSBYVAL);
4858 
4859  add_path(distinct_rel, (Path *)
4860  create_limit_path(root, distinct_rel, path, NULL,
4861  limitCount, LIMIT_OPTION_COUNT, 0, 1));
4862  }
4863  else
4864  {
4865  add_path(distinct_rel, (Path *)
4866  create_upper_unique_path(root, distinct_rel,
4867  path,
4869  numDistinctRows));
4870  }
4871  }
4872 
4873  /*
4874  * Consider hash-based implementations of DISTINCT, if possible.
4875  *
4876  * If we were not able to make any other types of path, we *must* hash or
4877  * die trying. If we do have other choices, there are two things that
4878  * should prevent selection of hashing: if the query uses DISTINCT ON
4879  * (because it won't really have the expected behavior if we hash), or if
4880  * enable_hashagg is off.
4881  *
4882  * Note: grouping_is_hashable() is much more expensive to check than the
4883  * other gating conditions, so we want to do it last.
4884  */
4885  if (distinct_rel->pathlist == NIL)
4886  allow_hash = true; /* we have no alternatives */
4887  else if (parse->hasDistinctOn || !enable_hashagg)
4888  allow_hash = false; /* policy-based decision not to hash */
4889  else
4890  allow_hash = true; /* default */
4891 
4892  if (allow_hash && grouping_is_hashable(parse->distinctClause))
4893  {
4894  /* Generate hashed aggregate path --- no sort needed */
4895  add_path(distinct_rel, (Path *)
4896  create_agg_path(root,
4897  distinct_rel,
4898  cheapest_input_path,
4899  cheapest_input_path->pathtarget,
4900  AGG_HASHED,
4902  parse->distinctClause,
4903  NIL,
4904  NULL,
4905  numDistinctRows));
4906  }
4907 
4908  return distinct_rel;
4909 }
4910 
4911 /*
4912  * create_ordered_paths
4913  *
4914  * Build a new upperrel containing Paths for ORDER BY evaluation.
4915  *
4916  * All paths in the result must satisfy the ORDER BY ordering.
4917  * The only new paths we need consider are an explicit full sort
4918  * and incremental sort on the cheapest-total existing path.
4919  *
4920  * input_rel: contains the source-data Paths
4921  * target: the output tlist the result Paths must emit
4922  * limit_tuples: estimated bound on the number of output tuples,
4923  * or -1 if no LIMIT or couldn't estimate
4924  *
4925  * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
4926  * other pathkeys (grouping, ...) like generate_useful_gather_paths.
4927  */
4928 static RelOptInfo *
4930  RelOptInfo *input_rel,
4931  PathTarget *target,
4932  bool target_parallel_safe,
4933  double limit_tuples)
4934 {
4935  Path *cheapest_input_path = input_rel->cheapest_total_path;
4936  RelOptInfo *ordered_rel;
4937  ListCell *lc;
4938 
4939  /* For now, do all work in the (ORDERED, NULL) upperrel */
4940  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4941 
4942  /*
4943  * If the input relation is not parallel-safe, then the ordered relation
4944  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
4945  * target list is parallel-safe.
4946  */
4947  if (input_rel->consider_parallel && target_parallel_safe)
4948  ordered_rel->consider_parallel = true;
4949 
4950  /*
4951  * If the input rel belongs to a single FDW, so does the ordered_rel.
4952  */
4953  ordered_rel->serverid = input_rel->serverid;
4954  ordered_rel->userid = input_rel->userid;
4955  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
4956  ordered_rel->fdwroutine = input_rel->fdwroutine;
4957 
4958  foreach(lc, input_rel->pathlist)
4959  {
4960  Path *input_path = (Path *) lfirst(lc);
4961  Path *sorted_path = input_path;
4962  bool is_sorted;
4963  int presorted_keys;
4964 
4965  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
4966  input_path->pathkeys, &presorted_keys);
4967 
4968  if (is_sorted)
4969  {
4970  /* Use the input path as is, but add a projection step if needed */
4971  if (sorted_path->pathtarget != target)
4972  sorted_path = apply_projection_to_path(root, ordered_rel,
4973  sorted_path, target);
4974 
4975  add_path(ordered_rel, sorted_path);
4976  }
4977  else
4978  {
4979  /*
4980  * Try adding an explicit sort, but only to the cheapest total
4981  * path since a full sort should generally add the same cost to
4982  * all paths.
4983  */
4984  if (input_path == cheapest_input_path)
4985  {
4986  /*
4987  * Sort the cheapest input path. An explicit sort here can
4988  * take advantage of LIMIT.
4989  */
4990  sorted_path = (Path *) create_sort_path(root,
4991  ordered_rel,
4992  input_path,
4993  root->sort_pathkeys,
4994  limit_tuples);
4995  /* Add projection step if needed */
4996  if (sorted_path->pathtarget != target)
4997  sorted_path = apply_projection_to_path(root, ordered_rel,
4998  sorted_path, target);
4999 
5000  add_path(ordered_rel, sorted_path);
5001  }
5002 
5003  /*
5004  * If incremental sort is enabled, then try it as well. Unlike
5005  * with regular sorts, we can't just look at the cheapest path,
5006  * because the cost of incremental sort depends on how well
5007  * presorted the path is. Additionally incremental sort may enable
5008  * a cheaper startup path to win out despite higher total cost.
5009  */
5011  continue;
5012 
5013  /* Likewise, if the path can't be used for incremental sort. */
5014  if (!presorted_keys)
5015  continue;
5016 
5017  /* Also consider incremental sort. */
5018  sorted_path = (Path *) create_incremental_sort_path(root,
5019  ordered_rel,
5020  input_path,
5021  root->sort_pathkeys,
5022  presorted_keys,
5023  limit_tuples);
5024 
5025  /* Add projection step if needed */
5026  if (sorted_path->pathtarget != target)
5027  sorted_path = apply_projection_to_path(root, ordered_rel,
5028  sorted_path, target);
5029 
5030  add_path(ordered_rel, sorted_path);
5031  }
5032  }
5033 
5034  /*
5035  * generate_gather_paths() will have already generated a simple Gather
5036  * path for the best parallel path, if any, and the loop above will have
5037  * considered sorting it. Similarly, generate_gather_paths() will also
5038  * have generated order-preserving Gather Merge plans which can be used
5039  * without sorting if they happen to match the sort_pathkeys, and the loop
5040  * above will have handled those as well. However, there's one more
5041  * possibility: it may make sense to sort the cheapest partial path
5042  * according to the required output order and then use Gather Merge.
5043  */
5044  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5045  input_rel->partial_pathlist != NIL)
5046  {
5047  Path *cheapest_partial_path;
5048 
5049  cheapest_partial_path = linitial(input_rel->partial_pathlist);
5050 
5051  /*
5052  * If cheapest partial path doesn't need a sort, this is redundant
5053  * with what's already been tried.
5054  */
5056  cheapest_partial_path->pathkeys))
5057  {
5058  Path *path;
5059  double total_groups;
5060 
5061  path = (Path *) create_sort_path(root,
5062  ordered_rel,
5063  cheapest_partial_path,
5064  root->sort_pathkeys,
5065  limit_tuples);
5066 
5067  total_groups = cheapest_partial_path->rows *
5068  cheapest_partial_path->parallel_workers;
5069  path = (Path *)
5070  create_gather_merge_path(root, ordered_rel,
5071  path,
5072  path->pathtarget,
5073  root->sort_pathkeys, NULL,
5074  &total_groups);
5075 
5076  /* Add projection step if needed */
5077  if (path->pathtarget != target)
5078  path = apply_projection_to_path(root, ordered_rel,
5079  path, target);
5080 
5081  add_path(ordered_rel, path);
5082  }
5083 
5084  /*
5085  * Consider incremental sort with a gather merge on partial paths.
5086  *
5087  * We can also skip the entire loop when we only have a single-item
5088  * sort_pathkeys because then we can't possibly have a presorted
5089  * prefix of the list without having the list be fully sorted.
5090  */
5092  {
5093  foreach(lc, input_rel->partial_pathlist)
5094  {
5095  Path *input_path = (Path *) lfirst(lc);
5096  Path *sorted_path;
5097  bool is_sorted;
5098  int presorted_keys;
5099  double total_groups;
5100 
5101  /*
5102  * We don't care if this is the cheapest partial path - we
5103  * can't simply skip it, because it may be partially sorted in
5104  * which case we want to consider adding incremental sort
5105  * (instead of full sort, which is what happens above).
5106  */
5107 
5108  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5109  input_path->pathkeys,
5110  &presorted_keys);
5111 
5112  /* No point in adding incremental sort on fully sorted paths. */
5113  if (is_sorted)
5114  continue;
5115 
5116  if (presorted_keys == 0)
5117  continue;
5118 
5119  /* Since we have presorted keys, consider incremental sort. */
5120  sorted_path = (Path *) create_incremental_sort_path(root,
5121  ordered_rel,
5122  input_path,
5123  root->sort_pathkeys,
5124  presorted_keys,
5125  limit_tuples);
5126  total_groups = input_path->rows *
5127  input_path->parallel_workers;
5128  sorted_path = (Path *)
5129  create_gather_merge_path(root, ordered_rel,
5130  sorted_path,
5131  sorted_path->pathtarget,
5132  root->sort_pathkeys, NULL,
5133  &total_groups);
5134 
5135  /* Add projection step if needed */
5136  if (sorted_path->pathtarget != target)
5137  sorted_path = apply_projection_to_path(root, ordered_rel,
5138  sorted_path, target);
5139 
5140  add_path(ordered_rel, sorted_path);
5141  }
5142  }
5143  }
5144 
5145  /*
5146  * If there is an FDW that's responsible for all baserels of the query,
5147  * let it consider adding ForeignPaths.
5148  */
5149  if (ordered_rel->fdwroutine &&
5150  ordered_rel->fdwroutine->GetForeignUpperPaths)
5151  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5152  input_rel, ordered_rel,
5153  NULL);
5154 
5155  /* Let extensions possibly add some more paths */
5157  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5158  input_rel, ordered_rel, NULL);
5159 
5160  /*
5161  * No need to bother with set_cheapest here; grouping_planner does not
5162  * need us to do it.
5163  */
5164  Assert(ordered_rel->pathlist != NIL);
5165 
5166  return ordered_rel;
5167 }
5168 
5169 
5170 /*
5171  * make_group_input_target
5172  * Generate appropriate PathTarget for initial input to grouping nodes.
5173  *
5174  * If there is grouping or aggregation, the scan/join subplan cannot emit
5175  * the query's final targetlist; for example, it certainly can't emit any
5176  * aggregate function calls. This routine generates the correct target
5177  * for the scan/join subplan.
5178  *
5179  * The query target list passed from the parser already contains entries
5180  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5181  * for variables used only in HAVING clauses; so we need to add those
5182  * variables to the subplan target list. Also, we flatten all expressions
5183  * except GROUP BY items into their component variables; other expressions
5184  * will be computed by the upper plan nodes rather than by the subplan.
5185  * For example, given a query like
5186  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5187  * we want to pass this targetlist to the subplan:
5188  * a+b,c,d
5189  * where the a+b target will be used by the Sort/Group steps, and the
5190  * other targets will be used for computing the final results.
5191  *
5192  * 'final_target' is the query's final target list (in PathTarget form)
5193  *
5194  * The result is the PathTarget to be computed by the Paths returned from
5195  * query_planner().
5196  */
5197 static PathTarget *
5199 {
5200  Query *parse = root->parse;
5201  PathTarget *input_target;
5202  List *non_group_cols;
5203  List *non_group_vars;
5204  int i;
5205  ListCell *lc;
5206 
5207  /*
5208  * We must build a target containing all grouping columns, plus any other
5209  * Vars mentioned in the query's targetlist and HAVING qual.
5210  */
5211  input_target = create_empty_pathtarget();
5212  non_group_cols = NIL;
5213 
5214  i = 0;
5215  foreach(lc, final_target->exprs)
5216  {
5217  Expr *expr = (Expr *) lfirst(lc);
5218  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5219 
5220  if (sgref && parse->groupClause &&
5221  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5222  {
5223  /*
5224  * It's a grouping column, so add it to the input target as-is.
5225  */
5226  add_column_to_pathtarget(input_target, expr, sgref);
5227  }
5228  else
5229  {
5230  /*
5231  * Non-grouping column, so just remember the expression for later
5232  * call to pull_var_clause.
5233  */
5234  non_group_cols = lappend(non_group_cols, expr);
5235  }
5236 
5237  i++;
5238  }
5239 
5240  /*
5241  * If there's a HAVING clause, we'll need the Vars it uses, too.
5242  */
5243  if (parse->havingQual)
5244  non_group_cols = lappend(non_group_cols, parse->havingQual);
5245 
5246  /*
5247  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5248  * add them to the input target if not already present. (A Var used
5249  * directly as a GROUP BY item will be present already.) Note this
5250  * includes Vars used in resjunk items, so we are covering the needs of
5251  * ORDER BY and window specifications. Vars used within Aggrefs and
5252  * WindowFuncs will be pulled out here, too.
5253  */
5254  non_group_vars = pull_var_clause((Node *) non_group_cols,
5258  add_new_columns_to_pathtarget(input_target, non_group_vars);
5259 
5260  /* clean up cruft */
5261  list_free(non_group_vars);
5262  list_free(non_group_cols);
5263 
5264  /* XXX this causes some redundant cost calculation ... */
5265  return set_pathtarget_cost_width(root, input_target);
5266 }
5267 
5268 /*
5269  * make_partial_grouping_target
5270  * Generate appropriate PathTarget for output of partial aggregate
5271  * (or partial grouping, if there are no aggregates) nodes.
5272  *
5273  * A partial aggregation node needs to emit all the same aggregates that
5274  * a regular aggregation node would, plus any aggregates used in HAVING;
5275  * except that the Aggref nodes should be marked as partial aggregates.
5276  *
5277  * In addition, we'd better emit any Vars and PlaceHolderVars that are
5278  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5279  * these would be Vars that are grouped by or used in grouping expressions.)
5280  *
5281  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5282  * havingQual represents the HAVING clause.
5283  */
5284 static PathTarget *
5286  PathTarget *grouping_target,
5287  Node *havingQual)
5288 {
5289  Query *parse = root->parse;
5290  PathTarget *partial_target;
5291  List *non_group_cols;
5292  List *non_group_exprs;
5293  int i;
5294  ListCell *lc;
5295 
5296  partial_target = create_empty_pathtarget();
5297  non_group_cols = NIL;
5298 
5299  i = 0;
5300  foreach(lc, grouping_target->exprs)
5301  {
5302  Expr *expr = (Expr *) lfirst(lc);
5303  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5304 
5305  if (sgref && parse->groupClause &&
5306  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5307  {
5308  /*
5309  * It's a grouping column, so add it to the partial_target as-is.
5310  * (This allows the upper agg step to repeat the grouping calcs.)
5311  */
5312  add_column_to_pathtarget(partial_target, expr, sgref);
5313  }
5314  else
5315  {
5316  /*
5317  * Non-grouping column, so just remember the expression for later
5318  * call to pull_var_clause.
5319  */
5320  non_group_cols = lappend(non_group_cols, expr);
5321  }
5322 
5323  i++;
5324  }
5325 
5326  /*
5327  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5328  */
5329  if (havingQual)
5330  non_group_cols = lappend(non_group_cols, havingQual);
5331 
5332  /*
5333  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5334  * non-group cols (plus HAVING), and add them to the partial_target if not
5335  * already present. (An expression used directly as a GROUP BY item will
5336  * be present already.) Note this includes Vars used in resjunk items, so
5337  * we are covering the needs of ORDER BY and window specifications.
5338  */
5339  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5343 
5344  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5345 
5346  /*
5347  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5348  * are at the top level of the target list, so we can just scan the list
5349  * rather than recursing through the expression trees.
5350  */
5351  foreach(lc, partial_target->exprs)
5352  {
5353  Aggref *aggref = (Aggref *) lfirst(lc);
5354 
5355  if (IsA(aggref, Aggref))
5356  {
5357  Aggref *newaggref;
5358 
5359  /*
5360  * We shouldn't need to copy the substructure of the Aggref node,
5361  * but flat-copy the node itself to avoid damaging other trees.
5362  */
5363  newaggref = makeNode(Aggref);
5364  memcpy(newaggref, aggref, sizeof(Aggref));
5365 
5366  /* For now, assume serialization is required */
5368 
5369  lfirst(lc) = newaggref;
5370  }
5371  }
5372 
5373  /* clean up cruft */
5374  list_free(non_group_exprs);
5375  list_free(non_group_cols);
5376 
5377  /* XXX this causes some redundant cost calculation ... */
5378  return set_pathtarget_cost_width(root, partial_target);
5379 }
5380 
5381 /*
5382  * mark_partial_aggref
5383  * Adjust an Aggref to make it represent a partial-aggregation step.
5384  *
5385  * The Aggref node is modified in-place; caller must do any copying required.
5386  */
5387 void
5389 {
5390  /* aggtranstype should be computed by this point */
5391  Assert(OidIsValid(agg->aggtranstype));
5392  /* ... but aggsplit should still be as the parser left it */
5393  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5394 
5395  /* Mark the Aggref with the intended partial-aggregation mode */
5396  agg->aggsplit = aggsplit;
5397 
5398  /*
5399  * Adjust result type if needed. Normally, a partial aggregate returns
5400  * the aggregate's transition type; but if that's INTERNAL and we're
5401  * serializing, it returns BYTEA instead.
5402  */
5403  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5404  {
5405  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5406  agg->aggtype = BYTEAOID;
5407  else
5408  agg->aggtype = agg->aggtranstype;
5409  }
5410 }
5411 
5412 /*
5413  * postprocess_setop_tlist
5414  * Fix up targetlist returned by plan_set_operations().
5415  *
5416  * We need to transpose sort key info from the orig_tlist into new_tlist.
5417  * NOTE: this would not be good enough if we supported resjunk sort keys
5418  * for results of set operations --- then, we'd need to project a whole
5419  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5420  * find any resjunk columns in orig_tlist.
5421  */
5422 static List *
5423 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5424 {
5425  ListCell *l;
5426  ListCell *orig_tlist_item = list_head(orig_tlist);
5427 
5428  foreach(l, new_tlist)
5429  {
5430  TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5431  TargetEntry *orig_tle;
5432 
5433  /* ignore resjunk columns in setop result */
5434  if (new_tle->resjunk)
5435  continue;
5436 
5437  Assert(orig_tlist_item != NULL);
5438  orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5439  orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5440  if (orig_tle->resjunk) /* should not happen */
5441  elog(ERROR, "resjunk output columns are not implemented");
5442  Assert(new_tle->resno == orig_tle->resno);
5443  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5444  }
5445  if (orig_tlist_item != NULL)
5446  elog(ERROR, "resjunk output columns are not implemented");
5447  return new_tlist;
5448 }
5449 
5450 /*
5451  * select_active_windows
5452  * Create a list of the "active" window clauses (ie, those referenced
5453  * by non-deleted WindowFuncs) in the order they are to be executed.
5454  */
5455 static List *
5457 {
5458  List *windowClause = root->parse->windowClause;
5459  List *result = NIL;
5460  ListCell *lc;
5461  int nActive = 0;
5463  * list_length(windowClause));
5464 
5465  /* First, construct an array of the active windows */
5466  foreach(lc, windowClause)
5467  {
5469 
5470  /* It's only active if wflists shows some related WindowFuncs */
5471  Assert(wc->winref <= wflists->maxWinRef);
5472  if (wflists->windowFuncs[wc->winref] == NIL)
5473  continue;
5474 
5475  actives[nActive].wc = wc; /* original clause */
5476 
5477  /*
5478  * For sorting, we want the list of partition keys followed by the
5479  * list of sort keys. But pathkeys construction will remove duplicates
5480  * between the two, so we can as well (even though we can't detect all
5481  * of the duplicates, since some may come from ECs - that might mean
5482  * we miss optimization chances here). We must, however, ensure that
5483  * the order of entries is preserved with respect to the ones we do
5484  * keep.
5485  *
5486  * partitionClause and orderClause had their own duplicates removed in
5487  * parse analysis, so we're only concerned here with removing
5488  * orderClause entries that also appear in partitionClause.
5489  */
5490  actives[nActive].uniqueOrder =
5492  wc->orderClause);
5493  nActive++;
5494  }
5495 
5496  /*
5497  * Sort active windows by their partitioning/ordering clauses, ignoring
5498  * any framing clauses, so that the windows that need the same sorting are
5499  * adjacent in the list. When we come to generate paths, this will avoid
5500  * inserting additional Sort nodes.
5501  *
5502  * This is how we implement a specific requirement from the SQL standard,
5503  * which says that when two or more windows are order-equivalent (i.e.
5504  * have matching partition and order clauses, even if their names or
5505  * framing clauses differ), then all peer rows must be presented in the
5506  * same order in all of them. If we allowed multiple sort nodes for such
5507  * cases, we'd risk having the peer rows end up in different orders in
5508  * equivalent windows due to sort instability. (See General Rule 4 of
5509  * <window clause> in SQL2008 - SQL2016.)
5510  *
5511  * Additionally, if the entire list of clauses of one window is a prefix
5512  * of another, put first the window with stronger sorting requirements.
5513  * This way we will first sort for stronger window, and won't have to sort
5514  * again for the weaker one.
5515  */
5516  qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
5517 
5518  /* build ordered list of the original WindowClause nodes */
5519  for (int i = 0; i < nActive; i++)
5520  result = lappend(result, actives[i].wc);
5521 
5522  pfree(actives);
5523 
5524  return result;
5525 }
5526 
5527 /*
5528  * common_prefix_cmp
5529  * QSort comparison function for WindowClauseSortData
5530  *
5531  * Sort the windows by the required sorting clauses. First, compare the sort
5532  * clauses themselves. Second, if one window's clauses are a prefix of another
5533  * one's clauses, put the window with more sort clauses first.
5534  */
5535 static int
5536 common_prefix_cmp(const void *a, const void *b)
5537 {
5538  const WindowClauseSortData *wcsa = a;
5539  const WindowClauseSortData *wcsb = b;
5540  ListCell *item_a;
5541  ListCell *item_b;
5542 
5543  forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
5544  {
5547 
5548  if (sca->tleSortGroupRef > scb->tleSortGroupRef)
5549  return -1;
5550  else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
5551  return 1;
5552  else if (sca->sortop > scb->sortop)
5553  return -1;
5554  else if (sca->sortop < scb->sortop)
5555  return 1;
5556  else if (sca->nulls_first && !scb->nulls_first)
5557  return -1;
5558  else if (!sca->nulls_first && scb->nulls_first)
5559  return 1;
5560  /* no need to compare eqop, since it is fully determined by sortop */
5561  }
5562 
5563  if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
5564  return -1;
5565  else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
5566  return 1;
5567 
5568  return 0;
5569 }
5570 
5571 /*
5572  * make_window_input_target
5573  * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5574  *
5575  * When the query has window functions, this function computes the desired
5576  * target to be computed by the node just below the first WindowAgg.
5577  * This tlist must contain all values needed to evaluate the window functions,
5578  * compute the final target list, and perform any required final sort step.
5579  * If multiple WindowAggs are needed, each intermediate one adds its window
5580  * function results onto this base tlist; only the topmost WindowAgg computes
5581  * the actual desired target list.
5582  *
5583  * This function is much like make_group_input_target, though not quite enough
5584  * like it to share code. As in that function, we flatten most expressions
5585  * into their component variables. But we do not want to flatten window
5586  * PARTITION BY/ORDER BY clauses, since that might result in multiple
5587  * evaluations of them, which would be bad (possibly even resulting in
5588  * inconsistent answers, if they contain volatile functions).
5589  * Also, we must not flatten GROUP BY clauses that were left unflattened by
5590  * make_group_input_target, because we may no longer have access to the
5591  * individual Vars in them.
5592  *
5593  * Another key difference from make_group_input_target is that we don't
5594  * flatten Aggref expressions, since those are to be computed below the
5595  * window functions and just referenced like Vars above that.
5596  *
5597  * 'final_target' is the query's final target list (in PathTarget form)
5598  * 'activeWindows' is the list of active windows previously identified by
5599  * select_active_windows.
5600  *
5601  * The result is the PathTarget to be computed by the plan node immediately
5602  * below the first WindowAgg node.
5603  */
5604 static PathTarget *
5606  PathTarget *final_target,
5607  List *activeWindows)
5608 {
5609  Query *parse = root->parse;
5610  PathTarget *input_target;
5611  Bitmapset *sgrefs;
5612  List *flattenable_cols;
5613  List *flattenable_vars;
5614  int i;
5615  ListCell *lc;
5616 
5617  Assert(parse->hasWindowFuncs);
5618 
5619  /*
5620  * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5621  * into a bitmapset for convenient reference below.
5622  */
5623  sgrefs = NULL;
5624  foreach(lc, activeWindows)
5625  {
5627  ListCell *lc2;
5628 
5629  foreach(lc2, wc->partitionClause)
5630  {
5632 
5633  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5634  }
5635  foreach(lc2, wc->orderClause)
5636  {
5638 
5639  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5640  }
5641  }
5642 
5643  /* Add in sortgroupref numbers of GROUP BY clauses, too */
5644  foreach(lc, parse->groupClause)
5645  {
5647 
5648  sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5649  }
5650 
5651  /*
5652  * Construct a target containing all the non-flattenable targetlist items,
5653  * and save aside the others for a moment.
5654  */
5655  input_target = create_empty_pathtarget();
5656  flattenable_cols = NIL;
5657 
5658  i = 0;
5659  foreach(lc, final_target->exprs)
5660  {
5661  Expr *expr = (Expr *) lfirst(lc);
5662  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5663 
5664  /*
5665  * Don't want to deconstruct window clauses or GROUP BY items. (Note
5666  * that such items can't contain window functions, so it's okay to
5667  * compute them below the WindowAgg nodes.)
5668  */
5669  if (sgref != 0 && bms_is_member(sgref, sgrefs))
5670  {
5671  /*
5672  * Don't want to deconstruct this value, so add it to the input
5673  * target as-is.
5674  */
5675  add_column_to_pathtarget(input_target, expr, sgref);
5676  }
5677  else
5678  {
5679  /*
5680  * Column is to be flattened, so just remember the expression for
5681  * later call to pull_var_clause.
5682  */
5683  flattenable_cols = lappend(flattenable_cols, expr);
5684  }
5685 
5686  i++;
5687  }
5688 
5689  /*
5690  * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
5691  * add them to the input target if not already present. (Some might be
5692  * there already because they're used directly as window/group clauses.)
5693  *
5694  * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
5695  * Aggrefs are placed in the Agg node's tlist and not left to be computed
5696  * at higher levels. On the other hand, we should recurse into
5697  * WindowFuncs to make sure their input expressions are available.
5698  */
5699  flattenable_vars = pull_var_clause((Node *) flattenable_cols,
5703  add_new_columns_to_pathtarget(input_target, flattenable_vars);
5704 
5705  /* clean up cruft */
5706  list_free(flattenable_vars);
5707  list_free(flattenable_cols);
5708 
5709  /* XXX this causes some redundant cost calculation ... */
5710  return set_pathtarget_cost_width(root, input_target);
5711 }
5712 
5713 /*
5714  * make_pathkeys_for_window
5715  * Create a pathkeys list describing the required input ordering
5716  * for the given WindowClause.
5717  *
5718  * The required ordering is first the PARTITION keys, then the ORDER keys.
5719  * In the future we might try to implement windowing using hashing, in which
5720  * case the ordering could be relaxed, but for now we always sort.
5721  */
5722 static List *
5724  List *tlist)
5725 {
5726  List *window_pathkeys;
5727  List *window_sortclauses;
5728 
5729  /* Throw error if can't sort */
5731  ereport(ERROR,
5732  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5733  errmsg("could not implement window PARTITION BY"),
5734  errdetail("Window partitioning columns must be of sortable datatypes.")));
5736  ereport(ERROR,
5737  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5738  errmsg("could not implement window ORDER BY"),
5739  errdetail("Window ordering columns must be of sortable datatypes.")));
5740 
5741  /* Okay, make the combined pathkeys */
5742  window_sortclauses = list_concat_copy(wc->partitionClause, wc->orderClause);
5743  window_pathkeys = make_pathkeys_for_sortclauses(root,
5744  window_sortclauses,
5745  tlist);
5746  list_free(window_sortclauses);
5747  return window_pathkeys;
5748 }
5749 
5750 /*
5751  * make_sort_input_target
5752  * Generate appropriate PathTarget for initial input to Sort step.
5753  *
5754  * If the query has ORDER BY, this function chooses the target to be computed
5755  * by the node just below the Sort (and DISTINCT, if any, since Unique can't
5756  * project) steps. This might or might not be identical to the query's final
5757  * output target.
5758  *
5759  * The main argument for keeping the sort-input tlist the same as the final
5760  * is that we avoid a separate projection node (which will be needed if
5761  * they're different, because Sort can't project). However, there are also
5762  * advantages to postponing tlist evaluation till after the Sort: it ensures
5763  * a consistent order of evaluation for any volatile functions in the tlist,
5764  * and if there's also a LIMIT, we can stop the query without ever computing
5765  * tlist functions for later rows, which is beneficial for both volatile and
5766  * expensive functions.
5767  *
5768  * Our current policy is to postpone volatile expressions till after the sort
5769  * unconditionally (assuming that that's possible, ie they are in plain tlist
5770  * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
5771  * postpone set-returning expressions, because running them beforehand would
5772  * bloat the sort dataset, and because it might cause unexpected output order
5773  * if the sort isn't stable. However there's a constraint on that: all SRFs
5774  * in the tlist should be evaluated at the same plan step, so that they can
5775  * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
5776  * mustn't postpone any SRFs. (Note that in principle that policy should
5777  * probably get applied to the group/window input targetlists too, but we
5778  * have not done that historically.) Lastly, expensive expressions are
5779  * postponed if there is a LIMIT, or if root->tuple_fraction shows that
5780  * partial evaluation of the query is possible (if neither is true, we expect
5781  * to have to evaluate the expressions for every row anyway), or if there are
5782  * any volatile or set-returning expressions (since once we've put in a
5783  * projection at all, it won't cost any more to postpone more stuff).
5784  *
5785  * Another issue that could potentially be considered here is that
5786  * evaluating tlist expressions could result in data that's either wider
5787  * or narrower than the input Vars, thus changing the volume of data that
5788  * has to go through the Sort. However, we usually have only a very bad
5789  * idea of the output width of any expression more complex than a Var,
5790  * so for now it seems too risky to try to optimize on that basis.
5791  *
5792  * Note that if we do produce a modified sort-input target, and then the
5793  * query ends up not using an explicit Sort, no particular harm is done:
5794  * we'll initially use the modified target for the preceding path nodes,
5795  * but then change them to the final target with apply_projection_to_path.
5796  * Moreover, in such a case the guarantees about evaluation order of
5797  * volatile functions still hold, since the rows are sorted already.
5798  *
5799  * This function has some things in common with make_group_input_target and
5800  * make_window_input_target, though the detailed rules for what to do are
5801  * different. We never flatten/postpone any grouping or ordering columns;
5802  * those are needed before the sort. If we do flatten a particular
5803  * expression, we leave Aggref and WindowFunc nodes alone, since those were
5804  * computed earlier.
5805  *
5806  * 'final_target' is the query's final target list (in PathTarget form)
5807  * 'have_postponed_srfs' is an output argument, see below
5808  *
5809  * The result is the PathTarget to be computed by the plan node immediately
5810  * below the Sort step (and the Distinct step, if any). This will be
5811  * exactly final_target if we decide a projection step wouldn't be helpful.
5812  *
5813  * In addition, *have_postponed_srfs is set to true if we choose to postpone
5814  * any set-returning functions to after the Sort.
5815  */
5816 static PathTarget *
5818  PathTarget *final_target,
5819  bool *have_postponed_srfs)
5820 {
5821  Query *parse = root->parse;
5822  PathTarget *input_target;
5823  int ncols;
5824  bool *col_is_srf;
5825  bool *postpone_col;
5826  bool have_srf;
5827  bool have_volatile;
5828  bool have_expensive;
5829  bool have_srf_sortcols;