PostgreSQL Source Code  git master
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/genam.h"
22 #include "access/htup_details.h"
23 #include "access/parallel.h"
24 #include "access/sysattr.h"
25 #include "access/table.h"
26 #include "access/xact.h"
27 #include "catalog/pg_aggregate.h"
28 #include "catalog/pg_constraint.h"
29 #include "catalog/pg_inherits.h"
30 #include "catalog/pg_proc.h"
31 #include "catalog/pg_type.h"
32 #include "executor/executor.h"
33 #include "executor/nodeAgg.h"
34 #include "foreign/fdwapi.h"
35 #include "jit/jit.h"
36 #include "lib/bipartite_match.h"
37 #include "lib/knapsack.h"
38 #include "miscadmin.h"
39 #include "nodes/makefuncs.h"
40 #include "nodes/nodeFuncs.h"
41 #ifdef OPTIMIZER_DEBUG
42 #include "nodes/print.h"
43 #endif
44 #include "nodes/supportnodes.h"
45 #include "optimizer/appendinfo.h"
46 #include "optimizer/clauses.h"
47 #include "optimizer/cost.h"
48 #include "optimizer/inherit.h"
49 #include "optimizer/optimizer.h"
50 #include "optimizer/paramassign.h"
51 #include "optimizer/pathnode.h"
52 #include "optimizer/paths.h"
53 #include "optimizer/plancat.h"
54 #include "optimizer/planmain.h"
55 #include "optimizer/planner.h"
56 #include "optimizer/prep.h"
57 #include "optimizer/subselect.h"
58 #include "optimizer/tlist.h"
59 #include "parser/analyze.h"
60 #include "parser/parse_agg.h"
61 #include "parser/parse_relation.h"
62 #include "parser/parsetree.h"
63 #include "partitioning/partdesc.h"
64 #include "rewrite/rewriteManip.h"
65 #include "storage/dsm_impl.h"
66 #include "utils/lsyscache.h"
67 #include "utils/rel.h"
68 #include "utils/selfuncs.h"
69 #include "utils/syscache.h"
70 
71 /* GUC parameters */
75 
76 /* Hook for plugins to get control in planner() */
78 
79 /* Hook for plugins to get control when grouping_planner() plans upper rels */
81 
82 
83 /* Expression kind codes for preprocess_expression */
84 #define EXPRKIND_QUAL 0
85 #define EXPRKIND_TARGET 1
86 #define EXPRKIND_RTFUNC 2
87 #define EXPRKIND_RTFUNC_LATERAL 3
88 #define EXPRKIND_VALUES 4
89 #define EXPRKIND_VALUES_LATERAL 5
90 #define EXPRKIND_LIMIT 6
91 #define EXPRKIND_APPINFO 7
92 #define EXPRKIND_PHV 8
93 #define EXPRKIND_TABLESAMPLE 9
94 #define EXPRKIND_ARBITER_ELEM 10
95 #define EXPRKIND_TABLEFUNC 11
96 #define EXPRKIND_TABLEFUNC_LATERAL 12
97 
98 /*
99  * Data specific to grouping sets
100  */
101 typedef struct
102 {
112 
113 /*
114  * Temporary structure for use during WindowClause reordering in order to be
115  * able to sort WindowClauses on partitioning/ordering prefix.
116  */
117 typedef struct
118 {
120  List *uniqueOrder; /* A List of unique ordering/partitioning
121  * clauses per Window */
123 
124 /* Passthrough data for standard_qp_callback */
125 typedef struct
126 {
127  List *activeWindows; /* active windows, if any */
128  grouping_sets_data *gset_data; /* grouping sets data, if any */
130 
131 /* Local functions */
132 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
133 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
134 static void grouping_planner(PlannerInfo *root, double tuple_fraction);
136 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
137  int *tleref_to_colnum_map);
138 static void preprocess_rowmarks(PlannerInfo *root);
139 static double preprocess_limit(PlannerInfo *root,
140  double tuple_fraction,
141  int64 *offset_est, int64 *count_est);
143 static List *preprocess_groupclause(PlannerInfo *root, List *force);
144 static List *extract_rollup_sets(List *groupingSets);
145 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
146 static void standard_qp_callback(PlannerInfo *root, void *extra);
147 static double get_number_of_groups(PlannerInfo *root,
148  double path_rows,
149  grouping_sets_data *gd,
150  List *target_list);
152  RelOptInfo *input_rel,
153  PathTarget *target,
154  bool target_parallel_safe,
155  grouping_sets_data *gd);
156 static bool is_degenerate_grouping(PlannerInfo *root);
158  RelOptInfo *input_rel,
159  RelOptInfo *grouped_rel);
160 static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
161  PathTarget *target, bool target_parallel_safe,
162  Node *havingQual);
164  RelOptInfo *input_rel,
165  RelOptInfo *grouped_rel,
166  const AggClauseCosts *agg_costs,
167  grouping_sets_data *gd,
168  GroupPathExtraData *extra,
169  RelOptInfo **partially_grouped_rel_p);
170 static void consider_groupingsets_paths(PlannerInfo *root,
171  RelOptInfo *grouped_rel,
172  Path *path,
173  bool is_sorted,
174  bool can_hash,
175  grouping_sets_data *gd,
176  const AggClauseCosts *agg_costs,
177  double dNumGroups);
179  RelOptInfo *input_rel,
180  PathTarget *input_target,
181  PathTarget *output_target,
182  bool output_target_parallel_safe,
183  WindowFuncLists *wflists,
184  List *activeWindows);
185 static void create_one_window_path(PlannerInfo *root,
186  RelOptInfo *window_rel,
187  Path *path,
188  PathTarget *input_target,
189  PathTarget *output_target,
190  WindowFuncLists *wflists,
191  List *activeWindows);
193  RelOptInfo *input_rel);
195  RelOptInfo *input_rel,
196  RelOptInfo *final_distinct_rel);
198  RelOptInfo *input_rel,
199  RelOptInfo *distinct_rel);
201  RelOptInfo *input_rel,
202  PathTarget *target,
203  bool target_parallel_safe,
204  double limit_tuples);
206  PathTarget *final_target);
208  PathTarget *grouping_target,
209  Node *havingQual);
210 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
211 static void optimize_window_clauses(PlannerInfo *root,
212  WindowFuncLists *wflists);
213 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
215  PathTarget *final_target,
216  List *activeWindows);
218  List *tlist);
220  PathTarget *final_target,
221  bool *have_postponed_srfs);
222 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
223  List *targets, List *targets_contain_srfs);
224 static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
225  RelOptInfo *grouped_rel,
226  RelOptInfo *partially_grouped_rel,
227  const AggClauseCosts *agg_costs,
228  grouping_sets_data *gd,
229  double dNumGroups,
230  GroupPathExtraData *extra);
232  RelOptInfo *grouped_rel,
233  RelOptInfo *input_rel,
234  grouping_sets_data *gd,
235  GroupPathExtraData *extra,
236  bool force_rel_creation);
237 static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
238 static bool can_partial_agg(PlannerInfo *root);
240  RelOptInfo *rel,
241  List *scanjoin_targets,
242  List *scanjoin_targets_contain_srfs,
243  bool scanjoin_target_parallel_safe,
244  bool tlist_same_exprs);
246  RelOptInfo *input_rel,
247  RelOptInfo *grouped_rel,
248  RelOptInfo *partially_grouped_rel,
249  const AggClauseCosts *agg_costs,
250  grouping_sets_data *gd,
252  GroupPathExtraData *extra);
253 static bool group_by_has_partkey(RelOptInfo *input_rel,
254  List *targetList,
255  List *groupClause);
256 static int common_prefix_cmp(const void *a, const void *b);
257 
258 
259 /*****************************************************************************
260  *
261  * Query optimizer entry point
262  *
263  * To support loadable plugins that monitor or modify planner behavior,
264  * we provide a hook variable that lets a plugin get control before and
265  * after the standard planning process. The plugin would normally call
266  * standard_planner().
267  *
268  * Note to plugin authors: standard_planner() scribbles on its Query input,
269  * so you'd better copy that data structure if you want to plan more than once.
270  *
271  *****************************************************************************/
272 PlannedStmt *
273 planner(Query *parse, const char *query_string, int cursorOptions,
274  ParamListInfo boundParams)
275 {
276  PlannedStmt *result;
277 
278  if (planner_hook)
279  result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
280  else
281  result = standard_planner(parse, query_string, cursorOptions, boundParams);
282  return result;
283 }
284 
285 PlannedStmt *
286 standard_planner(Query *parse, const char *query_string, int cursorOptions,
287  ParamListInfo boundParams)
288 {
289  PlannedStmt *result;
290  PlannerGlobal *glob;
291  double tuple_fraction;
292  PlannerInfo *root;
293  RelOptInfo *final_rel;
294  Path *best_path;
295  Plan *top_plan;
296  ListCell *lp,
297  *lr;
298 
299  /*
300  * Set up global state for this planner invocation. This data is needed
301  * across all levels of sub-Query that might exist in the given command,
302  * so we keep it in a separate struct that's linked to by each per-Query
303  * PlannerInfo.
304  */
305  glob = makeNode(PlannerGlobal);
306 
307  glob->boundParams = boundParams;
308  glob->subplans = NIL;
309  glob->subroots = NIL;
310  glob->rewindPlanIDs = NULL;
311  glob->finalrtable = NIL;
312  glob->finalrteperminfos = NIL;
313  glob->finalrowmarks = NIL;
314  glob->resultRelations = NIL;
315  glob->appendRelations = NIL;
316  glob->relationOids = NIL;
317  glob->invalItems = NIL;
318  glob->paramExecTypes = NIL;
319  glob->lastPHId = 0;
320  glob->lastRowMarkId = 0;
321  glob->lastPlanNodeId = 0;
322  glob->transientPlan = false;
323  glob->dependsOnRole = false;
324 
325  /*
326  * Assess whether it's feasible to use parallel mode for this query. We
327  * can't do this in a standalone backend, or if the command will try to
328  * modify any data, or if this is a cursor operation, or if GUCs are set
329  * to values that don't permit parallelism, or if parallel-unsafe
330  * functions are present in the query tree.
331  *
332  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
333  * MATERIALIZED VIEW to use parallel plans, but as of now, only the leader
334  * backend writes into a completely new table. In the future, we can
335  * extend it to allow workers to write into the table. However, to allow
336  * parallel updates and deletes, we have to solve other problems,
337  * especially around combo CIDs.)
338  *
339  * For now, we don't try to use parallel mode if we're running inside a
340  * parallel worker. We might eventually be able to relax this
341  * restriction, but for now it seems best not to have parallel workers
342  * trying to create their own parallel workers.
343  */
344  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
346  parse->commandType == CMD_SELECT &&
347  !parse->hasModifyingCTE &&
349  !IsParallelWorker())
350  {
351  /* all the cheap tests pass, so scan the query tree */
353  glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
354  }
355  else
356  {
357  /* skip the query tree scan, just assume it's unsafe */
358  glob->maxParallelHazard = PROPARALLEL_UNSAFE;
359  glob->parallelModeOK = false;
360  }
361 
362  /*
363  * glob->parallelModeNeeded is normally set to false here and changed to
364  * true during plan creation if a Gather or Gather Merge plan is actually
365  * created (cf. create_gather_plan, create_gather_merge_plan).
366  *
367  * However, if debug_parallel_query = on or debug_parallel_query =
368  * regress, then we impose parallel mode whenever it's safe to do so, even
369  * if the final plan doesn't use parallelism. It's not safe to do so if
370  * the query contains anything parallel-unsafe; parallelModeOK will be
371  * false in that case. Note that parallelModeOK can't change after this
372  * point. Otherwise, everything in the query is either parallel-safe or
373  * parallel-restricted, and in either case it should be OK to impose
374  * parallel-mode restrictions. If that ends up breaking something, then
375  * either some function the user included in the query is incorrectly
376  * labeled as parallel-safe or parallel-restricted when in reality it's
377  * parallel-unsafe, or else the query planner itself has a bug.
378  */
379  glob->parallelModeNeeded = glob->parallelModeOK &&
381 
382  /* Determine what fraction of the plan is likely to be scanned */
383  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
384  {
385  /*
386  * We have no real idea how many tuples the user will ultimately FETCH
387  * from a cursor, but it is often the case that he doesn't want 'em
388  * all, or would prefer a fast-start plan anyway so that he can
389  * process some of the tuples sooner. Use a GUC parameter to decide
390  * what fraction to optimize for.
391  */
392  tuple_fraction = cursor_tuple_fraction;
393 
394  /*
395  * We document cursor_tuple_fraction as simply being a fraction, which
396  * means the edge cases 0 and 1 have to be treated specially here. We
397  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
398  */
399  if (tuple_fraction >= 1.0)
400  tuple_fraction = 0.0;
401  else if (tuple_fraction <= 0.0)
402  tuple_fraction = 1e-10;
403  }
404  else
405  {
406  /* Default assumption is we need all the tuples */
407  tuple_fraction = 0.0;
408  }
409 
410  /* primary planning entry point (may recurse for subqueries) */
411  root = subquery_planner(glob, parse, NULL,
412  false, tuple_fraction);
413 
414  /* Select best Path and turn it into a Plan */
415  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
416  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
417 
418  top_plan = create_plan(root, best_path);
419 
420  /*
421  * If creating a plan for a scrollable cursor, make sure it can run
422  * backwards on demand. Add a Material node at the top at need.
423  */
424  if (cursorOptions & CURSOR_OPT_SCROLL)
425  {
426  if (!ExecSupportsBackwardScan(top_plan))
427  top_plan = materialize_finished_plan(top_plan);
428  }
429 
430  /*
431  * Optionally add a Gather node for testing purposes, provided this is
432  * actually a safe thing to do.
433  */
435  {
436  Gather *gather = makeNode(Gather);
437 
438  /*
439  * If there are any initPlans attached to the formerly-top plan node,
440  * move them up to the Gather node; same as we do for Material node in
441  * materialize_finished_plan.
442  */
443  gather->plan.initPlan = top_plan->initPlan;
444  top_plan->initPlan = NIL;
445 
446  gather->plan.targetlist = top_plan->targetlist;
447  gather->plan.qual = NIL;
448  gather->plan.lefttree = top_plan;
449  gather->plan.righttree = NULL;
450  gather->num_workers = 1;
451  gather->single_copy = true;
453 
454  /*
455  * Since this Gather has no parallel-aware descendants to signal to,
456  * we don't need a rescan Param.
457  */
458  gather->rescan_param = -1;
459 
460  /*
461  * Ideally we'd use cost_gather here, but setting up dummy path data
462  * to satisfy it doesn't seem much cleaner than knowing what it does.
463  */
464  gather->plan.startup_cost = top_plan->startup_cost +
466  gather->plan.total_cost = top_plan->total_cost +
468  gather->plan.plan_rows = top_plan->plan_rows;
469  gather->plan.plan_width = top_plan->plan_width;
470  gather->plan.parallel_aware = false;
471  gather->plan.parallel_safe = false;
472 
473  /* use parallel mode for parallel plans. */
474  root->glob->parallelModeNeeded = true;
475 
476  top_plan = &gather->plan;
477  }
478 
479  /*
480  * If any Params were generated, run through the plan tree and compute
481  * each plan node's extParam/allParam sets. Ideally we'd merge this into
482  * set_plan_references' tree traversal, but for now it has to be separate
483  * because we need to visit subplans before not after main plan.
484  */
485  if (glob->paramExecTypes != NIL)
486  {
487  Assert(list_length(glob->subplans) == list_length(glob->subroots));
488  forboth(lp, glob->subplans, lr, glob->subroots)
489  {
490  Plan *subplan = (Plan *) lfirst(lp);
491  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
492 
493  SS_finalize_plan(subroot, subplan);
494  }
495  SS_finalize_plan(root, top_plan);
496  }
497 
498  /* final cleanup of the plan */
499  Assert(glob->finalrtable == NIL);
500  Assert(glob->finalrteperminfos == NIL);
501  Assert(glob->finalrowmarks == NIL);
502  Assert(glob->resultRelations == NIL);
503  Assert(glob->appendRelations == NIL);
504  top_plan = set_plan_references(root, top_plan);
505  /* ... and the subplans (both regular subplans and initplans) */
506  Assert(list_length(glob->subplans) == list_length(glob->subroots));
507  forboth(lp, glob->subplans, lr, glob->subroots)
508  {
509  Plan *subplan = (Plan *) lfirst(lp);
510  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
511 
512  lfirst(lp) = set_plan_references(subroot, subplan);
513  }
514 
515  /* build the PlannedStmt result */
516  result = makeNode(PlannedStmt);
517 
518  result->commandType = parse->commandType;
519  result->queryId = parse->queryId;
520  result->hasReturning = (parse->returningList != NIL);
521  result->hasModifyingCTE = parse->hasModifyingCTE;
522  result->canSetTag = parse->canSetTag;
523  result->transientPlan = glob->transientPlan;
524  result->dependsOnRole = glob->dependsOnRole;
525  result->parallelModeNeeded = glob->parallelModeNeeded;
526  result->planTree = top_plan;
527  result->partPruneInfos = glob->partPruneInfos;
528  result->rtable = glob->finalrtable;
529  result->permInfos = glob->finalrteperminfos;
530  result->resultRelations = glob->resultRelations;
531  result->appendRelations = glob->appendRelations;
532  result->subplans = glob->subplans;
533  result->rewindPlanIDs = glob->rewindPlanIDs;
534  result->rowMarks = glob->finalrowmarks;
535  result->relationOids = glob->relationOids;
536  result->invalItems = glob->invalItems;
537  result->paramExecTypes = glob->paramExecTypes;
538  /* utilityStmt should be null, but we might as well copy it */
539  result->utilityStmt = parse->utilityStmt;
540  result->stmt_location = parse->stmt_location;
541  result->stmt_len = parse->stmt_len;
542 
543  result->jitFlags = PGJIT_NONE;
544  if (jit_enabled && jit_above_cost >= 0 &&
545  top_plan->total_cost > jit_above_cost)
546  {
547  result->jitFlags |= PGJIT_PERFORM;
548 
549  /*
550  * Decide how much effort should be put into generating better code.
551  */
552  if (jit_optimize_above_cost >= 0 &&
554  result->jitFlags |= PGJIT_OPT3;
555  if (jit_inline_above_cost >= 0 &&
556  top_plan->total_cost > jit_inline_above_cost)
557  result->jitFlags |= PGJIT_INLINE;
558 
559  /*
560  * Decide which operations should be JITed.
561  */
562  if (jit_expressions)
563  result->jitFlags |= PGJIT_EXPR;
565  result->jitFlags |= PGJIT_DEFORM;
566  }
567 
568  if (glob->partition_directory != NULL)
569  DestroyPartitionDirectory(glob->partition_directory);
570 
571  return result;
572 }
573 
574 
575 /*--------------------
576  * subquery_planner
577  * Invokes the planner on a subquery. We recurse to here for each
578  * sub-SELECT found in the query tree.
579  *
580  * glob is the global state for the current planner run.
581  * parse is the querytree produced by the parser & rewriter.
582  * parent_root is the immediate parent Query's info (NULL at the top level).
583  * hasRecursion is true if this is a recursive WITH query.
584  * tuple_fraction is the fraction of tuples we expect will be retrieved.
585  * tuple_fraction is interpreted as explained for grouping_planner, below.
586  *
587  * Basically, this routine does the stuff that should only be done once
588  * per Query object. It then calls grouping_planner. At one time,
589  * grouping_planner could be invoked recursively on the same Query object;
590  * that's not currently true, but we keep the separation between the two
591  * routines anyway, in case we need it again someday.
592  *
593  * subquery_planner will be called recursively to handle sub-Query nodes
594  * found within the query's expressions and rangetable.
595  *
596  * Returns the PlannerInfo struct ("root") that contains all data generated
597  * while planning the subquery. In particular, the Path(s) attached to
598  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
599  * cheapest way(s) to implement the query. The top level will select the
600  * best Path and pass it through createplan.c to produce a finished Plan.
601  *--------------------
602  */
603 PlannerInfo *
605  PlannerInfo *parent_root,
606  bool hasRecursion, double tuple_fraction)
607 {
608  PlannerInfo *root;
609  List *newWithCheckOptions;
610  List *newHaving;
611  bool hasOuterJoins;
612  bool hasResultRTEs;
613  RelOptInfo *final_rel;
614  ListCell *l;
615 
616  /* Create a PlannerInfo data structure for this subquery */
617  root = makeNode(PlannerInfo);
618  root->parse = parse;
619  root->glob = glob;
620  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
621  root->parent_root = parent_root;
622  root->plan_params = NIL;
623  root->outer_params = NULL;
624  root->planner_cxt = CurrentMemoryContext;
625  root->init_plans = NIL;
626  root->cte_plan_ids = NIL;
627  root->multiexpr_params = NIL;
628  root->join_domains = NIL;
629  root->eq_classes = NIL;
630  root->ec_merging_done = false;
631  root->last_rinfo_serial = 0;
632  root->all_result_relids =
633  parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
634  root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
635  root->append_rel_list = NIL;
636  root->row_identity_vars = NIL;
637  root->rowMarks = NIL;
638  memset(root->upper_rels, 0, sizeof(root->upper_rels));
639  memset(root->upper_targets, 0, sizeof(root->upper_targets));
640  root->processed_groupClause = NIL;
642  root->processed_tlist = NIL;
643  root->update_colnos = NIL;
644  root->grouping_map = NULL;
645  root->minmax_aggs = NIL;
646  root->qual_security_level = 0;
647  root->hasPseudoConstantQuals = false;
648  root->hasAlternativeSubPlans = false;
649  root->placeholdersFrozen = false;
650  root->hasRecursion = hasRecursion;
651  if (hasRecursion)
653  else
654  root->wt_param_id = -1;
655  root->non_recursive_path = NULL;
656  root->partColsUpdated = false;
657 
658  /*
659  * Create the top-level join domain. This won't have valid contents until
660  * deconstruct_jointree fills it in, but the node needs to exist before
661  * that so we can build EquivalenceClasses referencing it.
662  */
664 
665  /*
666  * If there is a WITH list, process each WITH query and either convert it
667  * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
668  */
669  if (parse->cteList)
670  SS_process_ctes(root);
671 
672  /*
673  * If it's a MERGE command, transform the joinlist as appropriate.
674  */
676 
677  /*
678  * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
679  * that we don't need so many special cases to deal with that situation.
680  */
682 
683  /*
684  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
685  * to transform them into joins. Note that this step does not descend
686  * into subqueries; if we pull up any subqueries below, their SubLinks are
687  * processed just before pulling them up.
688  */
689  if (parse->hasSubLinks)
690  pull_up_sublinks(root);
691 
692  /*
693  * Scan the rangetable for function RTEs, do const-simplification on them,
694  * and then inline them if possible (producing subqueries that might get
695  * pulled up next). Recursion issues here are handled in the same way as
696  * for SubLinks.
697  */
699 
700  /*
701  * Check to see if any subqueries in the jointree can be merged into this
702  * query.
703  */
704  pull_up_subqueries(root);
705 
706  /*
707  * If this is a simple UNION ALL query, flatten it into an appendrel. We
708  * do this now because it requires applying pull_up_subqueries to the leaf
709  * queries of the UNION ALL, which weren't touched above because they
710  * weren't referenced by the jointree (they will be after we do this).
711  */
712  if (parse->setOperations)
714 
715  /*
716  * Survey the rangetable to see what kinds of entries are present. We can
717  * skip some later processing if relevant SQL features are not used; for
718  * example if there are no JOIN RTEs we can avoid the expense of doing
719  * flatten_join_alias_vars(). This must be done after we have finished
720  * adding rangetable entries, of course. (Note: actually, processing of
721  * inherited or partitioned rels can cause RTEs for their child tables to
722  * get added later; but those must all be RTE_RELATION entries, so they
723  * don't invalidate the conclusions drawn here.)
724  */
725  root->hasJoinRTEs = false;
726  root->hasLateralRTEs = false;
727  hasOuterJoins = false;
728  hasResultRTEs = false;
729  foreach(l, parse->rtable)
730  {
732 
733  switch (rte->rtekind)
734  {
735  case RTE_RELATION:
736  if (rte->inh)
737  {
738  /*
739  * Check to see if the relation actually has any children;
740  * if not, clear the inh flag so we can treat it as a
741  * plain base relation.
742  *
743  * Note: this could give a false-positive result, if the
744  * rel once had children but no longer does. We used to
745  * be able to clear rte->inh later on when we discovered
746  * that, but no more; we have to handle such cases as
747  * full-fledged inheritance.
748  */
749  rte->inh = has_subclass(rte->relid);
750  }
751  break;
752  case RTE_JOIN:
753  root->hasJoinRTEs = true;
754  if (IS_OUTER_JOIN(rte->jointype))
755  hasOuterJoins = true;
756  break;
757  case RTE_RESULT:
758  hasResultRTEs = true;
759  break;
760  default:
761  /* No work here for other RTE types */
762  break;
763  }
764 
765  if (rte->lateral)
766  root->hasLateralRTEs = true;
767 
768  /*
769  * We can also determine the maximum security level required for any
770  * securityQuals now. Addition of inheritance-child RTEs won't affect
771  * this, because child tables don't have their own securityQuals; see
772  * expand_single_inheritance_child().
773  */
774  if (rte->securityQuals)
776  list_length(rte->securityQuals));
777  }
778 
779  /*
780  * If we have now verified that the query target relation is
781  * non-inheriting, mark it as a leaf target.
782  */
783  if (parse->resultRelation)
784  {
785  RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
786 
787  if (!rte->inh)
788  root->leaf_result_relids =
789  bms_make_singleton(parse->resultRelation);
790  }
791 
792  /*
793  * Preprocess RowMark information. We need to do this after subquery
794  * pullup, so that all base relations are present.
795  */
796  preprocess_rowmarks(root);
797 
798  /*
799  * Set hasHavingQual to remember if HAVING clause is present. Needed
800  * because preprocess_expression will reduce a constant-true condition to
801  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
802  */
803  root->hasHavingQual = (parse->havingQual != NULL);
804 
805  /*
806  * Do expression preprocessing on targetlist and quals, as well as other
807  * random expressions in the querytree. Note that we do not need to
808  * handle sort/group expressions explicitly, because they are actually
809  * part of the targetlist.
810  */
811  parse->targetList = (List *)
812  preprocess_expression(root, (Node *) parse->targetList,
814 
815  /* Constant-folding might have removed all set-returning functions */
816  if (parse->hasTargetSRFs)
817  parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
818 
819  newWithCheckOptions = NIL;
820  foreach(l, parse->withCheckOptions)
821  {
823 
824  wco->qual = preprocess_expression(root, wco->qual,
825  EXPRKIND_QUAL);
826  if (wco->qual != NULL)
827  newWithCheckOptions = lappend(newWithCheckOptions, wco);
828  }
829  parse->withCheckOptions = newWithCheckOptions;
830 
831  parse->returningList = (List *)
832  preprocess_expression(root, (Node *) parse->returningList,
834 
835  preprocess_qual_conditions(root, (Node *) parse->jointree);
836 
837  parse->havingQual = preprocess_expression(root, parse->havingQual,
838  EXPRKIND_QUAL);
839 
840  foreach(l, parse->windowClause)
841  {
843 
844  /* partitionClause/orderClause are sort/group expressions */
847  wc->endOffset = preprocess_expression(root, wc->endOffset,
849  wc->runCondition = (List *) preprocess_expression(root,
850  (Node *) wc->runCondition,
852  }
853 
854  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
856  parse->limitCount = preprocess_expression(root, parse->limitCount,
858 
859  if (parse->onConflict)
860  {
861  parse->onConflict->arbiterElems = (List *)
863  (Node *) parse->onConflict->arbiterElems,
865  parse->onConflict->arbiterWhere =
867  parse->onConflict->arbiterWhere,
868  EXPRKIND_QUAL);
869  parse->onConflict->onConflictSet = (List *)
871  (Node *) parse->onConflict->onConflictSet,
873  parse->onConflict->onConflictWhere =
875  parse->onConflict->onConflictWhere,
876  EXPRKIND_QUAL);
877  /* exclRelTlist contains only Vars, so no preprocessing needed */
878  }
879 
880  foreach(l, parse->mergeActionList)
881  {
883 
884  action->targetList = (List *)
886  (Node *) action->targetList,
888  action->qual =
890  (Node *) action->qual,
891  EXPRKIND_QUAL);
892  }
893 
894  root->append_rel_list = (List *)
897 
898  /* Also need to preprocess expressions within RTEs */
899  foreach(l, parse->rtable)
900  {
902  int kind;
903  ListCell *lcsq;
904 
905  if (rte->rtekind == RTE_RELATION)
906  {
907  if (rte->tablesample)
908  rte->tablesample = (TableSampleClause *)
910  (Node *) rte->tablesample,
912  }
913  else if (rte->rtekind == RTE_SUBQUERY)
914  {
915  /*
916  * We don't want to do all preprocessing yet on the subquery's
917  * expressions, since that will happen when we plan it. But if it
918  * contains any join aliases of our level, those have to get
919  * expanded now, because planning of the subquery won't do it.
920  * That's only possible if the subquery is LATERAL.
921  */
922  if (rte->lateral && root->hasJoinRTEs)
923  rte->subquery = (Query *)
924  flatten_join_alias_vars(root, root->parse,
925  (Node *) rte->subquery);
926  }
927  else if (rte->rtekind == RTE_FUNCTION)
928  {
929  /* Preprocess the function expression(s) fully */
931  rte->functions = (List *)
932  preprocess_expression(root, (Node *) rte->functions, kind);
933  }
934  else if (rte->rtekind == RTE_TABLEFUNC)
935  {
936  /* Preprocess the function expression(s) fully */
938  rte->tablefunc = (TableFunc *)
939  preprocess_expression(root, (Node *) rte->tablefunc, kind);
940  }
941  else if (rte->rtekind == RTE_VALUES)
942  {
943  /* Preprocess the values lists fully */
945  rte->values_lists = (List *)
946  preprocess_expression(root, (Node *) rte->values_lists, kind);
947  }
948 
949  /*
950  * Process each element of the securityQuals list as if it were a
951  * separate qual expression (as indeed it is). We need to do it this
952  * way to get proper canonicalization of AND/OR structure. Note that
953  * this converts each element into an implicit-AND sublist.
954  */
955  foreach(lcsq, rte->securityQuals)
956  {
957  lfirst(lcsq) = preprocess_expression(root,
958  (Node *) lfirst(lcsq),
959  EXPRKIND_QUAL);
960  }
961  }
962 
963  /*
964  * Now that we are done preprocessing expressions, and in particular done
965  * flattening join alias variables, get rid of the joinaliasvars lists.
966  * They no longer match what expressions in the rest of the tree look
967  * like, because we have not preprocessed expressions in those lists (and
968  * do not want to; for example, expanding a SubLink there would result in
969  * a useless unreferenced subplan). Leaving them in place simply creates
970  * a hazard for later scans of the tree. We could try to prevent that by
971  * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
972  * but that doesn't sound very reliable.
973  */
974  if (root->hasJoinRTEs)
975  {
976  foreach(l, parse->rtable)
977  {
979 
980  rte->joinaliasvars = NIL;
981  }
982  }
983 
984  /*
985  * In some cases we may want to transfer a HAVING clause into WHERE. We
986  * cannot do so if the HAVING clause contains aggregates (obviously) or
987  * volatile functions (since a HAVING clause is supposed to be executed
988  * only once per group). We also can't do this if there are any nonempty
989  * grouping sets; moving such a clause into WHERE would potentially change
990  * the results, if any referenced column isn't present in all the grouping
991  * sets. (If there are only empty grouping sets, then the HAVING clause
992  * must be degenerate as discussed below.)
993  *
994  * Also, it may be that the clause is so expensive to execute that we're
995  * better off doing it only once per group, despite the loss of
996  * selectivity. This is hard to estimate short of doing the entire
997  * planning process twice, so we use a heuristic: clauses containing
998  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
999  * clause into WHERE, in hopes of eliminating tuples before aggregation
1000  * instead of after.
1001  *
1002  * If the query has explicit grouping then we can simply move such a
1003  * clause into WHERE; any group that fails the clause will not be in the
1004  * output because none of its tuples will reach the grouping or
1005  * aggregation stage. Otherwise we must have a degenerate (variable-free)
1006  * HAVING clause, which we put in WHERE so that query_planner() can use it
1007  * in a gating Result node, but also keep in HAVING to ensure that we
1008  * don't emit a bogus aggregated row. (This could be done better, but it
1009  * seems not worth optimizing.)
1010  *
1011  * Note that both havingQual and parse->jointree->quals are in
1012  * implicitly-ANDed-list form at this point, even though they are declared
1013  * as Node *.
1014  */
1015  newHaving = NIL;
1016  foreach(l, (List *) parse->havingQual)
1017  {
1018  Node *havingclause = (Node *) lfirst(l);
1019 
1020  if ((parse->groupClause && parse->groupingSets) ||
1021  contain_agg_clause(havingclause) ||
1022  contain_volatile_functions(havingclause) ||
1023  contain_subplans(havingclause))
1024  {
1025  /* keep it in HAVING */
1026  newHaving = lappend(newHaving, havingclause);
1027  }
1028  else if (parse->groupClause && !parse->groupingSets)
1029  {
1030  /* move it to WHERE */
1031  parse->jointree->quals = (Node *)
1032  lappend((List *) parse->jointree->quals, havingclause);
1033  }
1034  else
1035  {
1036  /* put a copy in WHERE, keep it in HAVING */
1037  parse->jointree->quals = (Node *)
1038  lappend((List *) parse->jointree->quals,
1039  copyObject(havingclause));
1040  newHaving = lappend(newHaving, havingclause);
1041  }
1042  }
1043  parse->havingQual = (Node *) newHaving;
1044 
1045  /*
1046  * If we have any outer joins, try to reduce them to plain inner joins.
1047  * This step is most easily done after we've done expression
1048  * preprocessing.
1049  */
1050  if (hasOuterJoins)
1051  reduce_outer_joins(root);
1052 
1053  /*
1054  * If we have any RTE_RESULT relations, see if they can be deleted from
1055  * the jointree. We also rely on this processing to flatten single-child
1056  * FromExprs underneath outer joins. This step is most effectively done
1057  * after we've done expression preprocessing and outer join reduction.
1058  */
1059  if (hasResultRTEs || hasOuterJoins)
1061 
1062  /*
1063  * Do the main planning.
1064  */
1065  grouping_planner(root, tuple_fraction);
1066 
1067  /*
1068  * Capture the set of outer-level param IDs we have access to, for use in
1069  * extParam/allParam calculations later.
1070  */
1072 
1073  /*
1074  * If any initPlans were created in this query level, adjust the surviving
1075  * Paths' costs and parallel-safety flags to account for them. The
1076  * initPlans won't actually get attached to the plan tree till
1077  * create_plan() runs, but we must include their effects now.
1078  */
1079  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1080  SS_charge_for_initplans(root, final_rel);
1081 
1082  /*
1083  * Make sure we've identified the cheapest Path for the final rel. (By
1084  * doing this here not in grouping_planner, we include initPlan costs in
1085  * the decision, though it's unlikely that will change anything.)
1086  */
1087  set_cheapest(final_rel);
1088 
1089  return root;
1090 }
1091 
1092 /*
1093  * preprocess_expression
1094  * Do subquery_planner's preprocessing work for an expression,
1095  * which can be a targetlist, a WHERE clause (including JOIN/ON
1096  * conditions), a HAVING clause, or a few other things.
1097  */
1098 static Node *
1099 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1100 {
1101  /*
1102  * Fall out quickly if expression is empty. This occurs often enough to
1103  * be worth checking. Note that null->null is the correct conversion for
1104  * implicit-AND result format, too.
1105  */
1106  if (expr == NULL)
1107  return NULL;
1108 
1109  /*
1110  * If the query has any join RTEs, replace join alias variables with
1111  * base-relation variables. We must do this first, since any expressions
1112  * we may extract from the joinaliasvars lists have not been preprocessed.
1113  * For example, if we did this after sublink processing, sublinks expanded
1114  * out from join aliases would not get processed. But we can skip this in
1115  * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1116  * they can't contain any Vars of the current query level.
1117  */
1118  if (root->hasJoinRTEs &&
1119  !(kind == EXPRKIND_RTFUNC ||
1120  kind == EXPRKIND_VALUES ||
1121  kind == EXPRKIND_TABLESAMPLE ||
1122  kind == EXPRKIND_TABLEFUNC))
1123  expr = flatten_join_alias_vars(root, root->parse, expr);
1124 
1125  /*
1126  * Simplify constant expressions. For function RTEs, this was already
1127  * done by preprocess_function_rtes. (But note we must do it again for
1128  * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1129  * un-simplified subexpressions inserted by flattening of subqueries or
1130  * join alias variables.)
1131  *
1132  * Note: an essential effect of this is to convert named-argument function
1133  * calls to positional notation and insert the current actual values of
1134  * any default arguments for functions. To ensure that happens, we *must*
1135  * process all expressions here. Previous PG versions sometimes skipped
1136  * const-simplification if it didn't seem worth the trouble, but we can't
1137  * do that anymore.
1138  *
1139  * Note: this also flattens nested AND and OR expressions into N-argument
1140  * form. All processing of a qual expression after this point must be
1141  * careful to maintain AND/OR flatness --- that is, do not generate a tree
1142  * with AND directly under AND, nor OR directly under OR.
1143  */
1144  if (kind != EXPRKIND_RTFUNC)
1145  expr = eval_const_expressions(root, expr);
1146 
1147  /*
1148  * If it's a qual or havingQual, canonicalize it.
1149  */
1150  if (kind == EXPRKIND_QUAL)
1151  {
1152  expr = (Node *) canonicalize_qual((Expr *) expr, false);
1153 
1154 #ifdef OPTIMIZER_DEBUG
1155  printf("After canonicalize_qual()\n");
1156  pprint(expr);
1157 #endif
1158  }
1159 
1160  /*
1161  * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1162  * hashfuncid of any that might execute more quickly by using hash lookups
1163  * instead of a linear search.
1164  */
1165  if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1166  {
1168  }
1169 
1170  /* Expand SubLinks to SubPlans */
1171  if (root->parse->hasSubLinks)
1172  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1173 
1174  /*
1175  * XXX do not insert anything here unless you have grokked the comments in
1176  * SS_replace_correlation_vars ...
1177  */
1178 
1179  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1180  if (root->query_level > 1)
1181  expr = SS_replace_correlation_vars(root, expr);
1182 
1183  /*
1184  * If it's a qual or havingQual, convert it to implicit-AND format. (We
1185  * don't want to do this before eval_const_expressions, since the latter
1186  * would be unable to simplify a top-level AND correctly. Also,
1187  * SS_process_sublinks expects explicit-AND format.)
1188  */
1189  if (kind == EXPRKIND_QUAL)
1190  expr = (Node *) make_ands_implicit((Expr *) expr);
1191 
1192  return expr;
1193 }
1194 
1195 /*
1196  * preprocess_qual_conditions
1197  * Recursively scan the query's jointree and do subquery_planner's
1198  * preprocessing work on each qual condition found therein.
1199  */
1200 static void
1202 {
1203  if (jtnode == NULL)
1204  return;
1205  if (IsA(jtnode, RangeTblRef))
1206  {
1207  /* nothing to do here */
1208  }
1209  else if (IsA(jtnode, FromExpr))
1210  {
1211  FromExpr *f = (FromExpr *) jtnode;
1212  ListCell *l;
1213 
1214  foreach(l, f->fromlist)
1216 
1218  }
1219  else if (IsA(jtnode, JoinExpr))
1220  {
1221  JoinExpr *j = (JoinExpr *) jtnode;
1222 
1223  preprocess_qual_conditions(root, j->larg);
1224  preprocess_qual_conditions(root, j->rarg);
1225 
1226  j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1227  }
1228  else
1229  elog(ERROR, "unrecognized node type: %d",
1230  (int) nodeTag(jtnode));
1231 }
1232 
1233 /*
1234  * preprocess_phv_expression
1235  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1236  *
1237  * If a LATERAL subquery references an output of another subquery, and that
1238  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1239  * join, then we'll push the PlaceHolderVar expression down into the subquery
1240  * and later pull it back up during find_lateral_references, which runs after
1241  * subquery_planner has preprocessed all the expressions that were in the
1242  * current query level to start with. So we need to preprocess it then.
1243  */
1244 Expr *
1246 {
1247  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1248 }
1249 
1250 /*--------------------
1251  * grouping_planner
1252  * Perform planning steps related to grouping, aggregation, etc.
1253  *
1254  * This function adds all required top-level processing to the scan/join
1255  * Path(s) produced by query_planner.
1256  *
1257  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1258  * tuple_fraction is interpreted as follows:
1259  * 0: expect all tuples to be retrieved (normal case)
1260  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1261  * from the plan to be retrieved
1262  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1263  * expected to be retrieved (ie, a LIMIT specification)
1264  *
1265  * Returns nothing; the useful output is in the Paths we attach to the
1266  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1267  * root->processed_tlist contains the final processed targetlist.
1268  *
1269  * Note that we have not done set_cheapest() on the final rel; it's convenient
1270  * to leave this to the caller.
1271  *--------------------
1272  */
1273 static void
1274 grouping_planner(PlannerInfo *root, double tuple_fraction)
1275 {
1276  Query *parse = root->parse;
1277  int64 offset_est = 0;
1278  int64 count_est = 0;
1279  double limit_tuples = -1.0;
1280  bool have_postponed_srfs = false;
1281  PathTarget *final_target;
1282  List *final_targets;
1283  List *final_targets_contain_srfs;
1284  bool final_target_parallel_safe;
1285  RelOptInfo *current_rel;
1286  RelOptInfo *final_rel;
1287  FinalPathExtraData extra;
1288  ListCell *lc;
1289 
1290  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1291  if (parse->limitCount || parse->limitOffset)
1292  {
1293  tuple_fraction = preprocess_limit(root, tuple_fraction,
1294  &offset_est, &count_est);
1295 
1296  /*
1297  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1298  * estimate the effects of using a bounded sort.
1299  */
1300  if (count_est > 0 && offset_est >= 0)
1301  limit_tuples = (double) count_est + (double) offset_est;
1302  }
1303 
1304  /* Make tuple_fraction accessible to lower-level routines */
1305  root->tuple_fraction = tuple_fraction;
1306 
1307  if (parse->setOperations)
1308  {
1309  /*
1310  * If there's a top-level ORDER BY, assume we have to fetch all the
1311  * tuples. This might be too simplistic given all the hackery below
1312  * to possibly avoid the sort; but the odds of accurate estimates here
1313  * are pretty low anyway. XXX try to get rid of this in favor of
1314  * letting plan_set_operations generate both fast-start and
1315  * cheapest-total paths.
1316  */
1317  if (parse->sortClause)
1318  root->tuple_fraction = 0.0;
1319 
1320  /*
1321  * Construct Paths for set operations. The results will not need any
1322  * work except perhaps a top-level sort and/or LIMIT. Note that any
1323  * special work for recursive unions is the responsibility of
1324  * plan_set_operations.
1325  */
1326  current_rel = plan_set_operations(root);
1327 
1328  /*
1329  * We should not need to call preprocess_targetlist, since we must be
1330  * in a SELECT query node. Instead, use the processed_tlist returned
1331  * by plan_set_operations (since this tells whether it returned any
1332  * resjunk columns!), and transfer any sort key information from the
1333  * original tlist.
1334  */
1335  Assert(parse->commandType == CMD_SELECT);
1336 
1337  /* for safety, copy processed_tlist instead of modifying in-place */
1338  root->processed_tlist =
1340  parse->targetList);
1341 
1342  /* Also extract the PathTarget form of the setop result tlist */
1343  final_target = current_rel->cheapest_total_path->pathtarget;
1344 
1345  /* And check whether it's parallel safe */
1346  final_target_parallel_safe =
1347  is_parallel_safe(root, (Node *) final_target->exprs);
1348 
1349  /* The setop result tlist couldn't contain any SRFs */
1350  Assert(!parse->hasTargetSRFs);
1351  final_targets = final_targets_contain_srfs = NIL;
1352 
1353  /*
1354  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1355  * checked already, but let's make sure).
1356  */
1357  if (parse->rowMarks)
1358  ereport(ERROR,
1359  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1360  /*------
1361  translator: %s is a SQL row locking clause such as FOR UPDATE */
1362  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1364  parse->rowMarks)->strength))));
1365 
1366  /*
1367  * Calculate pathkeys that represent result ordering requirements
1368  */
1369  Assert(parse->distinctClause == NIL);
1371  parse->sortClause,
1372  root->processed_tlist);
1373  }
1374  else
1375  {
1376  /* No set operations, do regular planning */
1377  PathTarget *sort_input_target;
1378  List *sort_input_targets;
1379  List *sort_input_targets_contain_srfs;
1380  bool sort_input_target_parallel_safe;
1381  PathTarget *grouping_target;
1382  List *grouping_targets;
1383  List *grouping_targets_contain_srfs;
1384  bool grouping_target_parallel_safe;
1385  PathTarget *scanjoin_target;
1386  List *scanjoin_targets;
1387  List *scanjoin_targets_contain_srfs;
1388  bool scanjoin_target_parallel_safe;
1389  bool scanjoin_target_same_exprs;
1390  bool have_grouping;
1391  WindowFuncLists *wflists = NULL;
1392  List *activeWindows = NIL;
1393  grouping_sets_data *gset_data = NULL;
1394  standard_qp_extra qp_extra;
1395 
1396  /* A recursive query should always have setOperations */
1397  Assert(!root->hasRecursion);
1398 
1399  /* Preprocess grouping sets and GROUP BY clause, if any */
1400  if (parse->groupingSets)
1401  {
1402  gset_data = preprocess_grouping_sets(root);
1403  }
1404  else if (parse->groupClause)
1405  {
1406  /* Preprocess regular GROUP BY clause, if any */
1408  /* Remove any redundant GROUP BY columns */
1410  }
1411 
1412  /*
1413  * Preprocess targetlist. Note that much of the remaining planning
1414  * work will be done with the PathTarget representation of tlists, but
1415  * we must also maintain the full representation of the final tlist so
1416  * that we can transfer its decoration (resnames etc) to the topmost
1417  * tlist of the finished Plan. This is kept in processed_tlist.
1418  */
1419  preprocess_targetlist(root);
1420 
1421  /*
1422  * Mark all the aggregates with resolved aggtranstypes, and detect
1423  * aggregates that are duplicates or can share transition state. We
1424  * must do this before slicing and dicing the tlist into various
1425  * pathtargets, else some copies of the Aggref nodes might escape
1426  * being marked.
1427  */
1428  if (parse->hasAggs)
1429  {
1430  preprocess_aggrefs(root, (Node *) root->processed_tlist);
1431  preprocess_aggrefs(root, (Node *) parse->havingQual);
1432  }
1433 
1434  /*
1435  * Locate any window functions in the tlist. (We don't need to look
1436  * anywhere else, since expressions used in ORDER BY will be in there
1437  * too.) Note that they could all have been eliminated by constant
1438  * folding, in which case we don't need to do any more work.
1439  */
1440  if (parse->hasWindowFuncs)
1441  {
1442  wflists = find_window_functions((Node *) root->processed_tlist,
1443  list_length(parse->windowClause));
1444  if (wflists->numWindowFuncs > 0)
1445  {
1446  /*
1447  * See if any modifications can be made to each WindowClause
1448  * to allow the executor to execute the WindowFuncs more
1449  * quickly.
1450  */
1451  optimize_window_clauses(root, wflists);
1452 
1453  activeWindows = select_active_windows(root, wflists);
1454  }
1455  else
1456  parse->hasWindowFuncs = false;
1457  }
1458 
1459  /*
1460  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1461  * adding logic between here and the query_planner() call. Anything
1462  * that is needed in MIN/MAX-optimizable cases will have to be
1463  * duplicated in planagg.c.
1464  */
1465  if (parse->hasAggs)
1467 
1468  /*
1469  * Figure out whether there's a hard limit on the number of rows that
1470  * query_planner's result subplan needs to return. Even if we know a
1471  * hard limit overall, it doesn't apply if the query has any
1472  * grouping/aggregation operations, or SRFs in the tlist.
1473  */
1474  if (parse->groupClause ||
1475  parse->groupingSets ||
1476  parse->distinctClause ||
1477  parse->hasAggs ||
1478  parse->hasWindowFuncs ||
1479  parse->hasTargetSRFs ||
1480  root->hasHavingQual)
1481  root->limit_tuples = -1.0;
1482  else
1483  root->limit_tuples = limit_tuples;
1484 
1485  /* Set up data needed by standard_qp_callback */
1486  qp_extra.activeWindows = activeWindows;
1487  qp_extra.gset_data = gset_data;
1488 
1489  /*
1490  * Generate the best unsorted and presorted paths for the scan/join
1491  * portion of this Query, ie the processing represented by the
1492  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1493  * We also generate (in standard_qp_callback) pathkey representations
1494  * of the query's sort clause, distinct clause, etc.
1495  */
1496  current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1497 
1498  /*
1499  * Convert the query's result tlist into PathTarget format.
1500  *
1501  * Note: this cannot be done before query_planner() has performed
1502  * appendrel expansion, because that might add resjunk entries to
1503  * root->processed_tlist. Waiting till afterwards is also helpful
1504  * because the target width estimates can use per-Var width numbers
1505  * that were obtained within query_planner().
1506  */
1507  final_target = create_pathtarget(root, root->processed_tlist);
1508  final_target_parallel_safe =
1509  is_parallel_safe(root, (Node *) final_target->exprs);
1510 
1511  /*
1512  * If ORDER BY was given, consider whether we should use a post-sort
1513  * projection, and compute the adjusted target for preceding steps if
1514  * so.
1515  */
1516  if (parse->sortClause)
1517  {
1518  sort_input_target = make_sort_input_target(root,
1519  final_target,
1520  &have_postponed_srfs);
1521  sort_input_target_parallel_safe =
1522  is_parallel_safe(root, (Node *) sort_input_target->exprs);
1523  }
1524  else
1525  {
1526  sort_input_target = final_target;
1527  sort_input_target_parallel_safe = final_target_parallel_safe;
1528  }
1529 
1530  /*
1531  * If we have window functions to deal with, the output from any
1532  * grouping step needs to be what the window functions want;
1533  * otherwise, it should be sort_input_target.
1534  */
1535  if (activeWindows)
1536  {
1537  grouping_target = make_window_input_target(root,
1538  final_target,
1539  activeWindows);
1540  grouping_target_parallel_safe =
1541  is_parallel_safe(root, (Node *) grouping_target->exprs);
1542  }
1543  else
1544  {
1545  grouping_target = sort_input_target;
1546  grouping_target_parallel_safe = sort_input_target_parallel_safe;
1547  }
1548 
1549  /*
1550  * If we have grouping or aggregation to do, the topmost scan/join
1551  * plan node must emit what the grouping step wants; otherwise, it
1552  * should emit grouping_target.
1553  */
1554  have_grouping = (parse->groupClause || parse->groupingSets ||
1555  parse->hasAggs || root->hasHavingQual);
1556  if (have_grouping)
1557  {
1558  scanjoin_target = make_group_input_target(root, final_target);
1559  scanjoin_target_parallel_safe =
1560  is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1561  }
1562  else
1563  {
1564  scanjoin_target = grouping_target;
1565  scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1566  }
1567 
1568  /*
1569  * If there are any SRFs in the targetlist, we must separate each of
1570  * these PathTargets into SRF-computing and SRF-free targets. Replace
1571  * each of the named targets with a SRF-free version, and remember the
1572  * list of additional projection steps we need to add afterwards.
1573  */
1574  if (parse->hasTargetSRFs)
1575  {
1576  /* final_target doesn't recompute any SRFs in sort_input_target */
1577  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1578  &final_targets,
1579  &final_targets_contain_srfs);
1580  final_target = linitial_node(PathTarget, final_targets);
1581  Assert(!linitial_int(final_targets_contain_srfs));
1582  /* likewise for sort_input_target vs. grouping_target */
1583  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1584  &sort_input_targets,
1585  &sort_input_targets_contain_srfs);
1586  sort_input_target = linitial_node(PathTarget, sort_input_targets);
1587  Assert(!linitial_int(sort_input_targets_contain_srfs));
1588  /* likewise for grouping_target vs. scanjoin_target */
1589  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1590  &grouping_targets,
1591  &grouping_targets_contain_srfs);
1592  grouping_target = linitial_node(PathTarget, grouping_targets);
1593  Assert(!linitial_int(grouping_targets_contain_srfs));
1594  /* scanjoin_target will not have any SRFs precomputed for it */
1595  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1596  &scanjoin_targets,
1597  &scanjoin_targets_contain_srfs);
1598  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1599  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1600  }
1601  else
1602  {
1603  /* initialize lists; for most of these, dummy values are OK */
1604  final_targets = final_targets_contain_srfs = NIL;
1605  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1606  grouping_targets = grouping_targets_contain_srfs = NIL;
1607  scanjoin_targets = list_make1(scanjoin_target);
1608  scanjoin_targets_contain_srfs = NIL;
1609  }
1610 
1611  /* Apply scan/join target. */
1612  scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1613  && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1614  apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1615  scanjoin_targets_contain_srfs,
1616  scanjoin_target_parallel_safe,
1617  scanjoin_target_same_exprs);
1618 
1619  /*
1620  * Save the various upper-rel PathTargets we just computed into
1621  * root->upper_targets[]. The core code doesn't use this, but it
1622  * provides a convenient place for extensions to get at the info. For
1623  * consistency, we save all the intermediate targets, even though some
1624  * of the corresponding upperrels might not be needed for this query.
1625  */
1626  root->upper_targets[UPPERREL_FINAL] = final_target;
1627  root->upper_targets[UPPERREL_ORDERED] = final_target;
1628  root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1629  root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1630  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1631  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1632 
1633  /*
1634  * If we have grouping and/or aggregation, consider ways to implement
1635  * that. We build a new upperrel representing the output of this
1636  * phase.
1637  */
1638  if (have_grouping)
1639  {
1640  current_rel = create_grouping_paths(root,
1641  current_rel,
1642  grouping_target,
1643  grouping_target_parallel_safe,
1644  gset_data);
1645  /* Fix things up if grouping_target contains SRFs */
1646  if (parse->hasTargetSRFs)
1647  adjust_paths_for_srfs(root, current_rel,
1648  grouping_targets,
1649  grouping_targets_contain_srfs);
1650  }
1651 
1652  /*
1653  * If we have window functions, consider ways to implement those. We
1654  * build a new upperrel representing the output of this phase.
1655  */
1656  if (activeWindows)
1657  {
1658  current_rel = create_window_paths(root,
1659  current_rel,
1660  grouping_target,
1661  sort_input_target,
1662  sort_input_target_parallel_safe,
1663  wflists,
1664  activeWindows);
1665  /* Fix things up if sort_input_target contains SRFs */
1666  if (parse->hasTargetSRFs)
1667  adjust_paths_for_srfs(root, current_rel,
1668  sort_input_targets,
1669  sort_input_targets_contain_srfs);
1670  }
1671 
1672  /*
1673  * If there is a DISTINCT clause, consider ways to implement that. We
1674  * build a new upperrel representing the output of this phase.
1675  */
1676  if (parse->distinctClause)
1677  {
1678  current_rel = create_distinct_paths(root,
1679  current_rel);
1680  }
1681  } /* end of if (setOperations) */
1682 
1683  /*
1684  * If ORDER BY was given, consider ways to implement that, and generate a
1685  * new upperrel containing only paths that emit the correct ordering and
1686  * project the correct final_target. We can apply the original
1687  * limit_tuples limit in sort costing here, but only if there are no
1688  * postponed SRFs.
1689  */
1690  if (parse->sortClause)
1691  {
1692  current_rel = create_ordered_paths(root,
1693  current_rel,
1694  final_target,
1695  final_target_parallel_safe,
1696  have_postponed_srfs ? -1.0 :
1697  limit_tuples);
1698  /* Fix things up if final_target contains SRFs */
1699  if (parse->hasTargetSRFs)
1700  adjust_paths_for_srfs(root, current_rel,
1701  final_targets,
1702  final_targets_contain_srfs);
1703  }
1704 
1705  /*
1706  * Now we are prepared to build the final-output upperrel.
1707  */
1708  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1709 
1710  /*
1711  * If the input rel is marked consider_parallel and there's nothing that's
1712  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1713  * consider_parallel as well. Note that if the query has rowMarks or is
1714  * not a SELECT, consider_parallel will be false for every relation in the
1715  * query.
1716  */
1717  if (current_rel->consider_parallel &&
1718  is_parallel_safe(root, parse->limitOffset) &&
1719  is_parallel_safe(root, parse->limitCount))
1720  final_rel->consider_parallel = true;
1721 
1722  /*
1723  * If the current_rel belongs to a single FDW, so does the final_rel.
1724  */
1725  final_rel->serverid = current_rel->serverid;
1726  final_rel->userid = current_rel->userid;
1727  final_rel->useridiscurrent = current_rel->useridiscurrent;
1728  final_rel->fdwroutine = current_rel->fdwroutine;
1729 
1730  /*
1731  * Generate paths for the final_rel. Insert all surviving paths, with
1732  * LockRows, Limit, and/or ModifyTable steps added if needed.
1733  */
1734  foreach(lc, current_rel->pathlist)
1735  {
1736  Path *path = (Path *) lfirst(lc);
1737 
1738  /*
1739  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1740  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1741  * here. If there are only non-locking rowmarks, they should be
1742  * handled by the ModifyTable node instead. However, root->rowMarks
1743  * is what goes into the LockRows node.)
1744  */
1745  if (parse->rowMarks)
1746  {
1747  path = (Path *) create_lockrows_path(root, final_rel, path,
1748  root->rowMarks,
1750  }
1751 
1752  /*
1753  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1754  */
1755  if (limit_needed(parse))
1756  {
1757  path = (Path *) create_limit_path(root, final_rel, path,
1758  parse->limitOffset,
1759  parse->limitCount,
1760  parse->limitOption,
1761  offset_est, count_est);
1762  }
1763 
1764  /*
1765  * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1766  */
1767  if (parse->commandType != CMD_SELECT)
1768  {
1769  Index rootRelation;
1770  List *resultRelations = NIL;
1771  List *updateColnosLists = NIL;
1772  List *withCheckOptionLists = NIL;
1773  List *returningLists = NIL;
1774  List *mergeActionLists = NIL;
1775  List *rowMarks;
1776 
1778  {
1779  /* Inherited UPDATE/DELETE/MERGE */
1780  RelOptInfo *top_result_rel = find_base_rel(root,
1781  parse->resultRelation);
1782  int resultRelation = -1;
1783 
1784  /* Add only leaf children to ModifyTable. */
1785  while ((resultRelation = bms_next_member(root->leaf_result_relids,
1786  resultRelation)) >= 0)
1787  {
1788  RelOptInfo *this_result_rel = find_base_rel(root,
1789  resultRelation);
1790 
1791  /*
1792  * Also exclude any leaf rels that have turned dummy since
1793  * being added to the list, for example, by being excluded
1794  * by constraint exclusion.
1795  */
1796  if (IS_DUMMY_REL(this_result_rel))
1797  continue;
1798 
1799  /* Build per-target-rel lists needed by ModifyTable */
1800  resultRelations = lappend_int(resultRelations,
1801  resultRelation);
1802  if (parse->commandType == CMD_UPDATE)
1803  {
1804  List *update_colnos = root->update_colnos;
1805 
1806  if (this_result_rel != top_result_rel)
1807  update_colnos =
1809  update_colnos,
1810  this_result_rel->relid,
1811  top_result_rel->relid);
1812  updateColnosLists = lappend(updateColnosLists,
1813  update_colnos);
1814  }
1815  if (parse->withCheckOptions)
1816  {
1817  List *withCheckOptions = parse->withCheckOptions;
1818 
1819  if (this_result_rel != top_result_rel)
1820  withCheckOptions = (List *)
1822  (Node *) withCheckOptions,
1823  this_result_rel,
1824  top_result_rel);
1825  withCheckOptionLists = lappend(withCheckOptionLists,
1826  withCheckOptions);
1827  }
1828  if (parse->returningList)
1829  {
1830  List *returningList = parse->returningList;
1831 
1832  if (this_result_rel != top_result_rel)
1833  returningList = (List *)
1835  (Node *) returningList,
1836  this_result_rel,
1837  top_result_rel);
1838  returningLists = lappend(returningLists,
1839  returningList);
1840  }
1841  if (parse->mergeActionList)
1842  {
1843  ListCell *l;
1844  List *mergeActionList = NIL;
1845 
1846  /*
1847  * Copy MergeActions and translate stuff that
1848  * references attribute numbers.
1849  */
1850  foreach(l, parse->mergeActionList)
1851  {
1852  MergeAction *action = lfirst(l),
1853  *leaf_action = copyObject(action);
1854 
1855  leaf_action->qual =
1857  (Node *) action->qual,
1858  this_result_rel,
1859  top_result_rel);
1860  leaf_action->targetList = (List *)
1862  (Node *) action->targetList,
1863  this_result_rel,
1864  top_result_rel);
1865  if (leaf_action->commandType == CMD_UPDATE)
1866  leaf_action->updateColnos =
1868  action->updateColnos,
1869  this_result_rel->relid,
1870  top_result_rel->relid);
1871  mergeActionList = lappend(mergeActionList,
1872  leaf_action);
1873  }
1874 
1875  mergeActionLists = lappend(mergeActionLists,
1876  mergeActionList);
1877  }
1878  }
1879 
1880  if (resultRelations == NIL)
1881  {
1882  /*
1883  * We managed to exclude every child rel, so generate a
1884  * dummy one-relation plan using info for the top target
1885  * rel (even though that may not be a leaf target).
1886  * Although it's clear that no data will be updated or
1887  * deleted, we still need to have a ModifyTable node so
1888  * that any statement triggers will be executed. (This
1889  * could be cleaner if we fixed nodeModifyTable.c to allow
1890  * zero target relations, but that probably wouldn't be a
1891  * net win.)
1892  */
1893  resultRelations = list_make1_int(parse->resultRelation);
1894  if (parse->commandType == CMD_UPDATE)
1895  updateColnosLists = list_make1(root->update_colnos);
1896  if (parse->withCheckOptions)
1897  withCheckOptionLists = list_make1(parse->withCheckOptions);
1898  if (parse->returningList)
1899  returningLists = list_make1(parse->returningList);
1900  if (parse->mergeActionList)
1901  mergeActionLists = list_make1(parse->mergeActionList);
1902  }
1903  }
1904  else
1905  {
1906  /* Single-relation INSERT/UPDATE/DELETE/MERGE. */
1907  resultRelations = list_make1_int(parse->resultRelation);
1908  if (parse->commandType == CMD_UPDATE)
1909  updateColnosLists = list_make1(root->update_colnos);
1910  if (parse->withCheckOptions)
1911  withCheckOptionLists = list_make1(parse->withCheckOptions);
1912  if (parse->returningList)
1913  returningLists = list_make1(parse->returningList);
1914  if (parse->mergeActionList)
1915  mergeActionLists = list_make1(parse->mergeActionList);
1916  }
1917 
1918  /*
1919  * If target is a partition root table, we need to mark the
1920  * ModifyTable node appropriately for that.
1921  */
1922  if (rt_fetch(parse->resultRelation, parse->rtable)->relkind ==
1923  RELKIND_PARTITIONED_TABLE)
1924  rootRelation = parse->resultRelation;
1925  else
1926  rootRelation = 0;
1927 
1928  /*
1929  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
1930  * will have dealt with fetching non-locked marked rows, else we
1931  * need to have ModifyTable do that.
1932  */
1933  if (parse->rowMarks)
1934  rowMarks = NIL;
1935  else
1936  rowMarks = root->rowMarks;
1937 
1938  path = (Path *)
1939  create_modifytable_path(root, final_rel,
1940  path,
1941  parse->commandType,
1942  parse->canSetTag,
1943  parse->resultRelation,
1944  rootRelation,
1945  root->partColsUpdated,
1946  resultRelations,
1947  updateColnosLists,
1948  withCheckOptionLists,
1949  returningLists,
1950  rowMarks,
1951  parse->onConflict,
1952  mergeActionLists,
1954  }
1955 
1956  /* And shove it into final_rel */
1957  add_path(final_rel, path);
1958  }
1959 
1960  /*
1961  * Generate partial paths for final_rel, too, if outer query levels might
1962  * be able to make use of them.
1963  */
1964  if (final_rel->consider_parallel && root->query_level > 1 &&
1965  !limit_needed(parse))
1966  {
1967  Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
1968  foreach(lc, current_rel->partial_pathlist)
1969  {
1970  Path *partial_path = (Path *) lfirst(lc);
1971 
1972  add_partial_path(final_rel, partial_path);
1973  }
1974  }
1975 
1976  extra.limit_needed = limit_needed(parse);
1977  extra.limit_tuples = limit_tuples;
1978  extra.count_est = count_est;
1979  extra.offset_est = offset_est;
1980 
1981  /*
1982  * If there is an FDW that's responsible for all baserels of the query,
1983  * let it consider adding ForeignPaths.
1984  */
1985  if (final_rel->fdwroutine &&
1986  final_rel->fdwroutine->GetForeignUpperPaths)
1987  final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
1988  current_rel, final_rel,
1989  &extra);
1990 
1991  /* Let extensions possibly add some more paths */
1993  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
1994  current_rel, final_rel, &extra);
1995 
1996  /* Note: currently, we leave it to callers to do set_cheapest() */
1997 }
1998 
1999 /*
2000  * Do preprocessing for groupingSets clause and related data. This handles the
2001  * preliminary steps of expanding the grouping sets, organizing them into lists
2002  * of rollups, and preparing annotations which will later be filled in with
2003  * size estimates.
2004  */
2005 static grouping_sets_data *
2007 {
2008  Query *parse = root->parse;
2009  List *sets;
2010  int maxref = 0;
2011  ListCell *lc_set;
2013 
2014  parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
2015 
2016  gd->any_hashable = false;
2017  gd->unhashable_refs = NULL;
2018  gd->unsortable_refs = NULL;
2019  gd->unsortable_sets = NIL;
2020 
2021  /*
2022  * We don't currently make any attempt to optimize the groupClause when
2023  * there are grouping sets, so just duplicate it in processed_groupClause.
2024  */
2025  root->processed_groupClause = parse->groupClause;
2026 
2027  if (parse->groupClause)
2028  {
2029  ListCell *lc;
2030 
2031  foreach(lc, parse->groupClause)
2032  {
2034  Index ref = gc->tleSortGroupRef;
2035 
2036  if (ref > maxref)
2037  maxref = ref;
2038 
2039  if (!gc->hashable)
2041 
2042  if (!OidIsValid(gc->sortop))
2044  }
2045  }
2046 
2047  /* Allocate workspace array for remapping */
2048  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2049 
2050  /*
2051  * If we have any unsortable sets, we must extract them before trying to
2052  * prepare rollups. Unsortable sets don't go through
2053  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2054  * here.
2055  */
2056  if (!bms_is_empty(gd->unsortable_refs))
2057  {
2058  List *sortable_sets = NIL;
2059  ListCell *lc;
2060 
2061  foreach(lc, parse->groupingSets)
2062  {
2063  List *gset = (List *) lfirst(lc);
2064 
2065  if (bms_overlap_list(gd->unsortable_refs, gset))
2066  {
2068 
2069  gs->set = gset;
2070  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2071 
2072  /*
2073  * We must enforce here that an unsortable set is hashable;
2074  * later code assumes this. Parse analysis only checks that
2075  * every individual column is either hashable or sortable.
2076  *
2077  * Note that passing this test doesn't guarantee we can
2078  * generate a plan; there might be other showstoppers.
2079  */
2080  if (bms_overlap_list(gd->unhashable_refs, gset))
2081  ereport(ERROR,
2082  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2083  errmsg("could not implement GROUP BY"),
2084  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2085  }
2086  else
2087  sortable_sets = lappend(sortable_sets, gset);
2088  }
2089 
2090  if (sortable_sets)
2091  sets = extract_rollup_sets(sortable_sets);
2092  else
2093  sets = NIL;
2094  }
2095  else
2096  sets = extract_rollup_sets(parse->groupingSets);
2097 
2098  foreach(lc_set, sets)
2099  {
2100  List *current_sets = (List *) lfirst(lc_set);
2101  RollupData *rollup = makeNode(RollupData);
2102  GroupingSetData *gs;
2103 
2104  /*
2105  * Reorder the current list of grouping sets into correct prefix
2106  * order. If only one aggregation pass is needed, try to make the
2107  * list match the ORDER BY clause; if more than one pass is needed, we
2108  * don't bother with that.
2109  *
2110  * Note that this reorders the sets from smallest-member-first to
2111  * largest-member-first, and applies the GroupingSetData annotations,
2112  * though the data will be filled in later.
2113  */
2114  current_sets = reorder_grouping_sets(current_sets,
2115  (list_length(sets) == 1
2116  ? parse->sortClause
2117  : NIL));
2118 
2119  /*
2120  * Get the initial (and therefore largest) grouping set.
2121  */
2122  gs = linitial_node(GroupingSetData, current_sets);
2123 
2124  /*
2125  * Order the groupClause appropriately. If the first grouping set is
2126  * empty, then the groupClause must also be empty; otherwise we have
2127  * to force the groupClause to match that grouping set's order.
2128  *
2129  * (The first grouping set can be empty even though parse->groupClause
2130  * is not empty only if all non-empty grouping sets are unsortable.
2131  * The groupClauses for hashed grouping sets are built later on.)
2132  */
2133  if (gs->set)
2134  rollup->groupClause = preprocess_groupclause(root, gs->set);
2135  else
2136  rollup->groupClause = NIL;
2137 
2138  /*
2139  * Is it hashable? We pretend empty sets are hashable even though we
2140  * actually force them not to be hashed later. But don't bother if
2141  * there's nothing but empty sets (since in that case we can't hash
2142  * anything).
2143  */
2144  if (gs->set &&
2146  {
2147  rollup->hashable = true;
2148  gd->any_hashable = true;
2149  }
2150 
2151  /*
2152  * Now that we've pinned down an order for the groupClause for this
2153  * list of grouping sets, we need to remap the entries in the grouping
2154  * sets from sortgrouprefs to plain indices (0-based) into the
2155  * groupClause for this collection of grouping sets. We keep the
2156  * original form for later use, though.
2157  */
2158  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2159  current_sets,
2160  gd->tleref_to_colnum_map);
2161  rollup->gsets_data = current_sets;
2162 
2163  gd->rollups = lappend(gd->rollups, rollup);
2164  }
2165 
2166  if (gd->unsortable_sets)
2167  {
2168  /*
2169  * We have not yet pinned down a groupclause for this, but we will
2170  * need index-based lists for estimation purposes. Construct
2171  * hash_sets_idx based on the entire original groupclause for now.
2172  */
2173  gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2174  gd->unsortable_sets,
2175  gd->tleref_to_colnum_map);
2176  gd->any_hashable = true;
2177  }
2178 
2179  return gd;
2180 }
2181 
2182 /*
2183  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2184  * (without annotation) mapped to indexes into the given groupclause.
2185  */
2186 static List *
2188  List *gsets,
2189  int *tleref_to_colnum_map)
2190 {
2191  int ref = 0;
2192  List *result = NIL;
2193  ListCell *lc;
2194 
2195  foreach(lc, groupClause)
2196  {
2198 
2199  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2200  }
2201 
2202  foreach(lc, gsets)
2203  {
2204  List *set = NIL;
2205  ListCell *lc2;
2207 
2208  foreach(lc2, gs->set)
2209  {
2210  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2211  }
2212 
2213  result = lappend(result, set);
2214  }
2215 
2216  return result;
2217 }
2218 
2219 
2220 /*
2221  * preprocess_rowmarks - set up PlanRowMarks if needed
2222  */
2223 static void
2225 {
2226  Query *parse = root->parse;
2227  Bitmapset *rels;
2228  List *prowmarks;
2229  ListCell *l;
2230  int i;
2231 
2232  if (parse->rowMarks)
2233  {
2234  /*
2235  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2236  * grouping, since grouping renders a reference to individual tuple
2237  * CTIDs invalid. This is also checked at parse time, but that's
2238  * insufficient because of rule substitution, query pullup, etc.
2239  */
2241  parse->rowMarks)->strength);
2242  }
2243  else
2244  {
2245  /*
2246  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2247  * UPDATE/SHARE.
2248  */
2249  if (parse->commandType != CMD_UPDATE &&
2250  parse->commandType != CMD_DELETE)
2251  return;
2252  }
2253 
2254  /*
2255  * We need to have rowmarks for all base relations except the target. We
2256  * make a bitmapset of all base rels and then remove the items we don't
2257  * need or have FOR [KEY] UPDATE/SHARE marks for.
2258  */
2259  rels = get_relids_in_jointree((Node *) parse->jointree, false, false);
2260  if (parse->resultRelation)
2261  rels = bms_del_member(rels, parse->resultRelation);
2262 
2263  /*
2264  * Convert RowMarkClauses to PlanRowMark representation.
2265  */
2266  prowmarks = NIL;
2267  foreach(l, parse->rowMarks)
2268  {
2270  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2271  PlanRowMark *newrc;
2272 
2273  /*
2274  * Currently, it is syntactically impossible to have FOR UPDATE et al
2275  * applied to an update/delete target rel. If that ever becomes
2276  * possible, we should drop the target from the PlanRowMark list.
2277  */
2278  Assert(rc->rti != parse->resultRelation);
2279 
2280  /*
2281  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2282  * can't support true locking. Subqueries that got flattened into the
2283  * main query should be ignored completely. Any that didn't will get
2284  * ROW_MARK_COPY items in the next loop.
2285  */
2286  if (rte->rtekind != RTE_RELATION)
2287  continue;
2288 
2289  rels = bms_del_member(rels, rc->rti);
2290 
2291  newrc = makeNode(PlanRowMark);
2292  newrc->rti = newrc->prti = rc->rti;
2293  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2294  newrc->markType = select_rowmark_type(rte, rc->strength);
2295  newrc->allMarkTypes = (1 << newrc->markType);
2296  newrc->strength = rc->strength;
2297  newrc->waitPolicy = rc->waitPolicy;
2298  newrc->isParent = false;
2299 
2300  prowmarks = lappend(prowmarks, newrc);
2301  }
2302 
2303  /*
2304  * Now, add rowmarks for any non-target, non-locked base relations.
2305  */
2306  i = 0;
2307  foreach(l, parse->rtable)
2308  {
2310  PlanRowMark *newrc;
2311 
2312  i++;
2313  if (!bms_is_member(i, rels))
2314  continue;
2315 
2316  newrc = makeNode(PlanRowMark);
2317  newrc->rti = newrc->prti = i;
2318  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2319  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2320  newrc->allMarkTypes = (1 << newrc->markType);
2321  newrc->strength = LCS_NONE;
2322  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2323  newrc->isParent = false;
2324 
2325  prowmarks = lappend(prowmarks, newrc);
2326  }
2327 
2328  root->rowMarks = prowmarks;
2329 }
2330 
2331 /*
2332  * Select RowMarkType to use for a given table
2333  */
2336 {
2337  if (rte->rtekind != RTE_RELATION)
2338  {
2339  /* If it's not a table at all, use ROW_MARK_COPY */
2340  return ROW_MARK_COPY;
2341  }
2342  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2343  {
2344  /* Let the FDW select the rowmark type, if it wants to */
2345  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2346 
2347  if (fdwroutine->GetForeignRowMarkType != NULL)
2348  return fdwroutine->GetForeignRowMarkType(rte, strength);
2349  /* Otherwise, use ROW_MARK_COPY by default */
2350  return ROW_MARK_COPY;
2351  }
2352  else
2353  {
2354  /* Regular table, apply the appropriate lock type */
2355  switch (strength)
2356  {
2357  case LCS_NONE:
2358 
2359  /*
2360  * We don't need a tuple lock, only the ability to re-fetch
2361  * the row.
2362  */
2363  return ROW_MARK_REFERENCE;
2364  break;
2365  case LCS_FORKEYSHARE:
2366  return ROW_MARK_KEYSHARE;
2367  break;
2368  case LCS_FORSHARE:
2369  return ROW_MARK_SHARE;
2370  break;
2371  case LCS_FORNOKEYUPDATE:
2372  return ROW_MARK_NOKEYEXCLUSIVE;
2373  break;
2374  case LCS_FORUPDATE:
2375  return ROW_MARK_EXCLUSIVE;
2376  break;
2377  }
2378  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2379  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2380  }
2381 }
2382 
2383 /*
2384  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2385  *
2386  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2387  * results back in *count_est and *offset_est. These variables are set to
2388  * 0 if the corresponding clause is not present, and -1 if it's present
2389  * but we couldn't estimate the value for it. (The "0" convention is OK
2390  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2391  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2392  * usual practice of never estimating less than one row.) These values will
2393  * be passed to create_limit_path, which see if you change this code.
2394  *
2395  * The return value is the suitably adjusted tuple_fraction to use for
2396  * planning the query. This adjustment is not overridable, since it reflects
2397  * plan actions that grouping_planner() will certainly take, not assumptions
2398  * about context.
2399  */
2400 static double
2401 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2402  int64 *offset_est, int64 *count_est)
2403 {
2404  Query *parse = root->parse;
2405  Node *est;
2406  double limit_fraction;
2407 
2408  /* Should not be called unless LIMIT or OFFSET */
2409  Assert(parse->limitCount || parse->limitOffset);
2410 
2411  /*
2412  * Try to obtain the clause values. We use estimate_expression_value
2413  * primarily because it can sometimes do something useful with Params.
2414  */
2415  if (parse->limitCount)
2416  {
2417  est = estimate_expression_value(root, parse->limitCount);
2418  if (est && IsA(est, Const))
2419  {
2420  if (((Const *) est)->constisnull)
2421  {
2422  /* NULL indicates LIMIT ALL, ie, no limit */
2423  *count_est = 0; /* treat as not present */
2424  }
2425  else
2426  {
2427  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2428  if (*count_est <= 0)
2429  *count_est = 1; /* force to at least 1 */
2430  }
2431  }
2432  else
2433  *count_est = -1; /* can't estimate */
2434  }
2435  else
2436  *count_est = 0; /* not present */
2437 
2438  if (parse->limitOffset)
2439  {
2440  est = estimate_expression_value(root, parse->limitOffset);
2441  if (est && IsA(est, Const))
2442  {
2443  if (((Const *) est)->constisnull)
2444  {
2445  /* Treat NULL as no offset; the executor will too */
2446  *offset_est = 0; /* treat as not present */
2447  }
2448  else
2449  {
2450  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2451  if (*offset_est < 0)
2452  *offset_est = 0; /* treat as not present */
2453  }
2454  }
2455  else
2456  *offset_est = -1; /* can't estimate */
2457  }
2458  else
2459  *offset_est = 0; /* not present */
2460 
2461  if (*count_est != 0)
2462  {
2463  /*
2464  * A LIMIT clause limits the absolute number of tuples returned.
2465  * However, if it's not a constant LIMIT then we have to guess; for
2466  * lack of a better idea, assume 10% of the plan's result is wanted.
2467  */
2468  if (*count_est < 0 || *offset_est < 0)
2469  {
2470  /* LIMIT or OFFSET is an expression ... punt ... */
2471  limit_fraction = 0.10;
2472  }
2473  else
2474  {
2475  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2476  limit_fraction = (double) *count_est + (double) *offset_est;
2477  }
2478 
2479  /*
2480  * If we have absolute limits from both caller and LIMIT, use the
2481  * smaller value; likewise if they are both fractional. If one is
2482  * fractional and the other absolute, we can't easily determine which
2483  * is smaller, but we use the heuristic that the absolute will usually
2484  * be smaller.
2485  */
2486  if (tuple_fraction >= 1.0)
2487  {
2488  if (limit_fraction >= 1.0)
2489  {
2490  /* both absolute */
2491  tuple_fraction = Min(tuple_fraction, limit_fraction);
2492  }
2493  else
2494  {
2495  /* caller absolute, limit fractional; use caller's value */
2496  }
2497  }
2498  else if (tuple_fraction > 0.0)
2499  {
2500  if (limit_fraction >= 1.0)
2501  {
2502  /* caller fractional, limit absolute; use limit */
2503  tuple_fraction = limit_fraction;
2504  }
2505  else
2506  {
2507  /* both fractional */
2508  tuple_fraction = Min(tuple_fraction, limit_fraction);
2509  }
2510  }
2511  else
2512  {
2513  /* no info from caller, just use limit */
2514  tuple_fraction = limit_fraction;
2515  }
2516  }
2517  else if (*offset_est != 0 && tuple_fraction > 0.0)
2518  {
2519  /*
2520  * We have an OFFSET but no LIMIT. This acts entirely differently
2521  * from the LIMIT case: here, we need to increase rather than decrease
2522  * the caller's tuple_fraction, because the OFFSET acts to cause more
2523  * tuples to be fetched instead of fewer. This only matters if we got
2524  * a tuple_fraction > 0, however.
2525  *
2526  * As above, use 10% if OFFSET is present but unestimatable.
2527  */
2528  if (*offset_est < 0)
2529  limit_fraction = 0.10;
2530  else
2531  limit_fraction = (double) *offset_est;
2532 
2533  /*
2534  * If we have absolute counts from both caller and OFFSET, add them
2535  * together; likewise if they are both fractional. If one is
2536  * fractional and the other absolute, we want to take the larger, and
2537  * we heuristically assume that's the fractional one.
2538  */
2539  if (tuple_fraction >= 1.0)
2540  {
2541  if (limit_fraction >= 1.0)
2542  {
2543  /* both absolute, so add them together */
2544  tuple_fraction += limit_fraction;
2545  }
2546  else
2547  {
2548  /* caller absolute, limit fractional; use limit */
2549  tuple_fraction = limit_fraction;
2550  }
2551  }
2552  else
2553  {
2554  if (limit_fraction >= 1.0)
2555  {
2556  /* caller fractional, limit absolute; use caller's value */
2557  }
2558  else
2559  {
2560  /* both fractional, so add them together */
2561  tuple_fraction += limit_fraction;
2562  if (tuple_fraction >= 1.0)
2563  tuple_fraction = 0.0; /* assume fetch all */
2564  }
2565  }
2566  }
2567 
2568  return tuple_fraction;
2569 }
2570 
2571 /*
2572  * limit_needed - do we actually need a Limit plan node?
2573  *
2574  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2575  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2576  * locution for an optimization fence. (Because other places in the planner
2577  * merely check whether parse->limitOffset isn't NULL, it will still work as
2578  * an optimization fence --- we're just suppressing unnecessary run-time
2579  * overhead.)
2580  *
2581  * This might look like it could be merged into preprocess_limit, but there's
2582  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2583  * in preprocess_limit it's good enough to consider estimated values.
2584  */
2585 bool
2587 {
2588  Node *node;
2589 
2590  node = parse->limitCount;
2591  if (node)
2592  {
2593  if (IsA(node, Const))
2594  {
2595  /* NULL indicates LIMIT ALL, ie, no limit */
2596  if (!((Const *) node)->constisnull)
2597  return true; /* LIMIT with a constant value */
2598  }
2599  else
2600  return true; /* non-constant LIMIT */
2601  }
2602 
2603  node = parse->limitOffset;
2604  if (node)
2605  {
2606  if (IsA(node, Const))
2607  {
2608  /* Treat NULL as no offset; the executor would too */
2609  if (!((Const *) node)->constisnull)
2610  {
2611  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2612 
2613  if (offset != 0)
2614  return true; /* OFFSET with a nonzero value */
2615  }
2616  }
2617  else
2618  return true; /* non-constant OFFSET */
2619  }
2620 
2621  return false; /* don't need a Limit plan node */
2622 }
2623 
2624 
2625 /*
2626  * remove_useless_groupby_columns
2627  * Remove any columns in the GROUP BY clause that are redundant due to
2628  * being functionally dependent on other GROUP BY columns.
2629  *
2630  * Since some other DBMSes do not allow references to ungrouped columns, it's
2631  * not unusual to find all columns listed in GROUP BY even though listing the
2632  * primary-key columns would be sufficient. Deleting such excess columns
2633  * avoids redundant sorting work, so it's worth doing.
2634  *
2635  * Relcache invalidations will ensure that cached plans become invalidated
2636  * when the underlying index of the pkey constraint is dropped.
2637  *
2638  * Currently, we only make use of pkey constraints for this, however, we may
2639  * wish to take this further in the future and also use unique constraints
2640  * which have NOT NULL columns. In that case, plan invalidation will still
2641  * work since relations will receive a relcache invalidation when a NOT NULL
2642  * constraint is dropped.
2643  */
2644 static void
2646 {
2647  Query *parse = root->parse;
2648  Bitmapset **groupbyattnos;
2649  Bitmapset **surplusvars;
2650  ListCell *lc;
2651  int relid;
2652 
2653  /* No chance to do anything if there are less than two GROUP BY items */
2654  if (list_length(root->processed_groupClause) < 2)
2655  return;
2656 
2657  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2658  if (parse->groupingSets)
2659  return;
2660 
2661  /*
2662  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2663  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2664  * that are GROUP BY items.
2665  */
2666  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2667  (list_length(parse->rtable) + 1));
2668  foreach(lc, root->processed_groupClause)
2669  {
2671  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2672  Var *var = (Var *) tle->expr;
2673 
2674  /*
2675  * Ignore non-Vars and Vars from other query levels.
2676  *
2677  * XXX in principle, stable expressions containing Vars could also be
2678  * removed, if all the Vars are functionally dependent on other GROUP
2679  * BY items. But it's not clear that such cases occur often enough to
2680  * be worth troubling over.
2681  */
2682  if (!IsA(var, Var) ||
2683  var->varlevelsup > 0)
2684  continue;
2685 
2686  /* OK, remember we have this Var */
2687  relid = var->varno;
2688  Assert(relid <= list_length(parse->rtable));
2689  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2691  }
2692 
2693  /*
2694  * Consider each relation and see if it is possible to remove some of its
2695  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2696  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2697  * of the column attnos of RTE k that are removable GROUP BY items.
2698  */
2699  surplusvars = NULL; /* don't allocate array unless required */
2700  relid = 0;
2701  foreach(lc, parse->rtable)
2702  {
2704  Bitmapset *relattnos;
2705  Bitmapset *pkattnos;
2706  Oid constraintOid;
2707 
2708  relid++;
2709 
2710  /* Only plain relations could have primary-key constraints */
2711  if (rte->rtekind != RTE_RELATION)
2712  continue;
2713 
2714  /*
2715  * We must skip inheritance parent tables as some of the child rels
2716  * may cause duplicate rows. This cannot happen with partitioned
2717  * tables, however.
2718  */
2719  if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
2720  continue;
2721 
2722  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2723  relattnos = groupbyattnos[relid];
2724  if (bms_membership(relattnos) != BMS_MULTIPLE)
2725  continue;
2726 
2727  /*
2728  * Can't remove any columns for this rel if there is no suitable
2729  * (i.e., nondeferrable) primary key constraint.
2730  */
2731  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2732  if (pkattnos == NULL)
2733  continue;
2734 
2735  /*
2736  * If the primary key is a proper subset of relattnos then we have
2737  * some items in the GROUP BY that can be removed.
2738  */
2739  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2740  {
2741  /*
2742  * To easily remember whether we've found anything to do, we don't
2743  * allocate the surplusvars[] array until we find something.
2744  */
2745  if (surplusvars == NULL)
2746  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2747  (list_length(parse->rtable) + 1));
2748 
2749  /* Remember the attnos of the removable columns */
2750  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2751  }
2752  }
2753 
2754  /*
2755  * If we found any surplus Vars, build a new GROUP BY clause without them.
2756  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2757  * markings, but that's harmless.)
2758  */
2759  if (surplusvars != NULL)
2760  {
2761  List *new_groupby = NIL;
2762 
2763  foreach(lc, root->processed_groupClause)
2764  {
2766  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2767  Var *var = (Var *) tle->expr;
2768 
2769  /*
2770  * New list must include non-Vars, outer Vars, and anything not
2771  * marked as surplus.
2772  */
2773  if (!IsA(var, Var) ||
2774  var->varlevelsup > 0 ||
2776  surplusvars[var->varno]))
2777  new_groupby = lappend(new_groupby, sgc);
2778  }
2779 
2780  root->processed_groupClause = new_groupby;
2781  }
2782 }
2783 
2784 /*
2785  * preprocess_groupclause - do preparatory work on GROUP BY clause
2786  *
2787  * The idea here is to adjust the ordering of the GROUP BY elements
2788  * (which in itself is semantically insignificant) to match ORDER BY,
2789  * thereby allowing a single sort operation to both implement the ORDER BY
2790  * requirement and set up for a Unique step that implements GROUP BY.
2791  *
2792  * In principle it might be interesting to consider other orderings of the
2793  * GROUP BY elements, which could match the sort ordering of other
2794  * possible plans (eg an indexscan) and thereby reduce cost. We don't
2795  * bother with that, though. Hashed grouping will frequently win anyway.
2796  *
2797  * Note: we need no comparable processing of the distinctClause because
2798  * the parser already enforced that that matches ORDER BY.
2799  *
2800  * Note: we return a fresh List, but its elements are the same
2801  * SortGroupClauses appearing in parse->groupClause. This is important
2802  * because later processing may modify the processed_groupClause list.
2803  *
2804  * For grouping sets, the order of items is instead forced to agree with that
2805  * of the grouping set (and items not in the grouping set are skipped). The
2806  * work of sorting the order of grouping set elements to match the ORDER BY if
2807  * possible is done elsewhere.
2808  */
2809 static List *
2811 {
2812  Query *parse = root->parse;
2813  List *new_groupclause = NIL;
2814  bool partial_match;
2815  ListCell *sl;
2816  ListCell *gl;
2817 
2818  /* For grouping sets, we need to force the ordering */
2819  if (force)
2820  {
2821  foreach(sl, force)
2822  {
2823  Index ref = lfirst_int(sl);
2824  SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2825 
2826  new_groupclause = lappend(new_groupclause, cl);
2827  }
2828 
2829  return new_groupclause;
2830  }
2831 
2832  /* If no ORDER BY, nothing useful to do here */
2833  if (parse->sortClause == NIL)
2834  return list_copy(parse->groupClause);
2835 
2836  /*
2837  * Scan the ORDER BY clause and construct a list of matching GROUP BY
2838  * items, but only as far as we can make a matching prefix.
2839  *
2840  * This code assumes that the sortClause contains no duplicate items.
2841  */
2842  foreach(sl, parse->sortClause)
2843  {
2845 
2846  foreach(gl, parse->groupClause)
2847  {
2849 
2850  if (equal(gc, sc))
2851  {
2852  new_groupclause = lappend(new_groupclause, gc);
2853  break;
2854  }
2855  }
2856  if (gl == NULL)
2857  break; /* no match, so stop scanning */
2858  }
2859 
2860  /* Did we match all of the ORDER BY list, or just some of it? */
2861  partial_match = (sl != NULL);
2862 
2863  /* If no match at all, no point in reordering GROUP BY */
2864  if (new_groupclause == NIL)
2865  return list_copy(parse->groupClause);
2866 
2867  /*
2868  * Add any remaining GROUP BY items to the new list, but only if we were
2869  * able to make a complete match. In other words, we only rearrange the
2870  * GROUP BY list if the result is that one list is a prefix of the other
2871  * --- otherwise there's no possibility of a common sort. Also, give up
2872  * if there are any non-sortable GROUP BY items, since then there's no
2873  * hope anyway.
2874  */
2875  foreach(gl, parse->groupClause)
2876  {
2878 
2879  if (list_member_ptr(new_groupclause, gc))
2880  continue; /* it matched an ORDER BY item */
2881  if (partial_match) /* give up, no common sort possible */
2882  return list_copy(parse->groupClause);
2883  if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */
2884  return list_copy(parse->groupClause);
2885  new_groupclause = lappend(new_groupclause, gc);
2886  }
2887 
2888  /* Success --- install the rearranged GROUP BY list */
2889  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2890  return new_groupclause;
2891 }
2892 
2893 /*
2894  * Extract lists of grouping sets that can be implemented using a single
2895  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2896  *
2897  * Input must be sorted with smallest sets first. Result has each sublist
2898  * sorted with smallest sets first.
2899  *
2900  * We want to produce the absolute minimum possible number of lists here to
2901  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2902  * of finding the minimal partition of a partially-ordered set into chains
2903  * (which is what we need, taking the list of grouping sets as a poset ordered
2904  * by set inclusion) can be mapped to the problem of finding the maximum
2905  * cardinality matching on a bipartite graph, which is solvable in polynomial
2906  * time with a worst case of no worse than O(n^2.5) and usually much
2907  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2908  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2909  * half a second on my modest system even with optimization off and assertions
2910  * on.)
2911  */
2912 static List *
2914 {
2915  int num_sets_raw = list_length(groupingSets);
2916  int num_empty = 0;
2917  int num_sets = 0; /* distinct sets */
2918  int num_chains = 0;
2919  List *result = NIL;
2920  List **results;
2921  List **orig_sets;
2922  Bitmapset **set_masks;
2923  int *chains;
2924  short **adjacency;
2925  short *adjacency_buf;
2927  int i;
2928  int j;
2929  int j_size;
2930  ListCell *lc1 = list_head(groupingSets);
2931  ListCell *lc;
2932 
2933  /*
2934  * Start by stripping out empty sets. The algorithm doesn't require this,
2935  * but the planner currently needs all empty sets to be returned in the
2936  * first list, so we strip them here and add them back after.
2937  */
2938  while (lc1 && lfirst(lc1) == NIL)
2939  {
2940  ++num_empty;
2941  lc1 = lnext(groupingSets, lc1);
2942  }
2943 
2944  /* bail out now if it turns out that all we had were empty sets. */
2945  if (!lc1)
2946  return list_make1(groupingSets);
2947 
2948  /*----------
2949  * We don't strictly need to remove duplicate sets here, but if we don't,
2950  * they tend to become scattered through the result, which is a bit
2951  * confusing (and irritating if we ever decide to optimize them out).
2952  * So we remove them here and add them back after.
2953  *
2954  * For each non-duplicate set, we fill in the following:
2955  *
2956  * orig_sets[i] = list of the original set lists
2957  * set_masks[i] = bitmapset for testing inclusion
2958  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2959  *
2960  * chains[i] will be the result group this set is assigned to.
2961  *
2962  * We index all of these from 1 rather than 0 because it is convenient
2963  * to leave 0 free for the NIL node in the graph algorithm.
2964  *----------
2965  */
2966  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2967  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2968  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2969  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2970 
2971  j_size = 0;
2972  j = 0;
2973  i = 1;
2974 
2975  for_each_cell(lc, groupingSets, lc1)
2976  {
2977  List *candidate = (List *) lfirst(lc);
2978  Bitmapset *candidate_set = NULL;
2979  ListCell *lc2;
2980  int dup_of = 0;
2981 
2982  foreach(lc2, candidate)
2983  {
2984  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
2985  }
2986 
2987  /* we can only be a dup if we're the same length as a previous set */
2988  if (j_size == list_length(candidate))
2989  {
2990  int k;
2991 
2992  for (k = j; k < i; ++k)
2993  {
2994  if (bms_equal(set_masks[k], candidate_set))
2995  {
2996  dup_of = k;
2997  break;
2998  }
2999  }
3000  }
3001  else if (j_size < list_length(candidate))
3002  {
3003  j_size = list_length(candidate);
3004  j = i;
3005  }
3006 
3007  if (dup_of > 0)
3008  {
3009  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3010  bms_free(candidate_set);
3011  }
3012  else
3013  {
3014  int k;
3015  int n_adj = 0;
3016 
3017  orig_sets[i] = list_make1(candidate);
3018  set_masks[i] = candidate_set;
3019 
3020  /* fill in adjacency list; no need to compare equal-size sets */
3021 
3022  for (k = j - 1; k > 0; --k)
3023  {
3024  if (bms_is_subset(set_masks[k], candidate_set))
3025  adjacency_buf[++n_adj] = k;
3026  }
3027 
3028  if (n_adj > 0)
3029  {
3030  adjacency_buf[0] = n_adj;
3031  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3032  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3033  }
3034  else
3035  adjacency[i] = NULL;
3036 
3037  ++i;
3038  }
3039  }
3040 
3041  num_sets = i - 1;
3042 
3043  /*
3044  * Apply the graph matching algorithm to do the work.
3045  */
3046  state = BipartiteMatch(num_sets, num_sets, adjacency);
3047 
3048  /*
3049  * Now, the state->pair* fields have the info we need to assign sets to
3050  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3051  * pair_vu[v] = u (both will be true, but we check both so that we can do
3052  * it in one pass)
3053  */
3054  chains = palloc0((num_sets + 1) * sizeof(int));
3055 
3056  for (i = 1; i <= num_sets; ++i)
3057  {
3058  int u = state->pair_vu[i];
3059  int v = state->pair_uv[i];
3060 
3061  if (u > 0 && u < i)
3062  chains[i] = chains[u];
3063  else if (v > 0 && v < i)
3064  chains[i] = chains[v];
3065  else
3066  chains[i] = ++num_chains;
3067  }
3068 
3069  /* build result lists. */
3070  results = palloc0((num_chains + 1) * sizeof(List *));
3071 
3072  for (i = 1; i <= num_sets; ++i)
3073  {
3074  int c = chains[i];
3075 
3076  Assert(c > 0);
3077 
3078  results[c] = list_concat(results[c], orig_sets[i]);
3079  }
3080 
3081  /* push any empty sets back on the first list. */
3082  while (num_empty-- > 0)
3083  results[1] = lcons(NIL, results[1]);
3084 
3085  /* make result list */
3086  for (i = 1; i <= num_chains; ++i)
3087  result = lappend(result, results[i]);
3088 
3089  /*
3090  * Free all the things.
3091  *
3092  * (This is over-fussy for small sets but for large sets we could have
3093  * tied up a nontrivial amount of memory.)
3094  */
3096  pfree(results);
3097  pfree(chains);
3098  for (i = 1; i <= num_sets; ++i)
3099  if (adjacency[i])
3100  pfree(adjacency[i]);
3101  pfree(adjacency);
3102  pfree(adjacency_buf);
3103  pfree(orig_sets);
3104  for (i = 1; i <= num_sets; ++i)
3105  bms_free(set_masks[i]);
3106  pfree(set_masks);
3107 
3108  return result;
3109 }
3110 
3111 /*
3112  * Reorder the elements of a list of grouping sets such that they have correct
3113  * prefix relationships. Also inserts the GroupingSetData annotations.
3114  *
3115  * The input must be ordered with smallest sets first; the result is returned
3116  * with largest sets first. Note that the result shares no list substructure
3117  * with the input, so it's safe for the caller to modify it later.
3118  *
3119  * If we're passed in a sortclause, we follow its order of columns to the
3120  * extent possible, to minimize the chance that we add unnecessary sorts.
3121  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3122  * gets implemented in one pass.)
3123  */
3124 static List *
3125 reorder_grouping_sets(List *groupingSets, List *sortclause)
3126 {
3127  ListCell *lc;
3128  List *previous = NIL;
3129  List *result = NIL;
3130 
3131  foreach(lc, groupingSets)
3132  {
3133  List *candidate = (List *) lfirst(lc);
3134  List *new_elems = list_difference_int(candidate, previous);
3136 
3137  while (list_length(sortclause) > list_length(previous) &&
3138  new_elems != NIL)
3139  {
3140  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3141  int ref = sc->tleSortGroupRef;
3142 
3143  if (list_member_int(new_elems, ref))
3144  {
3145  previous = lappend_int(previous, ref);
3146  new_elems = list_delete_int(new_elems, ref);
3147  }
3148  else
3149  {
3150  /* diverged from the sortclause; give up on it */
3151  sortclause = NIL;
3152  break;
3153  }
3154  }
3155 
3156  previous = list_concat(previous, new_elems);
3157 
3158  gs->set = list_copy(previous);
3159  result = lcons(gs, result);
3160  }
3161 
3162  list_free(previous);
3163 
3164  return result;
3165 }
3166 
3167 /*
3168  * has_volatile_pathkey
3169  * Returns true if any PathKey in 'keys' has an EquivalenceClass
3170  * containing a volatile function. Otherwise returns false.
3171  */
3172 static bool
3174 {
3175  ListCell *lc;
3176 
3177  foreach(lc, keys)
3178  {
3179  PathKey *pathkey = lfirst_node(PathKey, lc);
3180 
3181  if (pathkey->pk_eclass->ec_has_volatile)
3182  return true;
3183  }
3184 
3185  return false;
3186 }
3187 
3188 /*
3189  * adjust_group_pathkeys_for_groupagg
3190  * Add pathkeys to root->group_pathkeys to reflect the best set of
3191  * pre-ordered input for ordered aggregates.
3192  *
3193  * We define "best" as the pathkeys that suit the largest number of
3194  * aggregate functions. We find these by looking at the first ORDER BY /
3195  * DISTINCT aggregate and take the pathkeys for that before searching for
3196  * other aggregates that require the same or a more strict variation of the
3197  * same pathkeys. We then repeat that process for any remaining aggregates
3198  * with different pathkeys and if we find another set of pathkeys that suits a
3199  * larger number of aggregates then we select those pathkeys instead.
3200  *
3201  * When the best pathkeys are found we also mark each Aggref that can use
3202  * those pathkeys as aggpresorted = true.
3203  *
3204  * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
3205  * volatile functions, we never make use of these pathkeys. We want to ensure
3206  * that sorts using volatile functions are done independently in each Aggref
3207  * rather than once at the query level. If we were to allow this then Aggrefs
3208  * with compatible sort orders would all transition their rows in the same
3209  * order if those pathkeys were deemed to be the best pathkeys to sort on.
3210  * Whereas, if some other set of Aggref's pathkeys happened to be deemed
3211  * better pathkeys to sort on, then the volatile function Aggrefs would be
3212  * left to perform their sorts individually. To avoid this inconsistent
3213  * behavior which could make Aggref results depend on what other Aggrefs the
3214  * query contains, we always force Aggrefs with volatile functions to perform
3215  * their own sorts.
3216  */
3217 static void
3219 {
3220  List *grouppathkeys = root->group_pathkeys;
3221  List *bestpathkeys;
3222  Bitmapset *bestaggs;
3223  Bitmapset *unprocessed_aggs;
3224  ListCell *lc;
3225  int i;
3226 
3227  /* Shouldn't be here if there are grouping sets */
3228  Assert(root->parse->groupingSets == NIL);
3229  /* Shouldn't be here unless there are some ordered aggregates */
3230  Assert(root->numOrderedAggs > 0);
3231 
3232  /* Do nothing if disabled */
3234  return;
3235 
3236  /*
3237  * Make a first pass over all AggInfos to collect a Bitmapset containing
3238  * the indexes of all AggInfos to be processed below.
3239  */
3240  unprocessed_aggs = NULL;
3241  foreach(lc, root->agginfos)
3242  {
3243  AggInfo *agginfo = lfirst_node(AggInfo, lc);
3244  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3245 
3246  if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
3247  continue;
3248 
3249  /* only add aggregates with a DISTINCT or ORDER BY */
3250  if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
3251  unprocessed_aggs = bms_add_member(unprocessed_aggs,
3252  foreach_current_index(lc));
3253  }
3254 
3255  /*
3256  * Now process all the unprocessed_aggs to find the best set of pathkeys
3257  * for the given set of aggregates.
3258  *
3259  * On the first outer loop here 'bestaggs' will be empty. We'll populate
3260  * this during the first loop using the pathkeys for the very first
3261  * AggInfo then taking any stronger pathkeys from any other AggInfos with
3262  * a more strict set of compatible pathkeys. Once the outer loop is
3263  * complete, we mark off all the aggregates with compatible pathkeys then
3264  * remove those from the unprocessed_aggs and repeat the process to try to
3265  * find another set of pathkeys that are suitable for a larger number of
3266  * aggregates. The outer loop will stop when there are not enough
3267  * unprocessed aggregates for it to be possible to find a set of pathkeys
3268  * to suit a larger number of aggregates.
3269  */
3270  bestpathkeys = NIL;
3271  bestaggs = NULL;
3272  while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
3273  {
3274  Bitmapset *aggindexes = NULL;
3275  List *currpathkeys = NIL;
3276 
3277  i = -1;
3278  while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
3279  {
3280  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3281  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3282  List *sortlist;
3283  List *pathkeys;
3284 
3285  if (aggref->aggdistinct != NIL)
3286  sortlist = aggref->aggdistinct;
3287  else
3288  sortlist = aggref->aggorder;
3289 
3290  pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3291  aggref->args);
3292 
3293  /*
3294  * Ignore Aggrefs which have volatile functions in their ORDER BY
3295  * or DISTINCT clause.
3296  */
3297  if (has_volatile_pathkey(pathkeys))
3298  {
3299  unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
3300  continue;
3301  }
3302 
3303  /*
3304  * When not set yet, take the pathkeys from the first unprocessed
3305  * aggregate.
3306  */
3307  if (currpathkeys == NIL)
3308  {
3309  currpathkeys = pathkeys;
3310 
3311  /* include the GROUP BY pathkeys, if they exist */
3312  if (grouppathkeys != NIL)
3313  currpathkeys = append_pathkeys(list_copy(grouppathkeys),
3314  currpathkeys);
3315 
3316  /* record that we found pathkeys for this aggregate */
3317  aggindexes = bms_add_member(aggindexes, i);
3318  }
3319  else
3320  {
3321  /* now look for a stronger set of matching pathkeys */
3322 
3323  /* include the GROUP BY pathkeys, if they exist */
3324  if (grouppathkeys != NIL)
3325  pathkeys = append_pathkeys(list_copy(grouppathkeys),
3326  pathkeys);
3327 
3328  /* are 'pathkeys' compatible or better than 'currpathkeys'? */
3329  switch (compare_pathkeys(currpathkeys, pathkeys))
3330  {
3331  case PATHKEYS_BETTER2:
3332  /* 'pathkeys' are stronger, use these ones instead */
3333  currpathkeys = pathkeys;
3334  /* FALLTHROUGH */
3335 
3336  case PATHKEYS_BETTER1:
3337  /* 'pathkeys' are less strict */
3338  /* FALLTHROUGH */
3339 
3340  case PATHKEYS_EQUAL:
3341  /* mark this aggregate as covered by 'currpathkeys' */
3342  aggindexes = bms_add_member(aggindexes, i);
3343  break;
3344 
3345  case PATHKEYS_DIFFERENT:
3346  break;
3347  }
3348  }
3349  }
3350 
3351  /* remove the aggregates that we've just processed */
3352  unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
3353 
3354  /*
3355  * If this pass included more aggregates than the previous best then
3356  * use these ones as the best set.
3357  */
3358  if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
3359  {
3360  bestaggs = aggindexes;
3361  bestpathkeys = currpathkeys;
3362  }
3363  }
3364 
3365  /*
3366  * If we found any ordered aggregates, update root->group_pathkeys to add
3367  * the best set of aggregate pathkeys. Note that bestpathkeys includes
3368  * the original GROUP BY pathkeys already.
3369  */
3370  if (bestpathkeys != NIL)
3371  root->group_pathkeys = bestpathkeys;
3372 
3373  /*
3374  * Now that we've found the best set of aggregates we can set the
3375  * presorted flag to indicate to the executor that it needn't bother
3376  * performing a sort for these Aggrefs. We're able to do this now as
3377  * there's no chance of a Hash Aggregate plan as create_grouping_paths
3378  * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
3379  * of ordered aggregates.
3380  */
3381  i = -1;
3382  while ((i = bms_next_member(bestaggs, i)) >= 0)
3383  {
3384  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3385 
3386  foreach(lc, agginfo->aggrefs)
3387  {
3388  Aggref *aggref = lfirst_node(Aggref, lc);
3389 
3390  aggref->aggpresorted = true;
3391  }
3392  }
3393 }
3394 
3395 /*
3396  * Compute query_pathkeys and other pathkeys during plan generation
3397  */
3398 static void
3400 {
3401  Query *parse = root->parse;
3402  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3403  List *tlist = root->processed_tlist;
3404  List *activeWindows = qp_extra->activeWindows;
3405 
3406  /*
3407  * Calculate pathkeys that represent grouping/ordering and/or ordered
3408  * aggregate requirements.
3409  */
3410  if (qp_extra->gset_data)
3411  {
3412  /*
3413  * With grouping sets, just use the first RollupData's groupClause. We
3414  * don't make any effort to optimize grouping clauses when there are
3415  * grouping sets, nor can we combine aggregate ordering keys with
3416  * grouping.
3417  */
3418  List *rollups = qp_extra->gset_data->rollups;
3419  List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
3420 
3421  if (grouping_is_sortable(groupClause))
3422  {
3424  groupClause,
3425  tlist);
3427  }
3428  else
3429  {
3430  root->group_pathkeys = NIL;
3431  root->num_groupby_pathkeys = 0;
3432  }
3433  }
3434  else if (parse->groupClause || root->numOrderedAggs > 0)
3435  {
3436  /*
3437  * With a plain GROUP BY list, we can remove any grouping items that
3438  * are proven redundant by EquivalenceClass processing. For example,
3439  * we can remove y given "WHERE x = y GROUP BY x, y". These aren't
3440  * especially common cases, but they're nearly free to detect. Note
3441  * that we remove redundant items from processed_groupClause but not
3442  * the original parse->groupClause.
3443  */
3444  bool sortable;
3445 
3446  root->group_pathkeys =
3448  &root->processed_groupClause,
3449  tlist,
3450  true,
3451  &sortable);
3452  if (!sortable)
3453  {
3454  /* Can't sort; no point in considering aggregate ordering either */
3455  root->group_pathkeys = NIL;
3456  root->num_groupby_pathkeys = 0;
3457  }
3458  else
3459  {
3461  /* If we have ordered aggs, consider adding onto group_pathkeys */
3462  if (root->numOrderedAggs > 0)
3464  }
3465  }
3466  else
3467  {
3468  root->group_pathkeys = NIL;
3469  root->num_groupby_pathkeys = 0;
3470  }
3471 
3472  /* We consider only the first (bottom) window in pathkeys logic */
3473  if (activeWindows != NIL)
3474  {
3475  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3476 
3478  wc,
3479  tlist);
3480  }
3481  else
3482  root->window_pathkeys = NIL;
3483 
3484  /*
3485  * As with GROUP BY, we can discard any DISTINCT items that are proven
3486  * redundant by EquivalenceClass processing. The non-redundant list is
3487  * kept in root->processed_distinctClause, leaving the original
3488  * parse->distinctClause alone.
3489  */
3490  if (parse->distinctClause)
3491  {
3492  bool sortable;
3493 
3494  /* Make a copy since pathkey processing can modify the list */
3495  root->processed_distinctClause = list_copy(parse->distinctClause);
3496  root->distinct_pathkeys =
3498  &root->processed_distinctClause,
3499  tlist,
3500  true,
3501  &sortable);
3502  if (!sortable)
3503  root->distinct_pathkeys = NIL;
3504  }
3505  else
3506  root->distinct_pathkeys = NIL;
3507 
3508  root->sort_pathkeys =
3510  parse->sortClause,
3511  tlist);
3512 
3513  /*
3514  * Figure out whether we want a sorted result from query_planner.
3515  *
3516  * If we have a sortable GROUP BY clause, then we want a result sorted
3517  * properly for grouping. Otherwise, if we have window functions to
3518  * evaluate, we try to sort for the first window. Otherwise, if there's a
3519  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3520  * we try to produce output that's sufficiently well sorted for the
3521  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3522  * by the ORDER BY clause.
3523  *
3524  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3525  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3526  * that might just leave us failing to exploit an available sort order at
3527  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3528  * much easier, since we know that the parser ensured that one is a
3529  * superset of the other.
3530  */
3531  if (root->group_pathkeys)
3532  root->query_pathkeys = root->group_pathkeys;
3533  else if (root->window_pathkeys)
3534  root->query_pathkeys = root->window_pathkeys;
3535  else if (list_length(root->distinct_pathkeys) >
3536  list_length(root->sort_pathkeys))
3537  root->query_pathkeys = root->distinct_pathkeys;
3538  else if (root->sort_pathkeys)
3539  root->query_pathkeys = root->sort_pathkeys;
3540  else
3541  root->query_pathkeys = NIL;
3542 }
3543 
3544 /*
3545  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3546  *
3547  * path_rows: number of output rows from scan/join step
3548  * gd: grouping sets data including list of grouping sets and their clauses
3549  * target_list: target list containing group clause references
3550  *
3551  * If doing grouping sets, we also annotate the gsets data with the estimates
3552  * for each set and each individual rollup list, with a view to later
3553  * determining whether some combination of them could be hashed instead.
3554  */
3555 static double
3557  double path_rows,
3558  grouping_sets_data *gd,
3559  List *target_list)
3560 {
3561  Query *parse = root->parse;
3562  double dNumGroups;
3563 
3564  if (parse->groupClause)
3565  {
3566  List *groupExprs;
3567 
3568  if (parse->groupingSets)
3569  {
3570  /* Add up the estimates for each grouping set */
3571  ListCell *lc;
3572 
3573  Assert(gd); /* keep Coverity happy */
3574 
3575  dNumGroups = 0;
3576 
3577  foreach(lc, gd->rollups)
3578  {
3579  RollupData *rollup = lfirst_node(RollupData, lc);
3580  ListCell *lc2;
3581  ListCell *lc3;
3582 
3583  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3584  target_list);
3585 
3586  rollup->numGroups = 0.0;
3587 
3588  forboth(lc2, rollup->gsets, lc3, rollup->gsets_data)
3589  {
3590  List *gset = (List *) lfirst(lc2);
3592  double numGroups = estimate_num_groups(root,
3593  groupExprs,
3594  path_rows,
3595  &gset,
3596  NULL);
3597 
3598  gs->numGroups = numGroups;
3599  rollup->numGroups += numGroups;
3600  }
3601 
3602  dNumGroups += rollup->numGroups;
3603  }
3604 
3605  if (gd->hash_sets_idx)
3606  {
3607  ListCell *lc2;
3608 
3609  gd->dNumHashGroups = 0;
3610 
3611  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3612  target_list);
3613 
3614  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3615  {
3616  List *gset = (List *) lfirst(lc);
3618  double numGroups = estimate_num_groups(root,
3619  groupExprs,
3620  path_rows,
3621  &gset,
3622  NULL);
3623 
3624  gs->numGroups = numGroups;
3625  gd->dNumHashGroups += numGroups;
3626  }
3627 
3628  dNumGroups += gd->dNumHashGroups;
3629  }
3630  }
3631  else
3632  {
3633  /* Plain GROUP BY -- estimate based on optimized groupClause */
3635  target_list);
3636 
3637  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3638  NULL, NULL);
3639  }
3640  }
3641  else if (parse->groupingSets)
3642  {
3643  /* Empty grouping sets ... one result row for each one */
3644  dNumGroups = list_length(parse->groupingSets);
3645  }
3646  else if (parse->hasAggs || root->hasHavingQual)
3647  {
3648  /* Plain aggregation, one result row */
3649  dNumGroups = 1;
3650  }
3651  else
3652  {
3653  /* Not grouping */
3654  dNumGroups = 1;
3655  }
3656 
3657  return dNumGroups;
3658 }
3659 
3660 /*
3661  * create_grouping_paths
3662  *
3663  * Build a new upperrel containing Paths for grouping and/or aggregation.
3664  * Along the way, we also build an upperrel for Paths which are partially
3665  * grouped and/or aggregated. A partially grouped and/or aggregated path
3666  * needs a FinalizeAggregate node to complete the aggregation. Currently,
3667  * the only partially grouped paths we build are also partial paths; that
3668  * is, they need a Gather and then a FinalizeAggregate.
3669  *
3670  * input_rel: contains the source-data Paths
3671  * target: the pathtarget for the result Paths to compute
3672  * gd: grouping sets data including list of grouping sets and their clauses
3673  *
3674  * Note: all Paths in input_rel are expected to return the target computed
3675  * by make_group_input_target.
3676  */
3677 static RelOptInfo *
3679  RelOptInfo *input_rel,
3680  PathTarget *target,
3681  bool target_parallel_safe,
3682  grouping_sets_data *gd)
3683 {
3684  Query *parse = root->parse;
3685  RelOptInfo *grouped_rel;
3686  RelOptInfo *partially_grouped_rel;
3687  AggClauseCosts agg_costs;
3688 
3689  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3690  get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
3691 
3692  /*
3693  * Create grouping relation to hold fully aggregated grouping and/or
3694  * aggregation paths.
3695  */
3696  grouped_rel = make_grouping_rel(root, input_rel, target,
3697  target_parallel_safe, parse->havingQual);
3698 
3699  /*
3700  * Create either paths for a degenerate grouping or paths for ordinary
3701  * grouping, as appropriate.
3702  */
3703  if (is_degenerate_grouping(root))
3704  create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3705  else
3706  {
3707  int flags = 0;
3708  GroupPathExtraData extra;
3709 
3710  /*
3711  * Determine whether it's possible to perform sort-based
3712  * implementations of grouping. (Note that if processed_groupClause
3713  * is empty, grouping_is_sortable() is trivially true, and all the
3714  * pathkeys_contained_in() tests will succeed too, so that we'll
3715  * consider every surviving input path.)
3716  *
3717  * If we have grouping sets, we might be able to sort some but not all
3718  * of them; in this case, we need can_sort to be true as long as we
3719  * must consider any sorted-input plan.
3720  */
3721  if ((gd && gd->rollups != NIL)
3723  flags |= GROUPING_CAN_USE_SORT;
3724 
3725  /*
3726  * Determine whether we should consider hash-based implementations of
3727  * grouping.
3728  *
3729  * Hashed aggregation only applies if we're grouping. If we have
3730  * grouping sets, some groups might be hashable but others not; in
3731  * this case we set can_hash true as long as there is nothing globally
3732  * preventing us from hashing (and we should therefore consider plans
3733  * with hashes).
3734  *
3735  * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3736  * BY aggregates. (Doing so would imply storing *all* the input
3737  * values in the hash table, and/or running many sorts in parallel,
3738  * either of which seems like a certain loser.) We similarly don't
3739  * support ordered-set aggregates in hashed aggregation, but that case
3740  * is also included in the numOrderedAggs count.
3741  *
3742  * Note: grouping_is_hashable() is much more expensive to check than
3743  * the other gating conditions, so we want to do it last.
3744  */
3745  if ((parse->groupClause != NIL &&
3746  root->numOrderedAggs == 0 &&
3748  flags |= GROUPING_CAN_USE_HASH;
3749 
3750  /*
3751  * Determine whether partial aggregation is possible.
3752  */
3753  if (can_partial_agg(root))
3754  flags |= GROUPING_CAN_PARTIAL_AGG;
3755 
3756  extra.flags = flags;
3757  extra.target_parallel_safe = target_parallel_safe;
3758  extra.havingQual = parse->havingQual;
3759  extra.targetList = parse->targetList;
3760  extra.partial_costs_set = false;
3761 
3762  /*
3763  * Determine whether partitionwise aggregation is in theory possible.
3764  * It can be disabled by the user, and for now, we don't try to
3765  * support grouping sets. create_ordinary_grouping_paths() will check
3766  * additional conditions, such as whether input_rel is partitioned.
3767  */
3768  if (enable_partitionwise_aggregate && !parse->groupingSets)
3770  else
3772 
3773  create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3774  &agg_costs, gd, &extra,
3775  &partially_grouped_rel);
3776  }
3777 
3778  set_cheapest(grouped_rel);
3779  return grouped_rel;
3780 }
3781 
3782 /*
3783  * make_grouping_rel
3784  *
3785  * Create a new grouping rel and set basic properties.
3786  *
3787  * input_rel represents the underlying scan/join relation.
3788  * target is the output expected from the grouping relation.
3789  */
3790 static RelOptInfo *
3792  PathTarget *target, bool target_parallel_safe,
3793  Node *havingQual)
3794 {
3795  RelOptInfo *grouped_rel;
3796 
3797  if (IS_OTHER_REL(input_rel))
3798  {
3799  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3800  input_rel->relids);
3801  grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3802  }
3803  else
3804  {
3805  /*
3806  * By tradition, the relids set for the main grouping relation is
3807  * NULL. (This could be changed, but might require adjustments
3808  * elsewhere.)
3809  */
3810  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3811  }
3812 
3813  /* Set target. */
3814  grouped_rel->reltarget = target;
3815 
3816  /*
3817  * If the input relation is not parallel-safe, then the grouped relation
3818  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3819  * target list and HAVING quals are parallel-safe.
3820  */
3821  if (input_rel->consider_parallel && target_parallel_safe &&
3822  is_parallel_safe(root, (Node *) havingQual))
3823  grouped_rel->consider_parallel = true;
3824 
3825  /*
3826  * If the input rel belongs to a single FDW, so does the grouped rel.
3827  */
3828  grouped_rel->serverid = input_rel->serverid;
3829  grouped_rel->userid = input_rel->userid;
3830  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3831  grouped_rel->fdwroutine = input_rel->fdwroutine;
3832 
3833  return grouped_rel;
3834 }
3835 
3836 /*
3837  * is_degenerate_grouping
3838  *
3839  * A degenerate grouping is one in which the query has a HAVING qual and/or
3840  * grouping sets, but no aggregates and no GROUP BY (which implies that the
3841  * grouping sets are all empty).
3842  */
3843 static bool
3845 {
3846  Query *parse = root->parse;
3847 
3848  return (root->hasHavingQual || parse->groupingSets) &&
3849  !parse->hasAggs && parse->groupClause == NIL;
3850 }
3851 
3852 /*
3853  * create_degenerate_grouping_paths
3854  *
3855  * When the grouping is degenerate (see is_degenerate_grouping), we are
3856  * supposed to emit either zero or one row for each grouping set depending on
3857  * whether HAVING succeeds. Furthermore, there cannot be any variables in
3858  * either HAVING or the targetlist, so we actually do not need the FROM table
3859  * at all! We can just throw away the plan-so-far and generate a Result node.
3860  * This is a sufficiently unusual corner case that it's not worth contorting
3861  * the structure of this module to avoid having to generate the earlier paths
3862  * in the first place.
3863  */
3864 static void
3866  RelOptInfo *grouped_rel)
3867 {
3868  Query *parse = root->parse;
3869  int nrows;
3870  Path *path;
3871 
3872  nrows = list_length(parse->groupingSets);
3873  if (nrows > 1)
3874  {
3875  /*
3876  * Doesn't seem worthwhile writing code to cons up a generate_series
3877  * or a values scan to emit multiple rows. Instead just make N clones
3878  * and append them. (With a volatile HAVING clause, this means you
3879  * might get between 0 and N output rows. Offhand I think that's
3880  * desired.)
3881  */
3882  List *paths = NIL;
3883 
3884  while (--nrows >= 0)
3885  {
3886  path = (Path *)
3887  create_group_result_path(root, grouped_rel,
3888  grouped_rel->reltarget,
3889  (List *) parse->havingQual);
3890  paths = lappend(paths, path);
3891  }
3892  path = (Path *)
3893  create_append_path(root,
3894  grouped_rel,
3895  paths,
3896  NIL,
3897  NIL,
3898  NULL,
3899  0,
3900  false,
3901  -1);
3902  }
3903  else
3904  {
3905  /* No grouping sets, or just one, so one output row */
3906  path = (Path *)
3907  create_group_result_path(root, grouped_rel,
3908  grouped_rel->reltarget,
3909  (List *) parse->havingQual);
3910  }
3911 
3912  add_path(grouped_rel, path);
3913 }
3914 
3915 /*
3916  * create_ordinary_grouping_paths
3917  *
3918  * Create grouping paths for the ordinary (that is, non-degenerate) case.
3919  *
3920  * We need to consider sorted and hashed aggregation in the same function,
3921  * because otherwise (1) it would be harder to throw an appropriate error
3922  * message if neither way works, and (2) we should not allow hashtable size
3923  * considerations to dissuade us from using hashing if sorting is not possible.
3924  *
3925  * *partially_grouped_rel_p will be set to the partially grouped rel which this
3926  * function creates, or to NULL if it doesn't create one.
3927  */
3928 static void
3930  RelOptInfo *grouped_rel,
3931  const AggClauseCosts *agg_costs,
3932  grouping_sets_data *gd,
3933  GroupPathExtraData *extra,
3934  RelOptInfo **partially_grouped_rel_p)
3935 {
3936  Path *cheapest_path = input_rel->cheapest_total_path;
3937  RelOptInfo *partially_grouped_rel = NULL;
3938  double dNumGroups;
3940 
3941  /*
3942  * If this is the topmost grouping relation or if the parent relation is
3943  * doing some form of partitionwise aggregation, then we may be able to do
3944  * it at this level also. However, if the input relation is not
3945  * partitioned, partitionwise aggregate is impossible.
3946  */
3947  if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
3948  IS_PARTITIONED_REL(input_rel))
3949  {
3950  /*
3951  * If this is the topmost relation or if the parent relation is doing
3952  * full partitionwise aggregation, then we can do full partitionwise
3953  * aggregation provided that the GROUP BY clause contains all of the
3954  * partitioning columns at this level. Otherwise, we can do at most
3955  * partial partitionwise aggregation. But if partial aggregation is
3956  * not supported in general then we can't use it for partitionwise
3957  * aggregation either.
3958  *
3959  * Check parse->groupClause not processed_groupClause, because it's
3960  * okay if some of the partitioning columns were proved redundant.
3961  */
3962  if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
3963  group_by_has_partkey(input_rel, extra->targetList,
3964  root->parse->groupClause))
3966  else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3968  else
3970  }
3971 
3972  /*
3973  * Before generating paths for grouped_rel, we first generate any possible
3974  * partially grouped paths; that way, later code can easily consider both
3975  * parallel and non-parallel approaches to grouping.
3976  */
3977  if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3978  {
3979  bool force_rel_creation;
3980 
3981  /*
3982  * If we're doing partitionwise aggregation at this level, force
3983  * creation of a partially_grouped_rel so we can add partitionwise
3984  * paths to it.
3985  */
3986  force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
3987 
3988  partially_grouped_rel =
3990  grouped_rel,
3991  input_rel,
3992  gd,
3993  extra,
3994  force_rel_creation);
3995  }
3996 
3997  /* Set out parameter. */
3998  *partially_grouped_rel_p = partially_grouped_rel;
3999 
4000  /* Apply partitionwise aggregation technique, if possible. */
4001  if (patype != PARTITIONWISE_AGGREGATE_NONE)
4002  create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
4003  partially_grouped_rel, agg_costs,
4004  gd, patype, extra);
4005 
4006  /* If we are doing partial aggregation only, return. */
4008  {
4009  Assert(partially_grouped_rel);
4010 
4011  if (partially_grouped_rel->pathlist)
4012  set_cheapest(partially_grouped_rel);
4013 
4014  return;
4015  }
4016 
4017  /* Gather any partially grouped partial paths. */
4018  if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
4019  {
4020  gather_grouping_paths(root, partially_grouped_rel);
4021  set_cheapest(partially_grouped_rel);
4022  }
4023 
4024  /*
4025  * Estimate number of groups.
4026  */
4027  dNumGroups = get_number_of_groups(root,
4028  cheapest_path->rows,
4029  gd,
4030  extra->targetList);
4031 
4032  /* Build final grouping paths */
4033  add_paths_to_grouping_rel(root, input_rel, grouped_rel,
4034  partially_grouped_rel, agg_costs, gd,
4035  dNumGroups, extra);
4036 
4037  /* Give a helpful error if we failed to find any implementation */
4038  if (grouped_rel->pathlist == NIL)
4039  ereport(ERROR,
4040  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4041  errmsg("could not implement GROUP BY"),
4042  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4043 
4044  /*
4045  * If there is an FDW that's responsible for all baserels of the query,
4046  * let it consider adding ForeignPaths.
4047  */
4048  if (grouped_rel->fdwroutine &&
4049  grouped_rel->fdwroutine->GetForeignUpperPaths)
4050  grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
4051  input_rel, grouped_rel,
4052  extra);
4053 
4054  /* Let extensions possibly add some more paths */
4056  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4057  input_rel, grouped_rel,
4058  extra);
4059 }
4060 
4061 /*
4062  * For a given input path, consider the possible ways of doing grouping sets on
4063  * it, by combinations of hashing and sorting. This can be called multiple
4064  * times, so it's important that it not scribble on input. No result is
4065  * returned, but any generated paths are added to grouped_rel.
4066  */
4067 static void
4069  RelOptInfo *grouped_rel,
4070  Path *path,
4071  bool is_sorted,
4072  bool can_hash,
4073  grouping_sets_data *gd,
4074  const AggClauseCosts *agg_costs,
4075  double dNumGroups)
4076 {
4077  Query *parse = root->parse;
4078  Size hash_mem_limit = get_hash_memory_limit();
4079 
4080  /*
4081  * If we're not being offered sorted input, then only consider plans that
4082  * can be done entirely by hashing.
4083  *
4084  * We can hash everything if it looks like it'll fit in hash_mem. But if
4085  * the input is actually sorted despite not being advertised as such, we
4086  * prefer to make use of that in order to use less memory.
4087  *
4088  * If none of the grouping sets are sortable, then ignore the hash_mem
4089  * limit and generate a path anyway, since otherwise we'll just fail.
4090  */
4091  if (!is_sorted)
4092  {
4093  List *new_rollups = NIL;
4094  RollupData *unhashed_rollup = NULL;
4095  List *sets_data;
4096  List *empty_sets_data = NIL;
4097  List *empty_sets = NIL;
4098  ListCell *lc;
4099  ListCell *l_start = list_head(gd->rollups);
4100  AggStrategy strat = AGG_HASHED;
4101  double hashsize;
4102  double exclude_groups = 0.0;
4103 
4104  Assert(can_hash);
4105 
4106  /*
4107  * If the input is coincidentally sorted usefully (which can happen
4108  * even if is_sorted is false, since that only means that our caller
4109  * has set up the sorting for us), then save some hashtable space by
4110  * making use of that. But we need to watch out for degenerate cases:
4111  *
4112  * 1) If there are any empty grouping sets, then group_pathkeys might
4113  * be NIL if all non-empty grouping sets are unsortable. In this case,
4114  * there will be a rollup containing only empty groups, and the
4115  * pathkeys_contained_in test is vacuously true; this is ok.
4116  *
4117  * XXX: the above relies on the fact that group_pathkeys is generated
4118  * from the first rollup. If we add the ability to consider multiple
4119  * sort orders for grouping input, this assumption might fail.
4120  *
4121  * 2) If there are no empty sets and only unsortable sets, then the
4122  * rollups list will be empty (and thus l_start == NULL), and
4123  * group_pathkeys will be NIL; we must ensure that the vacuously-true
4124  * pathkeys_contained_in test doesn't cause us to crash.
4125  */
4126  if (l_start != NULL &&
4128  {
4129  unhashed_rollup = lfirst_node(RollupData, l_start);
4130  exclude_groups = unhashed_rollup->numGroups;
4131  l_start = lnext(gd->rollups, l_start);
4132  }
4133 
4134  hashsize = estimate_hashagg_tablesize(root,
4135  path,
4136  agg_costs,
4137  dNumGroups - exclude_groups);
4138 
4139  /*
4140  * gd->rollups is empty if we have only unsortable columns to work
4141  * with. Override hash_mem in that case; otherwise, we'll rely on the
4142  * sorted-input case to generate usable mixed paths.
4143  */
4144  if (hashsize > hash_mem_limit && gd->rollups)
4145  return; /* nope, won't fit */
4146 
4147  /*
4148  * We need to burst the existing rollups list into individual grouping
4149  * sets and recompute a groupClause for each set.
4150  */
4151  sets_data = list_copy(gd->unsortable_sets);
4152 
4153  for_each_cell(lc, gd->rollups, l_start)
4154  {
4155  RollupData *rollup = lfirst_node(RollupData, lc);
4156 
4157  /*
4158  * If we find an unhashable rollup that's not been skipped by the
4159  * "actually sorted" check above, we can't cope; we'd need sorted
4160  * input (with a different sort order) but we can't get that here.
4161  * So bail out; we'll get a valid path from the is_sorted case
4162  * instead.
4163  *
4164  * The mere presence of empty grouping sets doesn't make a rollup
4165  * unhashable (see preprocess_grouping_sets), we handle those
4166  * specially below.
4167  */
4168  if (!rollup->hashable)
4169  return;
4170 
4171  sets_data = list_concat(sets_data, rollup->gsets_data);
4172  }
4173  foreach(lc, sets_data)
4174  {
4176  List *gset = gs->set;
4177  RollupData *rollup;
4178 
4179  if (gset == NIL)
4180  {
4181  /* Empty grouping sets can't be hashed. */
4182  empty_sets_data = lappend(empty_sets_data, gs);
4183  empty_sets = lappend(empty_sets, NIL);
4184  }
4185  else
4186  {
4187  rollup = makeNode(RollupData);
4188 
4189  rollup->groupClause = preprocess_groupclause(root, gset);
4190  rollup->gsets_data = list_make1(gs);
4191  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4192  rollup->gsets_data,
4193  gd->tleref_to_colnum_map);
4194  rollup->numGroups = gs->numGroups;
4195  rollup->hashable = true;
4196  rollup->is_hashed = true;
4197  new_rollups = lappend(new_rollups, rollup);
4198  }
4199  }
4200 
4201  /*
4202  * If we didn't find anything nonempty to hash, then bail. We'll
4203  * generate a path from the is_sorted case.
4204  */
4205  if (new_rollups == NIL)
4206  return;
4207 
4208  /*
4209  * If there were empty grouping sets they should have been in the
4210  * first rollup.
4211  */
4212  Assert(!unhashed_rollup || !empty_sets);
4213 
4214  if (unhashed_rollup)
4215  {
4216  new_rollups = lappend(new_rollups, unhashed_rollup);
4217  strat = AGG_MIXED;
4218  }
4219  else if (empty_sets)
4220  {
4221  RollupData *rollup = makeNode(RollupData);
4222 
4223  rollup->groupClause = NIL;
4224  rollup->gsets_data = empty_sets_data;
4225  rollup->gsets = empty_sets;
4226  rollup->numGroups = list_length(empty_sets);
4227  rollup->hashable = false;
4228  rollup->is_hashed = false;
4229  new_rollups = lappend(new_rollups, rollup);
4230  strat = AGG_MIXED;
4231  }
4232 
4233  add_path(grouped_rel, (Path *)
4235  grouped_rel,
4236  path,
4237  (List *) parse->havingQual,
4238  strat,
4239  new_rollups,
4240  agg_costs));
4241  return;
4242  }
4243 
4244  /*
4245  * If we have sorted input but nothing we can do with it, bail.
4246  */
4247  if (gd->rollups == NIL)
4248  return;
4249 
4250  /*
4251  * Given sorted input, we try and make two paths: one sorted and one mixed
4252  * sort/hash. (We need to try both because hashagg might be disabled, or
4253  * some columns might not be sortable.)
4254  *
4255  * can_hash is passed in as false if some obstacle elsewhere (such as
4256  * ordered aggs) means that we shouldn't consider hashing at all.
4257  */
4258  if (can_hash && gd->any_hashable)
4259  {
4260  List *rollups = NIL;
4261  List *hash_sets = list_copy(gd->unsortable_sets);
4262  double availspace = hash_mem_limit;
4263  ListCell *lc;
4264 
4265  /*
4266  * Account first for space needed for groups we can't sort at all.
4267  */
4268  availspace -= estimate_hashagg_tablesize(root,
4269  path,
4270  agg_costs,
4271  gd->dNumHashGroups);
4272 
4273  if (availspace > 0 && list_length(gd->rollups) > 1)
4274  {
4275  double scale;
4276  int num_rollups = list_length(gd->rollups);
4277  int k_capacity;
4278  int *k_weights = palloc(num_rollups * sizeof(int));
4279  Bitmapset *hash_items = NULL;
4280  int i;
4281 
4282  /*
4283  * We treat this as a knapsack problem: the knapsack capacity
4284  * represents hash_mem, the item weights are the estimated memory
4285  * usage of the hashtables needed to implement a single rollup,
4286  * and we really ought to use the cost saving as the item value;
4287  * however, currently the costs assigned to sort nodes don't
4288  * reflect the comparison costs well, and so we treat all items as
4289  * of equal value (each rollup we hash instead saves us one sort).
4290  *
4291  * To use the discrete knapsack, we need to scale the values to a
4292  * reasonably small bounded range. We choose to allow a 5% error
4293  * margin; we have no more than 4096 rollups in the worst possible
4294  * case, which with a 5% error margin will require a bit over 42MB
4295  * of workspace. (Anyone wanting to plan queries that complex had
4296  * better have the memory for it. In more reasonable cases, with
4297  * no more than a couple of dozen rollups, the memory usage will
4298  * be negligible.)
4299  *
4300  * k_capacity is naturally bounded, but we clamp the values for
4301  * scale and weight (below) to avoid overflows or underflows (or
4302  * uselessly trying to use a scale factor less than 1 byte).
4303  */
4304  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4305  k_capacity = (int) floor(availspace / scale);
4306 
4307  /*
4308  * We leave the first rollup out of consideration since it's the
4309  * one that matches the input sort order. We assign indexes "i"
4310  * to only those entries considered for hashing; the second loop,
4311  * below, must use the same condition.
4312  */
4313  i = 0;
4314  for_each_from(lc, gd->rollups, 1)
4315  {
4316  RollupData *rollup = lfirst_node(RollupData, lc);
4317 
4318  if (rollup->hashable)
4319  {
4320  double sz = estimate_hashagg_tablesize(root,
4321  path,
4322  agg_costs,
4323  rollup->numGroups);
4324 
4325  /*
4326  * If sz is enormous, but hash_mem (and hence scale) is
4327  * small, avoid integer overflow here.
4328  */
4329  k_weights[i] = (int) Min(floor(sz / scale),
4330  k_capacity + 1.0);
4331  ++i;
4332  }
4333  }
4334 
4335  /*
4336  * Apply knapsack algorithm; compute the set of items which
4337  * maximizes the value stored (in this case the number of sorts
4338  * saved) while keeping the total size (approximately) within
4339  * capacity.
4340  */
4341  if (i > 0)
4342  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4343 
4344  if (!bms_is_empty(hash_items))
4345  {
4346  rollups = list_make1(linitial(gd->rollups));
4347 
4348  i = 0;
4349  for_each_from(lc, gd->rollups, 1)
4350  {
4351  RollupData *rollup = lfirst_node(RollupData, lc);
4352 
4353  if (rollup->hashable)
4354  {
4355  if (bms_is_member(i, hash_items))
4356  hash_sets = list_concat(hash_sets,
4357  rollup->gsets_data);
4358  else
4359  rollups = lappend(rollups, rollup);
4360  ++i;
4361  }
4362  else
4363  rollups = lappend(rollups, rollup);
4364  }
4365  }
4366  }
4367 
4368  if (!rollups && hash_sets)
4369  rollups = list_copy(gd->rollups);
4370 
4371  foreach(lc, hash_sets)
4372  {
4374  RollupData *rollup = makeNode(RollupData);
4375 
4376  Assert(gs->set != NIL);
4377 
4378  rollup->groupClause = preprocess_groupclause(root, gs->set);
4379  rollup->gsets_data = list_make1(gs);
4380  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4381  rollup->gsets_data,
4382  gd->tleref_to_colnum_map);
4383  rollup->numGroups = gs->numGroups;
4384  rollup->hashable = true;
4385  rollup->is_hashed = true;
4386  rollups = lcons(rollup, rollups);
4387  }
4388 
4389  if (rollups)
4390  {
4391  add_path(grouped_rel, (Path *)
4393  grouped_rel,
4394  path,
4395  (List *) parse->havingQual,
4396  AGG_MIXED,
4397  rollups,
4398  agg_costs));
4399  }
4400  }
4401 
4402  /*
4403  * Now try the simple sorted case.
4404  */
4405  if (!gd->unsortable_sets)
4406  add_path(grouped_rel, (Path *)
4408  grouped_rel,
4409  path,
4410  (List *) parse->havingQual,
4411  AGG_SORTED,
4412  gd->rollups,
4413  agg_costs));
4414 }
4415 
4416 /*
4417  * create_window_paths
4418  *
4419  * Build a new upperrel containing Paths for window-function evaluation.
4420  *
4421  * input_rel: contains the source-data Paths
4422  * input_target: result of make_window_input_target
4423  * output_target: what the topmost WindowAggPath should return
4424  * wflists: result of find_window_functions
4425  * activeWindows: result of select_active_windows
4426  *
4427  * Note: all Paths in input_rel are expected to return input_target.
4428  */
4429 static RelOptInfo *
4431  RelOptInfo *input_rel,
4432  PathTarget *input_target,
4433  PathTarget *output_target,
4434  bool output_target_parallel_safe,
4435  WindowFuncLists *wflists,
4436  List *activeWindows)
4437 {
4438  RelOptInfo *window_rel;
4439  ListCell *lc;
4440 
4441  /* For now, do all work in the (WINDOW, NULL) upperrel */
4442  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4443 
4444  /*
4445  * If the input relation is not parallel-safe, then the window relation
4446  * can't be parallel-safe, either. Otherwise, we need to examine the
4447  * target list and active windows for non-parallel-safe constructs.
4448  */
4449  if (input_rel->consider_parallel && output_target_parallel_safe &&
4450  is_parallel_safe(root, (Node *) activeWindows))
4451  window_rel->consider_parallel = true;
4452 
4453  /*
4454  * If the input rel belongs to a single FDW, so does the window rel.
4455  */
4456  window_rel->serverid = input_rel->serverid;
4457  window_rel->userid = input_rel->userid;
4458  window_rel->useridiscurrent = input_rel->useridiscurrent;
4459  window_rel->fdwroutine = input_rel->fdwroutine;
4460 
4461  /*
4462  * Consider computing window functions starting from the existing
4463  * cheapest-total path (which will likely require a sort) as well as any
4464  * existing paths that satisfy or partially satisfy root->window_pathkeys.
4465  */
4466  foreach(lc, input_rel->pathlist)
4467  {
4468  Path *path = (Path *) lfirst(lc);
4469  int presorted_keys;
4470 
4471  if (path == input_rel->cheapest_total_path ||
4473  &presorted_keys) ||
4474  presorted_keys > 0)
4476  window_rel,
4477  path,
4478  input_target,
4479  output_target,
4480  wflists,
4481  activeWindows);
4482  }
4483 
4484  /*
4485  * If there is an FDW that's responsible for all baserels of the query,
4486  * let it consider adding ForeignPaths.
4487  */
4488  if (window_rel->fdwroutine &&
4489  window_rel->fdwroutine->GetForeignUpperPaths)
4490  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4491  input_rel, window_rel,
4492  NULL);
4493 
4494  /* Let extensions possibly add some more paths */
4496  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4497  input_rel, window_rel, NULL);
4498 
4499  /* Now choose the best path(s) */
4500  set_cheapest(window_rel);
4501 
4502  return window_rel;
4503 }
4504 
4505 /*
4506  * Stack window-function implementation steps atop the given Path, and
4507  * add the result to window_rel.
4508  *
4509  * window_rel: upperrel to contain result
4510  * path: input Path to use (must return input_target)
4511  * input_target: result of make_window_input_target
4512  * output_target: what the topmost WindowAggPath should return
4513  * wflists: result of find_window_functions
4514  * activeWindows: result of select_active_windows
4515  */
4516 static void
4518  RelOptInfo *window_rel,
4519  Path *path,
4520  PathTarget *input_target,
4521  PathTarget *output_target,
4522  WindowFuncLists *wflists,
4523  List *activeWindows)
4524 {
4525  PathTarget *window_target;
4526  ListCell *l;
4527  List *topqual = NIL;
4528 
4529  /*
4530  * Since each window clause could require a different sort order, we stack
4531  * up a WindowAgg node for each clause, with sort steps between them as
4532  * needed. (We assume that select_active_windows chose a good order for
4533  * executing the clauses in.)
4534  *
4535  * input_target should contain all Vars and Aggs needed for the result.
4536  * (In some cases we wouldn't need to propagate all of these all the way
4537  * to the top, since they might only be needed as inputs to WindowFuncs.
4538  * It's probably not worth trying to optimize that though.) It must also
4539  * contain all window partitioning and sorting expressions, to ensure
4540  * they're computed only once at the bottom of the stack (that's critical
4541  * for volatile functions). As we climb up the stack, we'll add outputs
4542  * for the WindowFuncs computed at each level.
4543  */
4544  window_target = input_target;
4545 
4546  foreach(l, activeWindows)
4547  {
4549  List *window_pathkeys;
4550  int presorted_keys;
4551  bool is_sorted;
4552  bool topwindow;
4553 
4554  window_pathkeys = make_pathkeys_for_window(root,
4555  wc,
4556  root->processed_tlist);
4557 
4558  is_sorted = pathkeys_count_contained_in(window_pathkeys,
4559  path->pathkeys,
4560  &presorted_keys);
4561 
4562  /* Sort if necessary */
4563  if (!is_sorted)
4564  {
4565  /*
4566  * No presorted keys or incremental sort disabled, just perform a
4567  * complete sort.
4568  */
4569  if (presorted_keys == 0 || !enable_incremental_sort)
4570  path = (Path *) create_sort_path(root, window_rel,
4571  path,
4572  window_pathkeys,
4573  -1.0);
4574  else
4575  {
4576  /*
4577  * Since we have presorted keys and incremental sort is
4578  * enabled, just use incremental sort.
4579  */
4580  path = (Path *) create_incremental_sort_path(root,
4581  window_rel,
4582  path,
4583  window_pathkeys,
4584  presorted_keys,
4585  -1.0);
4586  }
4587  }
4588 
4589  if (lnext(activeWindows, l))
4590  {
4591  /*
4592  * Add the current WindowFuncs to the output target for this
4593  * intermediate WindowAggPath. We must copy window_target to
4594  * avoid changing the previous path's target.
4595  *
4596  * Note: a WindowFunc adds nothing to the target's eval costs; but
4597  * we do need to account for the increase in tlist width.
4598  */
4599  ListCell *lc2;
4600 
4601  window_target = copy_pathtarget(window_target);
4602  foreach(lc2, wflists->windowFuncs[wc->winref])
4603  {
4604  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4605 
4606  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4607  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4608  }
4609  }
4610  else
4611  {
4612  /* Install the goal target in the topmost WindowAgg */
4613  window_target = output_target;
4614  }
4615 
4616  /* mark the final item in the list as the top-level window */
4617  topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4618 
4619  /*
4620  * Accumulate all of the runConditions from each intermediate
4621  * WindowClause. The top-level WindowAgg must pass these as a qual so
4622  * that it filters out unwanted tuples correctly.
4623  */
4624  if (!topwindow)
4625  topqual = list_concat(topqual, wc->runCondition);
4626 
4627  path = (Path *)
4628  create_windowagg_path(root, window_rel, path, window_target,
4629  wflists->windowFuncs[wc->winref],
4630  wc, topwindow ? topqual : NIL, topwindow);
4631  }
4632 
4633  add_path(window_rel, path);
4634 }
4635 
4636 /*
4637  * create_distinct_paths
4638  *
4639  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4640  *
4641  * input_rel: contains the source-data Paths
4642  *
4643  * Note: input paths should already compute the desired pathtarget, since
4644  * Sort/Unique won't project anything.
4645  */
4646 static RelOptInfo *
4648 {
4649  RelOptInfo *distinct_rel;
4650 
4651  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4652  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4653 
4654  /*
4655  * We don't compute anything at this level, so distinct_rel will be
4656  * parallel-safe if the input rel is parallel-safe. In particular, if
4657  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4658  * output those expressions, and will not be parallel-safe unless those
4659  * expressions are parallel-safe.
4660  */
4661  distinct_rel->consider_parallel = input_rel->consider_parallel;
4662 
4663  /*
4664  * If the input rel belongs to a single FDW, so does the distinct_rel.
4665  */
4666  distinct_rel->serverid = input_rel->serverid;
4667  distinct_rel->userid = input_rel->userid;
4668  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4669  distinct_rel->fdwroutine = input_rel->fdwroutine;
4670 
4671  /* build distinct paths based on input_rel's pathlist */
4672  create_final_distinct_paths(root, input_rel, distinct_rel);
4673 
4674  /* now build distinct paths based on input_rel's partial_pathlist */
4675  create_partial_distinct_paths(root, input_rel, distinct_rel);
4676 
4677  /* Give a helpful error if we failed to create any paths */
4678  if (distinct_rel->pathlist == NIL)
4679  ereport(ERROR,
4680  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4681  errmsg("could not implement DISTINCT"),
4682  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4683 
4684  /*
4685  * If there is an FDW that's responsible for all baserels of the query,
4686  * let it consider adding ForeignPaths.
4687  */
4688  if (distinct_rel->fdwroutine &&
4689  distinct_rel->fdwroutine->GetForeignUpperPaths)
4690  distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4692  input_rel,
4693  distinct_rel,
4694  NULL);
4695 
4696  /* Let extensions possibly add some more paths */
4698  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4699  distinct_rel, NULL);
4700 
4701  /* Now choose the best path(s) */
4702  set_cheapest(distinct_rel);
4703 
4704  return distinct_rel;
4705 }
4706 
4707 /*
4708  * create_partial_distinct_paths
4709  *
4710  * Process 'input_rel' partial paths and add unique/aggregate paths to the
4711  * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4712  * paths on top and add a final unique/aggregate path to remove any duplicate
4713  * produced from combining rows from parallel workers.
4714  */
4715 static void
4717  RelOptInfo *final_distinct_rel)
4718 {
4719  RelOptInfo *partial_distinct_rel;
4720  Query *parse;
4721  List *distinctExprs;
4722  double numDistinctRows;
4723  Path *cheapest_partial_path;
4724  ListCell *lc;
4725 
4726  /* nothing to do when there are no partial paths in the input rel */
4727  if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4728  return;
4729 
4730  parse = root->parse;
4731 
4732  /* can't do parallel DISTINCT ON */
4733  if (parse->hasDistinctOn)
4734  return;
4735 
4736  partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4737  NULL);
4738  partial_distinct_rel->reltarget = root->upper_targets[UPPERREL_PARTIAL_DISTINCT];
4739  partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4740 
4741  /*
4742  * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4743  */
4744  partial_distinct_rel->serverid = input_rel->serverid;
4745  partial_distinct_rel->userid = input_rel->userid;
4746  partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4747  partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4748 
4749  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4750 
4751  distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4752  parse->targetList);
4753 
4754  /* estimate how many distinct rows we'll get from each worker */
4755  numDistinctRows = estimate_num_groups(root, distinctExprs,
4756  cheapest_partial_path->rows,
4757  NULL, NULL);
4758 
4759  /*
4760  * Try sorting the cheapest path and incrementally sorting any paths with
4761  * presorted keys and put a unique paths atop of those.
4762  */
4764  {
4765  foreach(lc, input_rel->partial_pathlist)
4766  {
4767  Path *input_path = (Path *) lfirst(lc);
4768  Path *sorted_path;
4769  bool is_sorted;
4770  int presorted_keys;
4771 
4773  input_path->pathkeys,
4774  &presorted_keys);
4775 
4776  if (is_sorted)
4777  sorted_path = input_path;
4778  else
4779  {
4780  /*
4781  * Try at least sorting the cheapest path and also try
4782  * incrementally sorting any path which is partially sorted
4783  * already (no need to deal with paths which have presorted
4784  * keys when incremental sort is disabled unless it's the
4785  * cheapest partial path).
4786  */
4787  if (input_path != cheapest_partial_path &&
4788  (presorted_keys == 0 || !enable_incremental_sort))
4789  continue;
4790 
4791  /*
4792  * We've no need to consider both a sort and incremental sort.
4793  * We'll just do a sort if there are no presorted keys and an
4794  * incremental sort when there are presorted keys.
4795  */
4796  if (presorted_keys == 0 || !enable_incremental_sort)
4797  sorted_path = (Path *) create_sort_path(root,
4798  partial_distinct_rel,
4799  input_path,
4800  root->distinct_pathkeys,
4801  -1.0);
4802  else
4803  sorted_path = (Path *) create_incremental_sort_path(root,
4804  partial_distinct_rel,
4805  input_path,
4806  root->distinct_pathkeys,
4807  presorted_keys,
4808  -1.0);
4809  }
4810 
4811  add_partial_path(partial_distinct_rel, (Path *)
4812  create_upper_unique_path(root, partial_distinct_rel,
4813  sorted_path,
4815  numDistinctRows));
4816  }
4817  }
4818 
4819  /*
4820  * Now try hash aggregate paths, if enabled and hashing is possible. Since
4821  * we're not on the hook to ensure we do our best to create at least one
4822  * path here, we treat enable_hashagg as a hard off-switch rather than the
4823  * slightly softer variant in create_final_distinct_paths.
4824  */
4826  {
4827  add_partial_path(partial_distinct_rel, (Path *)
4828  create_agg_path(root,
4829  partial_distinct_rel,
4830  cheapest_partial_path,
4831  cheapest_partial_path->pathtarget,
4832  AGG_HASHED,
4835  NIL,
4836  NULL,
4837  numDistinctRows));
4838  }
4839 
4840  /*
4841  * If there is an FDW that's responsible for all baserels of the query,
4842  * let it consider adding ForeignPaths.
4843  */
4844  if (partial_distinct_rel->fdwroutine &&
4845  partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
4846  partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4848  input_rel,
4849  partial_distinct_rel,
4850  NULL);
4851 
4852  /* Let extensions possibly add some more partial paths */
4854  (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
4855  input_rel, partial_distinct_rel, NULL);
4856 
4857  if (partial_distinct_rel->partial_pathlist != NIL)
4858  {
4859  generate_gather_paths(root, partial_distinct_rel, true);
4860  set_cheapest(partial_distinct_rel);
4861 
4862  /*
4863  * Finally, create paths to distinctify the final result. This step
4864  * is needed to remove any duplicates due to combining rows from
4865  * parallel workers.
4866  */
4867  create_final_distinct_paths(root, partial_distinct_rel,
4868  final_distinct_rel);
4869  }
4870 }
4871 
4872 /*
4873  * create_final_distinct_paths
4874  * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4875  *
4876  * input_rel: contains the source-data paths
4877  * distinct_rel: destination relation for storing created paths
4878  */
4879 static RelOptInfo *
4881  RelOptInfo *distinct_rel)
4882 {
4883  Query *parse = root->parse;
4884  Path *cheapest_input_path = input_rel->cheapest_total_path;
4885  double numDistinctRows;
4886  bool allow_hash;
4887 
4888  /* Estimate number of distinct rows there will be */
4889  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4890  root->hasHavingQual)
4891  {
4892  /*
4893  * If there was grouping or aggregation, use the number of input rows
4894  * as the estimated number of DISTINCT rows (ie, assume the input is
4895  * already mostly unique).
4896  */
4897  numDistinctRows = cheapest_input_path->rows;
4898  }
4899  else
4900  {
4901  /*
4902  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4903  */
4904  List *distinctExprs;
4905 
4906  distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4907  parse->targetList);
4908  numDistinctRows = estimate_num_groups(root, distinctExprs,
4909  cheapest_input_path->rows,
4910  NULL, NULL);
4911  }
4912 
4913  /*
4914  * Consider sort-based implementations of DISTINCT, if possible.
4915  */
4917  {
4918  /*
4919  * Firstly, if we have any adequately-presorted paths, just stick a
4920  * Unique node on those. We also, consider doing an explicit sort of
4921  * the cheapest input path and Unique'ing that. If any paths have
4922  * presorted keys then we'll create an incremental sort atop of those
4923  * before adding a unique node on the top.
4924  *
4925  * When we have DISTINCT ON, we must sort by the more rigorous of
4926  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4927  * Also, if we do have to do an explicit sort, we might as well use
4928  * the more rigorous ordering to avoid a second sort later. (Note
4929  * that the parser will have ensured that one clause is a prefix of
4930  * the other.)
4931  */
4932  List *needed_pathkeys;
4933  ListCell *lc;
4934  double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
4935 
4936  if (parse->hasDistinctOn &&
4938  list_length(root->sort_pathkeys))
4939  needed_pathkeys = root->sort_pathkeys;
4940  else
4941  needed_pathkeys = root->distinct_pathkeys;
4942 
4943  foreach(lc, input_rel->pathlist)
4944  {
4945  Path *input_path = (Path *) lfirst(lc);
4946  Path *sorted_path;
4947  bool is_sorted;
4948  int presorted_keys;
4949 
4950  is_sorted = pathkeys_count_contained_in(needed_pathkeys,
4951  input_path->pathkeys,
4952  &presorted_keys);
4953 
4954  if (is_sorted)
4955  sorted_path = input_path;
4956  else
4957  {
4958  /*
4959  * Try at least sorting the cheapest path and also try
4960  * incrementally sorting any path which is partially sorted
4961  * already (no need to deal with paths which have presorted
4962  * keys when incremental sort is disabled unless it's the
4963  * cheapest input path).
4964  */
4965  if (input_path != cheapest_input_path &&
4966  (presorted_keys == 0 || !enable_incremental_sort))
4967  continue;
4968 
4969  /*
4970  * We've no need to consider both a sort and incremental sort.
4971  * We'll just do a sort if there are no presorted keys and an
4972  * incremental sort when there are presorted keys.
4973  */
4974  if (presorted_keys == 0 || !enable_incremental_sort)
4975  sorted_path = (Path *) create_sort_path(root,
4976  distinct_rel,
4977  input_path,
4978  needed_pathkeys,
4979  limittuples);
4980  else
4981  sorted_path = (Path *) create_incremental_sort_path(root,
4982  distinct_rel,
4983  input_path,
4984  needed_pathkeys,
4985  presorted_keys,
4986  limittuples);
4987  }
4988 
4989  /*
4990  * distinct_pathkeys may have become empty if all of the pathkeys
4991  * were determined to be redundant. If all of the pathkeys are
4992  * redundant then each DISTINCT target must only allow a single
4993  * value, therefore all resulting tuples must be identical (or at
4994  * least indistinguishable by an equality check). We can uniquify
4995  * these tuples simply by just taking the first tuple. All we do
4996  * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
4997  * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
4998  * list, so we must still only do this with paths which are
4999  * correctly sorted by sort_pathkeys.
5000  */
5001  if (root->distinct_pathkeys == NIL)
5002  {
5003  Node *limitCount;
5004 
5005  limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
5006  sizeof(int64),
5007  Int64GetDatum(1), false,
5008  FLOAT8PASSBYVAL);
5009 
5010  /*
5011  * If the query already has a LIMIT clause, then we could end
5012  * up with a duplicate LimitPath in the final plan. That does
5013  * not seem worth troubling over too much.
5014  */
5015  add_path(distinct_rel, (Path *)
5016  create_limit_path(root, distinct_rel, sorted_path,
5017  NULL, limitCount,
5018  LIMIT_OPTION_COUNT, 0, 1));
5019  }
5020  else
5021  {
5022  add_path(distinct_rel, (Path *)
5023  create_upper_unique_path(root, distinct_rel,
5024  sorted_path,
5026  numDistinctRows));
5027  }
5028  }
5029  }
5030 
5031  /*
5032  * Consider hash-based implementations of DISTINCT, if possible.
5033  *
5034  * If we were not able to make any other types of path, we *must* hash or
5035  * die trying. If we do have other choices, there are two things that
5036  * should prevent selection of hashing: if the query uses DISTINCT ON
5037  * (because it won't really have the expected behavior if we hash), or if
5038  * enable_hashagg is off.
5039  *
5040  * Note: grouping_is_hashable() is much more expensive to check than the
5041  * other gating conditions, so we want to do it last.
5042  */
5043  if (distinct_rel->pathlist == NIL)
5044  allow_hash = true; /* we have no alternatives */
5045  else if (parse->hasDistinctOn || !enable_hashagg)
5046  allow_hash = false; /* policy-based decision not to hash */
5047  else
5048  allow_hash = true; /* default */
5049 
5050  if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
5051  {
5052  /* Generate hashed aggregate path --- no sort needed */
5053  add_path(distinct_rel, (Path *)
5054  create_agg_path(root,
5055  distinct_rel,
5056  cheapest_input_path,
5057  cheapest_input_path->pathtarget,
5058  AGG_HASHED,
5061  NIL,
5062  NULL,
5063  numDistinctRows));
5064  }
5065 
5066  return distinct_rel;
5067 }
5068 
5069 /*
5070  * create_ordered_paths
5071  *
5072  * Build a new upperrel containing Paths for ORDER BY evaluation.
5073  *
5074  * All paths in the result must satisfy the ORDER BY ordering.
5075  * The only new paths we need consider are an explicit full sort
5076  * and incremental sort on the cheapest-total existing path.
5077  *
5078  * input_rel: contains the source-data Paths
5079  * target: the output tlist the result Paths must emit
5080  * limit_tuples: estimated bound on the number of output tuples,
5081  * or -1 if no LIMIT or couldn't estimate
5082  *
5083  * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
5084  * other pathkeys (grouping, ...) like generate_useful_gather_paths.
5085  */
5086 static RelOptInfo *
5088  RelOptInfo *input_rel,
5089  PathTarget *target,
5090  bool target_parallel_safe,
5091  double limit_tuples)
5092 {
5093  Path *cheapest_input_path = input_rel->cheapest_total_path;
5094  RelOptInfo *ordered_rel;
5095  ListCell *lc;
5096 
5097  /* For now, do all work in the (ORDERED, NULL) upperrel */
5098  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
5099 
5100  /*
5101  * If the input relation is not parallel-safe, then the ordered relation
5102  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
5103  * target list is parallel-safe.
5104  */
5105  if (input_rel->consider_parallel && target_parallel_safe)
5106  ordered_rel->consider_parallel = true;
5107 
5108  /*
5109  * If the input rel belongs to a single FDW, so does the ordered_rel.
5110  */
5111  ordered_rel->serverid = input_rel->serverid;
5112  ordered_rel->userid = input_rel->userid;
5113  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
5114  ordered_rel->fdwroutine = input_rel->fdwroutine;
5115 
5116  foreach(lc, input_rel->pathlist)
5117  {
5118  Path *input_path = (Path *) lfirst(lc);
5119  Path *sorted_path;
5120  bool is_sorted;
5121  int presorted_keys;
5122 
5123  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5124  input_path->pathkeys, &presorted_keys);
5125 
5126  if (is_sorted)
5127  sorted_path = input_path;
5128  else
5129  {
5130  /*
5131  * Try at least sorting the cheapest path and also try
5132  * incrementally sorting any path which is partially sorted
5133  * already (no need to deal with paths which have presorted keys
5134  * when incremental sort is disabled unless it's the cheapest
5135  * input path).
5136  */
5137  if (input_path != cheapest_input_path &&
5138  (presorted_keys == 0 || !enable_incremental_sort))
5139  continue;
5140 
5141  /*
5142  * We've no need to consider both a sort and incremental sort.
5143  * We'll just do a sort if there are no presorted keys and an
5144  * incremental sort when there are presorted keys.
5145  */
5146  if (presorted_keys == 0 || !enable_incremental_sort)
5147  sorted_path = (Path *) create_sort_path(root,
5148  ordered_rel,
5149  input_path,
5150  root->sort_pathkeys,
5151  limit_tuples);
5152  else
5153  sorted_path = (Path *) create_incremental_sort_path(root,
5154  ordered_rel,
5155  input_path,
5156  root->sort_pathkeys,
5157  presorted_keys,
5158  limit_tuples);
5159  }
5160 
5161  /* Add projection step if needed */
5162  if (sorted_path->pathtarget != target)
5163  sorted_path = apply_projection_to_path(root, ordered_rel,
5164  sorted_path, target);
5165 
5166  add_path(ordered_rel, sorted_path);
5167  }
5168 
5169  /*
5170  * generate_gather_paths() will have already generated a simple Gather
5171  * path for the best parallel path, if any, and the loop above will have
5172  * considered sorting it. Similarly, generate_gather_paths() will also
5173  * have generated order-preserving Gather Merge plans which can be used
5174  * without sorting if they happen to match the sort_pathkeys, and the loop
5175  * above will have handled those as well. However, there's one more
5176  * possibility: it may make sense to sort the cheapest partial path
5177  * according to the required output order and then use Gather Merge.
5178  */
5179  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5180  input_rel->partial_pathlist != NIL)
5181  {
5182  Path *cheapest_partial_path;
5183 
5184  cheapest_partial_path = linitial(input_rel->partial_pathlist);
5185 
5186  /*
5187  * If cheapest partial path doesn't need a sort, this is redundant
5188  * with what's already been tried.
5189  */
5191  cheapest_partial_path->pathkeys))
5192  {
5193  Path *path;
5194  double total_groups;
5195 
5196  path = (Path *) create_sort_path(root,
5197  ordered_rel,
5198  cheapest_partial_path,
5199  root->sort_pathkeys,
5200  limit_tuples);
5201 
5202  total_groups = cheapest_partial_path->rows *
5203  cheapest_partial_path->parallel_workers;
5204  path = (Path *)
5205  create_gather_merge_path(root, ordered_rel,
5206  path,
5207  path->pathtarget,
5208  root->sort_pathkeys, NULL,
5209  &total_groups);
5210 
5211  /* Add projection step if needed */
5212  if (path->pathtarget != target)
5213  path = apply_projection_to_path(root, ordered_rel,
5214  path, target);
5215 
5216  add_path(ordered_rel, path);
5217  }
5218 
5219  /*
5220  * Consider incremental sort with a gather merge on partial paths.
5221  *
5222  * We can also skip the entire loop when we only have a single-item
5223  * sort_pathkeys because then we can't possibly have a presorted
5224  * prefix of the list without having the list be fully sorted.
5225  */
5227  {
5228  foreach(lc, input_rel->partial_pathlist)
5229  {
5230  Path *input_path = (Path *) lfirst(lc);
5231  Path *sorted_path;
5232  bool is_sorted;
5233  int presorted_keys;
5234  double total_groups;
5235 
5236  /*
5237  * We don't care if this is the cheapest partial path - we
5238  * can't simply skip it, because it may be partially sorted in
5239  * which case we want to consider adding incremental sort
5240  * (instead of full sort, which is what happens above).
5241  */
5242 
5243  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5244  input_path->pathkeys,
5245  &presorted_keys);
5246 
5247  /* No point in adding incremental sort on fully sorted paths. */
5248  if (is_sorted)
5249  continue;
5250 
5251  if (presorted_keys == 0)
5252  continue;
5253 
5254  /* Since we have presorted keys, consider incremental sort. */
5255  sorted_path = (Path *) create_incremental_sort_path(root,
5256  ordered_rel,
5257  input_path,
5258  root->sort_pathkeys,
5259  presorted_keys,
5260  limit_tuples);
5261  total_groups = input_path->rows *
5262  input_path->parallel_workers;
5263  sorted_path = (Path *)
5264  create_gather_merge_path(root, ordered_rel,
5265  sorted_path,
5266  sorted_path->pathtarget,
5267  root->sort_pathkeys, NULL,
5268  &total_groups);
5269 
5270  /* Add projection step if needed */
5271  if (sorted_path->pathtarget != target)
5272  sorted_path = apply_projection_to_path(root, ordered_rel,
5273  sorted_path, target);
5274 
5275  add_path(ordered_rel, sorted_path);
5276  }
5277  }
5278  }
5279 
5280  /*
5281  * If there is an FDW that's responsible for all baserels of the query,
5282  * let it consider adding ForeignPaths.
5283  */
5284  if (ordered_rel->fdwroutine &&
5285  ordered_rel->fdwroutine->GetForeignUpperPaths)
5286  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5287  input_rel, ordered_rel,
5288  NULL);
5289 
5290  /* Let extensions possibly add some more paths */
5292  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5293  input_rel, ordered_rel, NULL);
5294 
5295  /*
5296  * No need to bother with set_cheapest here; grouping_planner does not
5297  * need us to do it.
5298  */
5299  Assert(ordered_rel->pathlist != NIL);
5300 
5301  return ordered_rel;
5302 }
5303 
5304 
5305 /*
5306  * make_group_input_target
5307  * Generate appropriate PathTarget for initial input to grouping nodes.
5308  *
5309  * If there is grouping or aggregation, the scan/join subplan cannot emit
5310  * the query's final targetlist; for example, it certainly can't emit any
5311  * aggregate function calls. This routine generates the correct target
5312  * for the scan/join subplan.
5313  *
5314  * The query target list passed from the parser already contains entries
5315  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5316  * for variables used only in HAVING clauses; so we need to add those
5317  * variables to the subplan target list. Also, we flatten all expressions
5318  * except GROUP BY items into their component variables; other expressions
5319  * will be computed by the upper plan nodes rather than by the subplan.
5320  * For example, given a query like
5321  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5322  * we want to pass this targetlist to the subplan:
5323  * a+b,c,d
5324  * where the a+b target will be used by the Sort/Group steps, and the
5325  * other targets will be used for computing the final results.
5326  *
5327  * 'final_target' is the query's final target list (in PathTarget form)
5328  *
5329  * The result is the PathTarget to be computed by the Paths returned from
5330  * query_planner().
5331  */
5332 static PathTarget *
5334 {
5335  Query *parse = root->parse;
5336  PathTarget *input_target;
5337  List *non_group_cols;
5338  List *non_group_vars;
5339  int i;
5340  ListCell *lc;
5341 
5342  /*
5343  * We must build a target containing all grouping columns, plus any other
5344  * Vars mentioned in the query's targetlist and HAVING qual.
5345  */
5346  input_target = create_empty_pathtarget();
5347  non_group_cols = NIL;
5348 
5349  i = 0;
5350  foreach(lc, final_target->exprs)
5351  {
5352  Expr *expr = (Expr *) lfirst(lc);
5353  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5354 
5355  if (sgref && root->processed_groupClause &&
5357  root->processed_groupClause) != NULL)
5358  {
5359  /*
5360  * It's a grouping column, so add it to the input target as-is.
5361  */
5362  add_column_to_pathtarget(input_target, expr, sgref);
5363  }
5364  else
5365  {
5366  /*
5367  * Non-grouping column, so just remember the expression for later
5368  * call to pull_var_clause.
5369  */
5370  non_group_cols = lappend(non_group_cols, expr);
5371  }
5372 
5373  i++;
5374  }
5375 
5376  /*
5377  * If there's a HAVING clause, we'll need the Vars it uses, too.
5378  */
5379  if (parse->havingQual)
5380  non_group_cols = lappend(non_group_cols, parse->havingQual);
5381 
5382  /*
5383  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5384  * add them to the input target if not already present. (A Var used
5385  * directly as a GROUP BY item will be present already.) Note this
5386  * includes Vars used in resjunk items, so we are covering the needs of
5387  * ORDER BY and window specifications. Vars used within Aggrefs and
5388  * WindowFuncs will be pulled out here, too.
5389  */
5390  non_group_vars = pull_var_clause((Node *) non_group_cols,
5394  add_new_columns_to_pathtarget(input_target, non_group_vars);
5395 
5396  /* clean up cruft */
5397  list_free(non_group_vars);
5398  list_free(non_group_cols);
5399 
5400  /* XXX this causes some redundant cost calculation ... */
5401  return set_pathtarget_cost_width(root, input_target);
5402 }
5403 
5404 /*
5405  * make_partial_grouping_target
5406  * Generate appropriate PathTarget for output of partial aggregate
5407  * (or partial grouping, if there are no aggregates) nodes.
5408  *
5409  * A partial aggregation node needs to emit all the same aggregates that
5410  * a regular aggregation node would, plus any aggregates used in HAVING;
5411  * except that the Aggref nodes should be marked as partial aggregates.
5412  *
5413  * In addition, we'd better emit any Vars and PlaceHolderVars that are
5414  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5415  * these would be Vars that are grouped by or used in grouping expressions.)
5416  *
5417  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5418  * havingQual represents the HAVING clause.
5419  */
5420 static PathTarget *
5422  PathTarget *grouping_target,
5423  Node *havingQual)
5424 {
5425  PathTarget *partial_target;
5426  List *non_group_cols;
5427  List *non_group_exprs;
5428  int i;
5429  ListCell *lc;
5430 
5431  partial_target = create_empty_pathtarget();
5432  non_group_cols = NIL;
5433 
5434  i = 0;
5435  foreach(lc, grouping_target->exprs)
5436  {
5437  Expr *expr = (Expr *) lfirst(lc);
5438  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5439 
5440  if (sgref && root->processed_groupClause &&
5442  root->processed_groupClause) != NULL)
5443  {
5444  /*
5445  * It's a grouping column, so add it to the partial_target as-is.
5446  * (This allows the upper agg step to repeat the grouping calcs.)
5447  */
5448  add_column_to_pathtarget(partial_target, expr, sgref);
5449  }
5450  else
5451  {
5452  /*
5453  * Non-grouping column, so just remember the expression for later
5454  * call to pull_var_clause.
5455  */
5456  non_group_cols = lappend(non_group_cols, expr);
5457  }
5458 
5459  i++;
5460  }
5461 
5462  /*
5463  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5464  */
5465  if (havingQual)
5466  non_group_cols = lappend(non_group_cols, havingQual);
5467 
5468  /*
5469  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5470  * non-group cols (plus HAVING), and add them to the partial_target if not
5471  * already present. (An expression used directly as a GROUP BY item will
5472  * be present already.) Note this includes Vars used in resjunk items, so
5473  * we are covering the needs of ORDER BY and window specifications.
5474  */
5475  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5479 
5480  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5481 
5482  /*
5483  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5484  * are at the top level of the target list, so we can just scan the list
5485  * rather than recursing through the expression trees.
5486  */
5487  foreach(lc, partial_target->exprs)
5488  {
5489  Aggref *aggref = (Aggref *) lfirst(lc);
5490 
5491  if (IsA(aggref, Aggref))
5492  {
5493  Aggref *newaggref;
5494 
5495  /*
5496  * We shouldn't need to copy the substructure of the Aggref node,
5497  * but flat-copy the node itself to avoid damaging other trees.
5498  */
5499  newaggref = makeNode(Aggref);
5500  memcpy(newaggref, aggref, sizeof(Aggref));
5501 
5502  /* For now, assume serialization is required */
5504 
5505  lfirst(lc) = newaggref;
5506  }
5507  }
5508 
5509  /* clean up cruft */
5510  list_free(non_group_exprs);
5511  list_free(non_group_cols);
5512 
5513  /* XXX this causes some redundant cost calculation ... */
5514  return set_pathtarget_cost_width(root, partial_target);
5515 }
5516 
5517 /*
5518  * mark_partial_aggref
5519  * Adjust an Aggref to make it represent a partial-aggregation step.
5520  *
5521  * The Aggref node is modified in-place; caller must do any copying required.
5522  */
5523 void
5525 {
5526  /* aggtranstype should be computed by this point */
5527  Assert(OidIsValid(agg->aggtranstype));
5528  /* ... but aggsplit should still be as the parser left it */
5529  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5530 
5531  /* Mark the Aggref with the intended partial-aggregation mode */
5532  agg->aggsplit = aggsplit;
5533 
5534  /*
5535  * Adjust result type if needed. Normally, a partial aggregate returns
5536  * the aggregate's transition type; but if that's INTERNAL and we're
5537  * serializing, it returns BYTEA instead.
5538  */
5539  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5540  {
5541  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5542  agg->aggtype = BYTEAOID;
5543  else
5544  agg->aggtype = agg->aggtranstype;
5545  }
5546 }
5547 
5548 /*
5549  * postprocess_setop_tlist
5550  * Fix up targetlist returned by plan_set_operations().
5551  *
5552  * We need to transpose sort key info from the orig_tlist into new_tlist.
5553  * NOTE: this would not be good enough if we supported resjunk sort keys
5554  * for results of set operations --- then, we'd need to project a whole
5555  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5556  * find any resjunk columns in orig_tlist.
5557  */
5558 static List *
5559 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5560 {
5561  ListCell *l;
5562  ListCell *orig_tlist_item = list_head(orig_tlist);
5563 
5564  foreach(l, new_tlist)
5565  {
5566  TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5567  TargetEntry *orig_tle;
5568 
5569  /* ignore resjunk columns in setop result */
5570  if (new_tle->resjunk)
5571  continue;
5572 
5573  Assert(orig_tlist_item != NULL);
5574  orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5575  orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5576  if (orig_tle->resjunk) /* should not happen */
5577  elog(ERROR, "resjunk output columns are not implemented");
5578  Assert(new_tle->resno == orig_tle->resno);
5579  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5580  }
5581  if (orig_tlist_item != NULL)
5582  elog(ERROR, "resjunk output columns are not implemented");
5583  return new_tlist;
5584 }
5585 
5586 /*
5587  * optimize_window_clauses
5588  * Call each WindowFunc's prosupport function to see if we're able to
5589  * make any adjustments to any of the WindowClause's so that the executor
5590  * can execute the window functions in a more optimal way.
5591  *
5592  * Currently we only allow adjustments to the WindowClause's frameOptions. We
5593  * may allow more things to be done here in the future.
5594  */
5595 static void
5597 {
5598  List *windowClause = root->parse->windowClause;
5599  ListCell *lc;
5600 
5601  foreach(lc, windowClause)
5602  {
5604  ListCell *lc2;
5605  int optimizedFrameOptions = 0;
5606 
5607  Assert(wc->winref <= wflists->maxWinRef);
5608 
5609  /* skip any WindowClauses that have no WindowFuncs */
5610  if (wflists->windowFuncs[wc->winref] == NIL)
5611  continue;
5612 
5613  foreach(lc2, wflists->windowFuncs[wc->winref])
5614  {
5617  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
5618  Oid prosupport;
5619 
5620  prosupport = get_func_support(wfunc->winfnoid);
5621 
5622  /* Check if there's a support function for 'wfunc' */
5623  if (!OidIsValid(prosupport))
5624  break; /* can't optimize this WindowClause */
5625 
5626  req.type = T_SupportRequestOptimizeWindowClause;
5627  req.window_clause = wc;
5628  req.window_func = wfunc;
5629  req.frameOptions = wc->frameOptions;
5630 
5631  /* call the support function */
5633  DatumGetPointer(OidFunctionCall1(prosupport,
5634  PointerGetDatum(&req)));
5635 
5636  /*
5637  * Skip to next WindowClause if the support function does not
5638  * support this request type.
5639  */
5640  if (res == NULL)
5641  break;
5642 
5643  /*
5644  * Save these frameOptions for the first WindowFunc for this
5645  * WindowClause.
5646  */
5647  if (foreach_current_index(lc2) == 0)
5648  optimizedFrameOptions = res->frameOptions;
5649 
5650  /*
5651  * On subsequent WindowFuncs, if the frameOptions are not the same
5652  * then we're unable to optimize the frameOptions for this
5653  * WindowClause.
5654  */
5655  else if (optimizedFrameOptions != res->frameOptions)
5656  break; /* skip to the next WindowClause, if any */
5657  }
5658 
5659  /* adjust the frameOptions if all WindowFunc's agree that it's ok */
5660  if (lc2 == NULL && wc->frameOptions != optimizedFrameOptions)
5661  {
5662  ListCell *lc3;
5663 
5664  /* apply the new frame options */
5665  wc->frameOptions = optimizedFrameOptions;
5666 
5667  /*
5668  * We now check to see if changing the frameOptions has caused
5669  * this WindowClause to be a duplicate of some other WindowClause.
5670  * This can only happen if we have multiple WindowClauses, so
5671  * don't bother if there's only 1.
5672  */
5673  if (list_length(windowClause) == 1)
5674  continue;
5675 
5676  /*
5677  * Do the duplicate check and reuse the existing WindowClause if
5678  * we find a duplicate.
5679  */
5680  foreach(lc3, windowClause)
5681  {
5682  WindowClause *existing_wc = lfirst_node(WindowClause, lc3);
5683 
5684  /* skip over the WindowClause we're currently editing */
5685  if (existing_wc == wc)
5686  continue;
5687 
5688  /*
5689  * Perform the same duplicate check that is done in
5690  * transformWindowFuncCall.
5691  */
5692  if (equal(wc->partitionClause, existing_wc->partitionClause) &&
5693  equal(wc->orderClause, existing_wc->orderClause) &&
5694  wc->frameOptions == existing_wc->frameOptions &&
5695  equal(wc->startOffset, existing_wc->startOffset) &&
5696  equal(wc->endOffset, existing_wc->endOffset))
5697  {
5698  ListCell *lc4;
5699 
5700  /*
5701  * Now move each WindowFunc in 'wc' into 'existing_wc'.
5702  * This required adjusting each WindowFunc's winref and
5703  * moving the WindowFuncs in 'wc' to the list of
5704  * WindowFuncs in 'existing_wc'.
5705  */
5706  foreach(lc4, wflists->windowFuncs[wc->winref])
5707  {
5708  WindowFunc *wfunc = lfirst_node(WindowFunc, lc4);
5709 
5710  wfunc->winref = existing_wc->winref;
5711  }
5712 
5713  /* move list items */
5714  wflists->windowFuncs[existing_wc->winref] = list_concat(wflists->windowFuncs[existing_wc->winref],
5715  wflists->windowFuncs[wc->winref]);
5716  wflists->windowFuncs[wc->winref] = NIL;
5717 
5718  /*
5719  * transformWindowFuncCall() should have made sure there
5720  * are no other duplicates, so we needn't bother looking
5721  * any further.
5722  */
5723  break;
5724  }
5725  }
5726  }
5727  }
5728 }
5729 
5730 /*
5731  * select_active_windows
5732  * Create a list of the "active" window clauses (ie, those referenced
5733  * by non-deleted WindowFuncs) in the order they are to be executed.
5734  */
5735 static List *
5737 {
5738  List *windowClause = root->parse->windowClause;
5739  List *result = NIL;
5740  ListCell *lc;
5741  int nActive = 0;
5743  * list_length(windowClause));
5744 
5745  /* First, construct an array of the active windows */
5746  foreach(lc, windowClause)
5747  {
5749 
5750  /* It's only active if wflists shows some related WindowFuncs */
5751  Assert(wc->winref <= wflists->maxWinRef);
5752  if (wflists->windowFuncs[wc->winref] == NIL)
5753  continue;
5754 
5755  actives[nActive].wc = wc; /* original clause */
5756 
5757  /*
5758  * For sorting, we want the list of partition keys followed by the
5759  * list of sort keys. But pathkeys construction will remove duplicates
5760  * between the two, so we can as well (even though we can't detect all
5761  * of the duplicates, since some may come from ECs - that might mean
5762  * we miss optimization chances here). We must, however, ensure that
5763  * the order of entries is preserved with respect to the ones we do
5764  * keep.
5765  *
5766  * partitionClause and orderClause had their own duplicates removed in
5767  * parse analysis, so we're only concerned here with removing
5768  * orderClause entries that also appear in partitionClause.
5769  */
5770  actives[nActive].uniqueOrder =
5772  wc->orderClause);
5773  nActive++;
5774  }
5775 
5776  /*
5777  * Sort active windows by their partitioning/ordering clauses, ignoring
5778  * any framing clauses, so that the windows that need the same sorting are
5779  * adjacent in the list. When we come to generate paths, this will avoid
5780  * inserting additional Sort nodes.
5781  *
5782  * This is how we implement a specific requirement from the SQL standard,
5783  * which says that when two or more windows are order-equivalent (i.e.
5784  * have matching partition and order clauses, even if their names or
5785  * framing clauses differ), then all peer rows must be presented in the
5786  * same order in all of them. If we allowed multiple sort nodes for such
5787  * cases, we'd risk having the peer rows end up in different orders in
5788  * equivalent windows due to sort instability. (See General Rule 4 of
5789  * <window clause> in SQL2008 - SQL2016.)
5790  *
5791  * Additionally, if the entire list of clauses of one window is a prefix
5792  * of another, put first the window with stronger sorting requirements.
5793  * This way we will first sort for stronger window, and won't have to sort
5794  * again for the weaker one.
5795  */
5796  qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
5797 
5798  /* build ordered list of the original WindowClause nodes */
5799  for (int i = 0; i < nActive; i++)
5800  result = lappend(result, actives[i].wc);
5801 
5802  pfree(actives);
5803 
5804  return result;
5805 }
5806 
5807 /*
5808  * common_prefix_cmp
5809  * QSort comparison function for WindowClauseSortData
5810  *
5811  * Sort the windows by the required sorting clauses. First, compare the sort
5812  * clauses themselves. Second, if one window's clauses are a prefix of another
5813  * one's clauses, put the window with more sort clauses first.
5814  *
5815  * We purposefully sort by the highest tleSortGroupRef first. Since
5816  * tleSortGroupRefs are assigned for the query's DISTINCT and ORDER BY first
5817  * and because here we sort the lowest tleSortGroupRefs last, if a
5818  * WindowClause is sharing a tleSortGroupRef with the query's DISTINCT or
5819  * ORDER BY clause, this makes it more likely that the final WindowAgg will
5820  * provide presorted input for the query's DISTINCT or ORDER BY clause, thus
5821  * reducing the total number of sorts required for the query.
5822  */
5823 static int
5824 common_prefix_cmp(const void *a, const void *b)
5825 {
5826  const WindowClauseSortData *wcsa = a;
5827  const WindowClauseSortData *wcsb = b;
5828  ListCell *item_a;
5829  ListCell *item_b;
5830 
5831  forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
<