PostgreSQL Source Code  git master
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/genam.h"
22 #include "access/htup_details.h"
23 #include "access/parallel.h"
24 #include "access/sysattr.h"
25 #include "access/table.h"
26 #include "access/xact.h"
27 #include "catalog/pg_aggregate.h"
28 #include "catalog/pg_constraint.h"
29 #include "catalog/pg_inherits.h"
30 #include "catalog/pg_proc.h"
31 #include "catalog/pg_type.h"
32 #include "executor/executor.h"
33 #include "executor/nodeAgg.h"
34 #include "foreign/fdwapi.h"
35 #include "jit/jit.h"
36 #include "lib/bipartite_match.h"
37 #include "lib/knapsack.h"
38 #include "miscadmin.h"
39 #include "nodes/makefuncs.h"
40 #include "nodes/nodeFuncs.h"
41 #ifdef OPTIMIZER_DEBUG
42 #include "nodes/print.h"
43 #endif
44 #include "nodes/supportnodes.h"
45 #include "optimizer/appendinfo.h"
46 #include "optimizer/clauses.h"
47 #include "optimizer/cost.h"
48 #include "optimizer/inherit.h"
49 #include "optimizer/optimizer.h"
50 #include "optimizer/paramassign.h"
51 #include "optimizer/pathnode.h"
52 #include "optimizer/paths.h"
53 #include "optimizer/plancat.h"
54 #include "optimizer/planmain.h"
55 #include "optimizer/planner.h"
56 #include "optimizer/prep.h"
57 #include "optimizer/subselect.h"
58 #include "optimizer/tlist.h"
59 #include "parser/analyze.h"
60 #include "parser/parse_agg.h"
61 #include "parser/parse_relation.h"
62 #include "parser/parsetree.h"
63 #include "partitioning/partdesc.h"
64 #include "rewrite/rewriteManip.h"
65 #include "storage/dsm_impl.h"
66 #include "utils/lsyscache.h"
67 #include "utils/rel.h"
68 #include "utils/selfuncs.h"
69 #include "utils/syscache.h"
70 
71 /* GUC parameters */
75 
76 /* Hook for plugins to get control in planner() */
78 
79 /* Hook for plugins to get control when grouping_planner() plans upper rels */
81 
82 
83 /* Expression kind codes for preprocess_expression */
84 #define EXPRKIND_QUAL 0
85 #define EXPRKIND_TARGET 1
86 #define EXPRKIND_RTFUNC 2
87 #define EXPRKIND_RTFUNC_LATERAL 3
88 #define EXPRKIND_VALUES 4
89 #define EXPRKIND_VALUES_LATERAL 5
90 #define EXPRKIND_LIMIT 6
91 #define EXPRKIND_APPINFO 7
92 #define EXPRKIND_PHV 8
93 #define EXPRKIND_TABLESAMPLE 9
94 #define EXPRKIND_ARBITER_ELEM 10
95 #define EXPRKIND_TABLEFUNC 11
96 #define EXPRKIND_TABLEFUNC_LATERAL 12
97 
98 /*
99  * Data specific to grouping sets
100  */
101 typedef struct
102 {
112 
113 /*
114  * Temporary structure for use during WindowClause reordering in order to be
115  * able to sort WindowClauses on partitioning/ordering prefix.
116  */
117 typedef struct
118 {
120  List *uniqueOrder; /* A List of unique ordering/partitioning
121  * clauses per Window */
123 
124 /* Passthrough data for standard_qp_callback */
125 typedef struct
126 {
127  List *activeWindows; /* active windows, if any */
128  grouping_sets_data *gset_data; /* grouping sets data, if any */
130 
131 /* Local functions */
132 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
133 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
134 static void grouping_planner(PlannerInfo *root, double tuple_fraction);
136 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
137  int *tleref_to_colnum_map);
138 static void preprocess_rowmarks(PlannerInfo *root);
139 static double preprocess_limit(PlannerInfo *root,
140  double tuple_fraction,
141  int64 *offset_est, int64 *count_est);
143 static List *groupclause_apply_groupingset(PlannerInfo *root, List *force);
144 static List *extract_rollup_sets(List *groupingSets);
145 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
146 static void standard_qp_callback(PlannerInfo *root, void *extra);
147 static double get_number_of_groups(PlannerInfo *root,
148  double path_rows,
149  grouping_sets_data *gd,
150  List *target_list);
152  RelOptInfo *input_rel,
153  PathTarget *target,
154  bool target_parallel_safe,
155  grouping_sets_data *gd);
156 static bool is_degenerate_grouping(PlannerInfo *root);
158  RelOptInfo *input_rel,
159  RelOptInfo *grouped_rel);
160 static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
161  PathTarget *target, bool target_parallel_safe,
162  Node *havingQual);
164  RelOptInfo *input_rel,
165  RelOptInfo *grouped_rel,
166  const AggClauseCosts *agg_costs,
167  grouping_sets_data *gd,
168  GroupPathExtraData *extra,
169  RelOptInfo **partially_grouped_rel_p);
170 static void consider_groupingsets_paths(PlannerInfo *root,
171  RelOptInfo *grouped_rel,
172  Path *path,
173  bool is_sorted,
174  bool can_hash,
175  grouping_sets_data *gd,
176  const AggClauseCosts *agg_costs,
177  double dNumGroups);
179  RelOptInfo *input_rel,
180  PathTarget *input_target,
181  PathTarget *output_target,
182  bool output_target_parallel_safe,
183  WindowFuncLists *wflists,
184  List *activeWindows);
185 static void create_one_window_path(PlannerInfo *root,
186  RelOptInfo *window_rel,
187  Path *path,
188  PathTarget *input_target,
189  PathTarget *output_target,
190  WindowFuncLists *wflists,
191  List *activeWindows);
193  RelOptInfo *input_rel,
194  PathTarget *target);
196  RelOptInfo *input_rel,
197  RelOptInfo *final_distinct_rel,
198  PathTarget *target);
200  RelOptInfo *input_rel,
201  RelOptInfo *distinct_rel);
203  RelOptInfo *input_rel,
204  PathTarget *target,
205  bool target_parallel_safe,
206  double limit_tuples);
208  PathTarget *final_target);
210  PathTarget *grouping_target,
211  Node *havingQual);
212 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
213 static void optimize_window_clauses(PlannerInfo *root,
214  WindowFuncLists *wflists);
215 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
217  PathTarget *final_target,
218  List *activeWindows);
220  List *tlist);
222  PathTarget *final_target,
223  bool *have_postponed_srfs);
224 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
225  List *targets, List *targets_contain_srfs);
226 static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
227  RelOptInfo *grouped_rel,
228  RelOptInfo *partially_grouped_rel,
229  const AggClauseCosts *agg_costs,
230  grouping_sets_data *gd,
231  double dNumGroups,
232  GroupPathExtraData *extra);
234  RelOptInfo *grouped_rel,
235  RelOptInfo *input_rel,
236  grouping_sets_data *gd,
237  GroupPathExtraData *extra,
238  bool force_rel_creation);
239 static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
240 static bool can_partial_agg(PlannerInfo *root);
242  RelOptInfo *rel,
243  List *scanjoin_targets,
244  List *scanjoin_targets_contain_srfs,
245  bool scanjoin_target_parallel_safe,
246  bool tlist_same_exprs);
248  RelOptInfo *input_rel,
249  RelOptInfo *grouped_rel,
250  RelOptInfo *partially_grouped_rel,
251  const AggClauseCosts *agg_costs,
252  grouping_sets_data *gd,
254  GroupPathExtraData *extra);
255 static bool group_by_has_partkey(RelOptInfo *input_rel,
256  List *targetList,
257  List *groupClause);
258 static int common_prefix_cmp(const void *a, const void *b);
259 
260 
261 /*****************************************************************************
262  *
263  * Query optimizer entry point
264  *
265  * To support loadable plugins that monitor or modify planner behavior,
266  * we provide a hook variable that lets a plugin get control before and
267  * after the standard planning process. The plugin would normally call
268  * standard_planner().
269  *
270  * Note to plugin authors: standard_planner() scribbles on its Query input,
271  * so you'd better copy that data structure if you want to plan more than once.
272  *
273  *****************************************************************************/
274 PlannedStmt *
275 planner(Query *parse, const char *query_string, int cursorOptions,
276  ParamListInfo boundParams)
277 {
278  PlannedStmt *result;
279 
280  if (planner_hook)
281  result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
282  else
283  result = standard_planner(parse, query_string, cursorOptions, boundParams);
284  return result;
285 }
286 
287 PlannedStmt *
288 standard_planner(Query *parse, const char *query_string, int cursorOptions,
289  ParamListInfo boundParams)
290 {
291  PlannedStmt *result;
292  PlannerGlobal *glob;
293  double tuple_fraction;
294  PlannerInfo *root;
295  RelOptInfo *final_rel;
296  Path *best_path;
297  Plan *top_plan;
298  ListCell *lp,
299  *lr;
300 
301  /*
302  * Set up global state for this planner invocation. This data is needed
303  * across all levels of sub-Query that might exist in the given command,
304  * so we keep it in a separate struct that's linked to by each per-Query
305  * PlannerInfo.
306  */
307  glob = makeNode(PlannerGlobal);
308 
309  glob->boundParams = boundParams;
310  glob->subplans = NIL;
311  glob->subroots = NIL;
312  glob->rewindPlanIDs = NULL;
313  glob->finalrtable = NIL;
314  glob->finalrteperminfos = NIL;
315  glob->finalrowmarks = NIL;
316  glob->resultRelations = NIL;
317  glob->appendRelations = NIL;
318  glob->relationOids = NIL;
319  glob->invalItems = NIL;
320  glob->paramExecTypes = NIL;
321  glob->lastPHId = 0;
322  glob->lastRowMarkId = 0;
323  glob->lastPlanNodeId = 0;
324  glob->transientPlan = false;
325  glob->dependsOnRole = false;
326 
327  /*
328  * Assess whether it's feasible to use parallel mode for this query. We
329  * can't do this in a standalone backend, or if the command will try to
330  * modify any data, or if this is a cursor operation, or if GUCs are set
331  * to values that don't permit parallelism, or if parallel-unsafe
332  * functions are present in the query tree.
333  *
334  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
335  * MATERIALIZED VIEW to use parallel plans, but this is safe only because
336  * the command is writing into a completely new table which workers won't
337  * be able to see. If the workers could see the table, the fact that
338  * group locking would cause them to ignore the leader's heavyweight GIN
339  * page locks would make this unsafe. We'll have to fix that somehow if
340  * we want to allow parallel inserts in general; updates and deletes have
341  * additional problems especially around combo CIDs.)
342  *
343  * For now, we don't try to use parallel mode if we're running inside a
344  * parallel worker. We might eventually be able to relax this
345  * restriction, but for now it seems best not to have parallel workers
346  * trying to create their own parallel workers.
347  */
348  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
350  parse->commandType == CMD_SELECT &&
351  !parse->hasModifyingCTE &&
353  !IsParallelWorker())
354  {
355  /* all the cheap tests pass, so scan the query tree */
357  glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
358  }
359  else
360  {
361  /* skip the query tree scan, just assume it's unsafe */
362  glob->maxParallelHazard = PROPARALLEL_UNSAFE;
363  glob->parallelModeOK = false;
364  }
365 
366  /*
367  * glob->parallelModeNeeded is normally set to false here and changed to
368  * true during plan creation if a Gather or Gather Merge plan is actually
369  * created (cf. create_gather_plan, create_gather_merge_plan).
370  *
371  * However, if debug_parallel_query = on or debug_parallel_query =
372  * regress, then we impose parallel mode whenever it's safe to do so, even
373  * if the final plan doesn't use parallelism. It's not safe to do so if
374  * the query contains anything parallel-unsafe; parallelModeOK will be
375  * false in that case. Note that parallelModeOK can't change after this
376  * point. Otherwise, everything in the query is either parallel-safe or
377  * parallel-restricted, and in either case it should be OK to impose
378  * parallel-mode restrictions. If that ends up breaking something, then
379  * either some function the user included in the query is incorrectly
380  * labeled as parallel-safe or parallel-restricted when in reality it's
381  * parallel-unsafe, or else the query planner itself has a bug.
382  */
383  glob->parallelModeNeeded = glob->parallelModeOK &&
385 
386  /* Determine what fraction of the plan is likely to be scanned */
387  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
388  {
389  /*
390  * We have no real idea how many tuples the user will ultimately FETCH
391  * from a cursor, but it is often the case that he doesn't want 'em
392  * all, or would prefer a fast-start plan anyway so that he can
393  * process some of the tuples sooner. Use a GUC parameter to decide
394  * what fraction to optimize for.
395  */
396  tuple_fraction = cursor_tuple_fraction;
397 
398  /*
399  * We document cursor_tuple_fraction as simply being a fraction, which
400  * means the edge cases 0 and 1 have to be treated specially here. We
401  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
402  */
403  if (tuple_fraction >= 1.0)
404  tuple_fraction = 0.0;
405  else if (tuple_fraction <= 0.0)
406  tuple_fraction = 1e-10;
407  }
408  else
409  {
410  /* Default assumption is we need all the tuples */
411  tuple_fraction = 0.0;
412  }
413 
414  /* primary planning entry point (may recurse for subqueries) */
415  root = subquery_planner(glob, parse, NULL,
416  false, tuple_fraction);
417 
418  /* Select best Path and turn it into a Plan */
419  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
420  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
421 
422  top_plan = create_plan(root, best_path);
423 
424  /*
425  * If creating a plan for a scrollable cursor, make sure it can run
426  * backwards on demand. Add a Material node at the top at need.
427  */
428  if (cursorOptions & CURSOR_OPT_SCROLL)
429  {
430  if (!ExecSupportsBackwardScan(top_plan))
431  top_plan = materialize_finished_plan(top_plan);
432  }
433 
434  /*
435  * Optionally add a Gather node for testing purposes, provided this is
436  * actually a safe thing to do.
437  *
438  * We can add Gather even when top_plan has parallel-safe initPlans, but
439  * then we have to move the initPlans to the Gather node because of
440  * SS_finalize_plan's limitations. That would cause cosmetic breakage of
441  * regression tests when debug_parallel_query = regress, because initPlans
442  * that would normally appear on the top_plan move to the Gather, causing
443  * them to disappear from EXPLAIN output. That doesn't seem worth kluging
444  * EXPLAIN to hide, so skip it when debug_parallel_query = regress.
445  */
447  top_plan->parallel_safe &&
448  (top_plan->initPlan == NIL ||
450  {
451  Gather *gather = makeNode(Gather);
452  Cost initplan_cost;
453  bool unsafe_initplans;
454 
455  gather->plan.targetlist = top_plan->targetlist;
456  gather->plan.qual = NIL;
457  gather->plan.lefttree = top_plan;
458  gather->plan.righttree = NULL;
459  gather->num_workers = 1;
460  gather->single_copy = true;
462 
463  /* Transfer any initPlans to the new top node */
464  gather->plan.initPlan = top_plan->initPlan;
465  top_plan->initPlan = NIL;
466 
467  /*
468  * Since this Gather has no parallel-aware descendants to signal to,
469  * we don't need a rescan Param.
470  */
471  gather->rescan_param = -1;
472 
473  /*
474  * Ideally we'd use cost_gather here, but setting up dummy path data
475  * to satisfy it doesn't seem much cleaner than knowing what it does.
476  */
477  gather->plan.startup_cost = top_plan->startup_cost +
479  gather->plan.total_cost = top_plan->total_cost +
481  gather->plan.plan_rows = top_plan->plan_rows;
482  gather->plan.plan_width = top_plan->plan_width;
483  gather->plan.parallel_aware = false;
484  gather->plan.parallel_safe = false;
485 
486  /*
487  * Delete the initplans' cost from top_plan. We needn't add it to the
488  * Gather node, since the above coding already included it there.
489  */
491  &initplan_cost, &unsafe_initplans);
492  top_plan->startup_cost -= initplan_cost;
493  top_plan->total_cost -= initplan_cost;
494 
495  /* use parallel mode for parallel plans. */
496  root->glob->parallelModeNeeded = true;
497 
498  top_plan = &gather->plan;
499  }
500 
501  /*
502  * If any Params were generated, run through the plan tree and compute
503  * each plan node's extParam/allParam sets. Ideally we'd merge this into
504  * set_plan_references' tree traversal, but for now it has to be separate
505  * because we need to visit subplans before not after main plan.
506  */
507  if (glob->paramExecTypes != NIL)
508  {
509  Assert(list_length(glob->subplans) == list_length(glob->subroots));
510  forboth(lp, glob->subplans, lr, glob->subroots)
511  {
512  Plan *subplan = (Plan *) lfirst(lp);
513  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
514 
515  SS_finalize_plan(subroot, subplan);
516  }
517  SS_finalize_plan(root, top_plan);
518  }
519 
520  /* final cleanup of the plan */
521  Assert(glob->finalrtable == NIL);
522  Assert(glob->finalrteperminfos == NIL);
523  Assert(glob->finalrowmarks == NIL);
524  Assert(glob->resultRelations == NIL);
525  Assert(glob->appendRelations == NIL);
526  top_plan = set_plan_references(root, top_plan);
527  /* ... and the subplans (both regular subplans and initplans) */
528  Assert(list_length(glob->subplans) == list_length(glob->subroots));
529  forboth(lp, glob->subplans, lr, glob->subroots)
530  {
531  Plan *subplan = (Plan *) lfirst(lp);
532  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
533 
534  lfirst(lp) = set_plan_references(subroot, subplan);
535  }
536 
537  /* build the PlannedStmt result */
538  result = makeNode(PlannedStmt);
539 
540  result->commandType = parse->commandType;
541  result->queryId = parse->queryId;
542  result->hasReturning = (parse->returningList != NIL);
543  result->hasModifyingCTE = parse->hasModifyingCTE;
544  result->canSetTag = parse->canSetTag;
545  result->transientPlan = glob->transientPlan;
546  result->dependsOnRole = glob->dependsOnRole;
547  result->parallelModeNeeded = glob->parallelModeNeeded;
548  result->planTree = top_plan;
549  result->rtable = glob->finalrtable;
550  result->permInfos = glob->finalrteperminfos;
551  result->resultRelations = glob->resultRelations;
552  result->appendRelations = glob->appendRelations;
553  result->subplans = glob->subplans;
554  result->rewindPlanIDs = glob->rewindPlanIDs;
555  result->rowMarks = glob->finalrowmarks;
556  result->relationOids = glob->relationOids;
557  result->invalItems = glob->invalItems;
558  result->paramExecTypes = glob->paramExecTypes;
559  /* utilityStmt should be null, but we might as well copy it */
560  result->utilityStmt = parse->utilityStmt;
561  result->stmt_location = parse->stmt_location;
562  result->stmt_len = parse->stmt_len;
563 
564  result->jitFlags = PGJIT_NONE;
565  if (jit_enabled && jit_above_cost >= 0 &&
566  top_plan->total_cost > jit_above_cost)
567  {
568  result->jitFlags |= PGJIT_PERFORM;
569 
570  /*
571  * Decide how much effort should be put into generating better code.
572  */
573  if (jit_optimize_above_cost >= 0 &&
575  result->jitFlags |= PGJIT_OPT3;
576  if (jit_inline_above_cost >= 0 &&
577  top_plan->total_cost > jit_inline_above_cost)
578  result->jitFlags |= PGJIT_INLINE;
579 
580  /*
581  * Decide which operations should be JITed.
582  */
583  if (jit_expressions)
584  result->jitFlags |= PGJIT_EXPR;
586  result->jitFlags |= PGJIT_DEFORM;
587  }
588 
589  if (glob->partition_directory != NULL)
590  DestroyPartitionDirectory(glob->partition_directory);
591 
592  return result;
593 }
594 
595 
596 /*--------------------
597  * subquery_planner
598  * Invokes the planner on a subquery. We recurse to here for each
599  * sub-SELECT found in the query tree.
600  *
601  * glob is the global state for the current planner run.
602  * parse is the querytree produced by the parser & rewriter.
603  * parent_root is the immediate parent Query's info (NULL at the top level).
604  * hasRecursion is true if this is a recursive WITH query.
605  * tuple_fraction is the fraction of tuples we expect will be retrieved.
606  * tuple_fraction is interpreted as explained for grouping_planner, below.
607  *
608  * Basically, this routine does the stuff that should only be done once
609  * per Query object. It then calls grouping_planner. At one time,
610  * grouping_planner could be invoked recursively on the same Query object;
611  * that's not currently true, but we keep the separation between the two
612  * routines anyway, in case we need it again someday.
613  *
614  * subquery_planner will be called recursively to handle sub-Query nodes
615  * found within the query's expressions and rangetable.
616  *
617  * Returns the PlannerInfo struct ("root") that contains all data generated
618  * while planning the subquery. In particular, the Path(s) attached to
619  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
620  * cheapest way(s) to implement the query. The top level will select the
621  * best Path and pass it through createplan.c to produce a finished Plan.
622  *--------------------
623  */
624 PlannerInfo *
626  PlannerInfo *parent_root,
627  bool hasRecursion, double tuple_fraction)
628 {
629  PlannerInfo *root;
630  List *newWithCheckOptions;
631  List *newHaving;
632  bool hasOuterJoins;
633  bool hasResultRTEs;
634  RelOptInfo *final_rel;
635  ListCell *l;
636 
637  /* Create a PlannerInfo data structure for this subquery */
638  root = makeNode(PlannerInfo);
639  root->parse = parse;
640  root->glob = glob;
641  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
642  root->parent_root = parent_root;
643  root->plan_params = NIL;
644  root->outer_params = NULL;
645  root->planner_cxt = CurrentMemoryContext;
646  root->init_plans = NIL;
647  root->cte_plan_ids = NIL;
648  root->multiexpr_params = NIL;
649  root->join_domains = NIL;
650  root->eq_classes = NIL;
651  root->ec_merging_done = false;
652  root->last_rinfo_serial = 0;
653  root->all_result_relids =
654  parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
655  root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
656  root->append_rel_list = NIL;
657  root->row_identity_vars = NIL;
658  root->rowMarks = NIL;
659  memset(root->upper_rels, 0, sizeof(root->upper_rels));
660  memset(root->upper_targets, 0, sizeof(root->upper_targets));
661  root->processed_groupClause = NIL;
663  root->processed_tlist = NIL;
664  root->update_colnos = NIL;
665  root->grouping_map = NULL;
666  root->minmax_aggs = NIL;
667  root->qual_security_level = 0;
668  root->hasPseudoConstantQuals = false;
669  root->hasAlternativeSubPlans = false;
670  root->placeholdersFrozen = false;
671  root->hasRecursion = hasRecursion;
672  if (hasRecursion)
674  else
675  root->wt_param_id = -1;
676  root->non_recursive_path = NULL;
677  root->partColsUpdated = false;
678 
679  /*
680  * Create the top-level join domain. This won't have valid contents until
681  * deconstruct_jointree fills it in, but the node needs to exist before
682  * that so we can build EquivalenceClasses referencing it.
683  */
685 
686  /*
687  * If there is a WITH list, process each WITH query and either convert it
688  * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
689  */
690  if (parse->cteList)
691  SS_process_ctes(root);
692 
693  /*
694  * If it's a MERGE command, transform the joinlist as appropriate.
695  */
697 
698  /*
699  * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
700  * that we don't need so many special cases to deal with that situation.
701  */
703 
704  /*
705  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
706  * to transform them into joins. Note that this step does not descend
707  * into subqueries; if we pull up any subqueries below, their SubLinks are
708  * processed just before pulling them up.
709  */
710  if (parse->hasSubLinks)
711  pull_up_sublinks(root);
712 
713  /*
714  * Scan the rangetable for function RTEs, do const-simplification on them,
715  * and then inline them if possible (producing subqueries that might get
716  * pulled up next). Recursion issues here are handled in the same way as
717  * for SubLinks.
718  */
720 
721  /*
722  * Check to see if any subqueries in the jointree can be merged into this
723  * query.
724  */
725  pull_up_subqueries(root);
726 
727  /*
728  * If this is a simple UNION ALL query, flatten it into an appendrel. We
729  * do this now because it requires applying pull_up_subqueries to the leaf
730  * queries of the UNION ALL, which weren't touched above because they
731  * weren't referenced by the jointree (they will be after we do this).
732  */
733  if (parse->setOperations)
735 
736  /*
737  * Survey the rangetable to see what kinds of entries are present. We can
738  * skip some later processing if relevant SQL features are not used; for
739  * example if there are no JOIN RTEs we can avoid the expense of doing
740  * flatten_join_alias_vars(). This must be done after we have finished
741  * adding rangetable entries, of course. (Note: actually, processing of
742  * inherited or partitioned rels can cause RTEs for their child tables to
743  * get added later; but those must all be RTE_RELATION entries, so they
744  * don't invalidate the conclusions drawn here.)
745  */
746  root->hasJoinRTEs = false;
747  root->hasLateralRTEs = false;
748  hasOuterJoins = false;
749  hasResultRTEs = false;
750  foreach(l, parse->rtable)
751  {
753 
754  switch (rte->rtekind)
755  {
756  case RTE_RELATION:
757  if (rte->inh)
758  {
759  /*
760  * Check to see if the relation actually has any children;
761  * if not, clear the inh flag so we can treat it as a
762  * plain base relation.
763  *
764  * Note: this could give a false-positive result, if the
765  * rel once had children but no longer does. We used to
766  * be able to clear rte->inh later on when we discovered
767  * that, but no more; we have to handle such cases as
768  * full-fledged inheritance.
769  */
770  rte->inh = has_subclass(rte->relid);
771  }
772  break;
773  case RTE_JOIN:
774  root->hasJoinRTEs = true;
775  if (IS_OUTER_JOIN(rte->jointype))
776  hasOuterJoins = true;
777  break;
778  case RTE_RESULT:
779  hasResultRTEs = true;
780  break;
781  default:
782  /* No work here for other RTE types */
783  break;
784  }
785 
786  if (rte->lateral)
787  root->hasLateralRTEs = true;
788 
789  /*
790  * We can also determine the maximum security level required for any
791  * securityQuals now. Addition of inheritance-child RTEs won't affect
792  * this, because child tables don't have their own securityQuals; see
793  * expand_single_inheritance_child().
794  */
795  if (rte->securityQuals)
797  list_length(rte->securityQuals));
798  }
799 
800  /*
801  * If we have now verified that the query target relation is
802  * non-inheriting, mark it as a leaf target.
803  */
804  if (parse->resultRelation)
805  {
806  RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
807 
808  if (!rte->inh)
809  root->leaf_result_relids =
810  bms_make_singleton(parse->resultRelation);
811  }
812 
813  /*
814  * Preprocess RowMark information. We need to do this after subquery
815  * pullup, so that all base relations are present.
816  */
817  preprocess_rowmarks(root);
818 
819  /*
820  * Set hasHavingQual to remember if HAVING clause is present. Needed
821  * because preprocess_expression will reduce a constant-true condition to
822  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
823  */
824  root->hasHavingQual = (parse->havingQual != NULL);
825 
826  /*
827  * Do expression preprocessing on targetlist and quals, as well as other
828  * random expressions in the querytree. Note that we do not need to
829  * handle sort/group expressions explicitly, because they are actually
830  * part of the targetlist.
831  */
832  parse->targetList = (List *)
833  preprocess_expression(root, (Node *) parse->targetList,
835 
836  /* Constant-folding might have removed all set-returning functions */
837  if (parse->hasTargetSRFs)
838  parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
839 
840  newWithCheckOptions = NIL;
841  foreach(l, parse->withCheckOptions)
842  {
844 
845  wco->qual = preprocess_expression(root, wco->qual,
846  EXPRKIND_QUAL);
847  if (wco->qual != NULL)
848  newWithCheckOptions = lappend(newWithCheckOptions, wco);
849  }
850  parse->withCheckOptions = newWithCheckOptions;
851 
852  parse->returningList = (List *)
853  preprocess_expression(root, (Node *) parse->returningList,
855 
856  preprocess_qual_conditions(root, (Node *) parse->jointree);
857 
858  parse->havingQual = preprocess_expression(root, parse->havingQual,
859  EXPRKIND_QUAL);
860 
861  foreach(l, parse->windowClause)
862  {
864 
865  /* partitionClause/orderClause are sort/group expressions */
868  wc->endOffset = preprocess_expression(root, wc->endOffset,
870  wc->runCondition = (List *) preprocess_expression(root,
871  (Node *) wc->runCondition,
873  }
874 
875  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
877  parse->limitCount = preprocess_expression(root, parse->limitCount,
879 
880  if (parse->onConflict)
881  {
882  parse->onConflict->arbiterElems = (List *)
884  (Node *) parse->onConflict->arbiterElems,
886  parse->onConflict->arbiterWhere =
888  parse->onConflict->arbiterWhere,
889  EXPRKIND_QUAL);
890  parse->onConflict->onConflictSet = (List *)
892  (Node *) parse->onConflict->onConflictSet,
894  parse->onConflict->onConflictWhere =
896  parse->onConflict->onConflictWhere,
897  EXPRKIND_QUAL);
898  /* exclRelTlist contains only Vars, so no preprocessing needed */
899  }
900 
901  foreach(l, parse->mergeActionList)
902  {
904 
905  action->targetList = (List *)
907  (Node *) action->targetList,
909  action->qual =
911  (Node *) action->qual,
912  EXPRKIND_QUAL);
913  }
914 
915  root->append_rel_list = (List *)
918 
919  /* Also need to preprocess expressions within RTEs */
920  foreach(l, parse->rtable)
921  {
923  int kind;
924  ListCell *lcsq;
925 
926  if (rte->rtekind == RTE_RELATION)
927  {
928  if (rte->tablesample)
929  rte->tablesample = (TableSampleClause *)
931  (Node *) rte->tablesample,
933  }
934  else if (rte->rtekind == RTE_SUBQUERY)
935  {
936  /*
937  * We don't want to do all preprocessing yet on the subquery's
938  * expressions, since that will happen when we plan it. But if it
939  * contains any join aliases of our level, those have to get
940  * expanded now, because planning of the subquery won't do it.
941  * That's only possible if the subquery is LATERAL.
942  */
943  if (rte->lateral && root->hasJoinRTEs)
944  rte->subquery = (Query *)
945  flatten_join_alias_vars(root, root->parse,
946  (Node *) rte->subquery);
947  }
948  else if (rte->rtekind == RTE_FUNCTION)
949  {
950  /* Preprocess the function expression(s) fully */
952  rte->functions = (List *)
953  preprocess_expression(root, (Node *) rte->functions, kind);
954  }
955  else if (rte->rtekind == RTE_TABLEFUNC)
956  {
957  /* Preprocess the function expression(s) fully */
959  rte->tablefunc = (TableFunc *)
960  preprocess_expression(root, (Node *) rte->tablefunc, kind);
961  }
962  else if (rte->rtekind == RTE_VALUES)
963  {
964  /* Preprocess the values lists fully */
966  rte->values_lists = (List *)
967  preprocess_expression(root, (Node *) rte->values_lists, kind);
968  }
969 
970  /*
971  * Process each element of the securityQuals list as if it were a
972  * separate qual expression (as indeed it is). We need to do it this
973  * way to get proper canonicalization of AND/OR structure. Note that
974  * this converts each element into an implicit-AND sublist.
975  */
976  foreach(lcsq, rte->securityQuals)
977  {
978  lfirst(lcsq) = preprocess_expression(root,
979  (Node *) lfirst(lcsq),
980  EXPRKIND_QUAL);
981  }
982  }
983 
984  /*
985  * Now that we are done preprocessing expressions, and in particular done
986  * flattening join alias variables, get rid of the joinaliasvars lists.
987  * They no longer match what expressions in the rest of the tree look
988  * like, because we have not preprocessed expressions in those lists (and
989  * do not want to; for example, expanding a SubLink there would result in
990  * a useless unreferenced subplan). Leaving them in place simply creates
991  * a hazard for later scans of the tree. We could try to prevent that by
992  * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
993  * but that doesn't sound very reliable.
994  */
995  if (root->hasJoinRTEs)
996  {
997  foreach(l, parse->rtable)
998  {
1000 
1001  rte->joinaliasvars = NIL;
1002  }
1003  }
1004 
1005  /*
1006  * In some cases we may want to transfer a HAVING clause into WHERE. We
1007  * cannot do so if the HAVING clause contains aggregates (obviously) or
1008  * volatile functions (since a HAVING clause is supposed to be executed
1009  * only once per group). We also can't do this if there are any nonempty
1010  * grouping sets; moving such a clause into WHERE would potentially change
1011  * the results, if any referenced column isn't present in all the grouping
1012  * sets. (If there are only empty grouping sets, then the HAVING clause
1013  * must be degenerate as discussed below.)
1014  *
1015  * Also, it may be that the clause is so expensive to execute that we're
1016  * better off doing it only once per group, despite the loss of
1017  * selectivity. This is hard to estimate short of doing the entire
1018  * planning process twice, so we use a heuristic: clauses containing
1019  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
1020  * clause into WHERE, in hopes of eliminating tuples before aggregation
1021  * instead of after.
1022  *
1023  * If the query has explicit grouping then we can simply move such a
1024  * clause into WHERE; any group that fails the clause will not be in the
1025  * output because none of its tuples will reach the grouping or
1026  * aggregation stage. Otherwise we must have a degenerate (variable-free)
1027  * HAVING clause, which we put in WHERE so that query_planner() can use it
1028  * in a gating Result node, but also keep in HAVING to ensure that we
1029  * don't emit a bogus aggregated row. (This could be done better, but it
1030  * seems not worth optimizing.)
1031  *
1032  * Note that both havingQual and parse->jointree->quals are in
1033  * implicitly-ANDed-list form at this point, even though they are declared
1034  * as Node *.
1035  */
1036  newHaving = NIL;
1037  foreach(l, (List *) parse->havingQual)
1038  {
1039  Node *havingclause = (Node *) lfirst(l);
1040 
1041  if ((parse->groupClause && parse->groupingSets) ||
1042  contain_agg_clause(havingclause) ||
1043  contain_volatile_functions(havingclause) ||
1044  contain_subplans(havingclause))
1045  {
1046  /* keep it in HAVING */
1047  newHaving = lappend(newHaving, havingclause);
1048  }
1049  else if (parse->groupClause && !parse->groupingSets)
1050  {
1051  /* move it to WHERE */
1052  parse->jointree->quals = (Node *)
1053  lappend((List *) parse->jointree->quals, havingclause);
1054  }
1055  else
1056  {
1057  /* put a copy in WHERE, keep it in HAVING */
1058  parse->jointree->quals = (Node *)
1059  lappend((List *) parse->jointree->quals,
1060  copyObject(havingclause));
1061  newHaving = lappend(newHaving, havingclause);
1062  }
1063  }
1064  parse->havingQual = (Node *) newHaving;
1065 
1066  /*
1067  * If we have any outer joins, try to reduce them to plain inner joins.
1068  * This step is most easily done after we've done expression
1069  * preprocessing.
1070  */
1071  if (hasOuterJoins)
1072  reduce_outer_joins(root);
1073 
1074  /*
1075  * If we have any RTE_RESULT relations, see if they can be deleted from
1076  * the jointree. We also rely on this processing to flatten single-child
1077  * FromExprs underneath outer joins. This step is most effectively done
1078  * after we've done expression preprocessing and outer join reduction.
1079  */
1080  if (hasResultRTEs || hasOuterJoins)
1082 
1083  /*
1084  * Do the main planning.
1085  */
1086  grouping_planner(root, tuple_fraction);
1087 
1088  /*
1089  * Capture the set of outer-level param IDs we have access to, for use in
1090  * extParam/allParam calculations later.
1091  */
1093 
1094  /*
1095  * If any initPlans were created in this query level, adjust the surviving
1096  * Paths' costs and parallel-safety flags to account for them. The
1097  * initPlans won't actually get attached to the plan tree till
1098  * create_plan() runs, but we must include their effects now.
1099  */
1100  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1101  SS_charge_for_initplans(root, final_rel);
1102 
1103  /*
1104  * Make sure we've identified the cheapest Path for the final rel. (By
1105  * doing this here not in grouping_planner, we include initPlan costs in
1106  * the decision, though it's unlikely that will change anything.)
1107  */
1108  set_cheapest(final_rel);
1109 
1110  return root;
1111 }
1112 
1113 /*
1114  * preprocess_expression
1115  * Do subquery_planner's preprocessing work for an expression,
1116  * which can be a targetlist, a WHERE clause (including JOIN/ON
1117  * conditions), a HAVING clause, or a few other things.
1118  */
1119 static Node *
1120 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1121 {
1122  /*
1123  * Fall out quickly if expression is empty. This occurs often enough to
1124  * be worth checking. Note that null->null is the correct conversion for
1125  * implicit-AND result format, too.
1126  */
1127  if (expr == NULL)
1128  return NULL;
1129 
1130  /*
1131  * If the query has any join RTEs, replace join alias variables with
1132  * base-relation variables. We must do this first, since any expressions
1133  * we may extract from the joinaliasvars lists have not been preprocessed.
1134  * For example, if we did this after sublink processing, sublinks expanded
1135  * out from join aliases would not get processed. But we can skip this in
1136  * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1137  * they can't contain any Vars of the current query level.
1138  */
1139  if (root->hasJoinRTEs &&
1140  !(kind == EXPRKIND_RTFUNC ||
1141  kind == EXPRKIND_VALUES ||
1142  kind == EXPRKIND_TABLESAMPLE ||
1143  kind == EXPRKIND_TABLEFUNC))
1144  expr = flatten_join_alias_vars(root, root->parse, expr);
1145 
1146  /*
1147  * Simplify constant expressions. For function RTEs, this was already
1148  * done by preprocess_function_rtes. (But note we must do it again for
1149  * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1150  * un-simplified subexpressions inserted by flattening of subqueries or
1151  * join alias variables.)
1152  *
1153  * Note: an essential effect of this is to convert named-argument function
1154  * calls to positional notation and insert the current actual values of
1155  * any default arguments for functions. To ensure that happens, we *must*
1156  * process all expressions here. Previous PG versions sometimes skipped
1157  * const-simplification if it didn't seem worth the trouble, but we can't
1158  * do that anymore.
1159  *
1160  * Note: this also flattens nested AND and OR expressions into N-argument
1161  * form. All processing of a qual expression after this point must be
1162  * careful to maintain AND/OR flatness --- that is, do not generate a tree
1163  * with AND directly under AND, nor OR directly under OR.
1164  */
1165  if (kind != EXPRKIND_RTFUNC)
1166  expr = eval_const_expressions(root, expr);
1167 
1168  /*
1169  * If it's a qual or havingQual, canonicalize it.
1170  */
1171  if (kind == EXPRKIND_QUAL)
1172  {
1173  expr = (Node *) canonicalize_qual((Expr *) expr, false);
1174 
1175 #ifdef OPTIMIZER_DEBUG
1176  printf("After canonicalize_qual()\n");
1177  pprint(expr);
1178 #endif
1179  }
1180 
1181  /*
1182  * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1183  * hashfuncid of any that might execute more quickly by using hash lookups
1184  * instead of a linear search.
1185  */
1186  if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1187  {
1189  }
1190 
1191  /* Expand SubLinks to SubPlans */
1192  if (root->parse->hasSubLinks)
1193  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1194 
1195  /*
1196  * XXX do not insert anything here unless you have grokked the comments in
1197  * SS_replace_correlation_vars ...
1198  */
1199 
1200  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1201  if (root->query_level > 1)
1202  expr = SS_replace_correlation_vars(root, expr);
1203 
1204  /*
1205  * If it's a qual or havingQual, convert it to implicit-AND format. (We
1206  * don't want to do this before eval_const_expressions, since the latter
1207  * would be unable to simplify a top-level AND correctly. Also,
1208  * SS_process_sublinks expects explicit-AND format.)
1209  */
1210  if (kind == EXPRKIND_QUAL)
1211  expr = (Node *) make_ands_implicit((Expr *) expr);
1212 
1213  return expr;
1214 }
1215 
1216 /*
1217  * preprocess_qual_conditions
1218  * Recursively scan the query's jointree and do subquery_planner's
1219  * preprocessing work on each qual condition found therein.
1220  */
1221 static void
1223 {
1224  if (jtnode == NULL)
1225  return;
1226  if (IsA(jtnode, RangeTblRef))
1227  {
1228  /* nothing to do here */
1229  }
1230  else if (IsA(jtnode, FromExpr))
1231  {
1232  FromExpr *f = (FromExpr *) jtnode;
1233  ListCell *l;
1234 
1235  foreach(l, f->fromlist)
1237 
1239  }
1240  else if (IsA(jtnode, JoinExpr))
1241  {
1242  JoinExpr *j = (JoinExpr *) jtnode;
1243 
1244  preprocess_qual_conditions(root, j->larg);
1245  preprocess_qual_conditions(root, j->rarg);
1246 
1247  j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1248  }
1249  else
1250  elog(ERROR, "unrecognized node type: %d",
1251  (int) nodeTag(jtnode));
1252 }
1253 
1254 /*
1255  * preprocess_phv_expression
1256  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1257  *
1258  * If a LATERAL subquery references an output of another subquery, and that
1259  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1260  * join, then we'll push the PlaceHolderVar expression down into the subquery
1261  * and later pull it back up during find_lateral_references, which runs after
1262  * subquery_planner has preprocessed all the expressions that were in the
1263  * current query level to start with. So we need to preprocess it then.
1264  */
1265 Expr *
1267 {
1268  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1269 }
1270 
1271 /*--------------------
1272  * grouping_planner
1273  * Perform planning steps related to grouping, aggregation, etc.
1274  *
1275  * This function adds all required top-level processing to the scan/join
1276  * Path(s) produced by query_planner.
1277  *
1278  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1279  * tuple_fraction is interpreted as follows:
1280  * 0: expect all tuples to be retrieved (normal case)
1281  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1282  * from the plan to be retrieved
1283  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1284  * expected to be retrieved (ie, a LIMIT specification)
1285  *
1286  * Returns nothing; the useful output is in the Paths we attach to the
1287  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1288  * root->processed_tlist contains the final processed targetlist.
1289  *
1290  * Note that we have not done set_cheapest() on the final rel; it's convenient
1291  * to leave this to the caller.
1292  *--------------------
1293  */
1294 static void
1295 grouping_planner(PlannerInfo *root, double tuple_fraction)
1296 {
1297  Query *parse = root->parse;
1298  int64 offset_est = 0;
1299  int64 count_est = 0;
1300  double limit_tuples = -1.0;
1301  bool have_postponed_srfs = false;
1302  PathTarget *final_target;
1303  List *final_targets;
1304  List *final_targets_contain_srfs;
1305  bool final_target_parallel_safe;
1306  RelOptInfo *current_rel;
1307  RelOptInfo *final_rel;
1308  FinalPathExtraData extra;
1309  ListCell *lc;
1310 
1311  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1312  if (parse->limitCount || parse->limitOffset)
1313  {
1314  tuple_fraction = preprocess_limit(root, tuple_fraction,
1315  &offset_est, &count_est);
1316 
1317  /*
1318  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1319  * estimate the effects of using a bounded sort.
1320  */
1321  if (count_est > 0 && offset_est >= 0)
1322  limit_tuples = (double) count_est + (double) offset_est;
1323  }
1324 
1325  /* Make tuple_fraction accessible to lower-level routines */
1326  root->tuple_fraction = tuple_fraction;
1327 
1328  if (parse->setOperations)
1329  {
1330  /*
1331  * If there's a top-level ORDER BY, assume we have to fetch all the
1332  * tuples. This might be too simplistic given all the hackery below
1333  * to possibly avoid the sort; but the odds of accurate estimates here
1334  * are pretty low anyway. XXX try to get rid of this in favor of
1335  * letting plan_set_operations generate both fast-start and
1336  * cheapest-total paths.
1337  */
1338  if (parse->sortClause)
1339  root->tuple_fraction = 0.0;
1340 
1341  /*
1342  * Construct Paths for set operations. The results will not need any
1343  * work except perhaps a top-level sort and/or LIMIT. Note that any
1344  * special work for recursive unions is the responsibility of
1345  * plan_set_operations.
1346  */
1347  current_rel = plan_set_operations(root);
1348 
1349  /*
1350  * We should not need to call preprocess_targetlist, since we must be
1351  * in a SELECT query node. Instead, use the processed_tlist returned
1352  * by plan_set_operations (since this tells whether it returned any
1353  * resjunk columns!), and transfer any sort key information from the
1354  * original tlist.
1355  */
1356  Assert(parse->commandType == CMD_SELECT);
1357 
1358  /* for safety, copy processed_tlist instead of modifying in-place */
1359  root->processed_tlist =
1361  parse->targetList);
1362 
1363  /* Also extract the PathTarget form of the setop result tlist */
1364  final_target = current_rel->cheapest_total_path->pathtarget;
1365 
1366  /* And check whether it's parallel safe */
1367  final_target_parallel_safe =
1368  is_parallel_safe(root, (Node *) final_target->exprs);
1369 
1370  /* The setop result tlist couldn't contain any SRFs */
1371  Assert(!parse->hasTargetSRFs);
1372  final_targets = final_targets_contain_srfs = NIL;
1373 
1374  /*
1375  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1376  * checked already, but let's make sure).
1377  */
1378  if (parse->rowMarks)
1379  ereport(ERROR,
1380  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1381  /*------
1382  translator: %s is a SQL row locking clause such as FOR UPDATE */
1383  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1385  parse->rowMarks)->strength))));
1386 
1387  /*
1388  * Calculate pathkeys that represent result ordering requirements
1389  */
1390  Assert(parse->distinctClause == NIL);
1392  parse->sortClause,
1393  root->processed_tlist);
1394  }
1395  else
1396  {
1397  /* No set operations, do regular planning */
1398  PathTarget *sort_input_target;
1399  List *sort_input_targets;
1400  List *sort_input_targets_contain_srfs;
1401  bool sort_input_target_parallel_safe;
1402  PathTarget *grouping_target;
1403  List *grouping_targets;
1404  List *grouping_targets_contain_srfs;
1405  bool grouping_target_parallel_safe;
1406  PathTarget *scanjoin_target;
1407  List *scanjoin_targets;
1408  List *scanjoin_targets_contain_srfs;
1409  bool scanjoin_target_parallel_safe;
1410  bool scanjoin_target_same_exprs;
1411  bool have_grouping;
1412  WindowFuncLists *wflists = NULL;
1413  List *activeWindows = NIL;
1414  grouping_sets_data *gset_data = NULL;
1415  standard_qp_extra qp_extra;
1416 
1417  /* A recursive query should always have setOperations */
1418  Assert(!root->hasRecursion);
1419 
1420  /* Preprocess grouping sets and GROUP BY clause, if any */
1421  if (parse->groupingSets)
1422  {
1423  gset_data = preprocess_grouping_sets(root);
1424  }
1425  else if (parse->groupClause)
1426  {
1427  /* Preprocess regular GROUP BY clause, if any */
1428  root->processed_groupClause = list_copy(parse->groupClause);;
1429  /* Remove any redundant GROUP BY columns */
1431  }
1432 
1433  /*
1434  * Preprocess targetlist. Note that much of the remaining planning
1435  * work will be done with the PathTarget representation of tlists, but
1436  * we must also maintain the full representation of the final tlist so
1437  * that we can transfer its decoration (resnames etc) to the topmost
1438  * tlist of the finished Plan. This is kept in processed_tlist.
1439  */
1440  preprocess_targetlist(root);
1441 
1442  /*
1443  * Mark all the aggregates with resolved aggtranstypes, and detect
1444  * aggregates that are duplicates or can share transition state. We
1445  * must do this before slicing and dicing the tlist into various
1446  * pathtargets, else some copies of the Aggref nodes might escape
1447  * being marked.
1448  */
1449  if (parse->hasAggs)
1450  {
1451  preprocess_aggrefs(root, (Node *) root->processed_tlist);
1452  preprocess_aggrefs(root, (Node *) parse->havingQual);
1453  }
1454 
1455  /*
1456  * Locate any window functions in the tlist. (We don't need to look
1457  * anywhere else, since expressions used in ORDER BY will be in there
1458  * too.) Note that they could all have been eliminated by constant
1459  * folding, in which case we don't need to do any more work.
1460  */
1461  if (parse->hasWindowFuncs)
1462  {
1463  wflists = find_window_functions((Node *) root->processed_tlist,
1464  list_length(parse->windowClause));
1465  if (wflists->numWindowFuncs > 0)
1466  {
1467  /*
1468  * See if any modifications can be made to each WindowClause
1469  * to allow the executor to execute the WindowFuncs more
1470  * quickly.
1471  */
1472  optimize_window_clauses(root, wflists);
1473 
1474  activeWindows = select_active_windows(root, wflists);
1475  }
1476  else
1477  parse->hasWindowFuncs = false;
1478  }
1479 
1480  /*
1481  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1482  * adding logic between here and the query_planner() call. Anything
1483  * that is needed in MIN/MAX-optimizable cases will have to be
1484  * duplicated in planagg.c.
1485  */
1486  if (parse->hasAggs)
1488 
1489  /*
1490  * Figure out whether there's a hard limit on the number of rows that
1491  * query_planner's result subplan needs to return. Even if we know a
1492  * hard limit overall, it doesn't apply if the query has any
1493  * grouping/aggregation operations, or SRFs in the tlist.
1494  */
1495  if (parse->groupClause ||
1496  parse->groupingSets ||
1497  parse->distinctClause ||
1498  parse->hasAggs ||
1499  parse->hasWindowFuncs ||
1500  parse->hasTargetSRFs ||
1501  root->hasHavingQual)
1502  root->limit_tuples = -1.0;
1503  else
1504  root->limit_tuples = limit_tuples;
1505 
1506  /* Set up data needed by standard_qp_callback */
1507  qp_extra.activeWindows = activeWindows;
1508  qp_extra.gset_data = gset_data;
1509 
1510  /*
1511  * Generate the best unsorted and presorted paths for the scan/join
1512  * portion of this Query, ie the processing represented by the
1513  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1514  * We also generate (in standard_qp_callback) pathkey representations
1515  * of the query's sort clause, distinct clause, etc.
1516  */
1517  current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1518 
1519  /*
1520  * Convert the query's result tlist into PathTarget format.
1521  *
1522  * Note: this cannot be done before query_planner() has performed
1523  * appendrel expansion, because that might add resjunk entries to
1524  * root->processed_tlist. Waiting till afterwards is also helpful
1525  * because the target width estimates can use per-Var width numbers
1526  * that were obtained within query_planner().
1527  */
1528  final_target = create_pathtarget(root, root->processed_tlist);
1529  final_target_parallel_safe =
1530  is_parallel_safe(root, (Node *) final_target->exprs);
1531 
1532  /*
1533  * If ORDER BY was given, consider whether we should use a post-sort
1534  * projection, and compute the adjusted target for preceding steps if
1535  * so.
1536  */
1537  if (parse->sortClause)
1538  {
1539  sort_input_target = make_sort_input_target(root,
1540  final_target,
1541  &have_postponed_srfs);
1542  sort_input_target_parallel_safe =
1543  is_parallel_safe(root, (Node *) sort_input_target->exprs);
1544  }
1545  else
1546  {
1547  sort_input_target = final_target;
1548  sort_input_target_parallel_safe = final_target_parallel_safe;
1549  }
1550 
1551  /*
1552  * If we have window functions to deal with, the output from any
1553  * grouping step needs to be what the window functions want;
1554  * otherwise, it should be sort_input_target.
1555  */
1556  if (activeWindows)
1557  {
1558  grouping_target = make_window_input_target(root,
1559  final_target,
1560  activeWindows);
1561  grouping_target_parallel_safe =
1562  is_parallel_safe(root, (Node *) grouping_target->exprs);
1563  }
1564  else
1565  {
1566  grouping_target = sort_input_target;
1567  grouping_target_parallel_safe = sort_input_target_parallel_safe;
1568  }
1569 
1570  /*
1571  * If we have grouping or aggregation to do, the topmost scan/join
1572  * plan node must emit what the grouping step wants; otherwise, it
1573  * should emit grouping_target.
1574  */
1575  have_grouping = (parse->groupClause || parse->groupingSets ||
1576  parse->hasAggs || root->hasHavingQual);
1577  if (have_grouping)
1578  {
1579  scanjoin_target = make_group_input_target(root, final_target);
1580  scanjoin_target_parallel_safe =
1581  is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1582  }
1583  else
1584  {
1585  scanjoin_target = grouping_target;
1586  scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1587  }
1588 
1589  /*
1590  * If there are any SRFs in the targetlist, we must separate each of
1591  * these PathTargets into SRF-computing and SRF-free targets. Replace
1592  * each of the named targets with a SRF-free version, and remember the
1593  * list of additional projection steps we need to add afterwards.
1594  */
1595  if (parse->hasTargetSRFs)
1596  {
1597  /* final_target doesn't recompute any SRFs in sort_input_target */
1598  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1599  &final_targets,
1600  &final_targets_contain_srfs);
1601  final_target = linitial_node(PathTarget, final_targets);
1602  Assert(!linitial_int(final_targets_contain_srfs));
1603  /* likewise for sort_input_target vs. grouping_target */
1604  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1605  &sort_input_targets,
1606  &sort_input_targets_contain_srfs);
1607  sort_input_target = linitial_node(PathTarget, sort_input_targets);
1608  Assert(!linitial_int(sort_input_targets_contain_srfs));
1609  /* likewise for grouping_target vs. scanjoin_target */
1610  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1611  &grouping_targets,
1612  &grouping_targets_contain_srfs);
1613  grouping_target = linitial_node(PathTarget, grouping_targets);
1614  Assert(!linitial_int(grouping_targets_contain_srfs));
1615  /* scanjoin_target will not have any SRFs precomputed for it */
1616  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1617  &scanjoin_targets,
1618  &scanjoin_targets_contain_srfs);
1619  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1620  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1621  }
1622  else
1623  {
1624  /* initialize lists; for most of these, dummy values are OK */
1625  final_targets = final_targets_contain_srfs = NIL;
1626  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1627  grouping_targets = grouping_targets_contain_srfs = NIL;
1628  scanjoin_targets = list_make1(scanjoin_target);
1629  scanjoin_targets_contain_srfs = NIL;
1630  }
1631 
1632  /* Apply scan/join target. */
1633  scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1634  && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1635  apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1636  scanjoin_targets_contain_srfs,
1637  scanjoin_target_parallel_safe,
1638  scanjoin_target_same_exprs);
1639 
1640  /*
1641  * Save the various upper-rel PathTargets we just computed into
1642  * root->upper_targets[]. The core code doesn't use this, but it
1643  * provides a convenient place for extensions to get at the info. For
1644  * consistency, we save all the intermediate targets, even though some
1645  * of the corresponding upperrels might not be needed for this query.
1646  */
1647  root->upper_targets[UPPERREL_FINAL] = final_target;
1648  root->upper_targets[UPPERREL_ORDERED] = final_target;
1649  root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1650  root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1651  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1652  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1653 
1654  /*
1655  * If we have grouping and/or aggregation, consider ways to implement
1656  * that. We build a new upperrel representing the output of this
1657  * phase.
1658  */
1659  if (have_grouping)
1660  {
1661  current_rel = create_grouping_paths(root,
1662  current_rel,
1663  grouping_target,
1664  grouping_target_parallel_safe,
1665  gset_data);
1666  /* Fix things up if grouping_target contains SRFs */
1667  if (parse->hasTargetSRFs)
1668  adjust_paths_for_srfs(root, current_rel,
1669  grouping_targets,
1670  grouping_targets_contain_srfs);
1671  }
1672 
1673  /*
1674  * If we have window functions, consider ways to implement those. We
1675  * build a new upperrel representing the output of this phase.
1676  */
1677  if (activeWindows)
1678  {
1679  current_rel = create_window_paths(root,
1680  current_rel,
1681  grouping_target,
1682  sort_input_target,
1683  sort_input_target_parallel_safe,
1684  wflists,
1685  activeWindows);
1686  /* Fix things up if sort_input_target contains SRFs */
1687  if (parse->hasTargetSRFs)
1688  adjust_paths_for_srfs(root, current_rel,
1689  sort_input_targets,
1690  sort_input_targets_contain_srfs);
1691  }
1692 
1693  /*
1694  * If there is a DISTINCT clause, consider ways to implement that. We
1695  * build a new upperrel representing the output of this phase.
1696  */
1697  if (parse->distinctClause)
1698  {
1699  current_rel = create_distinct_paths(root,
1700  current_rel,
1701  sort_input_target);
1702  }
1703  } /* end of if (setOperations) */
1704 
1705  /*
1706  * If ORDER BY was given, consider ways to implement that, and generate a
1707  * new upperrel containing only paths that emit the correct ordering and
1708  * project the correct final_target. We can apply the original
1709  * limit_tuples limit in sort costing here, but only if there are no
1710  * postponed SRFs.
1711  */
1712  if (parse->sortClause)
1713  {
1714  current_rel = create_ordered_paths(root,
1715  current_rel,
1716  final_target,
1717  final_target_parallel_safe,
1718  have_postponed_srfs ? -1.0 :
1719  limit_tuples);
1720  /* Fix things up if final_target contains SRFs */
1721  if (parse->hasTargetSRFs)
1722  adjust_paths_for_srfs(root, current_rel,
1723  final_targets,
1724  final_targets_contain_srfs);
1725  }
1726 
1727  /*
1728  * Now we are prepared to build the final-output upperrel.
1729  */
1730  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1731 
1732  /*
1733  * If the input rel is marked consider_parallel and there's nothing that's
1734  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1735  * consider_parallel as well. Note that if the query has rowMarks or is
1736  * not a SELECT, consider_parallel will be false for every relation in the
1737  * query.
1738  */
1739  if (current_rel->consider_parallel &&
1740  is_parallel_safe(root, parse->limitOffset) &&
1741  is_parallel_safe(root, parse->limitCount))
1742  final_rel->consider_parallel = true;
1743 
1744  /*
1745  * If the current_rel belongs to a single FDW, so does the final_rel.
1746  */
1747  final_rel->serverid = current_rel->serverid;
1748  final_rel->userid = current_rel->userid;
1749  final_rel->useridiscurrent = current_rel->useridiscurrent;
1750  final_rel->fdwroutine = current_rel->fdwroutine;
1751 
1752  /*
1753  * Generate paths for the final_rel. Insert all surviving paths, with
1754  * LockRows, Limit, and/or ModifyTable steps added if needed.
1755  */
1756  foreach(lc, current_rel->pathlist)
1757  {
1758  Path *path = (Path *) lfirst(lc);
1759 
1760  /*
1761  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1762  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1763  * here. If there are only non-locking rowmarks, they should be
1764  * handled by the ModifyTable node instead. However, root->rowMarks
1765  * is what goes into the LockRows node.)
1766  */
1767  if (parse->rowMarks)
1768  {
1769  path = (Path *) create_lockrows_path(root, final_rel, path,
1770  root->rowMarks,
1772  }
1773 
1774  /*
1775  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1776  */
1777  if (limit_needed(parse))
1778  {
1779  path = (Path *) create_limit_path(root, final_rel, path,
1780  parse->limitOffset,
1781  parse->limitCount,
1782  parse->limitOption,
1783  offset_est, count_est);
1784  }
1785 
1786  /*
1787  * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1788  */
1789  if (parse->commandType != CMD_SELECT)
1790  {
1791  Index rootRelation;
1792  List *resultRelations = NIL;
1793  List *updateColnosLists = NIL;
1794  List *withCheckOptionLists = NIL;
1795  List *returningLists = NIL;
1796  List *mergeActionLists = NIL;
1797  List *rowMarks;
1798 
1800  {
1801  /* Inherited UPDATE/DELETE/MERGE */
1802  RelOptInfo *top_result_rel = find_base_rel(root,
1803  parse->resultRelation);
1804  int resultRelation = -1;
1805 
1806  /* Pass the root result rel forward to the executor. */
1807  rootRelation = parse->resultRelation;
1808 
1809  /* Add only leaf children to ModifyTable. */
1810  while ((resultRelation = bms_next_member(root->leaf_result_relids,
1811  resultRelation)) >= 0)
1812  {
1813  RelOptInfo *this_result_rel = find_base_rel(root,
1814  resultRelation);
1815 
1816  /*
1817  * Also exclude any leaf rels that have turned dummy since
1818  * being added to the list, for example, by being excluded
1819  * by constraint exclusion.
1820  */
1821  if (IS_DUMMY_REL(this_result_rel))
1822  continue;
1823 
1824  /* Build per-target-rel lists needed by ModifyTable */
1825  resultRelations = lappend_int(resultRelations,
1826  resultRelation);
1827  if (parse->commandType == CMD_UPDATE)
1828  {
1829  List *update_colnos = root->update_colnos;
1830 
1831  if (this_result_rel != top_result_rel)
1832  update_colnos =
1834  update_colnos,
1835  this_result_rel->relid,
1836  top_result_rel->relid);
1837  updateColnosLists = lappend(updateColnosLists,
1838  update_colnos);
1839  }
1840  if (parse->withCheckOptions)
1841  {
1842  List *withCheckOptions = parse->withCheckOptions;
1843 
1844  if (this_result_rel != top_result_rel)
1845  withCheckOptions = (List *)
1847  (Node *) withCheckOptions,
1848  this_result_rel,
1849  top_result_rel);
1850  withCheckOptionLists = lappend(withCheckOptionLists,
1851  withCheckOptions);
1852  }
1853  if (parse->returningList)
1854  {
1855  List *returningList = parse->returningList;
1856 
1857  if (this_result_rel != top_result_rel)
1858  returningList = (List *)
1860  (Node *) returningList,
1861  this_result_rel,
1862  top_result_rel);
1863  returningLists = lappend(returningLists,
1864  returningList);
1865  }
1866  if (parse->mergeActionList)
1867  {
1868  ListCell *l;
1869  List *mergeActionList = NIL;
1870 
1871  /*
1872  * Copy MergeActions and translate stuff that
1873  * references attribute numbers.
1874  */
1875  foreach(l, parse->mergeActionList)
1876  {
1877  MergeAction *action = lfirst(l),
1878  *leaf_action = copyObject(action);
1879 
1880  leaf_action->qual =
1882  (Node *) action->qual,
1883  this_result_rel,
1884  top_result_rel);
1885  leaf_action->targetList = (List *)
1887  (Node *) action->targetList,
1888  this_result_rel,
1889  top_result_rel);
1890  if (leaf_action->commandType == CMD_UPDATE)
1891  leaf_action->updateColnos =
1893  action->updateColnos,
1894  this_result_rel->relid,
1895  top_result_rel->relid);
1896  mergeActionList = lappend(mergeActionList,
1897  leaf_action);
1898  }
1899 
1900  mergeActionLists = lappend(mergeActionLists,
1901  mergeActionList);
1902  }
1903  }
1904 
1905  if (resultRelations == NIL)
1906  {
1907  /*
1908  * We managed to exclude every child rel, so generate a
1909  * dummy one-relation plan using info for the top target
1910  * rel (even though that may not be a leaf target).
1911  * Although it's clear that no data will be updated or
1912  * deleted, we still need to have a ModifyTable node so
1913  * that any statement triggers will be executed. (This
1914  * could be cleaner if we fixed nodeModifyTable.c to allow
1915  * zero target relations, but that probably wouldn't be a
1916  * net win.)
1917  */
1918  resultRelations = list_make1_int(parse->resultRelation);
1919  if (parse->commandType == CMD_UPDATE)
1920  updateColnosLists = list_make1(root->update_colnos);
1921  if (parse->withCheckOptions)
1922  withCheckOptionLists = list_make1(parse->withCheckOptions);
1923  if (parse->returningList)
1924  returningLists = list_make1(parse->returningList);
1925  if (parse->mergeActionList)
1926  mergeActionLists = list_make1(parse->mergeActionList);
1927  }
1928  }
1929  else
1930  {
1931  /* Single-relation INSERT/UPDATE/DELETE/MERGE. */
1932  rootRelation = 0; /* there's no separate root rel */
1933  resultRelations = list_make1_int(parse->resultRelation);
1934  if (parse->commandType == CMD_UPDATE)
1935  updateColnosLists = list_make1(root->update_colnos);
1936  if (parse->withCheckOptions)
1937  withCheckOptionLists = list_make1(parse->withCheckOptions);
1938  if (parse->returningList)
1939  returningLists = list_make1(parse->returningList);
1940  if (parse->mergeActionList)
1941  mergeActionLists = list_make1(parse->mergeActionList);
1942  }
1943 
1944  /*
1945  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
1946  * will have dealt with fetching non-locked marked rows, else we
1947  * need to have ModifyTable do that.
1948  */
1949  if (parse->rowMarks)
1950  rowMarks = NIL;
1951  else
1952  rowMarks = root->rowMarks;
1953 
1954  path = (Path *)
1955  create_modifytable_path(root, final_rel,
1956  path,
1957  parse->commandType,
1958  parse->canSetTag,
1959  parse->resultRelation,
1960  rootRelation,
1961  root->partColsUpdated,
1962  resultRelations,
1963  updateColnosLists,
1964  withCheckOptionLists,
1965  returningLists,
1966  rowMarks,
1967  parse->onConflict,
1968  mergeActionLists,
1970  }
1971 
1972  /* And shove it into final_rel */
1973  add_path(final_rel, path);
1974  }
1975 
1976  /*
1977  * Generate partial paths for final_rel, too, if outer query levels might
1978  * be able to make use of them.
1979  */
1980  if (final_rel->consider_parallel && root->query_level > 1 &&
1981  !limit_needed(parse))
1982  {
1983  Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
1984  foreach(lc, current_rel->partial_pathlist)
1985  {
1986  Path *partial_path = (Path *) lfirst(lc);
1987 
1988  add_partial_path(final_rel, partial_path);
1989  }
1990  }
1991 
1992  extra.limit_needed = limit_needed(parse);
1993  extra.limit_tuples = limit_tuples;
1994  extra.count_est = count_est;
1995  extra.offset_est = offset_est;
1996 
1997  /*
1998  * If there is an FDW that's responsible for all baserels of the query,
1999  * let it consider adding ForeignPaths.
2000  */
2001  if (final_rel->fdwroutine &&
2002  final_rel->fdwroutine->GetForeignUpperPaths)
2003  final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
2004  current_rel, final_rel,
2005  &extra);
2006 
2007  /* Let extensions possibly add some more paths */
2009  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2010  current_rel, final_rel, &extra);
2011 
2012  /* Note: currently, we leave it to callers to do set_cheapest() */
2013 }
2014 
2015 /*
2016  * Do preprocessing for groupingSets clause and related data. This handles the
2017  * preliminary steps of expanding the grouping sets, organizing them into lists
2018  * of rollups, and preparing annotations which will later be filled in with
2019  * size estimates.
2020  */
2021 static grouping_sets_data *
2023 {
2024  Query *parse = root->parse;
2025  List *sets;
2026  int maxref = 0;
2027  ListCell *lc_set;
2029 
2030  parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
2031 
2032  gd->any_hashable = false;
2033  gd->unhashable_refs = NULL;
2034  gd->unsortable_refs = NULL;
2035  gd->unsortable_sets = NIL;
2036 
2037  /*
2038  * We don't currently make any attempt to optimize the groupClause when
2039  * there are grouping sets, so just duplicate it in processed_groupClause.
2040  */
2041  root->processed_groupClause = parse->groupClause;
2042 
2043  if (parse->groupClause)
2044  {
2045  ListCell *lc;
2046 
2047  foreach(lc, parse->groupClause)
2048  {
2050  Index ref = gc->tleSortGroupRef;
2051 
2052  if (ref > maxref)
2053  maxref = ref;
2054 
2055  if (!gc->hashable)
2057 
2058  if (!OidIsValid(gc->sortop))
2060  }
2061  }
2062 
2063  /* Allocate workspace array for remapping */
2064  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2065 
2066  /*
2067  * If we have any unsortable sets, we must extract them before trying to
2068  * prepare rollups. Unsortable sets don't go through
2069  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2070  * here.
2071  */
2072  if (!bms_is_empty(gd->unsortable_refs))
2073  {
2074  List *sortable_sets = NIL;
2075  ListCell *lc;
2076 
2077  foreach(lc, parse->groupingSets)
2078  {
2079  List *gset = (List *) lfirst(lc);
2080 
2081  if (bms_overlap_list(gd->unsortable_refs, gset))
2082  {
2084 
2085  gs->set = gset;
2086  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2087 
2088  /*
2089  * We must enforce here that an unsortable set is hashable;
2090  * later code assumes this. Parse analysis only checks that
2091  * every individual column is either hashable or sortable.
2092  *
2093  * Note that passing this test doesn't guarantee we can
2094  * generate a plan; there might be other showstoppers.
2095  */
2096  if (bms_overlap_list(gd->unhashable_refs, gset))
2097  ereport(ERROR,
2098  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2099  errmsg("could not implement GROUP BY"),
2100  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2101  }
2102  else
2103  sortable_sets = lappend(sortable_sets, gset);
2104  }
2105 
2106  if (sortable_sets)
2107  sets = extract_rollup_sets(sortable_sets);
2108  else
2109  sets = NIL;
2110  }
2111  else
2112  sets = extract_rollup_sets(parse->groupingSets);
2113 
2114  foreach(lc_set, sets)
2115  {
2116  List *current_sets = (List *) lfirst(lc_set);
2117  RollupData *rollup = makeNode(RollupData);
2118  GroupingSetData *gs;
2119 
2120  /*
2121  * Reorder the current list of grouping sets into correct prefix
2122  * order. If only one aggregation pass is needed, try to make the
2123  * list match the ORDER BY clause; if more than one pass is needed, we
2124  * don't bother with that.
2125  *
2126  * Note that this reorders the sets from smallest-member-first to
2127  * largest-member-first, and applies the GroupingSetData annotations,
2128  * though the data will be filled in later.
2129  */
2130  current_sets = reorder_grouping_sets(current_sets,
2131  (list_length(sets) == 1
2132  ? parse->sortClause
2133  : NIL));
2134 
2135  /*
2136  * Get the initial (and therefore largest) grouping set.
2137  */
2138  gs = linitial_node(GroupingSetData, current_sets);
2139 
2140  /*
2141  * Order the groupClause appropriately. If the first grouping set is
2142  * empty, then the groupClause must also be empty; otherwise we have
2143  * to force the groupClause to match that grouping set's order.
2144  *
2145  * (The first grouping set can be empty even though parse->groupClause
2146  * is not empty only if all non-empty grouping sets are unsortable.
2147  * The groupClauses for hashed grouping sets are built later on.)
2148  */
2149  if (gs->set)
2150  rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
2151  else
2152  rollup->groupClause = NIL;
2153 
2154  /*
2155  * Is it hashable? We pretend empty sets are hashable even though we
2156  * actually force them not to be hashed later. But don't bother if
2157  * there's nothing but empty sets (since in that case we can't hash
2158  * anything).
2159  */
2160  if (gs->set &&
2162  {
2163  rollup->hashable = true;
2164  gd->any_hashable = true;
2165  }
2166 
2167  /*
2168  * Now that we've pinned down an order for the groupClause for this
2169  * list of grouping sets, we need to remap the entries in the grouping
2170  * sets from sortgrouprefs to plain indices (0-based) into the
2171  * groupClause for this collection of grouping sets. We keep the
2172  * original form for later use, though.
2173  */
2174  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2175  current_sets,
2176  gd->tleref_to_colnum_map);
2177  rollup->gsets_data = current_sets;
2178 
2179  gd->rollups = lappend(gd->rollups, rollup);
2180  }
2181 
2182  if (gd->unsortable_sets)
2183  {
2184  /*
2185  * We have not yet pinned down a groupclause for this, but we will
2186  * need index-based lists for estimation purposes. Construct
2187  * hash_sets_idx based on the entire original groupclause for now.
2188  */
2189  gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2190  gd->unsortable_sets,
2191  gd->tleref_to_colnum_map);
2192  gd->any_hashable = true;
2193  }
2194 
2195  return gd;
2196 }
2197 
2198 /*
2199  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2200  * (without annotation) mapped to indexes into the given groupclause.
2201  */
2202 static List *
2204  List *gsets,
2205  int *tleref_to_colnum_map)
2206 {
2207  int ref = 0;
2208  List *result = NIL;
2209  ListCell *lc;
2210 
2211  foreach(lc, groupClause)
2212  {
2214 
2215  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2216  }
2217 
2218  foreach(lc, gsets)
2219  {
2220  List *set = NIL;
2221  ListCell *lc2;
2223 
2224  foreach(lc2, gs->set)
2225  {
2226  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2227  }
2228 
2229  result = lappend(result, set);
2230  }
2231 
2232  return result;
2233 }
2234 
2235 
2236 /*
2237  * preprocess_rowmarks - set up PlanRowMarks if needed
2238  */
2239 static void
2241 {
2242  Query *parse = root->parse;
2243  Bitmapset *rels;
2244  List *prowmarks;
2245  ListCell *l;
2246  int i;
2247 
2248  if (parse->rowMarks)
2249  {
2250  /*
2251  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2252  * grouping, since grouping renders a reference to individual tuple
2253  * CTIDs invalid. This is also checked at parse time, but that's
2254  * insufficient because of rule substitution, query pullup, etc.
2255  */
2257  parse->rowMarks)->strength);
2258  }
2259  else
2260  {
2261  /*
2262  * We only need rowmarks for UPDATE, DELETE, MERGE, or FOR [KEY]
2263  * UPDATE/SHARE.
2264  */
2265  if (parse->commandType != CMD_UPDATE &&
2266  parse->commandType != CMD_DELETE &&
2267  parse->commandType != CMD_MERGE)
2268  return;
2269  }
2270 
2271  /*
2272  * We need to have rowmarks for all base relations except the target. We
2273  * make a bitmapset of all base rels and then remove the items we don't
2274  * need or have FOR [KEY] UPDATE/SHARE marks for.
2275  */
2276  rels = get_relids_in_jointree((Node *) parse->jointree, false, false);
2277  if (parse->resultRelation)
2278  rels = bms_del_member(rels, parse->resultRelation);
2279 
2280  /*
2281  * Convert RowMarkClauses to PlanRowMark representation.
2282  */
2283  prowmarks = NIL;
2284  foreach(l, parse->rowMarks)
2285  {
2287  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2288  PlanRowMark *newrc;
2289 
2290  /*
2291  * Currently, it is syntactically impossible to have FOR UPDATE et al
2292  * applied to an update/delete target rel. If that ever becomes
2293  * possible, we should drop the target from the PlanRowMark list.
2294  */
2295  Assert(rc->rti != parse->resultRelation);
2296 
2297  /*
2298  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2299  * can't support true locking. Subqueries that got flattened into the
2300  * main query should be ignored completely. Any that didn't will get
2301  * ROW_MARK_COPY items in the next loop.
2302  */
2303  if (rte->rtekind != RTE_RELATION)
2304  continue;
2305 
2306  rels = bms_del_member(rels, rc->rti);
2307 
2308  newrc = makeNode(PlanRowMark);
2309  newrc->rti = newrc->prti = rc->rti;
2310  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2311  newrc->markType = select_rowmark_type(rte, rc->strength);
2312  newrc->allMarkTypes = (1 << newrc->markType);
2313  newrc->strength = rc->strength;
2314  newrc->waitPolicy = rc->waitPolicy;
2315  newrc->isParent = false;
2316 
2317  prowmarks = lappend(prowmarks, newrc);
2318  }
2319 
2320  /*
2321  * Now, add rowmarks for any non-target, non-locked base relations.
2322  */
2323  i = 0;
2324  foreach(l, parse->rtable)
2325  {
2327  PlanRowMark *newrc;
2328 
2329  i++;
2330  if (!bms_is_member(i, rels))
2331  continue;
2332 
2333  newrc = makeNode(PlanRowMark);
2334  newrc->rti = newrc->prti = i;
2335  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2336  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2337  newrc->allMarkTypes = (1 << newrc->markType);
2338  newrc->strength = LCS_NONE;
2339  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2340  newrc->isParent = false;
2341 
2342  prowmarks = lappend(prowmarks, newrc);
2343  }
2344 
2345  root->rowMarks = prowmarks;
2346 }
2347 
2348 /*
2349  * Select RowMarkType to use for a given table
2350  */
2353 {
2354  if (rte->rtekind != RTE_RELATION)
2355  {
2356  /* If it's not a table at all, use ROW_MARK_COPY */
2357  return ROW_MARK_COPY;
2358  }
2359  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2360  {
2361  /* Let the FDW select the rowmark type, if it wants to */
2362  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2363 
2364  if (fdwroutine->GetForeignRowMarkType != NULL)
2365  return fdwroutine->GetForeignRowMarkType(rte, strength);
2366  /* Otherwise, use ROW_MARK_COPY by default */
2367  return ROW_MARK_COPY;
2368  }
2369  else
2370  {
2371  /* Regular table, apply the appropriate lock type */
2372  switch (strength)
2373  {
2374  case LCS_NONE:
2375 
2376  /*
2377  * We don't need a tuple lock, only the ability to re-fetch
2378  * the row.
2379  */
2380  return ROW_MARK_REFERENCE;
2381  break;
2382  case LCS_FORKEYSHARE:
2383  return ROW_MARK_KEYSHARE;
2384  break;
2385  case LCS_FORSHARE:
2386  return ROW_MARK_SHARE;
2387  break;
2388  case LCS_FORNOKEYUPDATE:
2389  return ROW_MARK_NOKEYEXCLUSIVE;
2390  break;
2391  case LCS_FORUPDATE:
2392  return ROW_MARK_EXCLUSIVE;
2393  break;
2394  }
2395  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2396  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2397  }
2398 }
2399 
2400 /*
2401  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2402  *
2403  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2404  * results back in *count_est and *offset_est. These variables are set to
2405  * 0 if the corresponding clause is not present, and -1 if it's present
2406  * but we couldn't estimate the value for it. (The "0" convention is OK
2407  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2408  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2409  * usual practice of never estimating less than one row.) These values will
2410  * be passed to create_limit_path, which see if you change this code.
2411  *
2412  * The return value is the suitably adjusted tuple_fraction to use for
2413  * planning the query. This adjustment is not overridable, since it reflects
2414  * plan actions that grouping_planner() will certainly take, not assumptions
2415  * about context.
2416  */
2417 static double
2418 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2419  int64 *offset_est, int64 *count_est)
2420 {
2421  Query *parse = root->parse;
2422  Node *est;
2423  double limit_fraction;
2424 
2425  /* Should not be called unless LIMIT or OFFSET */
2426  Assert(parse->limitCount || parse->limitOffset);
2427 
2428  /*
2429  * Try to obtain the clause values. We use estimate_expression_value
2430  * primarily because it can sometimes do something useful with Params.
2431  */
2432  if (parse->limitCount)
2433  {
2434  est = estimate_expression_value(root, parse->limitCount);
2435  if (est && IsA(est, Const))
2436  {
2437  if (((Const *) est)->constisnull)
2438  {
2439  /* NULL indicates LIMIT ALL, ie, no limit */
2440  *count_est = 0; /* treat as not present */
2441  }
2442  else
2443  {
2444  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2445  if (*count_est <= 0)
2446  *count_est = 1; /* force to at least 1 */
2447  }
2448  }
2449  else
2450  *count_est = -1; /* can't estimate */
2451  }
2452  else
2453  *count_est = 0; /* not present */
2454 
2455  if (parse->limitOffset)
2456  {
2457  est = estimate_expression_value(root, parse->limitOffset);
2458  if (est && IsA(est, Const))
2459  {
2460  if (((Const *) est)->constisnull)
2461  {
2462  /* Treat NULL as no offset; the executor will too */
2463  *offset_est = 0; /* treat as not present */
2464  }
2465  else
2466  {
2467  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2468  if (*offset_est < 0)
2469  *offset_est = 0; /* treat as not present */
2470  }
2471  }
2472  else
2473  *offset_est = -1; /* can't estimate */
2474  }
2475  else
2476  *offset_est = 0; /* not present */
2477 
2478  if (*count_est != 0)
2479  {
2480  /*
2481  * A LIMIT clause limits the absolute number of tuples returned.
2482  * However, if it's not a constant LIMIT then we have to guess; for
2483  * lack of a better idea, assume 10% of the plan's result is wanted.
2484  */
2485  if (*count_est < 0 || *offset_est < 0)
2486  {
2487  /* LIMIT or OFFSET is an expression ... punt ... */
2488  limit_fraction = 0.10;
2489  }
2490  else
2491  {
2492  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2493  limit_fraction = (double) *count_est + (double) *offset_est;
2494  }
2495 
2496  /*
2497  * If we have absolute limits from both caller and LIMIT, use the
2498  * smaller value; likewise if they are both fractional. If one is
2499  * fractional and the other absolute, we can't easily determine which
2500  * is smaller, but we use the heuristic that the absolute will usually
2501  * be smaller.
2502  */
2503  if (tuple_fraction >= 1.0)
2504  {
2505  if (limit_fraction >= 1.0)
2506  {
2507  /* both absolute */
2508  tuple_fraction = Min(tuple_fraction, limit_fraction);
2509  }
2510  else
2511  {
2512  /* caller absolute, limit fractional; use caller's value */
2513  }
2514  }
2515  else if (tuple_fraction > 0.0)
2516  {
2517  if (limit_fraction >= 1.0)
2518  {
2519  /* caller fractional, limit absolute; use limit */
2520  tuple_fraction = limit_fraction;
2521  }
2522  else
2523  {
2524  /* both fractional */
2525  tuple_fraction = Min(tuple_fraction, limit_fraction);
2526  }
2527  }
2528  else
2529  {
2530  /* no info from caller, just use limit */
2531  tuple_fraction = limit_fraction;
2532  }
2533  }
2534  else if (*offset_est != 0 && tuple_fraction > 0.0)
2535  {
2536  /*
2537  * We have an OFFSET but no LIMIT. This acts entirely differently
2538  * from the LIMIT case: here, we need to increase rather than decrease
2539  * the caller's tuple_fraction, because the OFFSET acts to cause more
2540  * tuples to be fetched instead of fewer. This only matters if we got
2541  * a tuple_fraction > 0, however.
2542  *
2543  * As above, use 10% if OFFSET is present but unestimatable.
2544  */
2545  if (*offset_est < 0)
2546  limit_fraction = 0.10;
2547  else
2548  limit_fraction = (double) *offset_est;
2549 
2550  /*
2551  * If we have absolute counts from both caller and OFFSET, add them
2552  * together; likewise if they are both fractional. If one is
2553  * fractional and the other absolute, we want to take the larger, and
2554  * we heuristically assume that's the fractional one.
2555  */
2556  if (tuple_fraction >= 1.0)
2557  {
2558  if (limit_fraction >= 1.0)
2559  {
2560  /* both absolute, so add them together */
2561  tuple_fraction += limit_fraction;
2562  }
2563  else
2564  {
2565  /* caller absolute, limit fractional; use limit */
2566  tuple_fraction = limit_fraction;
2567  }
2568  }
2569  else
2570  {
2571  if (limit_fraction >= 1.0)
2572  {
2573  /* caller fractional, limit absolute; use caller's value */
2574  }
2575  else
2576  {
2577  /* both fractional, so add them together */
2578  tuple_fraction += limit_fraction;
2579  if (tuple_fraction >= 1.0)
2580  tuple_fraction = 0.0; /* assume fetch all */
2581  }
2582  }
2583  }
2584 
2585  return tuple_fraction;
2586 }
2587 
2588 /*
2589  * limit_needed - do we actually need a Limit plan node?
2590  *
2591  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2592  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2593  * locution for an optimization fence. (Because other places in the planner
2594  * merely check whether parse->limitOffset isn't NULL, it will still work as
2595  * an optimization fence --- we're just suppressing unnecessary run-time
2596  * overhead.)
2597  *
2598  * This might look like it could be merged into preprocess_limit, but there's
2599  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2600  * in preprocess_limit it's good enough to consider estimated values.
2601  */
2602 bool
2604 {
2605  Node *node;
2606 
2607  node = parse->limitCount;
2608  if (node)
2609  {
2610  if (IsA(node, Const))
2611  {
2612  /* NULL indicates LIMIT ALL, ie, no limit */
2613  if (!((Const *) node)->constisnull)
2614  return true; /* LIMIT with a constant value */
2615  }
2616  else
2617  return true; /* non-constant LIMIT */
2618  }
2619 
2620  node = parse->limitOffset;
2621  if (node)
2622  {
2623  if (IsA(node, Const))
2624  {
2625  /* Treat NULL as no offset; the executor would too */
2626  if (!((Const *) node)->constisnull)
2627  {
2628  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2629 
2630  if (offset != 0)
2631  return true; /* OFFSET with a nonzero value */
2632  }
2633  }
2634  else
2635  return true; /* non-constant OFFSET */
2636  }
2637 
2638  return false; /* don't need a Limit plan node */
2639 }
2640 
2641 
2642 /*
2643  * remove_useless_groupby_columns
2644  * Remove any columns in the GROUP BY clause that are redundant due to
2645  * being functionally dependent on other GROUP BY columns.
2646  *
2647  * Since some other DBMSes do not allow references to ungrouped columns, it's
2648  * not unusual to find all columns listed in GROUP BY even though listing the
2649  * primary-key columns would be sufficient. Deleting such excess columns
2650  * avoids redundant sorting work, so it's worth doing.
2651  *
2652  * Relcache invalidations will ensure that cached plans become invalidated
2653  * when the underlying index of the pkey constraint is dropped.
2654  *
2655  * Currently, we only make use of pkey constraints for this, however, we may
2656  * wish to take this further in the future and also use unique constraints
2657  * which have NOT NULL columns. In that case, plan invalidation will still
2658  * work since relations will receive a relcache invalidation when a NOT NULL
2659  * constraint is dropped.
2660  */
2661 static void
2663 {
2664  Query *parse = root->parse;
2665  Bitmapset **groupbyattnos;
2666  Bitmapset **surplusvars;
2667  ListCell *lc;
2668  int relid;
2669 
2670  /* No chance to do anything if there are less than two GROUP BY items */
2671  if (list_length(root->processed_groupClause) < 2)
2672  return;
2673 
2674  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2675  if (parse->groupingSets)
2676  return;
2677 
2678  /*
2679  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2680  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2681  * that are GROUP BY items.
2682  */
2683  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2684  (list_length(parse->rtable) + 1));
2685  foreach(lc, root->processed_groupClause)
2686  {
2688  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2689  Var *var = (Var *) tle->expr;
2690 
2691  /*
2692  * Ignore non-Vars and Vars from other query levels.
2693  *
2694  * XXX in principle, stable expressions containing Vars could also be
2695  * removed, if all the Vars are functionally dependent on other GROUP
2696  * BY items. But it's not clear that such cases occur often enough to
2697  * be worth troubling over.
2698  */
2699  if (!IsA(var, Var) ||
2700  var->varlevelsup > 0)
2701  continue;
2702 
2703  /* OK, remember we have this Var */
2704  relid = var->varno;
2705  Assert(relid <= list_length(parse->rtable));
2706  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2708  }
2709 
2710  /*
2711  * Consider each relation and see if it is possible to remove some of its
2712  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2713  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2714  * of the column attnos of RTE k that are removable GROUP BY items.
2715  */
2716  surplusvars = NULL; /* don't allocate array unless required */
2717  relid = 0;
2718  foreach(lc, parse->rtable)
2719  {
2721  Bitmapset *relattnos;
2722  Bitmapset *pkattnos;
2723  Oid constraintOid;
2724 
2725  relid++;
2726 
2727  /* Only plain relations could have primary-key constraints */
2728  if (rte->rtekind != RTE_RELATION)
2729  continue;
2730 
2731  /*
2732  * We must skip inheritance parent tables as some of the child rels
2733  * may cause duplicate rows. This cannot happen with partitioned
2734  * tables, however.
2735  */
2736  if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
2737  continue;
2738 
2739  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2740  relattnos = groupbyattnos[relid];
2741  if (bms_membership(relattnos) != BMS_MULTIPLE)
2742  continue;
2743 
2744  /*
2745  * Can't remove any columns for this rel if there is no suitable
2746  * (i.e., nondeferrable) primary key constraint.
2747  */
2748  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2749  if (pkattnos == NULL)
2750  continue;
2751 
2752  /*
2753  * If the primary key is a proper subset of relattnos then we have
2754  * some items in the GROUP BY that can be removed.
2755  */
2756  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2757  {
2758  /*
2759  * To easily remember whether we've found anything to do, we don't
2760  * allocate the surplusvars[] array until we find something.
2761  */
2762  if (surplusvars == NULL)
2763  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2764  (list_length(parse->rtable) + 1));
2765 
2766  /* Remember the attnos of the removable columns */
2767  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2768  }
2769  }
2770 
2771  /*
2772  * If we found any surplus Vars, build a new GROUP BY clause without them.
2773  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2774  * markings, but that's harmless.)
2775  */
2776  if (surplusvars != NULL)
2777  {
2778  List *new_groupby = NIL;
2779 
2780  foreach(lc, root->processed_groupClause)
2781  {
2783  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2784  Var *var = (Var *) tle->expr;
2785 
2786  /*
2787  * New list must include non-Vars, outer Vars, and anything not
2788  * marked as surplus.
2789  */
2790  if (!IsA(var, Var) ||
2791  var->varlevelsup > 0 ||
2793  surplusvars[var->varno]))
2794  new_groupby = lappend(new_groupby, sgc);
2795  }
2796 
2797  root->processed_groupClause = new_groupby;
2798  }
2799 }
2800 
2801 /*
2802  * groupclause_apply_groupingset
2803  * Apply the order of GROUP BY clauses defined by grouping sets. Items
2804  * not in the grouping set are skipped.
2805  */
2806 static List *
2808 {
2809  Query *parse = root->parse;
2810  List *new_groupclause = NIL;
2811  ListCell *sl;
2812 
2813  foreach(sl, gset)
2814  {
2815  Index ref = lfirst_int(sl);
2816  SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2817 
2818  new_groupclause = lappend(new_groupclause, cl);
2819  }
2820  return new_groupclause;
2821 }
2822 
2823 /*
2824  * Extract lists of grouping sets that can be implemented using a single
2825  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2826  *
2827  * Input must be sorted with smallest sets first. Result has each sublist
2828  * sorted with smallest sets first.
2829  *
2830  * We want to produce the absolute minimum possible number of lists here to
2831  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2832  * of finding the minimal partition of a partially-ordered set into chains
2833  * (which is what we need, taking the list of grouping sets as a poset ordered
2834  * by set inclusion) can be mapped to the problem of finding the maximum
2835  * cardinality matching on a bipartite graph, which is solvable in polynomial
2836  * time with a worst case of no worse than O(n^2.5) and usually much
2837  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2838  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2839  * half a second on my modest system even with optimization off and assertions
2840  * on.)
2841  */
2842 static List *
2844 {
2845  int num_sets_raw = list_length(groupingSets);
2846  int num_empty = 0;
2847  int num_sets = 0; /* distinct sets */
2848  int num_chains = 0;
2849  List *result = NIL;
2850  List **results;
2851  List **orig_sets;
2852  Bitmapset **set_masks;
2853  int *chains;
2854  short **adjacency;
2855  short *adjacency_buf;
2857  int i;
2858  int j;
2859  int j_size;
2860  ListCell *lc1 = list_head(groupingSets);
2861  ListCell *lc;
2862 
2863  /*
2864  * Start by stripping out empty sets. The algorithm doesn't require this,
2865  * but the planner currently needs all empty sets to be returned in the
2866  * first list, so we strip them here and add them back after.
2867  */
2868  while (lc1 && lfirst(lc1) == NIL)
2869  {
2870  ++num_empty;
2871  lc1 = lnext(groupingSets, lc1);
2872  }
2873 
2874  /* bail out now if it turns out that all we had were empty sets. */
2875  if (!lc1)
2876  return list_make1(groupingSets);
2877 
2878  /*----------
2879  * We don't strictly need to remove duplicate sets here, but if we don't,
2880  * they tend to become scattered through the result, which is a bit
2881  * confusing (and irritating if we ever decide to optimize them out).
2882  * So we remove them here and add them back after.
2883  *
2884  * For each non-duplicate set, we fill in the following:
2885  *
2886  * orig_sets[i] = list of the original set lists
2887  * set_masks[i] = bitmapset for testing inclusion
2888  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2889  *
2890  * chains[i] will be the result group this set is assigned to.
2891  *
2892  * We index all of these from 1 rather than 0 because it is convenient
2893  * to leave 0 free for the NIL node in the graph algorithm.
2894  *----------
2895  */
2896  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2897  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2898  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2899  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2900 
2901  j_size = 0;
2902  j = 0;
2903  i = 1;
2904 
2905  for_each_cell(lc, groupingSets, lc1)
2906  {
2907  List *candidate = (List *) lfirst(lc);
2908  Bitmapset *candidate_set = NULL;
2909  ListCell *lc2;
2910  int dup_of = 0;
2911 
2912  foreach(lc2, candidate)
2913  {
2914  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
2915  }
2916 
2917  /* we can only be a dup if we're the same length as a previous set */
2918  if (j_size == list_length(candidate))
2919  {
2920  int k;
2921 
2922  for (k = j; k < i; ++k)
2923  {
2924  if (bms_equal(set_masks[k], candidate_set))
2925  {
2926  dup_of = k;
2927  break;
2928  }
2929  }
2930  }
2931  else if (j_size < list_length(candidate))
2932  {
2933  j_size = list_length(candidate);
2934  j = i;
2935  }
2936 
2937  if (dup_of > 0)
2938  {
2939  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
2940  bms_free(candidate_set);
2941  }
2942  else
2943  {
2944  int k;
2945  int n_adj = 0;
2946 
2947  orig_sets[i] = list_make1(candidate);
2948  set_masks[i] = candidate_set;
2949 
2950  /* fill in adjacency list; no need to compare equal-size sets */
2951 
2952  for (k = j - 1; k > 0; --k)
2953  {
2954  if (bms_is_subset(set_masks[k], candidate_set))
2955  adjacency_buf[++n_adj] = k;
2956  }
2957 
2958  if (n_adj > 0)
2959  {
2960  adjacency_buf[0] = n_adj;
2961  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
2962  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
2963  }
2964  else
2965  adjacency[i] = NULL;
2966 
2967  ++i;
2968  }
2969  }
2970 
2971  num_sets = i - 1;
2972 
2973  /*
2974  * Apply the graph matching algorithm to do the work.
2975  */
2976  state = BipartiteMatch(num_sets, num_sets, adjacency);
2977 
2978  /*
2979  * Now, the state->pair* fields have the info we need to assign sets to
2980  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
2981  * pair_vu[v] = u (both will be true, but we check both so that we can do
2982  * it in one pass)
2983  */
2984  chains = palloc0((num_sets + 1) * sizeof(int));
2985 
2986  for (i = 1; i <= num_sets; ++i)
2987  {
2988  int u = state->pair_vu[i];
2989  int v = state->pair_uv[i];
2990 
2991  if (u > 0 && u < i)
2992  chains[i] = chains[u];
2993  else if (v > 0 && v < i)
2994  chains[i] = chains[v];
2995  else
2996  chains[i] = ++num_chains;
2997  }
2998 
2999  /* build result lists. */
3000  results = palloc0((num_chains + 1) * sizeof(List *));
3001 
3002  for (i = 1; i <= num_sets; ++i)
3003  {
3004  int c = chains[i];
3005 
3006  Assert(c > 0);
3007 
3008  results[c] = list_concat(results[c], orig_sets[i]);
3009  }
3010 
3011  /* push any empty sets back on the first list. */
3012  while (num_empty-- > 0)
3013  results[1] = lcons(NIL, results[1]);
3014 
3015  /* make result list */
3016  for (i = 1; i <= num_chains; ++i)
3017  result = lappend(result, results[i]);
3018 
3019  /*
3020  * Free all the things.
3021  *
3022  * (This is over-fussy for small sets but for large sets we could have
3023  * tied up a nontrivial amount of memory.)
3024  */
3026  pfree(results);
3027  pfree(chains);
3028  for (i = 1; i <= num_sets; ++i)
3029  if (adjacency[i])
3030  pfree(adjacency[i]);
3031  pfree(adjacency);
3032  pfree(adjacency_buf);
3033  pfree(orig_sets);
3034  for (i = 1; i <= num_sets; ++i)
3035  bms_free(set_masks[i]);
3036  pfree(set_masks);
3037 
3038  return result;
3039 }
3040 
3041 /*
3042  * Reorder the elements of a list of grouping sets such that they have correct
3043  * prefix relationships. Also inserts the GroupingSetData annotations.
3044  *
3045  * The input must be ordered with smallest sets first; the result is returned
3046  * with largest sets first. Note that the result shares no list substructure
3047  * with the input, so it's safe for the caller to modify it later.
3048  *
3049  * If we're passed in a sortclause, we follow its order of columns to the
3050  * extent possible, to minimize the chance that we add unnecessary sorts.
3051  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3052  * gets implemented in one pass.)
3053  */
3054 static List *
3055 reorder_grouping_sets(List *groupingSets, List *sortclause)
3056 {
3057  ListCell *lc;
3058  List *previous = NIL;
3059  List *result = NIL;
3060 
3061  foreach(lc, groupingSets)
3062  {
3063  List *candidate = (List *) lfirst(lc);
3064  List *new_elems = list_difference_int(candidate, previous);
3066 
3067  while (list_length(sortclause) > list_length(previous) &&
3068  new_elems != NIL)
3069  {
3070  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3071  int ref = sc->tleSortGroupRef;
3072 
3073  if (list_member_int(new_elems, ref))
3074  {
3075  previous = lappend_int(previous, ref);
3076  new_elems = list_delete_int(new_elems, ref);
3077  }
3078  else
3079  {
3080  /* diverged from the sortclause; give up on it */
3081  sortclause = NIL;
3082  break;
3083  }
3084  }
3085 
3086  previous = list_concat(previous, new_elems);
3087 
3088  gs->set = list_copy(previous);
3089  result = lcons(gs, result);
3090  }
3091 
3092  list_free(previous);
3093 
3094  return result;
3095 }
3096 
3097 /*
3098  * has_volatile_pathkey
3099  * Returns true if any PathKey in 'keys' has an EquivalenceClass
3100  * containing a volatile function. Otherwise returns false.
3101  */
3102 static bool
3104 {
3105  ListCell *lc;
3106 
3107  foreach(lc, keys)
3108  {
3109  PathKey *pathkey = lfirst_node(PathKey, lc);
3110 
3111  if (pathkey->pk_eclass->ec_has_volatile)
3112  return true;
3113  }
3114 
3115  return false;
3116 }
3117 
3118 /*
3119  * adjust_group_pathkeys_for_groupagg
3120  * Add pathkeys to root->group_pathkeys to reflect the best set of
3121  * pre-ordered input for ordered aggregates.
3122  *
3123  * We define "best" as the pathkeys that suit the largest number of
3124  * aggregate functions. We find these by looking at the first ORDER BY /
3125  * DISTINCT aggregate and take the pathkeys for that before searching for
3126  * other aggregates that require the same or a more strict variation of the
3127  * same pathkeys. We then repeat that process for any remaining aggregates
3128  * with different pathkeys and if we find another set of pathkeys that suits a
3129  * larger number of aggregates then we select those pathkeys instead.
3130  *
3131  * When the best pathkeys are found we also mark each Aggref that can use
3132  * those pathkeys as aggpresorted = true.
3133  *
3134  * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
3135  * volatile functions, we never make use of these pathkeys. We want to ensure
3136  * that sorts using volatile functions are done independently in each Aggref
3137  * rather than once at the query level. If we were to allow this then Aggrefs
3138  * with compatible sort orders would all transition their rows in the same
3139  * order if those pathkeys were deemed to be the best pathkeys to sort on.
3140  * Whereas, if some other set of Aggref's pathkeys happened to be deemed
3141  * better pathkeys to sort on, then the volatile function Aggrefs would be
3142  * left to perform their sorts individually. To avoid this inconsistent
3143  * behavior which could make Aggref results depend on what other Aggrefs the
3144  * query contains, we always force Aggrefs with volatile functions to perform
3145  * their own sorts.
3146  */
3147 static void
3149 {
3150  List *grouppathkeys = root->group_pathkeys;
3151  List *bestpathkeys;
3152  Bitmapset *bestaggs;
3153  Bitmapset *unprocessed_aggs;
3154  ListCell *lc;
3155  int i;
3156 
3157  /* Shouldn't be here if there are grouping sets */
3158  Assert(root->parse->groupingSets == NIL);
3159  /* Shouldn't be here unless there are some ordered aggregates */
3160  Assert(root->numOrderedAggs > 0);
3161 
3162  /* Do nothing if disabled */
3164  return;
3165 
3166  /*
3167  * Make a first pass over all AggInfos to collect a Bitmapset containing
3168  * the indexes of all AggInfos to be processed below.
3169  */
3170  unprocessed_aggs = NULL;
3171  foreach(lc, root->agginfos)
3172  {
3173  AggInfo *agginfo = lfirst_node(AggInfo, lc);
3174  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3175 
3176  if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
3177  continue;
3178 
3179  /* only add aggregates with a DISTINCT or ORDER BY */
3180  if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
3181  unprocessed_aggs = bms_add_member(unprocessed_aggs,
3182  foreach_current_index(lc));
3183  }
3184 
3185  /*
3186  * Now process all the unprocessed_aggs to find the best set of pathkeys
3187  * for the given set of aggregates.
3188  *
3189  * On the first outer loop here 'bestaggs' will be empty. We'll populate
3190  * this during the first loop using the pathkeys for the very first
3191  * AggInfo then taking any stronger pathkeys from any other AggInfos with
3192  * a more strict set of compatible pathkeys. Once the outer loop is
3193  * complete, we mark off all the aggregates with compatible pathkeys then
3194  * remove those from the unprocessed_aggs and repeat the process to try to
3195  * find another set of pathkeys that are suitable for a larger number of
3196  * aggregates. The outer loop will stop when there are not enough
3197  * unprocessed aggregates for it to be possible to find a set of pathkeys
3198  * to suit a larger number of aggregates.
3199  */
3200  bestpathkeys = NIL;
3201  bestaggs = NULL;
3202  while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
3203  {
3204  Bitmapset *aggindexes = NULL;
3205  List *currpathkeys = NIL;
3206 
3207  i = -1;
3208  while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
3209  {
3210  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3211  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3212  List *sortlist;
3213  List *pathkeys;
3214 
3215  if (aggref->aggdistinct != NIL)
3216  sortlist = aggref->aggdistinct;
3217  else
3218  sortlist = aggref->aggorder;
3219 
3220  pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3221  aggref->args);
3222 
3223  /*
3224  * Ignore Aggrefs which have volatile functions in their ORDER BY
3225  * or DISTINCT clause.
3226  */
3227  if (has_volatile_pathkey(pathkeys))
3228  {
3229  unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
3230  continue;
3231  }
3232 
3233  /*
3234  * When not set yet, take the pathkeys from the first unprocessed
3235  * aggregate.
3236  */
3237  if (currpathkeys == NIL)
3238  {
3239  currpathkeys = pathkeys;
3240 
3241  /* include the GROUP BY pathkeys, if they exist */
3242  if (grouppathkeys != NIL)
3243  currpathkeys = append_pathkeys(list_copy(grouppathkeys),
3244  currpathkeys);
3245 
3246  /* record that we found pathkeys for this aggregate */
3247  aggindexes = bms_add_member(aggindexes, i);
3248  }
3249  else
3250  {
3251  /* now look for a stronger set of matching pathkeys */
3252 
3253  /* include the GROUP BY pathkeys, if they exist */
3254  if (grouppathkeys != NIL)
3255  pathkeys = append_pathkeys(list_copy(grouppathkeys),
3256  pathkeys);
3257 
3258  /* are 'pathkeys' compatible or better than 'currpathkeys'? */
3259  switch (compare_pathkeys(currpathkeys, pathkeys))
3260  {
3261  case PATHKEYS_BETTER2:
3262  /* 'pathkeys' are stronger, use these ones instead */
3263  currpathkeys = pathkeys;
3264  /* FALLTHROUGH */
3265 
3266  case PATHKEYS_BETTER1:
3267  /* 'pathkeys' are less strict */
3268  /* FALLTHROUGH */
3269 
3270  case PATHKEYS_EQUAL:
3271  /* mark this aggregate as covered by 'currpathkeys' */
3272  aggindexes = bms_add_member(aggindexes, i);
3273  break;
3274 
3275  case PATHKEYS_DIFFERENT:
3276  break;
3277  }
3278  }
3279  }
3280 
3281  /* remove the aggregates that we've just processed */
3282  unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
3283 
3284  /*
3285  * If this pass included more aggregates than the previous best then
3286  * use these ones as the best set.
3287  */
3288  if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
3289  {
3290  bestaggs = aggindexes;
3291  bestpathkeys = currpathkeys;
3292  }
3293  }
3294 
3295  /*
3296  * If we found any ordered aggregates, update root->group_pathkeys to add
3297  * the best set of aggregate pathkeys. Note that bestpathkeys includes
3298  * the original GROUP BY pathkeys already.
3299  */
3300  if (bestpathkeys != NIL)
3301  root->group_pathkeys = bestpathkeys;
3302 
3303  /*
3304  * Now that we've found the best set of aggregates we can set the
3305  * presorted flag to indicate to the executor that it needn't bother
3306  * performing a sort for these Aggrefs. We're able to do this now as
3307  * there's no chance of a Hash Aggregate plan as create_grouping_paths
3308  * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
3309  * of ordered aggregates.
3310  */
3311  i = -1;
3312  while ((i = bms_next_member(bestaggs, i)) >= 0)
3313  {
3314  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3315 
3316  foreach(lc, agginfo->aggrefs)
3317  {
3318  Aggref *aggref = lfirst_node(Aggref, lc);
3319 
3320  aggref->aggpresorted = true;
3321  }
3322  }
3323 }
3324 
3325 /*
3326  * Compute query_pathkeys and other pathkeys during plan generation
3327  */
3328 static void
3330 {
3331  Query *parse = root->parse;
3332  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3333  List *tlist = root->processed_tlist;
3334  List *activeWindows = qp_extra->activeWindows;
3335 
3336  /*
3337  * Calculate pathkeys that represent grouping/ordering and/or ordered
3338  * aggregate requirements.
3339  */
3340  if (qp_extra->gset_data)
3341  {
3342  /*
3343  * With grouping sets, just use the first RollupData's groupClause. We
3344  * don't make any effort to optimize grouping clauses when there are
3345  * grouping sets, nor can we combine aggregate ordering keys with
3346  * grouping.
3347  */
3348  List *rollups = qp_extra->gset_data->rollups;
3349  List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
3350 
3351  if (grouping_is_sortable(groupClause))
3352  {
3354  groupClause,
3355  tlist);
3357  }
3358  else
3359  {
3360  root->group_pathkeys = NIL;
3361  root->num_groupby_pathkeys = 0;
3362  }
3363  }
3364  else if (parse->groupClause || root->numOrderedAggs > 0)
3365  {
3366  /*
3367  * With a plain GROUP BY list, we can remove any grouping items that
3368  * are proven redundant by EquivalenceClass processing. For example,
3369  * we can remove y given "WHERE x = y GROUP BY x, y". These aren't
3370  * especially common cases, but they're nearly free to detect. Note
3371  * that we remove redundant items from processed_groupClause but not
3372  * the original parse->groupClause.
3373  */
3374  bool sortable;
3375 
3376  root->group_pathkeys =
3378  &root->processed_groupClause,
3379  tlist,
3380  true,
3381  &sortable);
3382  if (!sortable)
3383  {
3384  /* Can't sort; no point in considering aggregate ordering either */
3385  root->group_pathkeys = NIL;
3386  root->num_groupby_pathkeys = 0;
3387  }
3388  else
3389  {
3391  /* If we have ordered aggs, consider adding onto group_pathkeys */
3392  if (root->numOrderedAggs > 0)
3394  }
3395  }
3396  else
3397  {
3398  root->group_pathkeys = NIL;
3399  root->num_groupby_pathkeys = 0;
3400  }
3401 
3402  /* We consider only the first (bottom) window in pathkeys logic */
3403  if (activeWindows != NIL)
3404  {
3405  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3406 
3408  wc,
3409  tlist);
3410  }
3411  else
3412  root->window_pathkeys = NIL;
3413 
3414  /*
3415  * As with GROUP BY, we can discard any DISTINCT items that are proven
3416  * redundant by EquivalenceClass processing. The non-redundant list is
3417  * kept in root->processed_distinctClause, leaving the original
3418  * parse->distinctClause alone.
3419  */
3420  if (parse->distinctClause)
3421  {
3422  bool sortable;
3423 
3424  /* Make a copy since pathkey processing can modify the list */
3425  root->processed_distinctClause = list_copy(parse->distinctClause);
3426  root->distinct_pathkeys =
3428  &root->processed_distinctClause,
3429  tlist,
3430  true,
3431  &sortable);
3432  if (!sortable)
3433  root->distinct_pathkeys = NIL;
3434  }
3435  else
3436  root->distinct_pathkeys = NIL;
3437 
3438  root->sort_pathkeys =
3440  parse->sortClause,
3441  tlist);
3442 
3443  /*
3444  * Figure out whether we want a sorted result from query_planner.
3445  *
3446  * If we have a sortable GROUP BY clause, then we want a result sorted
3447  * properly for grouping. Otherwise, if we have window functions to
3448  * evaluate, we try to sort for the first window. Otherwise, if there's a
3449  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3450  * we try to produce output that's sufficiently well sorted for the
3451  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3452  * by the ORDER BY clause.
3453  *
3454  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3455  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3456  * that might just leave us failing to exploit an available sort order at
3457  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3458  * much easier, since we know that the parser ensured that one is a
3459  * superset of the other.
3460  */
3461  if (root->group_pathkeys)
3462  root->query_pathkeys = root->group_pathkeys;
3463  else if (root->window_pathkeys)
3464  root->query_pathkeys = root->window_pathkeys;
3465  else if (list_length(root->distinct_pathkeys) >
3466  list_length(root->sort_pathkeys))
3467  root->query_pathkeys = root->distinct_pathkeys;
3468  else if (root->sort_pathkeys)
3469  root->query_pathkeys = root->sort_pathkeys;
3470  else
3471  root->query_pathkeys = NIL;
3472 }
3473 
3474 /*
3475  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3476  *
3477  * path_rows: number of output rows from scan/join step
3478  * gd: grouping sets data including list of grouping sets and their clauses
3479  * target_list: target list containing group clause references
3480  *
3481  * If doing grouping sets, we also annotate the gsets data with the estimates
3482  * for each set and each individual rollup list, with a view to later
3483  * determining whether some combination of them could be hashed instead.
3484  */
3485 static double
3487  double path_rows,
3488  grouping_sets_data *gd,
3489  List *target_list)
3490 {
3491  Query *parse = root->parse;
3492  double dNumGroups;
3493 
3494  if (parse->groupClause)
3495  {
3496  List *groupExprs;
3497 
3498  if (parse->groupingSets)
3499  {
3500  /* Add up the estimates for each grouping set */
3501  ListCell *lc;
3502 
3503  Assert(gd); /* keep Coverity happy */
3504 
3505  dNumGroups = 0;
3506 
3507  foreach(lc, gd->rollups)
3508  {
3509  RollupData *rollup = lfirst_node(RollupData, lc);
3510  ListCell *lc2;
3511  ListCell *lc3;
3512 
3513  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3514  target_list);
3515 
3516  rollup->numGroups = 0.0;
3517 
3518  forboth(lc2, rollup->gsets, lc3, rollup->gsets_data)
3519  {
3520  List *gset = (List *) lfirst(lc2);
3522  double numGroups = estimate_num_groups(root,
3523  groupExprs,
3524  path_rows,
3525  &gset,
3526  NULL);
3527 
3528  gs->numGroups = numGroups;
3529  rollup->numGroups += numGroups;
3530  }
3531 
3532  dNumGroups += rollup->numGroups;
3533  }
3534 
3535  if (gd->hash_sets_idx)
3536  {
3537  ListCell *lc2;
3538 
3539  gd->dNumHashGroups = 0;
3540 
3541  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3542  target_list);
3543 
3544  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3545  {
3546  List *gset = (List *) lfirst(lc);
3548  double numGroups = estimate_num_groups(root,
3549  groupExprs,
3550  path_rows,
3551  &gset,
3552  NULL);
3553 
3554  gs->numGroups = numGroups;
3555  gd->dNumHashGroups += numGroups;
3556  }
3557 
3558  dNumGroups += gd->dNumHashGroups;
3559  }
3560  }
3561  else
3562  {
3563  /* Plain GROUP BY -- estimate based on optimized groupClause */
3565  target_list);
3566 
3567  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3568  NULL, NULL);
3569  }
3570  }
3571  else if (parse->groupingSets)
3572  {
3573  /* Empty grouping sets ... one result row for each one */
3574  dNumGroups = list_length(parse->groupingSets);
3575  }
3576  else if (parse->hasAggs || root->hasHavingQual)
3577  {
3578  /* Plain aggregation, one result row */
3579  dNumGroups = 1;
3580  }
3581  else
3582  {
3583  /* Not grouping */
3584  dNumGroups = 1;
3585  }
3586 
3587  return dNumGroups;
3588 }
3589 
3590 /*
3591  * create_grouping_paths
3592  *
3593  * Build a new upperrel containing Paths for grouping and/or aggregation.
3594  * Along the way, we also build an upperrel for Paths which are partially
3595  * grouped and/or aggregated. A partially grouped and/or aggregated path
3596  * needs a FinalizeAggregate node to complete the aggregation. Currently,
3597  * the only partially grouped paths we build are also partial paths; that
3598  * is, they need a Gather and then a FinalizeAggregate.
3599  *
3600  * input_rel: contains the source-data Paths
3601  * target: the pathtarget for the result Paths to compute
3602  * gd: grouping sets data including list of grouping sets and their clauses
3603  *
3604  * Note: all Paths in input_rel are expected to return the target computed
3605  * by make_group_input_target.
3606  */
3607 static RelOptInfo *
3609  RelOptInfo *input_rel,
3610  PathTarget *target,
3611  bool target_parallel_safe,
3612  grouping_sets_data *gd)
3613 {
3614  Query *parse = root->parse;
3615  RelOptInfo *grouped_rel;
3616  RelOptInfo *partially_grouped_rel;
3617  AggClauseCosts agg_costs;
3618 
3619  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3620  get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
3621 
3622  /*
3623  * Create grouping relation to hold fully aggregated grouping and/or
3624  * aggregation paths.
3625  */
3626  grouped_rel = make_grouping_rel(root, input_rel, target,
3627  target_parallel_safe, parse->havingQual);
3628 
3629  /*
3630  * Create either paths for a degenerate grouping or paths for ordinary
3631  * grouping, as appropriate.
3632  */
3633  if (is_degenerate_grouping(root))
3634  create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3635  else
3636  {
3637  int flags = 0;
3638  GroupPathExtraData extra;
3639 
3640  /*
3641  * Determine whether it's possible to perform sort-based
3642  * implementations of grouping. (Note that if processed_groupClause
3643  * is empty, grouping_is_sortable() is trivially true, and all the
3644  * pathkeys_contained_in() tests will succeed too, so that we'll
3645  * consider every surviving input path.)
3646  *
3647  * If we have grouping sets, we might be able to sort some but not all
3648  * of them; in this case, we need can_sort to be true as long as we
3649  * must consider any sorted-input plan.
3650  */
3651  if ((gd && gd->rollups != NIL)
3653  flags |= GROUPING_CAN_USE_SORT;
3654 
3655  /*
3656  * Determine whether we should consider hash-based implementations of
3657  * grouping.
3658  *
3659  * Hashed aggregation only applies if we're grouping. If we have
3660  * grouping sets, some groups might be hashable but others not; in
3661  * this case we set can_hash true as long as there is nothing globally
3662  * preventing us from hashing (and we should therefore consider plans
3663  * with hashes).
3664  *
3665  * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3666  * BY aggregates. (Doing so would imply storing *all* the input
3667  * values in the hash table, and/or running many sorts in parallel,
3668  * either of which seems like a certain loser.) We similarly don't
3669  * support ordered-set aggregates in hashed aggregation, but that case
3670  * is also included in the numOrderedAggs count.
3671  *
3672  * Note: grouping_is_hashable() is much more expensive to check than
3673  * the other gating conditions, so we want to do it last.
3674  */
3675  if ((parse->groupClause != NIL &&
3676  root->numOrderedAggs == 0 &&
3678  flags |= GROUPING_CAN_USE_HASH;
3679 
3680  /*
3681  * Determine whether partial aggregation is possible.
3682  */
3683  if (can_partial_agg(root))
3684  flags |= GROUPING_CAN_PARTIAL_AGG;
3685 
3686  extra.flags = flags;
3687  extra.target_parallel_safe = target_parallel_safe;
3688  extra.havingQual = parse->havingQual;
3689  extra.targetList = parse->targetList;
3690  extra.partial_costs_set = false;
3691 
3692  /*
3693  * Determine whether partitionwise aggregation is in theory possible.
3694  * It can be disabled by the user, and for now, we don't try to
3695  * support grouping sets. create_ordinary_grouping_paths() will check
3696  * additional conditions, such as whether input_rel is partitioned.
3697  */
3698  if (enable_partitionwise_aggregate && !parse->groupingSets)
3700  else
3702 
3703  create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3704  &agg_costs, gd, &extra,
3705  &partially_grouped_rel);
3706  }
3707 
3708  set_cheapest(grouped_rel);
3709  return grouped_rel;
3710 }
3711 
3712 /*
3713  * make_grouping_rel
3714  *
3715  * Create a new grouping rel and set basic properties.
3716  *
3717  * input_rel represents the underlying scan/join relation.
3718  * target is the output expected from the grouping relation.
3719  */
3720 static RelOptInfo *
3722  PathTarget *target, bool target_parallel_safe,
3723  Node *havingQual)
3724 {
3725  RelOptInfo *grouped_rel;
3726 
3727  if (IS_OTHER_REL(input_rel))
3728  {
3729  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3730  input_rel->relids);
3731  grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3732  }
3733  else
3734  {
3735  /*
3736  * By tradition, the relids set for the main grouping relation is
3737  * NULL. (This could be changed, but might require adjustments
3738  * elsewhere.)
3739  */
3740  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3741  }
3742 
3743  /* Set target. */
3744  grouped_rel->reltarget = target;
3745 
3746  /*
3747  * If the input relation is not parallel-safe, then the grouped relation
3748  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3749  * target list and HAVING quals are parallel-safe.
3750  */
3751  if (input_rel->consider_parallel && target_parallel_safe &&
3752  is_parallel_safe(root, (Node *) havingQual))
3753  grouped_rel->consider_parallel = true;
3754 
3755  /*
3756  * If the input rel belongs to a single FDW, so does the grouped rel.
3757  */
3758  grouped_rel->serverid = input_rel->serverid;
3759  grouped_rel->userid = input_rel->userid;
3760  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3761  grouped_rel->fdwroutine = input_rel->fdwroutine;
3762 
3763  return grouped_rel;
3764 }
3765 
3766 /*
3767  * is_degenerate_grouping
3768  *
3769  * A degenerate grouping is one in which the query has a HAVING qual and/or
3770  * grouping sets, but no aggregates and no GROUP BY (which implies that the
3771  * grouping sets are all empty).
3772  */
3773 static bool
3775 {
3776  Query *parse = root->parse;
3777 
3778  return (root->hasHavingQual || parse->groupingSets) &&
3779  !parse->hasAggs && parse->groupClause == NIL;
3780 }
3781 
3782 /*
3783  * create_degenerate_grouping_paths
3784  *
3785  * When the grouping is degenerate (see is_degenerate_grouping), we are
3786  * supposed to emit either zero or one row for each grouping set depending on
3787  * whether HAVING succeeds. Furthermore, there cannot be any variables in
3788  * either HAVING or the targetlist, so we actually do not need the FROM table
3789  * at all! We can just throw away the plan-so-far and generate a Result node.
3790  * This is a sufficiently unusual corner case that it's not worth contorting
3791  * the structure of this module to avoid having to generate the earlier paths
3792  * in the first place.
3793  */
3794 static void
3796  RelOptInfo *grouped_rel)
3797 {
3798  Query *parse = root->parse;
3799  int nrows;
3800  Path *path;
3801 
3802  nrows = list_length(parse->groupingSets);
3803  if (nrows > 1)
3804  {
3805  /*
3806  * Doesn't seem worthwhile writing code to cons up a generate_series
3807  * or a values scan to emit multiple rows. Instead just make N clones
3808  * and append them. (With a volatile HAVING clause, this means you
3809  * might get between 0 and N output rows. Offhand I think that's
3810  * desired.)
3811  */
3812  List *paths = NIL;
3813 
3814  while (--nrows >= 0)
3815  {
3816  path = (Path *)
3817  create_group_result_path(root, grouped_rel,
3818  grouped_rel->reltarget,
3819  (List *) parse->havingQual);
3820  paths = lappend(paths, path);
3821  }
3822  path = (Path *)
3823  create_append_path(root,
3824  grouped_rel,
3825  paths,
3826  NIL,
3827  NIL,
3828  NULL,
3829  0,
3830  false,
3831  -1);
3832  }
3833  else
3834  {
3835  /* No grouping sets, or just one, so one output row */
3836  path = (Path *)
3837  create_group_result_path(root, grouped_rel,
3838  grouped_rel->reltarget,
3839  (List *) parse->havingQual);
3840  }
3841 
3842  add_path(grouped_rel, path);
3843 }
3844 
3845 /*
3846  * create_ordinary_grouping_paths
3847  *
3848  * Create grouping paths for the ordinary (that is, non-degenerate) case.
3849  *
3850  * We need to consider sorted and hashed aggregation in the same function,
3851  * because otherwise (1) it would be harder to throw an appropriate error
3852  * message if neither way works, and (2) we should not allow hashtable size
3853  * considerations to dissuade us from using hashing if sorting is not possible.
3854  *
3855  * *partially_grouped_rel_p will be set to the partially grouped rel which this
3856  * function creates, or to NULL if it doesn't create one.
3857  */
3858 static void
3860  RelOptInfo *grouped_rel,
3861  const AggClauseCosts *agg_costs,
3862  grouping_sets_data *gd,
3863  GroupPathExtraData *extra,
3864  RelOptInfo **partially_grouped_rel_p)
3865 {
3866  Path *cheapest_path = input_rel->cheapest_total_path;
3867  RelOptInfo *partially_grouped_rel = NULL;
3868  double dNumGroups;
3870 
3871  /*
3872  * If this is the topmost grouping relation or if the parent relation is
3873  * doing some form of partitionwise aggregation, then we may be able to do
3874  * it at this level also. However, if the input relation is not
3875  * partitioned, partitionwise aggregate is impossible.
3876  */
3877  if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
3878  IS_PARTITIONED_REL(input_rel))
3879  {
3880  /*
3881  * If this is the topmost relation or if the parent relation is doing
3882  * full partitionwise aggregation, then we can do full partitionwise
3883  * aggregation provided that the GROUP BY clause contains all of the
3884  * partitioning columns at this level. Otherwise, we can do at most
3885  * partial partitionwise aggregation. But if partial aggregation is
3886  * not supported in general then we can't use it for partitionwise
3887  * aggregation either.
3888  *
3889  * Check parse->groupClause not processed_groupClause, because it's
3890  * okay if some of the partitioning columns were proved redundant.
3891  */
3892  if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
3893  group_by_has_partkey(input_rel, extra->targetList,
3894  root->parse->groupClause))
3896  else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3898  else
3900  }
3901 
3902  /*
3903  * Before generating paths for grouped_rel, we first generate any possible
3904  * partially grouped paths; that way, later code can easily consider both
3905  * parallel and non-parallel approaches to grouping.
3906  */
3907  if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3908  {
3909  bool force_rel_creation;
3910 
3911  /*
3912  * If we're doing partitionwise aggregation at this level, force
3913  * creation of a partially_grouped_rel so we can add partitionwise
3914  * paths to it.
3915  */
3916  force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
3917 
3918  partially_grouped_rel =
3920  grouped_rel,
3921  input_rel,
3922  gd,
3923  extra,
3924  force_rel_creation);
3925  }
3926 
3927  /* Set out parameter. */
3928  *partially_grouped_rel_p = partially_grouped_rel;
3929 
3930  /* Apply partitionwise aggregation technique, if possible. */
3931  if (patype != PARTITIONWISE_AGGREGATE_NONE)
3932  create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
3933  partially_grouped_rel, agg_costs,
3934  gd, patype, extra);
3935 
3936  /* If we are doing partial aggregation only, return. */
3938  {
3939  Assert(partially_grouped_rel);
3940 
3941  if (partially_grouped_rel->pathlist)
3942  set_cheapest(partially_grouped_rel);
3943 
3944  return;
3945  }
3946 
3947  /* Gather any partially grouped partial paths. */
3948  if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
3949  {
3950  gather_grouping_paths(root, partially_grouped_rel);
3951  set_cheapest(partially_grouped_rel);
3952  }
3953 
3954  /*
3955  * Estimate number of groups.
3956  */
3957  dNumGroups = get_number_of_groups(root,
3958  cheapest_path->rows,
3959  gd,
3960  extra->targetList);
3961 
3962  /* Build final grouping paths */
3963  add_paths_to_grouping_rel(root, input_rel, grouped_rel,
3964  partially_grouped_rel, agg_costs, gd,
3965  dNumGroups, extra);
3966 
3967  /* Give a helpful error if we failed to find any implementation */
3968  if (grouped_rel->pathlist == NIL)
3969  ereport(ERROR,
3970  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3971  errmsg("could not implement GROUP BY"),
3972  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
3973 
3974  /*
3975  * If there is an FDW that's responsible for all baserels of the query,
3976  * let it consider adding ForeignPaths.
3977  */
3978  if (grouped_rel->fdwroutine &&
3979  grouped_rel->fdwroutine->GetForeignUpperPaths)
3980  grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
3981  input_rel, grouped_rel,
3982  extra);
3983 
3984  /* Let extensions possibly add some more paths */
3986  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
3987  input_rel, grouped_rel,
3988  extra);
3989 }
3990 
3991 /*
3992  * For a given input path, consider the possible ways of doing grouping sets on
3993  * it, by combinations of hashing and sorting. This can be called multiple
3994  * times, so it's important that it not scribble on input. No result is
3995  * returned, but any generated paths are added to grouped_rel.
3996  */
3997 static void
3999  RelOptInfo *grouped_rel,
4000  Path *path,
4001  bool is_sorted,
4002  bool can_hash,
4003  grouping_sets_data *gd,
4004  const AggClauseCosts *agg_costs,
4005  double dNumGroups)
4006 {
4007  Query *parse = root->parse;
4008  Size hash_mem_limit = get_hash_memory_limit();
4009 
4010  /*
4011  * If we're not being offered sorted input, then only consider plans that
4012  * can be done entirely by hashing.
4013  *
4014  * We can hash everything if it looks like it'll fit in hash_mem. But if
4015  * the input is actually sorted despite not being advertised as such, we
4016  * prefer to make use of that in order to use less memory.
4017  *
4018  * If none of the grouping sets are sortable, then ignore the hash_mem
4019  * limit and generate a path anyway, since otherwise we'll just fail.
4020  */
4021  if (!is_sorted)
4022  {
4023  List *new_rollups = NIL;
4024  RollupData *unhashed_rollup = NULL;
4025  List *sets_data;
4026  List *empty_sets_data = NIL;
4027  List *empty_sets = NIL;
4028  ListCell *lc;
4029  ListCell *l_start = list_head(gd->rollups);
4030  AggStrategy strat = AGG_HASHED;
4031  double hashsize;
4032  double exclude_groups = 0.0;
4033 
4034  Assert(can_hash);
4035 
4036  /*
4037  * If the input is coincidentally sorted usefully (which can happen
4038  * even if is_sorted is false, since that only means that our caller
4039  * has set up the sorting for us), then save some hashtable space by
4040  * making use of that. But we need to watch out for degenerate cases:
4041  *
4042  * 1) If there are any empty grouping sets, then group_pathkeys might
4043  * be NIL if all non-empty grouping sets are unsortable. In this case,
4044  * there will be a rollup containing only empty groups, and the
4045  * pathkeys_contained_in test is vacuously true; this is ok.
4046  *
4047  * XXX: the above relies on the fact that group_pathkeys is generated
4048  * from the first rollup. If we add the ability to consider multiple
4049  * sort orders for grouping input, this assumption might fail.
4050  *
4051  * 2) If there are no empty sets and only unsortable sets, then the
4052  * rollups list will be empty (and thus l_start == NULL), and
4053  * group_pathkeys will be NIL; we must ensure that the vacuously-true
4054  * pathkeys_contained_in test doesn't cause us to crash.
4055  */
4056  if (l_start != NULL &&
4058  {
4059  unhashed_rollup = lfirst_node(RollupData, l_start);
4060  exclude_groups = unhashed_rollup->numGroups;
4061  l_start = lnext(gd->rollups, l_start);
4062  }
4063 
4064  hashsize = estimate_hashagg_tablesize(root,
4065  path,
4066  agg_costs,
4067  dNumGroups - exclude_groups);
4068 
4069  /*
4070  * gd->rollups is empty if we have only unsortable columns to work
4071  * with. Override hash_mem in that case; otherwise, we'll rely on the
4072  * sorted-input case to generate usable mixed paths.
4073  */
4074  if (hashsize > hash_mem_limit && gd->rollups)
4075  return; /* nope, won't fit */
4076 
4077  /*
4078  * We need to burst the existing rollups list into individual grouping
4079  * sets and recompute a groupClause for each set.
4080  */
4081  sets_data = list_copy(gd->unsortable_sets);
4082 
4083  for_each_cell(lc, gd->rollups, l_start)
4084  {
4085  RollupData *rollup = lfirst_node(RollupData, lc);
4086 
4087  /*
4088  * If we find an unhashable rollup that's not been skipped by the
4089  * "actually sorted" check above, we can't cope; we'd need sorted
4090  * input (with a different sort order) but we can't get that here.
4091  * So bail out; we'll get a valid path from the is_sorted case
4092  * instead.
4093  *
4094  * The mere presence of empty grouping sets doesn't make a rollup
4095  * unhashable (see preprocess_grouping_sets), we handle those
4096  * specially below.
4097  */
4098  if (!rollup->hashable)
4099  return;
4100 
4101  sets_data = list_concat(sets_data, rollup->gsets_data);
4102  }
4103  foreach(lc, sets_data)
4104  {
4106  List *gset = gs->set;
4107  RollupData *rollup;
4108 
4109  if (gset == NIL)
4110  {
4111  /* Empty grouping sets can't be hashed. */
4112  empty_sets_data = lappend(empty_sets_data, gs);
4113  empty_sets = lappend(empty_sets, NIL);
4114  }
4115  else
4116  {
4117  rollup = makeNode(RollupData);
4118 
4119  rollup->groupClause = groupclause_apply_groupingset(root, gset);
4120  rollup->gsets_data = list_make1(gs);
4121  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4122  rollup->gsets_data,
4123  gd->tleref_to_colnum_map);
4124  rollup->numGroups = gs->numGroups;
4125  rollup->hashable = true;
4126  rollup->is_hashed = true;
4127  new_rollups = lappend(new_rollups, rollup);
4128  }
4129  }
4130 
4131  /*
4132  * If we didn't find anything nonempty to hash, then bail. We'll
4133  * generate a path from the is_sorted case.
4134  */
4135  if (new_rollups == NIL)
4136  return;
4137 
4138  /*
4139  * If there were empty grouping sets they should have been in the
4140  * first rollup.
4141  */
4142  Assert(!unhashed_rollup || !empty_sets);
4143 
4144  if (unhashed_rollup)
4145  {
4146  new_rollups = lappend(new_rollups, unhashed_rollup);
4147  strat = AGG_MIXED;
4148  }
4149  else if (empty_sets)
4150  {
4151  RollupData *rollup = makeNode(RollupData);
4152 
4153  rollup->groupClause = NIL;
4154  rollup->gsets_data = empty_sets_data;
4155  rollup->gsets = empty_sets;
4156  rollup->numGroups = list_length(empty_sets);
4157  rollup->hashable = false;
4158  rollup->is_hashed = false;
4159  new_rollups = lappend(new_rollups, rollup);
4160  strat = AGG_MIXED;
4161  }
4162 
4163  add_path(grouped_rel, (Path *)
4165  grouped_rel,
4166  path,
4167  (List *) parse->havingQual,
4168  strat,
4169  new_rollups,
4170  agg_costs));
4171  return;
4172  }
4173 
4174  /*
4175  * If we have sorted input but nothing we can do with it, bail.
4176  */
4177  if (gd->rollups == NIL)
4178  return;
4179 
4180  /*
4181  * Given sorted input, we try and make two paths: one sorted and one mixed
4182  * sort/hash. (We need to try both because hashagg might be disabled, or
4183  * some columns might not be sortable.)
4184  *
4185  * can_hash is passed in as false if some obstacle elsewhere (such as
4186  * ordered aggs) means that we shouldn't consider hashing at all.
4187  */
4188  if (can_hash && gd->any_hashable)
4189  {
4190  List *rollups = NIL;
4191  List *hash_sets = list_copy(gd->unsortable_sets);
4192  double availspace = hash_mem_limit;
4193  ListCell *lc;
4194 
4195  /*
4196  * Account first for space needed for groups we can't sort at all.
4197  */
4198  availspace -= estimate_hashagg_tablesize(root,
4199  path,
4200  agg_costs,
4201  gd->dNumHashGroups);
4202 
4203  if (availspace > 0 && list_length(gd->rollups) > 1)
4204  {
4205  double scale;
4206  int num_rollups = list_length(gd->rollups);
4207  int k_capacity;
4208  int *k_weights = palloc(num_rollups * sizeof(int));
4209  Bitmapset *hash_items = NULL;
4210  int i;
4211 
4212  /*
4213  * We treat this as a knapsack problem: the knapsack capacity
4214  * represents hash_mem, the item weights are the estimated memory
4215  * usage of the hashtables needed to implement a single rollup,
4216  * and we really ought to use the cost saving as the item value;
4217  * however, currently the costs assigned to sort nodes don't
4218  * reflect the comparison costs well, and so we treat all items as
4219  * of equal value (each rollup we hash instead saves us one sort).
4220  *
4221  * To use the discrete knapsack, we need to scale the values to a
4222  * reasonably small bounded range. We choose to allow a 5% error
4223  * margin; we have no more than 4096 rollups in the worst possible
4224  * case, which with a 5% error margin will require a bit over 42MB
4225  * of workspace. (Anyone wanting to plan queries that complex had
4226  * better have the memory for it. In more reasonable cases, with
4227  * no more than a couple of dozen rollups, the memory usage will
4228  * be negligible.)
4229  *
4230  * k_capacity is naturally bounded, but we clamp the values for
4231  * scale and weight (below) to avoid overflows or underflows (or
4232  * uselessly trying to use a scale factor less than 1 byte).
4233  */
4234  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4235  k_capacity = (int) floor(availspace / scale);
4236 
4237  /*
4238  * We leave the first rollup out of consideration since it's the
4239  * one that matches the input sort order. We assign indexes "i"
4240  * to only those entries considered for hashing; the second loop,
4241  * below, must use the same condition.
4242  */
4243  i = 0;
4244  for_each_from(lc, gd->rollups, 1)
4245  {
4246  RollupData *rollup = lfirst_node(RollupData, lc);
4247 
4248  if (rollup->hashable)
4249  {
4250  double sz = estimate_hashagg_tablesize(root,
4251  path,
4252  agg_costs,
4253  rollup->numGroups);
4254 
4255  /*
4256  * If sz is enormous, but hash_mem (and hence scale) is
4257  * small, avoid integer overflow here.
4258  */
4259  k_weights[i] = (int) Min(floor(sz / scale),
4260  k_capacity + 1.0);
4261  ++i;
4262  }
4263  }
4264 
4265  /*
4266  * Apply knapsack algorithm; compute the set of items which
4267  * maximizes the value stored (in this case the number of sorts
4268  * saved) while keeping the total size (approximately) within
4269  * capacity.
4270  */
4271  if (i > 0)
4272  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4273 
4274  if (!bms_is_empty(hash_items))
4275  {
4276  rollups = list_make1(linitial(gd->rollups));
4277 
4278  i = 0;
4279  for_each_from(lc, gd->rollups, 1)
4280  {
4281  RollupData *rollup = lfirst_node(RollupData, lc);
4282 
4283  if (rollup->hashable)
4284  {
4285  if (bms_is_member(i, hash_items))
4286  hash_sets = list_concat(hash_sets,
4287  rollup->gsets_data);
4288  else
4289  rollups = lappend(rollups, rollup);
4290  ++i;
4291  }
4292  else
4293  rollups = lappend(rollups, rollup);
4294  }
4295  }
4296  }
4297 
4298  if (!rollups && hash_sets)
4299  rollups = list_copy(gd->rollups);
4300 
4301  foreach(lc, hash_sets)
4302  {
4304  RollupData *rollup = makeNode(RollupData);
4305 
4306  Assert(gs->set != NIL);
4307 
4308  rollup->groupClause = groupclause_apply_groupingset(root, gs->set);
4309  rollup->gsets_data = list_make1(gs);
4310  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4311  rollup->gsets_data,
4312  gd->tleref_to_colnum_map);
4313  rollup->numGroups = gs->numGroups;
4314  rollup->hashable = true;
4315  rollup->is_hashed = true;
4316  rollups = lcons(rollup, rollups);
4317  }
4318 
4319  if (rollups)
4320  {
4321  add_path(grouped_rel, (Path *)
4323  grouped_rel,
4324  path,
4325  (List *) parse->havingQual,
4326  AGG_MIXED,
4327  rollups,
4328  agg_costs));
4329  }
4330  }
4331 
4332  /*
4333  * Now try the simple sorted case.
4334  */
4335  if (!gd->unsortable_sets)
4336  add_path(grouped_rel, (Path *)
4338  grouped_rel,
4339  path,
4340  (List *) parse->havingQual,
4341  AGG_SORTED,
4342  gd->rollups,
4343  agg_costs));
4344 }
4345 
4346 /*
4347  * create_window_paths
4348  *
4349  * Build a new upperrel containing Paths for window-function evaluation.
4350  *
4351  * input_rel: contains the source-data Paths
4352  * input_target: result of make_window_input_target
4353  * output_target: what the topmost WindowAggPath should return
4354  * wflists: result of find_window_functions
4355  * activeWindows: result of select_active_windows
4356  *
4357  * Note: all Paths in input_rel are expected to return input_target.
4358  */
4359 static RelOptInfo *
4361  RelOptInfo *input_rel,
4362  PathTarget *input_target,
4363  PathTarget *output_target,
4364  bool output_target_parallel_safe,
4365  WindowFuncLists *wflists,
4366  List *activeWindows)
4367 {
4368  RelOptInfo *window_rel;
4369  ListCell *lc;
4370 
4371  /* For now, do all work in the (WINDOW, NULL) upperrel */
4372  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4373 
4374  /*
4375  * If the input relation is not parallel-safe, then the window relation
4376  * can't be parallel-safe, either. Otherwise, we need to examine the
4377  * target list and active windows for non-parallel-safe constructs.
4378  */
4379  if (input_rel->consider_parallel && output_target_parallel_safe &&
4380  is_parallel_safe(root, (Node *) activeWindows))
4381  window_rel->consider_parallel = true;
4382 
4383  /*
4384  * If the input rel belongs to a single FDW, so does the window rel.
4385  */
4386  window_rel->serverid = input_rel->serverid;
4387  window_rel->userid = input_rel->userid;
4388  window_rel->useridiscurrent = input_rel->useridiscurrent;
4389  window_rel->fdwroutine = input_rel->fdwroutine;
4390 
4391  /*
4392  * Consider computing window functions starting from the existing
4393  * cheapest-total path (which will likely require a sort) as well as any
4394  * existing paths that satisfy or partially satisfy root->window_pathkeys.
4395  */
4396  foreach(lc, input_rel->pathlist)
4397  {
4398  Path *path = (Path *) lfirst(lc);
4399  int presorted_keys;
4400 
4401  if (path == input_rel->cheapest_total_path ||
4403  &presorted_keys) ||
4404  presorted_keys > 0)
4406  window_rel,
4407  path,
4408  input_target,
4409  output_target,
4410  wflists,
4411  activeWindows);
4412  }
4413 
4414  /*
4415  * If there is an FDW that's responsible for all baserels of the query,
4416  * let it consider adding ForeignPaths.
4417  */
4418  if (window_rel->fdwroutine &&
4419  window_rel->fdwroutine->GetForeignUpperPaths)
4420  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4421  input_rel, window_rel,
4422  NULL);
4423 
4424  /* Let extensions possibly add some more paths */
4426  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4427  input_rel, window_rel, NULL);
4428 
4429  /* Now choose the best path(s) */
4430  set_cheapest(window_rel);
4431 
4432  return window_rel;
4433 }
4434 
4435 /*
4436  * Stack window-function implementation steps atop the given Path, and
4437  * add the result to window_rel.
4438  *
4439  * window_rel: upperrel to contain result
4440  * path: input Path to use (must return input_target)
4441  * input_target: result of make_window_input_target
4442  * output_target: what the topmost WindowAggPath should return
4443  * wflists: result of find_window_functions
4444  * activeWindows: result of select_active_windows
4445  */
4446 static void
4448  RelOptInfo *window_rel,
4449  Path *path,
4450  PathTarget *input_target,
4451  PathTarget *output_target,
4452  WindowFuncLists *wflists,
4453  List *activeWindows)
4454 {
4455  PathTarget *window_target;
4456  ListCell *l;
4457  List *topqual = NIL;
4458 
4459  /*
4460  * Since each window clause could require a different sort order, we stack
4461  * up a WindowAgg node for each clause, with sort steps between them as
4462  * needed. (We assume that select_active_windows chose a good order for
4463  * executing the clauses in.)
4464  *
4465  * input_target should contain all Vars and Aggs needed for the result.
4466  * (In some cases we wouldn't need to propagate all of these all the way
4467  * to the top, since they might only be needed as inputs to WindowFuncs.
4468  * It's probably not worth trying to optimize that though.) It must also
4469  * contain all window partitioning and sorting expressions, to ensure
4470  * they're computed only once at the bottom of the stack (that's critical
4471  * for volatile functions). As we climb up the stack, we'll add outputs
4472  * for the WindowFuncs computed at each level.
4473  */
4474  window_target = input_target;
4475 
4476  foreach(l, activeWindows)
4477  {
4479  List *window_pathkeys;
4480  int presorted_keys;
4481  bool is_sorted;
4482  bool topwindow;
4483 
4484  window_pathkeys = make_pathkeys_for_window(root,
4485  wc,
4486  root->processed_tlist);
4487 
4488  is_sorted = pathkeys_count_contained_in(window_pathkeys,
4489  path->pathkeys,
4490  &presorted_keys);
4491 
4492  /* Sort if necessary */
4493  if (!is_sorted)
4494  {
4495  /*
4496  * No presorted keys or incremental sort disabled, just perform a
4497  * complete sort.
4498  */
4499  if (presorted_keys == 0 || !enable_incremental_sort)
4500  path = (Path *) create_sort_path(root, window_rel,
4501  path,
4502  window_pathkeys,
4503  -1.0);
4504  else
4505  {
4506  /*
4507  * Since we have presorted keys and incremental sort is
4508  * enabled, just use incremental sort.
4509  */
4510  path = (Path *) create_incremental_sort_path(root,
4511  window_rel,
4512  path,
4513  window_pathkeys,
4514  presorted_keys,
4515  -1.0);
4516  }
4517  }
4518 
4519  if (lnext(activeWindows, l))
4520  {
4521  /*
4522  * Add the current WindowFuncs to the output target for this
4523  * intermediate WindowAggPath. We must copy window_target to
4524  * avoid changing the previous path's target.
4525  *
4526  * Note: a WindowFunc adds nothing to the target's eval costs; but
4527  * we do need to account for the increase in tlist width.
4528  */
4529  int64 tuple_width = window_target->width;
4530  ListCell *lc2;
4531 
4532  window_target = copy_pathtarget(window_target);
4533  foreach(lc2, wflists->windowFuncs[wc->winref])
4534  {
4535  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4536 
4537  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4538  tuple_width += get_typavgwidth(wfunc->wintype, -1);
4539  }
4540  window_target->width = clamp_width_est(tuple_width);
4541  }
4542  else
4543  {
4544  /* Install the goal target in the topmost WindowAgg */
4545  window_target = output_target;
4546  }
4547 
4548  /* mark the final item in the list as the top-level window */
4549  topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4550 
4551  /*
4552  * Accumulate all of the runConditions from each intermediate
4553  * WindowClause. The top-level WindowAgg must pass these as a qual so
4554  * that it filters out unwanted tuples correctly.
4555  */
4556  if (!topwindow)
4557  topqual = list_concat(topqual, wc->runCondition);
4558 
4559  path = (Path *)
4560  create_windowagg_path(root, window_rel, path, window_target,
4561  wflists->windowFuncs[wc->winref],
4562  wc, topwindow ? topqual : NIL, topwindow);
4563  }
4564 
4565  add_path(window_rel, path);
4566 }
4567 
4568 /*
4569  * create_distinct_paths
4570  *
4571  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4572  *
4573  * input_rel: contains the source-data Paths
4574  * target: the pathtarget for the result Paths to compute
4575  *
4576  * Note: input paths should already compute the desired pathtarget, since
4577  * Sort/Unique won't project anything.
4578  */
4579 static RelOptInfo *
4581  PathTarget *target)
4582 {
4583  RelOptInfo *distinct_rel;
4584 
4585  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4586  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4587 
4588  /*
4589  * We don't compute anything at this level, so distinct_rel will be
4590  * parallel-safe if the input rel is parallel-safe. In particular, if
4591  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4592  * output those expressions, and will not be parallel-safe unless those
4593  * expressions are parallel-safe.
4594  */
4595  distinct_rel->consider_parallel = input_rel->consider_parallel;
4596 
4597  /*
4598  * If the input rel belongs to a single FDW, so does the distinct_rel.
4599  */
4600  distinct_rel->serverid = input_rel->serverid;
4601  distinct_rel->userid = input_rel->userid;
4602  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4603  distinct_rel->fdwroutine = input_rel->fdwroutine;
4604 
4605  /* build distinct paths based on input_rel's pathlist */
4606  create_final_distinct_paths(root, input_rel, distinct_rel);
4607 
4608  /* now build distinct paths based on input_rel's partial_pathlist */
4609  create_partial_distinct_paths(root, input_rel, distinct_rel, target);
4610 
4611  /* Give a helpful error if we failed to create any paths */
4612  if (distinct_rel->pathlist == NIL)
4613  ereport(ERROR,
4614  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4615  errmsg("could not implement DISTINCT"),
4616  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4617 
4618  /*
4619  * If there is an FDW that's responsible for all baserels of the query,
4620  * let it consider adding ForeignPaths.
4621  */
4622  if (distinct_rel->fdwroutine &&
4623  distinct_rel->fdwroutine->GetForeignUpperPaths)
4624  distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4626  input_rel,
4627  distinct_rel,
4628  NULL);
4629 
4630  /* Let extensions possibly add some more paths */
4632  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4633  distinct_rel, NULL);
4634 
4635  /* Now choose the best path(s) */
4636  set_cheapest(distinct_rel);
4637 
4638  return distinct_rel;
4639 }
4640 
4641 /*
4642  * create_partial_distinct_paths
4643  *
4644  * Process 'input_rel' partial paths and add unique/aggregate paths to the
4645  * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4646  * paths on top and add a final unique/aggregate path to remove any duplicate
4647  * produced from combining rows from parallel workers.
4648  */
4649 static void
4651  RelOptInfo *final_distinct_rel,
4652  PathTarget *target)
4653 {
4654  RelOptInfo *partial_distinct_rel;
4655  Query *parse;
4656  List *distinctExprs;
4657  double numDistinctRows;
4658  Path *cheapest_partial_path;
4659  ListCell *lc;
4660 
4661  /* nothing to do when there are no partial paths in the input rel */
4662  if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4663  return;
4664 
4665  parse = root->parse;
4666 
4667  /* can't do parallel DISTINCT ON */
4668  if (parse->hasDistinctOn)
4669  return;
4670 
4671  partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4672  NULL);
4673  partial_distinct_rel->reltarget = target;
4674  partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4675 
4676  /*
4677  * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4678  */
4679  partial_distinct_rel->serverid = input_rel->serverid;
4680  partial_distinct_rel->userid = input_rel->userid;
4681  partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4682  partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4683 
4684  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4685 
4686  distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4687  parse->targetList);
4688 
4689  /* estimate how many distinct rows we'll get from each worker */
4690  numDistinctRows = estimate_num_groups(root, distinctExprs,
4691  cheapest_partial_path->rows,
4692  NULL, NULL);
4693 
4694  /*
4695  * Try sorting the cheapest path and incrementally sorting any paths with
4696  * presorted keys and put a unique paths atop of those.
4697  */
4699  {
4700  foreach(lc, input_rel->partial_pathlist)
4701  {
4702  Path *input_path = (Path *) lfirst(lc);
4703  Path *sorted_path;
4704  bool is_sorted;
4705  int presorted_keys;
4706 
4708  input_path->pathkeys,
4709  &presorted_keys);
4710 
4711  if (is_sorted)
4712  sorted_path = input_path;
4713  else
4714  {
4715  /*
4716  * Try at least sorting the cheapest path and also try
4717  * incrementally sorting any path which is partially sorted
4718  * already (no need to deal with paths which have presorted
4719  * keys when incremental sort is disabled unless it's the
4720  * cheapest partial path).
4721  */
4722  if (input_path != cheapest_partial_path &&
4723  (presorted_keys == 0 || !enable_incremental_sort))
4724  continue;
4725 
4726  /*
4727  * We've no need to consider both a sort and incremental sort.
4728  * We'll just do a sort if there are no presorted keys and an
4729  * incremental sort when there are presorted keys.
4730  */
4731  if (presorted_keys == 0 || !enable_incremental_sort)
4732  sorted_path = (Path *) create_sort_path(root,
4733  partial_distinct_rel,
4734  input_path,
4735  root->distinct_pathkeys,
4736  -1.0);
4737  else
4738  sorted_path = (Path *) create_incremental_sort_path(root,
4739  partial_distinct_rel,
4740  input_path,
4741  root->distinct_pathkeys,
4742  presorted_keys,
4743  -1.0);
4744  }
4745 
4746  /*
4747  * An empty distinct_pathkeys means all tuples have the same value
4748  * for the DISTINCT clause. See create_final_distinct_paths()
4749  */
4750  if (root->distinct_pathkeys == NIL)
4751  {
4752  Node *limitCount;
4753 
4754  limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
4755  sizeof(int64),
4756  Int64GetDatum(1), false,
4757  FLOAT8PASSBYVAL);
4758 
4759  /*
4760  * Apply a LimitPath onto the partial path to restrict the
4761  * tuples from each worker to 1. create_final_distinct_paths
4762  * will need to apply an additional LimitPath to restrict this
4763  * to a single row after the Gather node. If the query
4764  * already has a LIMIT clause, then we could end up with three
4765  * Limit nodes in the final plan. Consolidating the top two
4766  * of these could be done, but does not seem worth troubling
4767  * over.
4768  */
4769  add_partial_path(partial_distinct_rel, (Path *)
4770  create_limit_path(root, partial_distinct_rel,
4771  sorted_path,
4772  NULL,
4773  limitCount,
4775  0, 1));
4776  }
4777  else
4778  {
4779  add_partial_path(partial_distinct_rel, (Path *)
4780  create_upper_unique_path(root, partial_distinct_rel,
4781  sorted_path,
4783  numDistinctRows));
4784  }
4785  }
4786  }
4787 
4788  /*
4789  * Now try hash aggregate paths, if enabled and hashing is possible. Since
4790  * we're not on the hook to ensure we do our best to create at least one
4791  * path here, we treat enable_hashagg as a hard off-switch rather than the
4792  * slightly softer variant in create_final_distinct_paths.
4793  */
4795  {
4796  add_partial_path(partial_distinct_rel, (Path *)
4797  create_agg_path(root,
4798  partial_distinct_rel,
4799  cheapest_partial_path,
4800  cheapest_partial_path->pathtarget,
4801  AGG_HASHED,
4804  NIL,
4805  NULL,
4806  numDistinctRows));
4807  }
4808 
4809  /*
4810  * If there is an FDW that's responsible for all baserels of the query,
4811  * let it consider adding ForeignPaths.
4812  */
4813  if (partial_distinct_rel->fdwroutine &&
4814  partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
4815  partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4817  input_rel,
4818  partial_distinct_rel,
4819  NULL);
4820 
4821  /* Let extensions possibly add some more partial paths */
4823  (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
4824  input_rel, partial_distinct_rel, NULL);
4825 
4826  if (partial_distinct_rel->partial_pathlist != NIL)
4827  {
4828  generate_useful_gather_paths(root, partial_distinct_rel, true);
4829  set_cheapest(partial_distinct_rel);
4830 
4831  /*
4832  * Finally, create paths to distinctify the final result. This step
4833  * is needed to remove any duplicates due to combining rows from
4834  * parallel workers.
4835  */
4836  create_final_distinct_paths(root, partial_distinct_rel,
4837  final_distinct_rel);
4838  }
4839 }
4840 
4841 /*
4842  * create_final_distinct_paths
4843  * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4844  *
4845  * input_rel: contains the source-data paths
4846  * distinct_rel: destination relation for storing created paths
4847  */
4848 static RelOptInfo *
4850  RelOptInfo *distinct_rel)
4851 {
4852  Query *parse = root->parse;
4853  Path *cheapest_input_path = input_rel->cheapest_total_path;
4854  double numDistinctRows;
4855  bool allow_hash;
4856 
4857  /* Estimate number of distinct rows there will be */
4858  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4859  root->hasHavingQual)
4860  {
4861  /*
4862  * If there was grouping or aggregation, use the number of input rows
4863  * as the estimated number of DISTINCT rows (ie, assume the input is
4864  * already mostly unique).
4865  */
4866  numDistinctRows = cheapest_input_path->rows;
4867  }
4868  else
4869  {
4870  /*
4871  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4872  */
4873  List *distinctExprs;
4874 
4875  distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4876  parse->targetList);
4877  numDistinctRows = estimate_num_groups(root, distinctExprs,
4878  cheapest_input_path->rows,
4879  NULL, NULL);
4880  }
4881 
4882  /*
4883  * Consider sort-based implementations of DISTINCT, if possible.
4884  */
4886  {
4887  /*
4888  * Firstly, if we have any adequately-presorted paths, just stick a
4889  * Unique node on those. We also, consider doing an explicit sort of
4890  * the cheapest input path and Unique'ing that. If any paths have
4891  * presorted keys then we'll create an incremental sort atop of those
4892  * before adding a unique node on the top.
4893  *
4894  * When we have DISTINCT ON, we must sort by the more rigorous of
4895  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4896  * Also, if we do have to do an explicit sort, we might as well use
4897  * the more rigorous ordering to avoid a second sort later. (Note
4898  * that the parser will have ensured that one clause is a prefix of
4899  * the other.)
4900  */
4901  List *needed_pathkeys;
4902  ListCell *lc;
4903  double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
4904 
4905  if (parse->hasDistinctOn &&
4907  list_length(root->sort_pathkeys))
4908  needed_pathkeys = root->sort_pathkeys;
4909  else
4910  needed_pathkeys = root->distinct_pathkeys;
4911 
4912  foreach(lc, input_rel->pathlist)
4913  {
4914  Path *input_path = (Path *) lfirst(lc);
4915  Path *sorted_path;
4916  bool is_sorted;
4917  int presorted_keys;
4918 
4919  is_sorted = pathkeys_count_contained_in(needed_pathkeys,
4920  input_path->pathkeys,
4921  &presorted_keys);
4922 
4923  if (is_sorted)
4924  sorted_path = input_path;
4925  else
4926  {
4927  /*
4928  * Try at least sorting the cheapest path and also try
4929  * incrementally sorting any path which is partially sorted
4930  * already (no need to deal with paths which have presorted
4931  * keys when incremental sort is disabled unless it's the
4932  * cheapest input path).
4933  */
4934  if (input_path != cheapest_input_path &&
4935  (presorted_keys == 0 || !enable_incremental_sort))
4936  continue;
4937 
4938  /*
4939  * We've no need to consider both a sort and incremental sort.
4940  * We'll just do a sort if there are no presorted keys and an
4941  * incremental sort when there are presorted keys.
4942  */
4943  if (presorted_keys == 0 || !enable_incremental_sort)
4944  sorted_path = (Path *) create_sort_path(root,
4945  distinct_rel,
4946  input_path,
4947  needed_pathkeys,
4948  limittuples);
4949  else
4950  sorted_path = (Path *) create_incremental_sort_path(root,
4951  distinct_rel,
4952  input_path,
4953  needed_pathkeys,
4954  presorted_keys,
4955  limittuples);
4956  }
4957 
4958  /*
4959  * distinct_pathkeys may have become empty if all of the pathkeys
4960  * were determined to be redundant. If all of the pathkeys are
4961  * redundant then each DISTINCT target must only allow a single
4962  * value, therefore all resulting tuples must be identical (or at
4963  * least indistinguishable by an equality check). We can uniquify
4964  * these tuples simply by just taking the first tuple. All we do
4965  * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
4966  * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
4967  * list, so we must still only do this with paths which are
4968  * correctly sorted by sort_pathkeys.
4969  */
4970  if (root->distinct_pathkeys == NIL)
4971  {
4972  Node *limitCount;
4973 
4974  limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
4975  sizeof(int64),
4976  Int64GetDatum(1), false,
4977  FLOAT8PASSBYVAL);
4978 
4979  /*
4980  * If the query already has a LIMIT clause, then we could end
4981  * up with a duplicate LimitPath in the final plan. That does
4982  * not seem worth troubling over too much.
4983  */
4984  add_path(distinct_rel, (Path *)
4985  create_limit_path(root, distinct_rel, sorted_path,
4986  NULL, limitCount,
4987  LIMIT_OPTION_COUNT, 0, 1));
4988  }
4989  else
4990  {
4991  add_path(distinct_rel, (Path *)
4992  create_upper_unique_path(root, distinct_rel,
4993  sorted_path,
4995  numDistinctRows));
4996  }
4997  }
4998  }
4999 
5000  /*
5001  * Consider hash-based implementations of DISTINCT, if possible.
5002  *
5003  * If we were not able to make any other types of path, we *must* hash or
5004  * die trying. If we do have other choices, there are two things that
5005  * should prevent selection of hashing: if the query uses DISTINCT ON
5006  * (because it won't really have the expected behavior if we hash), or if
5007  * enable_hashagg is off.
5008  *
5009  * Note: grouping_is_hashable() is much more expensive to check than the
5010  * other gating conditions, so we want to do it last.
5011  */
5012  if (distinct_rel->pathlist == NIL)
5013  allow_hash = true; /* we have no alternatives */
5014  else if (parse->hasDistinctOn || !enable_hashagg)
5015  allow_hash = false; /* policy-based decision not to hash */
5016  else
5017  allow_hash = true; /* default */
5018 
5019  if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
5020  {
5021  /* Generate hashed aggregate path --- no sort needed */
5022  add_path(distinct_rel, (Path *)
5023  create_agg_path(root,
5024  distinct_rel,
5025  cheapest_input_path,
5026  cheapest_input_path->pathtarget,
5027  AGG_HASHED,
5030  NIL,
5031  NULL,
5032  numDistinctRows));
5033  }
5034 
5035  return distinct_rel;
5036 }
5037 
5038 /*
5039  * create_ordered_paths
5040  *
5041  * Build a new upperrel containing Paths for ORDER BY evaluation.
5042  *
5043  * All paths in the result must satisfy the ORDER BY ordering.
5044  * The only new paths we need consider are an explicit full sort
5045  * and incremental sort on the cheapest-total existing path.
5046  *
5047  * input_rel: contains the source-data Paths
5048  * target: the output tlist the result Paths must emit
5049  * limit_tuples: estimated bound on the number of output tuples,
5050  * or -1 if no LIMIT or couldn't estimate
5051  *
5052  * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
5053  * other pathkeys (grouping, ...) like generate_useful_gather_paths.
5054  */
5055 static RelOptInfo *
5057  RelOptInfo *input_rel,
5058  PathTarget *target,
5059  bool target_parallel_safe,
5060  double limit_tuples)
5061 {
5062  Path *cheapest_input_path = input_rel->cheapest_total_path;
5063  RelOptInfo *ordered_rel;
5064  ListCell *lc;
5065 
5066  /* For now, do all work in the (ORDERED, NULL) upperrel */
5067  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
5068 
5069  /*
5070  * If the input relation is not parallel-safe, then the ordered relation
5071  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
5072  * target list is parallel-safe.
5073  */
5074  if (input_rel->consider_parallel && target_parallel_safe)
5075  ordered_rel->consider_parallel = true;
5076 
5077  /*
5078  * If the input rel belongs to a single FDW, so does the ordered_rel.
5079  */
5080  ordered_rel->serverid = input_rel->serverid;
5081  ordered_rel->userid = input_rel->userid;
5082  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
5083  ordered_rel->fdwroutine = input_rel->fdwroutine;
5084 
5085  foreach(lc, input_rel->pathlist)
5086  {
5087  Path *input_path = (Path *) lfirst(lc);
5088  Path *sorted_path;
5089  bool is_sorted;
5090  int presorted_keys;
5091 
5092  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5093  input_path->pathkeys, &presorted_keys);
5094 
5095  if (is_sorted)
5096  sorted_path = input_path;
5097  else
5098  {
5099  /*
5100  * Try at least sorting the cheapest path and also try
5101  * incrementally sorting any path which is partially sorted
5102  * already (no need to deal with paths which have presorted keys
5103  * when incremental sort is disabled unless it's the cheapest
5104  * input path).
5105  */
5106  if (input_path != cheapest_input_path &&
5107  (presorted_keys == 0 || !enable_incremental_sort))
5108  continue;
5109 
5110  /*
5111  * We've no need to consider both a sort and incremental sort.
5112  * We'll just do a sort if there are no presorted keys and an
5113  * incremental sort when there are presorted keys.
5114  */
5115  if (presorted_keys == 0 || !enable_incremental_sort)
5116  sorted_path = (Path *) create_sort_path(root,
5117  ordered_rel,
5118  input_path,
5119  root->sort_pathkeys,
5120  limit_tuples);
5121  else
5122  sorted_path = (Path *) create_incremental_sort_path(root,
5123  ordered_rel,
5124  input_path,
5125  root->sort_pathkeys,
5126  presorted_keys,
5127  limit_tuples);
5128  }
5129 
5130  /* Add projection step if needed */
5131  if (sorted_path->pathtarget != target)
5132  sorted_path = apply_projection_to_path(root, ordered_rel,
5133  sorted_path, target);
5134 
5135  add_path(ordered_rel, sorted_path);
5136  }
5137 
5138  /*
5139  * generate_gather_paths() will have already generated a simple Gather
5140  * path for the best parallel path, if any, and the loop above will have
5141  * considered sorting it. Similarly, generate_gather_paths() will also
5142  * have generated order-preserving Gather Merge plans which can be used
5143  * without sorting if they happen to match the sort_pathkeys, and the loop
5144  * above will have handled those as well. However, there's one more
5145  * possibility: it may make sense to sort the cheapest partial path or
5146  * incrementally sort any partial path that is partially sorted according
5147  * to the required output order and then use Gather Merge.
5148  */
5149  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5150  input_rel->partial_pathlist != NIL)
5151  {
5152  Path *cheapest_partial_path;
5153 
5154  cheapest_partial_path = linitial(input_rel->partial_pathlist);
5155 
5156  foreach(lc, input_rel->partial_pathlist)
5157  {
5158  Path *input_path = (Path *) lfirst(lc);
5159  Path *sorted_path;
5160  bool is_sorted;
5161  int presorted_keys;
5162  double total_groups;
5163 
5164  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5165  input_path->pathkeys,
5166  &presorted_keys);
5167 
5168  if (is_sorted)
5169  continue;
5170 
5171  /*
5172  * Try at least sorting the cheapest path and also try
5173  * incrementally sorting any path which is partially sorted
5174  * already (no need to deal with paths which have presorted keys
5175  * when incremental sort is disabled unless it's the cheapest
5176  * partial path).
5177  */
5178  if (input_path != cheapest_partial_path &&
5179  (presorted_keys == 0 || !enable_incremental_sort))
5180  continue;
5181 
5182  /*
5183  * We've no need to consider both a sort and incremental sort.
5184  * We'll just do a sort if there are no presorted keys and an
5185  * incremental sort when there are presorted keys.
5186  */
5187  if (presorted_keys == 0 || !enable_incremental_sort)
5188  sorted_path = (Path *) create_sort_path(root,
5189  ordered_rel,
5190  input_path,
5191  root->sort_pathkeys,
5192  limit_tuples);
5193  else
5194  sorted_path = (Path *) create_incremental_sort_path(root,
5195  ordered_rel,
5196  input_path,
5197  root->sort_pathkeys,
5198  presorted_keys,
5199  limit_tuples);
5200  total_groups = input_path->rows *
5201  input_path->parallel_workers;
5202  sorted_path = (Path *)
5203  create_gather_merge_path(root, ordered_rel,
5204  sorted_path,
5205  sorted_path->pathtarget,
5206  root->sort_pathkeys, NULL,
5207  &total_groups);
5208 
5209  /* Add projection step if needed */
5210  if (sorted_path->pathtarget != target)
5211  sorted_path = apply_projection_to_path(root, ordered_rel,
5212  sorted_path, target);
5213 
5214  add_path(ordered_rel, sorted_path);
5215  }
5216  }
5217 
5218  /*
5219  * If there is an FDW that's responsible for all baserels of the query,
5220  * let it consider adding ForeignPaths.
5221  */
5222  if (ordered_rel->fdwroutine &&
5223  ordered_rel->fdwroutine->GetForeignUpperPaths)
5224  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5225  input_rel, ordered_rel,
5226  NULL);
5227 
5228  /* Let extensions possibly add some more paths */
5230  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5231  input_rel, ordered_rel, NULL);
5232 
5233  /*
5234  * No need to bother with set_cheapest here; grouping_planner does not
5235  * need us to do it.
5236  */
5237  Assert(ordered_rel->pathlist != NIL);
5238 
5239  return ordered_rel;
5240 }
5241 
5242 
5243 /*
5244  * make_group_input_target
5245  * Generate appropriate PathTarget for initial input to grouping nodes.
5246  *
5247  * If there is grouping or aggregation, the scan/join subplan cannot emit
5248  * the query's final targetlist; for example, it certainly can't emit any
5249  * aggregate function calls. This routine generates the correct target
5250  * for the scan/join subplan.
5251  *
5252  * The query target list passed from the parser already contains entries
5253  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5254  * for variables used only in HAVING clauses; so we need to add those
5255  * variables to the subplan target list. Also, we flatten all expressions
5256  * except GROUP BY items into their component variables; other expressions
5257  * will be computed by the upper plan nodes rather than by the subplan.
5258  * For example, given a query like
5259  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5260  * we want to pass this targetlist to the subplan:
5261  * a+b,c,d
5262  * where the a+b target will be used by the Sort/Group steps, and the
5263  * other targets will be used for computing the final results.
5264  *
5265  * 'final_target' is the query's final target list (in PathTarget form)
5266  *
5267  * The result is the PathTarget to be computed by the Paths returned from
5268  * query_planner().
5269  */
5270 static PathTarget *
5272 {
5273  Query *parse = root->parse;
5274  PathTarget *input_target;
5275  List *non_group_cols;
5276  List *non_group_vars;
5277  int i;
5278  ListCell *lc;
5279 
5280  /*
5281  * We must build a target containing all grouping columns, plus any other
5282  * Vars mentioned in the query's targetlist and HAVING qual.
5283  */
5284  input_target = create_empty_pathtarget();
5285  non_group_cols = NIL;
5286 
5287  i = 0;
5288  foreach(lc, final_target->exprs)
5289  {
5290  Expr *expr = (Expr *) lfirst(lc);
5291  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5292 
5293  if (sgref && root->processed_groupClause &&
5295  root->processed_groupClause) != NULL)
5296  {
5297  /*
5298  * It's a grouping column, so add it to the input target as-is.
5299  */
5300  add_column_to_pathtarget(input_target, expr, sgref);
5301  }
5302  else
5303  {
5304  /*
5305  * Non-grouping column, so just remember the expression for later
5306  * call to pull_var_clause.
5307  */
5308  non_group_cols = lappend(non_group_cols, expr);
5309  }
5310 
5311  i++;
5312  }
5313 
5314  /*
5315  * If there's a HAVING clause, we'll need the Vars it uses, too.
5316  */
5317  if (parse->havingQual)
5318  non_group_cols = lappend(non_group_cols, parse->havingQual);
5319 
5320  /*
5321  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5322  * add them to the input target if not already present. (A Var used
5323  * directly as a GROUP BY item will be present already.) Note this
5324  * includes Vars used in resjunk items, so we are covering the needs of
5325  * ORDER BY and window specifications. Vars used within Aggrefs and
5326  * WindowFuncs will be pulled out here, too.
5327  */
5328  non_group_vars = pull_var_clause((Node *) non_group_cols,
5332  add_new_columns_to_pathtarget(input_target, non_group_vars);
5333 
5334  /* clean up cruft */
5335  list_free(non_group_vars);
5336  list_free(non_group_cols);
5337 
5338  /* XXX this causes some redundant cost calculation ... */
5339  return set_pathtarget_cost_width(root, input_target);
5340 }
5341 
5342 /*
5343  * make_partial_grouping_target
5344  * Generate appropriate PathTarget for output of partial aggregate
5345  * (or partial grouping, if there are no aggregates) nodes.
5346  *
5347  * A partial aggregation node needs to emit all the same aggregates that
5348  * a regular aggregation node would, plus any aggregates used in HAVING;
5349  * except that the Aggref nodes should be marked as partial aggregates.
5350  *
5351  * In addition, we'd better emit any Vars and PlaceHolderVars that are
5352  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5353  * these would be Vars that are grouped by or used in grouping expressions.)
5354  *
5355  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5356  * havingQual represents the HAVING clause.
5357  */
5358 static PathTarget *
5360  PathTarget *grouping_target,
5361  Node *havingQual)
5362 {
5363  PathTarget *partial_target;
5364  List *non_group_cols;
5365  List *non_group_exprs;
5366  int i;
5367  ListCell *lc;
5368 
5369  partial_target = create_empty_pathtarget();
5370  non_group_cols = NIL;
5371 
5372  i = 0;
5373  foreach(lc, grouping_target->exprs)
5374  {
5375  Expr *expr = (Expr *) lfirst(lc);
5376  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5377 
5378  if (sgref && root->processed_groupClause &&
5380  root->processed_groupClause) != NULL)
5381  {
5382  /*
5383  * It's a grouping column, so add it to the partial_target as-is.
5384  * (This allows the upper agg step to repeat the grouping calcs.)
5385  */
5386  add_column_to_pathtarget(partial_target, expr, sgref);
5387  }
5388  else
5389  {
5390  /*
5391  * Non-grouping column, so just remember the expression for later
5392  * call to pull_var_clause.
5393  */
5394  non_group_cols = lappend(non_group_cols, expr);
5395  }
5396 
5397  i++;
5398  }
5399 
5400  /*
5401  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5402  */
5403  if (havingQual)
5404  non_group_cols = lappend(non_group_cols, havingQual);
5405 
5406  /*
5407  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5408  * non-group cols (plus HAVING), and add them to the partial_target if not
5409  * already present. (An expression used directly as a GROUP BY item will
5410  * be present already.) Note this includes Vars used in resjunk items, so
5411  * we are covering the needs of ORDER BY and window specifications.
5412  */
5413  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5417 
5418  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5419 
5420  /*
5421  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5422  * are at the top level of the target list, so we can just scan the list
5423  * rather than recursing through the expression trees.
5424  */
5425  foreach(lc, partial_target->exprs)
5426  {
5427  Aggref *aggref = (Aggref *) lfirst(lc);
5428 
5429  if (IsA(aggref, Aggref))
5430  {
5431  Aggref *newaggref;
5432 
5433  /*
5434  * We shouldn't need to copy the substructure of the Aggref node,
5435  * but flat-copy the node itself to avoid damaging other trees.
5436  */
5437  newaggref = makeNode(Aggref);
5438  memcpy(newaggref, aggref, sizeof(Aggref));
5439 
5440  /* For now, assume serialization is required */
5442 
5443  lfirst(lc) = newaggref;
5444  }
5445  }
5446 
5447  /* clean up cruft */
5448  list_free(non_group_exprs);
5449  list_free(non_group_cols);
5450 
5451  /* XXX this causes some redundant cost calculation ... */
5452  return set_pathtarget_cost_width(root, partial_target);
5453 }
5454 
5455 /*
5456  * mark_partial_aggref
5457  * Adjust an Aggref to make it represent a partial-aggregation step.
5458  *
5459  * The Aggref node is modified in-place; caller must do any copying required.
5460  */
5461 void
5463 {
5464  /* aggtranstype should be computed by this point */
5465  Assert(OidIsValid(agg->aggtranstype));
5466  /* ... but aggsplit should still be as the parser left it */
5467  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5468 
5469  /* Mark the Aggref with the intended partial-aggregation mode */
5470  agg->aggsplit = aggsplit;
5471 
5472  /*
5473  * Adjust result type if needed. Normally, a partial aggregate returns
5474  * the aggregate's transition type; but if that's INTERNAL and we're
5475  * serializing, it returns BYTEA instead.
5476  */
5477  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5478  {
5479  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5480  agg->aggtype = BYTEAOID;
5481  else
5482  agg->aggtype = agg->aggtranstype;
5483  }
5484 }
5485 
5486 /*
5487  * postprocess_setop_tlist
5488  * Fix up targetlist returned by plan_set_operations().
5489  *
5490  * We need to transpose sort key info from the orig_tlist into new_tlist.
5491  * NOTE: this would not be good enough if we supported resjunk sort keys
5492  * for results of set operations --- then, we'd need to project a whole
5493  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5494  * find any resjunk columns in orig_tlist.
5495  */
5496 static List *
5497 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5498 {
5499  ListCell *l;
5500  ListCell *orig_tlist_item = list_head(orig_tlist);
5501 
5502  foreach(l, new_tlist)
5503  {
5504  TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5505  TargetEntry *orig_tle;
5506 
5507  /* ignore resjunk columns in setop result */
5508  if (new_tle->resjunk)
5509  continue;
5510 
5511  Assert(orig_tlist_item != NULL);
5512  orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5513  orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5514  if (orig_tle->resjunk) /* should not happen */
5515  elog(ERROR, "resjunk output columns are not implemented");
5516  Assert(new_tle->resno == orig_tle->resno);
5517  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5518  }
5519  if (orig_tlist_item != NULL)
5520  elog(ERROR, "resjunk output columns are not implemented");
5521  return new_tlist;
5522 }
5523 
5524 /*
5525  * optimize_window_clauses
5526  * Call each WindowFunc's prosupport function to see if we're able to
5527  * make any adjustments to any of the WindowClause's so that the executor
5528  * can execute the window functions in a more optimal way.
5529  *
5530  * Currently we only allow adjustments to the WindowClause's frameOptions. We
5531  * may allow more things to be done here in the future.
5532  */
5533 static void
5535 {
5536  List *windowClause = root->parse->windowClause;
5537  ListCell *lc;
5538 
5539  foreach(lc, windowClause)
5540  {
5542  ListCell *lc2;
5543  int optimizedFrameOptions = 0;
5544 
5545  Assert(wc->winref <= wflists->maxWinRef);
5546 
5547  /* skip any WindowClauses that have no WindowFuncs */
5548  if (wflists->windowFuncs[wc->winref] == NIL)
5549  continue;
5550 
5551  foreach(lc2, wflists->windowFuncs[wc->winref])
5552  {
5555  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
5556  Oid prosupport;
5557 
5558  prosupport = get_func_support(wfunc->winfnoid);
5559 
5560  /* Check if there's a support function for 'wfunc' */
5561  if (!OidIsValid(prosupport))
5562  break; /* can't optimize this WindowClause */
5563 
5564  req.type = T_SupportRequestOptimizeWindowClause;
5565  req.window_clause = wc;
5566  req.window_func = wfunc;
5567  req.frameOptions = wc->frameOptions;
5568 
5569  /* call the support function */
5571  DatumGetPointer(OidFunctionCall1(prosupport,
5572  PointerGetDatum(&req)));
5573 
5574  /*
5575  * Skip to next WindowClause if the support function does not
5576  * support this request type.
5577  */
5578  if (res == NULL)
5579  break;
5580 
5581  /*
5582  * Save these frameOptions for the first WindowFunc for this
5583  * WindowClause.
5584  */
5585  if (foreach_current_index(lc2) == 0)
5586  optimizedFrameOptions = res->frameOptions;
5587 
5588  /*
5589  * On subsequent WindowFuncs, if the frameOptions are not the same
5590  * then we're unable to optimize the frameOptions for this
5591  * WindowClause.
5592  */
5593  else if (optimizedFrameOptions != res->frameOptions)
5594  break; /* skip to the next WindowClause, if any */
5595  }
5596 
5597  /* adjust the frameOptions if all WindowFunc's agree that it's ok */
5598  if (lc2 == NULL && wc->frameOptions != optimizedFrameOptions)
5599  {
5600  ListCell *lc3;
5601 
5602  /* apply the new frame options */
5603  wc->frameOptions = optimizedFrameOptions;
5604 
5605  /*
5606  * We now check to see if changing the frameOptions has caused
5607  * this WindowClause to be a duplicate of some other WindowClause.
5608  * This can only happen if we have multiple WindowClauses, so
5609  * don't bother if there's only 1.
5610  */
5611  if (list_length(windowClause) == 1)
5612  continue;
5613 
5614  /*
5615  * Do the duplicate check and reuse the existing WindowClause if
5616  * we find a duplicate.
5617  */
5618  foreach(lc3, windowClause)
5619  {
5620  WindowClause *existing_wc = lfirst_node(WindowClause, lc3);
5621 
5622  /* skip over the WindowClause we're currently editing */
5623  if (existing_wc == wc)
5624  continue;
5625 
5626  /*
5627  * Perform the same duplicate check that is done in
5628  * transformWindowFuncCall.
5629  */
5630  if (equal(wc->partitionClause, existing_wc->partitionClause) &&
5631  equal(wc->orderClause, existing_wc->orderClause) &&
5632  wc->frameOptions == existing_wc->frameOptions &&
5633  equal(wc->startOffset, existing_wc->startOffset) &&
5634  equal(wc->endOffset, existing_wc->endOffset))
5635  {
5636  ListCell *lc4;
5637 
5638  /*
5639  * Now move each WindowFunc in 'wc' into 'existing_wc'.
5640  * This required adjusting each WindowFunc's winref and
5641  * moving the WindowFuncs in 'wc' to the list of
5642  * WindowFuncs in 'existing_wc'.
5643  */
5644  foreach(lc4, wflists->windowFuncs[wc->winref])
5645  {
5646  WindowFunc *wfunc = lfirst_node(WindowFunc, lc4);
5647 
5648  wfunc->winref = existing_wc->winref;
5649  }
5650 
5651  /* move list items */
5652  wflists->windowFuncs[existing_wc->winref] = list_concat(wflists->windowFuncs[existing_wc->winref],
5653  wflists->windowFuncs[wc->winref]);
5654  wflists->windowFuncs[wc->winref] = NIL;
5655 
5656  /*
5657  * transformWindowFuncCall() should have made sure there
5658  * are no other duplicates, so we needn't bother looking
5659  * any further.
5660  */
5661  break;
5662  }
5663  }
5664  }
5665  }
5666 }
5667 
5668 /*
5669  * select_active_windows
5670  * Create a list of the "active" window clauses (ie, those referenced
5671  * by non-deleted WindowFuncs) in the order they are to be executed.
5672  */
5673 static List *
5675 {
5676  List *windowClause = root->parse->windowClause;
5677  List *result = NIL;
5678  ListCell *lc;
5679  int nActive = 0;
5681  * list_length(windowClause));
5682 
5683  /* First, construct an array of the active windows */
5684  foreach(lc, windowClause)
5685  {
5687 
5688  /* It's only active if wflists shows some related WindowFuncs */
5689  Assert(wc->winref <= wflists->maxWinRef);
5690  if (wflists->windowFuncs[wc->winref] == NIL)
5691  continue;
5692 
5693  actives[nActive].wc = wc; /* original clause */
5694 
5695  /*
5696  * For sorting, we want the list of partition keys followed by the
5697  * list of sort keys. But pathkeys construction will remove duplicates
5698  * between the two, so we can as well (even though we can't detect all
5699  * of the duplicates, since some may come from ECs - that might mean
5700  * we miss optimization chances here). We must, however, ensure that
5701  * the order of entries is preserved with respect to the ones we do
5702  * keep.
5703  *
5704  * partitionClause and orderClause had their own duplicates removed in
5705  * parse analysis, so we're only concerned here with removing
5706  * orderClause entries that also appear in partitionClause.
5707  */
5708  actives[nActive].uniqueOrder =
5710  wc->orderClause);
5711  nActive++;
5712  }
5713 
5714  /*
5715  * Sort active windows by their partitioning/ordering clauses, ignoring
5716  * any framing clauses, so that the windows that need the same sorting are
5717  * adjacent in the list. When we come to generate paths, this will avoid
5718  * inserting additional Sort nodes.
5719  *
5720  * This is how we implement a specific requirement from the SQL standard,
5721  * which says that when two or more windows are order-equivalent (i.e.
5722  * have matching partition and order clauses, even if their names or
5723  * framing clauses differ), then all peer rows must be presented in the
5724  * same order in all of them. If we allowed multiple sort nodes for such
5725  * cases, we'd risk having the peer rows end up in different orders in
5726  * equivalent windows due to sort instability. (See General Rule 4 of
5727  * <window clause> in SQL2008 - SQL2016.)
5728  *
5729  * Additionally, if the entire list of clauses of one window is a prefix
5730  * of another, put first the window with stronger sorting requirements.
5731  * This way we will first sort for stronger window, and won't have to sort
5732  * again for the weaker one.
5733  */
5734  qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
5735 
5736  /* build ordered list of the original WindowClause nodes */
5737  for (int i = 0; i < nActive; i++)
5738  result = lappend(result, actives[i].wc);
5739 
5740  pfree(actives);
5741 
5742  return result;
5743 }
5744 
5745 /*
5746  * common_prefix_cmp
5747  * QSort comparison function for WindowClauseSortData
5748  *
5749  * Sort the windows by the required sorting clauses. First, compare the sort
5750  * clauses themselves. Second, if one window's clauses are a prefix of another
5751  * one's clauses, put the window with more sort clauses first.
5752  *
5753  * We purposefully sort by the highest tleSortGroupRef first. Since
5754  * tleSortGroupRefs are assigned for the query's DISTINCT and ORDER BY first
5755  * and because here we sort the lowest tleSortGroupRefs last, if a
5756  * WindowClause is sharing a tleSortGroupRef with the query's DISTINCT or
5757  * ORDER BY clause, this makes it more likely that the final WindowAgg will
5758  * provide presorted input for the query's DISTINCT or ORDER BY clause, thus
5759  * reducing the total number of sorts required for the query.
5760  */
5761 static int
5762 common_prefix_cmp(const void *a, const void *b)
5763 {
5764  const WindowClauseSortData *wcsa = a;
5765  const WindowClauseSortData *wcsb = b;
5766  ListCell *item_a;
5767  ListCell *item_b;
5768 
5769  forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
5770  {
5773 
5774  if (sca->tleSortGroupRef > scb->tleSortGroupRef)
5775  return -1;
5776  else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
5777  return 1;
5778  else if (sca->sortop > scb->sortop)
5779  return -1;
5780  else if (sca->sortop < scb->sortop)
5781  return 1;
5782  else if (sca->nulls_first && !scb->nulls_first)
5783  return -1;
5784  else if (!sca->nulls_first && scb->nulls_first)
5785  return 1;
5786  /* no need to compare eqop, since it is fully determined by sortop */
5787  }
5788 
5789  if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
5790  return -1;
5791  else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
5792  return 1;
5793 
5794  return 0;
5795 }
5796 
5797 /*
5798  * make_window_input_target
5799  * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5800  *
5801  * When the query has window functions, this function computes the desired
5802  * target to be computed by the node just below the first WindowAgg.
5803  * This tlist must contain all values needed to evaluate the window functions,
5804  * compute the final target list, and perform any required final sort step.
5805  * If multiple WindowAggs are needed, each intermediate one adds its window
5806  * function results onto this base tlist; only the topmost WindowAgg computes
5807  * the actual desired target list.
5808  *
5809  * This function is much like make_group_input_target, though not quite enough
5810  * like it to share code. As in that function, we flatten most expressions
5811  * into their component variables. But we do not want to flatten window
5812  * PARTITION BY/ORDER BY clauses, since that might result in multiple
5813  * evaluations of them, which would be bad (possibly even resulting in
5814  * inconsistent answers, if they contain volatile functions).
5815  * Also, we must not flatten GROUP BY clauses that were left unflattened by
5816  * make_group_input_target, because we may no longer have access to the
5817  * individual Vars in them.
5818  *