PostgreSQL Source Code  git master
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/genam.h"
22 #include "access/htup_details.h"
23 #include "access/parallel.h"
24 #include "access/sysattr.h"
25 #include "access/table.h"
26 #include "access/xact.h"
27 #include "catalog/pg_aggregate.h"
28 #include "catalog/pg_constraint.h"
29 #include "catalog/pg_inherits.h"
30 #include "catalog/pg_proc.h"
31 #include "catalog/pg_type.h"
32 #include "executor/executor.h"
33 #include "executor/nodeAgg.h"
34 #include "foreign/fdwapi.h"
35 #include "jit/jit.h"
36 #include "lib/bipartite_match.h"
37 #include "lib/knapsack.h"
38 #include "miscadmin.h"
39 #include "nodes/makefuncs.h"
40 #include "nodes/nodeFuncs.h"
41 #ifdef OPTIMIZER_DEBUG
42 #include "nodes/print.h"
43 #endif
44 #include "nodes/supportnodes.h"
45 #include "optimizer/appendinfo.h"
46 #include "optimizer/clauses.h"
47 #include "optimizer/cost.h"
48 #include "optimizer/inherit.h"
49 #include "optimizer/optimizer.h"
50 #include "optimizer/paramassign.h"
51 #include "optimizer/pathnode.h"
52 #include "optimizer/paths.h"
53 #include "optimizer/plancat.h"
54 #include "optimizer/planmain.h"
55 #include "optimizer/planner.h"
56 #include "optimizer/prep.h"
57 #include "optimizer/subselect.h"
58 #include "optimizer/tlist.h"
59 #include "parser/analyze.h"
60 #include "parser/parse_agg.h"
61 #include "parser/parse_relation.h"
62 #include "parser/parsetree.h"
63 #include "partitioning/partdesc.h"
64 #include "rewrite/rewriteManip.h"
65 #include "storage/dsm_impl.h"
66 #include "utils/lsyscache.h"
67 #include "utils/rel.h"
68 #include "utils/selfuncs.h"
69 #include "utils/syscache.h"
70 
71 /* GUC parameters */
75 
76 /* Hook for plugins to get control in planner() */
78 
79 /* Hook for plugins to get control when grouping_planner() plans upper rels */
81 
82 
83 /* Expression kind codes for preprocess_expression */
84 #define EXPRKIND_QUAL 0
85 #define EXPRKIND_TARGET 1
86 #define EXPRKIND_RTFUNC 2
87 #define EXPRKIND_RTFUNC_LATERAL 3
88 #define EXPRKIND_VALUES 4
89 #define EXPRKIND_VALUES_LATERAL 5
90 #define EXPRKIND_LIMIT 6
91 #define EXPRKIND_APPINFO 7
92 #define EXPRKIND_PHV 8
93 #define EXPRKIND_TABLESAMPLE 9
94 #define EXPRKIND_ARBITER_ELEM 10
95 #define EXPRKIND_TABLEFUNC 11
96 #define EXPRKIND_TABLEFUNC_LATERAL 12
97 
98 /*
99  * Data specific to grouping sets
100  */
101 typedef struct
102 {
112 
113 /*
114  * Temporary structure for use during WindowClause reordering in order to be
115  * able to sort WindowClauses on partitioning/ordering prefix.
116  */
117 typedef struct
118 {
120  List *uniqueOrder; /* A List of unique ordering/partitioning
121  * clauses per Window */
123 
124 /* Passthrough data for standard_qp_callback */
125 typedef struct
126 {
127  List *activeWindows; /* active windows, if any */
128  grouping_sets_data *gset_data; /* grouping sets data, if any */
130 
131 /* Local functions */
132 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
133 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
134 static void grouping_planner(PlannerInfo *root, double tuple_fraction);
136 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
137  int *tleref_to_colnum_map);
138 static void preprocess_rowmarks(PlannerInfo *root);
139 static double preprocess_limit(PlannerInfo *root,
140  double tuple_fraction,
141  int64 *offset_est, int64 *count_est);
143 static List *preprocess_groupclause(PlannerInfo *root, List *force);
144 static List *extract_rollup_sets(List *groupingSets);
145 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
146 static void standard_qp_callback(PlannerInfo *root, void *extra);
147 static double get_number_of_groups(PlannerInfo *root,
148  double path_rows,
149  grouping_sets_data *gd,
150  List *target_list);
152  RelOptInfo *input_rel,
153  PathTarget *target,
154  bool target_parallel_safe,
155  grouping_sets_data *gd);
156 static bool is_degenerate_grouping(PlannerInfo *root);
158  RelOptInfo *input_rel,
159  RelOptInfo *grouped_rel);
160 static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
161  PathTarget *target, bool target_parallel_safe,
162  Node *havingQual);
164  RelOptInfo *input_rel,
165  RelOptInfo *grouped_rel,
166  const AggClauseCosts *agg_costs,
167  grouping_sets_data *gd,
168  GroupPathExtraData *extra,
169  RelOptInfo **partially_grouped_rel_p);
170 static void consider_groupingsets_paths(PlannerInfo *root,
171  RelOptInfo *grouped_rel,
172  Path *path,
173  bool is_sorted,
174  bool can_hash,
175  grouping_sets_data *gd,
176  const AggClauseCosts *agg_costs,
177  double dNumGroups);
179  RelOptInfo *input_rel,
180  PathTarget *input_target,
181  PathTarget *output_target,
182  bool output_target_parallel_safe,
183  WindowFuncLists *wflists,
184  List *activeWindows);
185 static void create_one_window_path(PlannerInfo *root,
186  RelOptInfo *window_rel,
187  Path *path,
188  PathTarget *input_target,
189  PathTarget *output_target,
190  WindowFuncLists *wflists,
191  List *activeWindows);
193  RelOptInfo *input_rel);
195  RelOptInfo *input_rel,
196  RelOptInfo *final_distinct_rel);
198  RelOptInfo *input_rel,
199  RelOptInfo *distinct_rel);
201  RelOptInfo *input_rel,
202  PathTarget *target,
203  bool target_parallel_safe,
204  double limit_tuples);
206  PathTarget *final_target);
208  PathTarget *grouping_target,
209  Node *havingQual);
210 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
211 static void optimize_window_clauses(PlannerInfo *root,
212  WindowFuncLists *wflists);
213 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
215  PathTarget *final_target,
216  List *activeWindows);
218  List *tlist);
220  PathTarget *final_target,
221  bool *have_postponed_srfs);
222 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
223  List *targets, List *targets_contain_srfs);
224 static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
225  RelOptInfo *grouped_rel,
226  RelOptInfo *partially_grouped_rel,
227  const AggClauseCosts *agg_costs,
228  grouping_sets_data *gd,
229  double dNumGroups,
230  GroupPathExtraData *extra);
232  RelOptInfo *grouped_rel,
233  RelOptInfo *input_rel,
234  grouping_sets_data *gd,
235  GroupPathExtraData *extra,
236  bool force_rel_creation);
237 static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
238 static bool can_partial_agg(PlannerInfo *root);
240  RelOptInfo *rel,
241  List *scanjoin_targets,
242  List *scanjoin_targets_contain_srfs,
243  bool scanjoin_target_parallel_safe,
244  bool tlist_same_exprs);
246  RelOptInfo *input_rel,
247  RelOptInfo *grouped_rel,
248  RelOptInfo *partially_grouped_rel,
249  const AggClauseCosts *agg_costs,
250  grouping_sets_data *gd,
252  GroupPathExtraData *extra);
253 static bool group_by_has_partkey(RelOptInfo *input_rel,
254  List *targetList,
255  List *groupClause);
256 static int common_prefix_cmp(const void *a, const void *b);
257 
258 
259 /*****************************************************************************
260  *
261  * Query optimizer entry point
262  *
263  * To support loadable plugins that monitor or modify planner behavior,
264  * we provide a hook variable that lets a plugin get control before and
265  * after the standard planning process. The plugin would normally call
266  * standard_planner().
267  *
268  * Note to plugin authors: standard_planner() scribbles on its Query input,
269  * so you'd better copy that data structure if you want to plan more than once.
270  *
271  *****************************************************************************/
272 PlannedStmt *
273 planner(Query *parse, const char *query_string, int cursorOptions,
274  ParamListInfo boundParams)
275 {
276  PlannedStmt *result;
277 
278  if (planner_hook)
279  result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
280  else
281  result = standard_planner(parse, query_string, cursorOptions, boundParams);
282  return result;
283 }
284 
285 PlannedStmt *
286 standard_planner(Query *parse, const char *query_string, int cursorOptions,
287  ParamListInfo boundParams)
288 {
289  PlannedStmt *result;
290  PlannerGlobal *glob;
291  double tuple_fraction;
292  PlannerInfo *root;
293  RelOptInfo *final_rel;
294  Path *best_path;
295  Plan *top_plan;
296  ListCell *lp,
297  *lr;
298 
299  /*
300  * Set up global state for this planner invocation. This data is needed
301  * across all levels of sub-Query that might exist in the given command,
302  * so we keep it in a separate struct that's linked to by each per-Query
303  * PlannerInfo.
304  */
305  glob = makeNode(PlannerGlobal);
306 
307  glob->boundParams = boundParams;
308  glob->subplans = NIL;
309  glob->subroots = NIL;
310  glob->rewindPlanIDs = NULL;
311  glob->finalrtable = NIL;
312  glob->finalrteperminfos = NIL;
313  glob->finalrowmarks = NIL;
314  glob->resultRelations = NIL;
315  glob->appendRelations = NIL;
316  glob->relationOids = NIL;
317  glob->invalItems = NIL;
318  glob->paramExecTypes = NIL;
319  glob->lastPHId = 0;
320  glob->lastRowMarkId = 0;
321  glob->lastPlanNodeId = 0;
322  glob->transientPlan = false;
323  glob->dependsOnRole = false;
324 
325  /*
326  * Assess whether it's feasible to use parallel mode for this query. We
327  * can't do this in a standalone backend, or if the command will try to
328  * modify any data, or if this is a cursor operation, or if GUCs are set
329  * to values that don't permit parallelism, or if parallel-unsafe
330  * functions are present in the query tree.
331  *
332  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
333  * MATERIALIZED VIEW to use parallel plans, but this is safe only because
334  * the command is writing into a completely new table which workers won't
335  * be able to see. If the workers could see the table, the fact that
336  * group locking would cause them to ignore the leader's heavyweight GIN
337  * page locks would make this unsafe. We'll have to fix that somehow if
338  * we want to allow parallel inserts in general; updates and deletes have
339  * additional problems especially around combo CIDs.)
340  *
341  * For now, we don't try to use parallel mode if we're running inside a
342  * parallel worker. We might eventually be able to relax this
343  * restriction, but for now it seems best not to have parallel workers
344  * trying to create their own parallel workers.
345  */
346  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
348  parse->commandType == CMD_SELECT &&
349  !parse->hasModifyingCTE &&
351  !IsParallelWorker())
352  {
353  /* all the cheap tests pass, so scan the query tree */
355  glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
356  }
357  else
358  {
359  /* skip the query tree scan, just assume it's unsafe */
360  glob->maxParallelHazard = PROPARALLEL_UNSAFE;
361  glob->parallelModeOK = false;
362  }
363 
364  /*
365  * glob->parallelModeNeeded is normally set to false here and changed to
366  * true during plan creation if a Gather or Gather Merge plan is actually
367  * created (cf. create_gather_plan, create_gather_merge_plan).
368  *
369  * However, if debug_parallel_query = on or debug_parallel_query =
370  * regress, then we impose parallel mode whenever it's safe to do so, even
371  * if the final plan doesn't use parallelism. It's not safe to do so if
372  * the query contains anything parallel-unsafe; parallelModeOK will be
373  * false in that case. Note that parallelModeOK can't change after this
374  * point. Otherwise, everything in the query is either parallel-safe or
375  * parallel-restricted, and in either case it should be OK to impose
376  * parallel-mode restrictions. If that ends up breaking something, then
377  * either some function the user included in the query is incorrectly
378  * labeled as parallel-safe or parallel-restricted when in reality it's
379  * parallel-unsafe, or else the query planner itself has a bug.
380  */
381  glob->parallelModeNeeded = glob->parallelModeOK &&
383 
384  /* Determine what fraction of the plan is likely to be scanned */
385  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
386  {
387  /*
388  * We have no real idea how many tuples the user will ultimately FETCH
389  * from a cursor, but it is often the case that he doesn't want 'em
390  * all, or would prefer a fast-start plan anyway so that he can
391  * process some of the tuples sooner. Use a GUC parameter to decide
392  * what fraction to optimize for.
393  */
394  tuple_fraction = cursor_tuple_fraction;
395 
396  /*
397  * We document cursor_tuple_fraction as simply being a fraction, which
398  * means the edge cases 0 and 1 have to be treated specially here. We
399  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
400  */
401  if (tuple_fraction >= 1.0)
402  tuple_fraction = 0.0;
403  else if (tuple_fraction <= 0.0)
404  tuple_fraction = 1e-10;
405  }
406  else
407  {
408  /* Default assumption is we need all the tuples */
409  tuple_fraction = 0.0;
410  }
411 
412  /* primary planning entry point (may recurse for subqueries) */
413  root = subquery_planner(glob, parse, NULL,
414  false, tuple_fraction);
415 
416  /* Select best Path and turn it into a Plan */
417  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
418  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
419 
420  top_plan = create_plan(root, best_path);
421 
422  /*
423  * If creating a plan for a scrollable cursor, make sure it can run
424  * backwards on demand. Add a Material node at the top at need.
425  */
426  if (cursorOptions & CURSOR_OPT_SCROLL)
427  {
428  if (!ExecSupportsBackwardScan(top_plan))
429  top_plan = materialize_finished_plan(top_plan);
430  }
431 
432  /*
433  * Optionally add a Gather node for testing purposes, provided this is
434  * actually a safe thing to do.
435  *
436  * We can add Gather even when top_plan has parallel-safe initPlans, but
437  * then we have to move the initPlans to the Gather node because of
438  * SS_finalize_plan's limitations. That would cause cosmetic breakage of
439  * regression tests when debug_parallel_query = regress, because initPlans
440  * that would normally appear on the top_plan move to the Gather, causing
441  * them to disappear from EXPLAIN output. That doesn't seem worth kluging
442  * EXPLAIN to hide, so skip it when debug_parallel_query = regress.
443  */
445  top_plan->parallel_safe &&
446  (top_plan->initPlan == NIL ||
448  {
449  Gather *gather = makeNode(Gather);
450  Cost initplan_cost;
451  bool unsafe_initplans;
452 
453  gather->plan.targetlist = top_plan->targetlist;
454  gather->plan.qual = NIL;
455  gather->plan.lefttree = top_plan;
456  gather->plan.righttree = NULL;
457  gather->num_workers = 1;
458  gather->single_copy = true;
460 
461  /* Transfer any initPlans to the new top node */
462  gather->plan.initPlan = top_plan->initPlan;
463  top_plan->initPlan = NIL;
464 
465  /*
466  * Since this Gather has no parallel-aware descendants to signal to,
467  * we don't need a rescan Param.
468  */
469  gather->rescan_param = -1;
470 
471  /*
472  * Ideally we'd use cost_gather here, but setting up dummy path data
473  * to satisfy it doesn't seem much cleaner than knowing what it does.
474  */
475  gather->plan.startup_cost = top_plan->startup_cost +
477  gather->plan.total_cost = top_plan->total_cost +
479  gather->plan.plan_rows = top_plan->plan_rows;
480  gather->plan.plan_width = top_plan->plan_width;
481  gather->plan.parallel_aware = false;
482  gather->plan.parallel_safe = false;
483 
484  /*
485  * Delete the initplans' cost from top_plan. We needn't add it to the
486  * Gather node, since the above coding already included it there.
487  */
489  &initplan_cost, &unsafe_initplans);
490  top_plan->startup_cost -= initplan_cost;
491  top_plan->total_cost -= initplan_cost;
492 
493  /* use parallel mode for parallel plans. */
494  root->glob->parallelModeNeeded = true;
495 
496  top_plan = &gather->plan;
497  }
498 
499  /*
500  * If any Params were generated, run through the plan tree and compute
501  * each plan node's extParam/allParam sets. Ideally we'd merge this into
502  * set_plan_references' tree traversal, but for now it has to be separate
503  * because we need to visit subplans before not after main plan.
504  */
505  if (glob->paramExecTypes != NIL)
506  {
507  Assert(list_length(glob->subplans) == list_length(glob->subroots));
508  forboth(lp, glob->subplans, lr, glob->subroots)
509  {
510  Plan *subplan = (Plan *) lfirst(lp);
511  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
512 
513  SS_finalize_plan(subroot, subplan);
514  }
515  SS_finalize_plan(root, top_plan);
516  }
517 
518  /* final cleanup of the plan */
519  Assert(glob->finalrtable == NIL);
520  Assert(glob->finalrteperminfos == NIL);
521  Assert(glob->finalrowmarks == NIL);
522  Assert(glob->resultRelations == NIL);
523  Assert(glob->appendRelations == NIL);
524  top_plan = set_plan_references(root, top_plan);
525  /* ... and the subplans (both regular subplans and initplans) */
526  Assert(list_length(glob->subplans) == list_length(glob->subroots));
527  forboth(lp, glob->subplans, lr, glob->subroots)
528  {
529  Plan *subplan = (Plan *) lfirst(lp);
530  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
531 
532  lfirst(lp) = set_plan_references(subroot, subplan);
533  }
534 
535  /* build the PlannedStmt result */
536  result = makeNode(PlannedStmt);
537 
538  result->commandType = parse->commandType;
539  result->queryId = parse->queryId;
540  result->hasReturning = (parse->returningList != NIL);
541  result->hasModifyingCTE = parse->hasModifyingCTE;
542  result->canSetTag = parse->canSetTag;
543  result->transientPlan = glob->transientPlan;
544  result->dependsOnRole = glob->dependsOnRole;
545  result->parallelModeNeeded = glob->parallelModeNeeded;
546  result->planTree = top_plan;
547  result->rtable = glob->finalrtable;
548  result->permInfos = glob->finalrteperminfos;
549  result->resultRelations = glob->resultRelations;
550  result->appendRelations = glob->appendRelations;
551  result->subplans = glob->subplans;
552  result->rewindPlanIDs = glob->rewindPlanIDs;
553  result->rowMarks = glob->finalrowmarks;
554  result->relationOids = glob->relationOids;
555  result->invalItems = glob->invalItems;
556  result->paramExecTypes = glob->paramExecTypes;
557  /* utilityStmt should be null, but we might as well copy it */
558  result->utilityStmt = parse->utilityStmt;
559  result->stmt_location = parse->stmt_location;
560  result->stmt_len = parse->stmt_len;
561 
562  result->jitFlags = PGJIT_NONE;
563  if (jit_enabled && jit_above_cost >= 0 &&
564  top_plan->total_cost > jit_above_cost)
565  {
566  result->jitFlags |= PGJIT_PERFORM;
567 
568  /*
569  * Decide how much effort should be put into generating better code.
570  */
571  if (jit_optimize_above_cost >= 0 &&
573  result->jitFlags |= PGJIT_OPT3;
574  if (jit_inline_above_cost >= 0 &&
575  top_plan->total_cost > jit_inline_above_cost)
576  result->jitFlags |= PGJIT_INLINE;
577 
578  /*
579  * Decide which operations should be JITed.
580  */
581  if (jit_expressions)
582  result->jitFlags |= PGJIT_EXPR;
584  result->jitFlags |= PGJIT_DEFORM;
585  }
586 
587  if (glob->partition_directory != NULL)
588  DestroyPartitionDirectory(glob->partition_directory);
589 
590  return result;
591 }
592 
593 
594 /*--------------------
595  * subquery_planner
596  * Invokes the planner on a subquery. We recurse to here for each
597  * sub-SELECT found in the query tree.
598  *
599  * glob is the global state for the current planner run.
600  * parse is the querytree produced by the parser & rewriter.
601  * parent_root is the immediate parent Query's info (NULL at the top level).
602  * hasRecursion is true if this is a recursive WITH query.
603  * tuple_fraction is the fraction of tuples we expect will be retrieved.
604  * tuple_fraction is interpreted as explained for grouping_planner, below.
605  *
606  * Basically, this routine does the stuff that should only be done once
607  * per Query object. It then calls grouping_planner. At one time,
608  * grouping_planner could be invoked recursively on the same Query object;
609  * that's not currently true, but we keep the separation between the two
610  * routines anyway, in case we need it again someday.
611  *
612  * subquery_planner will be called recursively to handle sub-Query nodes
613  * found within the query's expressions and rangetable.
614  *
615  * Returns the PlannerInfo struct ("root") that contains all data generated
616  * while planning the subquery. In particular, the Path(s) attached to
617  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
618  * cheapest way(s) to implement the query. The top level will select the
619  * best Path and pass it through createplan.c to produce a finished Plan.
620  *--------------------
621  */
622 PlannerInfo *
624  PlannerInfo *parent_root,
625  bool hasRecursion, double tuple_fraction)
626 {
627  PlannerInfo *root;
628  List *newWithCheckOptions;
629  List *newHaving;
630  bool hasOuterJoins;
631  bool hasResultRTEs;
632  RelOptInfo *final_rel;
633  ListCell *l;
634 
635  /* Create a PlannerInfo data structure for this subquery */
636  root = makeNode(PlannerInfo);
637  root->parse = parse;
638  root->glob = glob;
639  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
640  root->parent_root = parent_root;
641  root->plan_params = NIL;
642  root->outer_params = NULL;
643  root->planner_cxt = CurrentMemoryContext;
644  root->init_plans = NIL;
645  root->cte_plan_ids = NIL;
646  root->multiexpr_params = NIL;
647  root->join_domains = NIL;
648  root->eq_classes = NIL;
649  root->ec_merging_done = false;
650  root->last_rinfo_serial = 0;
651  root->all_result_relids =
652  parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
653  root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
654  root->append_rel_list = NIL;
655  root->row_identity_vars = NIL;
656  root->rowMarks = NIL;
657  memset(root->upper_rels, 0, sizeof(root->upper_rels));
658  memset(root->upper_targets, 0, sizeof(root->upper_targets));
659  root->processed_groupClause = NIL;
661  root->processed_tlist = NIL;
662  root->update_colnos = NIL;
663  root->grouping_map = NULL;
664  root->minmax_aggs = NIL;
665  root->qual_security_level = 0;
666  root->hasPseudoConstantQuals = false;
667  root->hasAlternativeSubPlans = false;
668  root->placeholdersFrozen = false;
669  root->hasRecursion = hasRecursion;
670  if (hasRecursion)
672  else
673  root->wt_param_id = -1;
674  root->non_recursive_path = NULL;
675  root->partColsUpdated = false;
676 
677  /*
678  * Create the top-level join domain. This won't have valid contents until
679  * deconstruct_jointree fills it in, but the node needs to exist before
680  * that so we can build EquivalenceClasses referencing it.
681  */
683 
684  /*
685  * If there is a WITH list, process each WITH query and either convert it
686  * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
687  */
688  if (parse->cteList)
689  SS_process_ctes(root);
690 
691  /*
692  * If it's a MERGE command, transform the joinlist as appropriate.
693  */
695 
696  /*
697  * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
698  * that we don't need so many special cases to deal with that situation.
699  */
701 
702  /*
703  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
704  * to transform them into joins. Note that this step does not descend
705  * into subqueries; if we pull up any subqueries below, their SubLinks are
706  * processed just before pulling them up.
707  */
708  if (parse->hasSubLinks)
709  pull_up_sublinks(root);
710 
711  /*
712  * Scan the rangetable for function RTEs, do const-simplification on them,
713  * and then inline them if possible (producing subqueries that might get
714  * pulled up next). Recursion issues here are handled in the same way as
715  * for SubLinks.
716  */
718 
719  /*
720  * Check to see if any subqueries in the jointree can be merged into this
721  * query.
722  */
723  pull_up_subqueries(root);
724 
725  /*
726  * If this is a simple UNION ALL query, flatten it into an appendrel. We
727  * do this now because it requires applying pull_up_subqueries to the leaf
728  * queries of the UNION ALL, which weren't touched above because they
729  * weren't referenced by the jointree (they will be after we do this).
730  */
731  if (parse->setOperations)
733 
734  /*
735  * Survey the rangetable to see what kinds of entries are present. We can
736  * skip some later processing if relevant SQL features are not used; for
737  * example if there are no JOIN RTEs we can avoid the expense of doing
738  * flatten_join_alias_vars(). This must be done after we have finished
739  * adding rangetable entries, of course. (Note: actually, processing of
740  * inherited or partitioned rels can cause RTEs for their child tables to
741  * get added later; but those must all be RTE_RELATION entries, so they
742  * don't invalidate the conclusions drawn here.)
743  */
744  root->hasJoinRTEs = false;
745  root->hasLateralRTEs = false;
746  hasOuterJoins = false;
747  hasResultRTEs = false;
748  foreach(l, parse->rtable)
749  {
751 
752  switch (rte->rtekind)
753  {
754  case RTE_RELATION:
755  if (rte->inh)
756  {
757  /*
758  * Check to see if the relation actually has any children;
759  * if not, clear the inh flag so we can treat it as a
760  * plain base relation.
761  *
762  * Note: this could give a false-positive result, if the
763  * rel once had children but no longer does. We used to
764  * be able to clear rte->inh later on when we discovered
765  * that, but no more; we have to handle such cases as
766  * full-fledged inheritance.
767  */
768  rte->inh = has_subclass(rte->relid);
769  }
770  break;
771  case RTE_JOIN:
772  root->hasJoinRTEs = true;
773  if (IS_OUTER_JOIN(rte->jointype))
774  hasOuterJoins = true;
775  break;
776  case RTE_RESULT:
777  hasResultRTEs = true;
778  break;
779  default:
780  /* No work here for other RTE types */
781  break;
782  }
783 
784  if (rte->lateral)
785  root->hasLateralRTEs = true;
786 
787  /*
788  * We can also determine the maximum security level required for any
789  * securityQuals now. Addition of inheritance-child RTEs won't affect
790  * this, because child tables don't have their own securityQuals; see
791  * expand_single_inheritance_child().
792  */
793  if (rte->securityQuals)
795  list_length(rte->securityQuals));
796  }
797 
798  /*
799  * If we have now verified that the query target relation is
800  * non-inheriting, mark it as a leaf target.
801  */
802  if (parse->resultRelation)
803  {
804  RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
805 
806  if (!rte->inh)
807  root->leaf_result_relids =
808  bms_make_singleton(parse->resultRelation);
809  }
810 
811  /*
812  * Preprocess RowMark information. We need to do this after subquery
813  * pullup, so that all base relations are present.
814  */
815  preprocess_rowmarks(root);
816 
817  /*
818  * Set hasHavingQual to remember if HAVING clause is present. Needed
819  * because preprocess_expression will reduce a constant-true condition to
820  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
821  */
822  root->hasHavingQual = (parse->havingQual != NULL);
823 
824  /*
825  * Do expression preprocessing on targetlist and quals, as well as other
826  * random expressions in the querytree. Note that we do not need to
827  * handle sort/group expressions explicitly, because they are actually
828  * part of the targetlist.
829  */
830  parse->targetList = (List *)
831  preprocess_expression(root, (Node *) parse->targetList,
833 
834  /* Constant-folding might have removed all set-returning functions */
835  if (parse->hasTargetSRFs)
836  parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
837 
838  newWithCheckOptions = NIL;
839  foreach(l, parse->withCheckOptions)
840  {
842 
843  wco->qual = preprocess_expression(root, wco->qual,
844  EXPRKIND_QUAL);
845  if (wco->qual != NULL)
846  newWithCheckOptions = lappend(newWithCheckOptions, wco);
847  }
848  parse->withCheckOptions = newWithCheckOptions;
849 
850  parse->returningList = (List *)
851  preprocess_expression(root, (Node *) parse->returningList,
853 
854  preprocess_qual_conditions(root, (Node *) parse->jointree);
855 
856  parse->havingQual = preprocess_expression(root, parse->havingQual,
857  EXPRKIND_QUAL);
858 
859  foreach(l, parse->windowClause)
860  {
862 
863  /* partitionClause/orderClause are sort/group expressions */
866  wc->endOffset = preprocess_expression(root, wc->endOffset,
868  wc->runCondition = (List *) preprocess_expression(root,
869  (Node *) wc->runCondition,
871  }
872 
873  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
875  parse->limitCount = preprocess_expression(root, parse->limitCount,
877 
878  if (parse->onConflict)
879  {
880  parse->onConflict->arbiterElems = (List *)
882  (Node *) parse->onConflict->arbiterElems,
884  parse->onConflict->arbiterWhere =
886  parse->onConflict->arbiterWhere,
887  EXPRKIND_QUAL);
888  parse->onConflict->onConflictSet = (List *)
890  (Node *) parse->onConflict->onConflictSet,
892  parse->onConflict->onConflictWhere =
894  parse->onConflict->onConflictWhere,
895  EXPRKIND_QUAL);
896  /* exclRelTlist contains only Vars, so no preprocessing needed */
897  }
898 
899  foreach(l, parse->mergeActionList)
900  {
902 
903  action->targetList = (List *)
905  (Node *) action->targetList,
907  action->qual =
909  (Node *) action->qual,
910  EXPRKIND_QUAL);
911  }
912 
913  root->append_rel_list = (List *)
916 
917  /* Also need to preprocess expressions within RTEs */
918  foreach(l, parse->rtable)
919  {
921  int kind;
922  ListCell *lcsq;
923 
924  if (rte->rtekind == RTE_RELATION)
925  {
926  if (rte->tablesample)
927  rte->tablesample = (TableSampleClause *)
929  (Node *) rte->tablesample,
931  }
932  else if (rte->rtekind == RTE_SUBQUERY)
933  {
934  /*
935  * We don't want to do all preprocessing yet on the subquery's
936  * expressions, since that will happen when we plan it. But if it
937  * contains any join aliases of our level, those have to get
938  * expanded now, because planning of the subquery won't do it.
939  * That's only possible if the subquery is LATERAL.
940  */
941  if (rte->lateral && root->hasJoinRTEs)
942  rte->subquery = (Query *)
943  flatten_join_alias_vars(root, root->parse,
944  (Node *) rte->subquery);
945  }
946  else if (rte->rtekind == RTE_FUNCTION)
947  {
948  /* Preprocess the function expression(s) fully */
950  rte->functions = (List *)
951  preprocess_expression(root, (Node *) rte->functions, kind);
952  }
953  else if (rte->rtekind == RTE_TABLEFUNC)
954  {
955  /* Preprocess the function expression(s) fully */
957  rte->tablefunc = (TableFunc *)
958  preprocess_expression(root, (Node *) rte->tablefunc, kind);
959  }
960  else if (rte->rtekind == RTE_VALUES)
961  {
962  /* Preprocess the values lists fully */
964  rte->values_lists = (List *)
965  preprocess_expression(root, (Node *) rte->values_lists, kind);
966  }
967 
968  /*
969  * Process each element of the securityQuals list as if it were a
970  * separate qual expression (as indeed it is). We need to do it this
971  * way to get proper canonicalization of AND/OR structure. Note that
972  * this converts each element into an implicit-AND sublist.
973  */
974  foreach(lcsq, rte->securityQuals)
975  {
976  lfirst(lcsq) = preprocess_expression(root,
977  (Node *) lfirst(lcsq),
978  EXPRKIND_QUAL);
979  }
980  }
981 
982  /*
983  * Now that we are done preprocessing expressions, and in particular done
984  * flattening join alias variables, get rid of the joinaliasvars lists.
985  * They no longer match what expressions in the rest of the tree look
986  * like, because we have not preprocessed expressions in those lists (and
987  * do not want to; for example, expanding a SubLink there would result in
988  * a useless unreferenced subplan). Leaving them in place simply creates
989  * a hazard for later scans of the tree. We could try to prevent that by
990  * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
991  * but that doesn't sound very reliable.
992  */
993  if (root->hasJoinRTEs)
994  {
995  foreach(l, parse->rtable)
996  {
998 
999  rte->joinaliasvars = NIL;
1000  }
1001  }
1002 
1003  /*
1004  * In some cases we may want to transfer a HAVING clause into WHERE. We
1005  * cannot do so if the HAVING clause contains aggregates (obviously) or
1006  * volatile functions (since a HAVING clause is supposed to be executed
1007  * only once per group). We also can't do this if there are any nonempty
1008  * grouping sets; moving such a clause into WHERE would potentially change
1009  * the results, if any referenced column isn't present in all the grouping
1010  * sets. (If there are only empty grouping sets, then the HAVING clause
1011  * must be degenerate as discussed below.)
1012  *
1013  * Also, it may be that the clause is so expensive to execute that we're
1014  * better off doing it only once per group, despite the loss of
1015  * selectivity. This is hard to estimate short of doing the entire
1016  * planning process twice, so we use a heuristic: clauses containing
1017  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
1018  * clause into WHERE, in hopes of eliminating tuples before aggregation
1019  * instead of after.
1020  *
1021  * If the query has explicit grouping then we can simply move such a
1022  * clause into WHERE; any group that fails the clause will not be in the
1023  * output because none of its tuples will reach the grouping or
1024  * aggregation stage. Otherwise we must have a degenerate (variable-free)
1025  * HAVING clause, which we put in WHERE so that query_planner() can use it
1026  * in a gating Result node, but also keep in HAVING to ensure that we
1027  * don't emit a bogus aggregated row. (This could be done better, but it
1028  * seems not worth optimizing.)
1029  *
1030  * Note that both havingQual and parse->jointree->quals are in
1031  * implicitly-ANDed-list form at this point, even though they are declared
1032  * as Node *.
1033  */
1034  newHaving = NIL;
1035  foreach(l, (List *) parse->havingQual)
1036  {
1037  Node *havingclause = (Node *) lfirst(l);
1038 
1039  if ((parse->groupClause && parse->groupingSets) ||
1040  contain_agg_clause(havingclause) ||
1041  contain_volatile_functions(havingclause) ||
1042  contain_subplans(havingclause))
1043  {
1044  /* keep it in HAVING */
1045  newHaving = lappend(newHaving, havingclause);
1046  }
1047  else if (parse->groupClause && !parse->groupingSets)
1048  {
1049  /* move it to WHERE */
1050  parse->jointree->quals = (Node *)
1051  lappend((List *) parse->jointree->quals, havingclause);
1052  }
1053  else
1054  {
1055  /* put a copy in WHERE, keep it in HAVING */
1056  parse->jointree->quals = (Node *)
1057  lappend((List *) parse->jointree->quals,
1058  copyObject(havingclause));
1059  newHaving = lappend(newHaving, havingclause);
1060  }
1061  }
1062  parse->havingQual = (Node *) newHaving;
1063 
1064  /*
1065  * If we have any outer joins, try to reduce them to plain inner joins.
1066  * This step is most easily done after we've done expression
1067  * preprocessing.
1068  */
1069  if (hasOuterJoins)
1070  reduce_outer_joins(root);
1071 
1072  /*
1073  * If we have any RTE_RESULT relations, see if they can be deleted from
1074  * the jointree. We also rely on this processing to flatten single-child
1075  * FromExprs underneath outer joins. This step is most effectively done
1076  * after we've done expression preprocessing and outer join reduction.
1077  */
1078  if (hasResultRTEs || hasOuterJoins)
1080 
1081  /*
1082  * Do the main planning.
1083  */
1084  grouping_planner(root, tuple_fraction);
1085 
1086  /*
1087  * Capture the set of outer-level param IDs we have access to, for use in
1088  * extParam/allParam calculations later.
1089  */
1091 
1092  /*
1093  * If any initPlans were created in this query level, adjust the surviving
1094  * Paths' costs and parallel-safety flags to account for them. The
1095  * initPlans won't actually get attached to the plan tree till
1096  * create_plan() runs, but we must include their effects now.
1097  */
1098  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1099  SS_charge_for_initplans(root, final_rel);
1100 
1101  /*
1102  * Make sure we've identified the cheapest Path for the final rel. (By
1103  * doing this here not in grouping_planner, we include initPlan costs in
1104  * the decision, though it's unlikely that will change anything.)
1105  */
1106  set_cheapest(final_rel);
1107 
1108  return root;
1109 }
1110 
1111 /*
1112  * preprocess_expression
1113  * Do subquery_planner's preprocessing work for an expression,
1114  * which can be a targetlist, a WHERE clause (including JOIN/ON
1115  * conditions), a HAVING clause, or a few other things.
1116  */
1117 static Node *
1118 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1119 {
1120  /*
1121  * Fall out quickly if expression is empty. This occurs often enough to
1122  * be worth checking. Note that null->null is the correct conversion for
1123  * implicit-AND result format, too.
1124  */
1125  if (expr == NULL)
1126  return NULL;
1127 
1128  /*
1129  * If the query has any join RTEs, replace join alias variables with
1130  * base-relation variables. We must do this first, since any expressions
1131  * we may extract from the joinaliasvars lists have not been preprocessed.
1132  * For example, if we did this after sublink processing, sublinks expanded
1133  * out from join aliases would not get processed. But we can skip this in
1134  * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1135  * they can't contain any Vars of the current query level.
1136  */
1137  if (root->hasJoinRTEs &&
1138  !(kind == EXPRKIND_RTFUNC ||
1139  kind == EXPRKIND_VALUES ||
1140  kind == EXPRKIND_TABLESAMPLE ||
1141  kind == EXPRKIND_TABLEFUNC))
1142  expr = flatten_join_alias_vars(root, root->parse, expr);
1143 
1144  /*
1145  * Simplify constant expressions. For function RTEs, this was already
1146  * done by preprocess_function_rtes. (But note we must do it again for
1147  * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1148  * un-simplified subexpressions inserted by flattening of subqueries or
1149  * join alias variables.)
1150  *
1151  * Note: an essential effect of this is to convert named-argument function
1152  * calls to positional notation and insert the current actual values of
1153  * any default arguments for functions. To ensure that happens, we *must*
1154  * process all expressions here. Previous PG versions sometimes skipped
1155  * const-simplification if it didn't seem worth the trouble, but we can't
1156  * do that anymore.
1157  *
1158  * Note: this also flattens nested AND and OR expressions into N-argument
1159  * form. All processing of a qual expression after this point must be
1160  * careful to maintain AND/OR flatness --- that is, do not generate a tree
1161  * with AND directly under AND, nor OR directly under OR.
1162  */
1163  if (kind != EXPRKIND_RTFUNC)
1164  expr = eval_const_expressions(root, expr);
1165 
1166  /*
1167  * If it's a qual or havingQual, canonicalize it.
1168  */
1169  if (kind == EXPRKIND_QUAL)
1170  {
1171  expr = (Node *) canonicalize_qual((Expr *) expr, false);
1172 
1173 #ifdef OPTIMIZER_DEBUG
1174  printf("After canonicalize_qual()\n");
1175  pprint(expr);
1176 #endif
1177  }
1178 
1179  /*
1180  * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1181  * hashfuncid of any that might execute more quickly by using hash lookups
1182  * instead of a linear search.
1183  */
1184  if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1185  {
1187  }
1188 
1189  /* Expand SubLinks to SubPlans */
1190  if (root->parse->hasSubLinks)
1191  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1192 
1193  /*
1194  * XXX do not insert anything here unless you have grokked the comments in
1195  * SS_replace_correlation_vars ...
1196  */
1197 
1198  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1199  if (root->query_level > 1)
1200  expr = SS_replace_correlation_vars(root, expr);
1201 
1202  /*
1203  * If it's a qual or havingQual, convert it to implicit-AND format. (We
1204  * don't want to do this before eval_const_expressions, since the latter
1205  * would be unable to simplify a top-level AND correctly. Also,
1206  * SS_process_sublinks expects explicit-AND format.)
1207  */
1208  if (kind == EXPRKIND_QUAL)
1209  expr = (Node *) make_ands_implicit((Expr *) expr);
1210 
1211  return expr;
1212 }
1213 
1214 /*
1215  * preprocess_qual_conditions
1216  * Recursively scan the query's jointree and do subquery_planner's
1217  * preprocessing work on each qual condition found therein.
1218  */
1219 static void
1221 {
1222  if (jtnode == NULL)
1223  return;
1224  if (IsA(jtnode, RangeTblRef))
1225  {
1226  /* nothing to do here */
1227  }
1228  else if (IsA(jtnode, FromExpr))
1229  {
1230  FromExpr *f = (FromExpr *) jtnode;
1231  ListCell *l;
1232 
1233  foreach(l, f->fromlist)
1235 
1237  }
1238  else if (IsA(jtnode, JoinExpr))
1239  {
1240  JoinExpr *j = (JoinExpr *) jtnode;
1241 
1242  preprocess_qual_conditions(root, j->larg);
1243  preprocess_qual_conditions(root, j->rarg);
1244 
1245  j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1246  }
1247  else
1248  elog(ERROR, "unrecognized node type: %d",
1249  (int) nodeTag(jtnode));
1250 }
1251 
1252 /*
1253  * preprocess_phv_expression
1254  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1255  *
1256  * If a LATERAL subquery references an output of another subquery, and that
1257  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1258  * join, then we'll push the PlaceHolderVar expression down into the subquery
1259  * and later pull it back up during find_lateral_references, which runs after
1260  * subquery_planner has preprocessed all the expressions that were in the
1261  * current query level to start with. So we need to preprocess it then.
1262  */
1263 Expr *
1265 {
1266  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1267 }
1268 
1269 /*--------------------
1270  * grouping_planner
1271  * Perform planning steps related to grouping, aggregation, etc.
1272  *
1273  * This function adds all required top-level processing to the scan/join
1274  * Path(s) produced by query_planner.
1275  *
1276  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1277  * tuple_fraction is interpreted as follows:
1278  * 0: expect all tuples to be retrieved (normal case)
1279  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1280  * from the plan to be retrieved
1281  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1282  * expected to be retrieved (ie, a LIMIT specification)
1283  *
1284  * Returns nothing; the useful output is in the Paths we attach to the
1285  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1286  * root->processed_tlist contains the final processed targetlist.
1287  *
1288  * Note that we have not done set_cheapest() on the final rel; it's convenient
1289  * to leave this to the caller.
1290  *--------------------
1291  */
1292 static void
1293 grouping_planner(PlannerInfo *root, double tuple_fraction)
1294 {
1295  Query *parse = root->parse;
1296  int64 offset_est = 0;
1297  int64 count_est = 0;
1298  double limit_tuples = -1.0;
1299  bool have_postponed_srfs = false;
1300  PathTarget *final_target;
1301  List *final_targets;
1302  List *final_targets_contain_srfs;
1303  bool final_target_parallel_safe;
1304  RelOptInfo *current_rel;
1305  RelOptInfo *final_rel;
1306  FinalPathExtraData extra;
1307  ListCell *lc;
1308 
1309  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1310  if (parse->limitCount || parse->limitOffset)
1311  {
1312  tuple_fraction = preprocess_limit(root, tuple_fraction,
1313  &offset_est, &count_est);
1314 
1315  /*
1316  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1317  * estimate the effects of using a bounded sort.
1318  */
1319  if (count_est > 0 && offset_est >= 0)
1320  limit_tuples = (double) count_est + (double) offset_est;
1321  }
1322 
1323  /* Make tuple_fraction accessible to lower-level routines */
1324  root->tuple_fraction = tuple_fraction;
1325 
1326  if (parse->setOperations)
1327  {
1328  /*
1329  * If there's a top-level ORDER BY, assume we have to fetch all the
1330  * tuples. This might be too simplistic given all the hackery below
1331  * to possibly avoid the sort; but the odds of accurate estimates here
1332  * are pretty low anyway. XXX try to get rid of this in favor of
1333  * letting plan_set_operations generate both fast-start and
1334  * cheapest-total paths.
1335  */
1336  if (parse->sortClause)
1337  root->tuple_fraction = 0.0;
1338 
1339  /*
1340  * Construct Paths for set operations. The results will not need any
1341  * work except perhaps a top-level sort and/or LIMIT. Note that any
1342  * special work for recursive unions is the responsibility of
1343  * plan_set_operations.
1344  */
1345  current_rel = plan_set_operations(root);
1346 
1347  /*
1348  * We should not need to call preprocess_targetlist, since we must be
1349  * in a SELECT query node. Instead, use the processed_tlist returned
1350  * by plan_set_operations (since this tells whether it returned any
1351  * resjunk columns!), and transfer any sort key information from the
1352  * original tlist.
1353  */
1354  Assert(parse->commandType == CMD_SELECT);
1355 
1356  /* for safety, copy processed_tlist instead of modifying in-place */
1357  root->processed_tlist =
1359  parse->targetList);
1360 
1361  /* Also extract the PathTarget form of the setop result tlist */
1362  final_target = current_rel->cheapest_total_path->pathtarget;
1363 
1364  /* And check whether it's parallel safe */
1365  final_target_parallel_safe =
1366  is_parallel_safe(root, (Node *) final_target->exprs);
1367 
1368  /* The setop result tlist couldn't contain any SRFs */
1369  Assert(!parse->hasTargetSRFs);
1370  final_targets = final_targets_contain_srfs = NIL;
1371 
1372  /*
1373  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1374  * checked already, but let's make sure).
1375  */
1376  if (parse->rowMarks)
1377  ereport(ERROR,
1378  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1379  /*------
1380  translator: %s is a SQL row locking clause such as FOR UPDATE */
1381  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1383  parse->rowMarks)->strength))));
1384 
1385  /*
1386  * Calculate pathkeys that represent result ordering requirements
1387  */
1388  Assert(parse->distinctClause == NIL);
1390  parse->sortClause,
1391  root->processed_tlist);
1392  }
1393  else
1394  {
1395  /* No set operations, do regular planning */
1396  PathTarget *sort_input_target;
1397  List *sort_input_targets;
1398  List *sort_input_targets_contain_srfs;
1399  bool sort_input_target_parallel_safe;
1400  PathTarget *grouping_target;
1401  List *grouping_targets;
1402  List *grouping_targets_contain_srfs;
1403  bool grouping_target_parallel_safe;
1404  PathTarget *scanjoin_target;
1405  List *scanjoin_targets;
1406  List *scanjoin_targets_contain_srfs;
1407  bool scanjoin_target_parallel_safe;
1408  bool scanjoin_target_same_exprs;
1409  bool have_grouping;
1410  WindowFuncLists *wflists = NULL;
1411  List *activeWindows = NIL;
1412  grouping_sets_data *gset_data = NULL;
1413  standard_qp_extra qp_extra;
1414 
1415  /* A recursive query should always have setOperations */
1416  Assert(!root->hasRecursion);
1417 
1418  /* Preprocess grouping sets and GROUP BY clause, if any */
1419  if (parse->groupingSets)
1420  {
1421  gset_data = preprocess_grouping_sets(root);
1422  }
1423  else if (parse->groupClause)
1424  {
1425  /* Preprocess regular GROUP BY clause, if any */
1427  /* Remove any redundant GROUP BY columns */
1429  }
1430 
1431  /*
1432  * Preprocess targetlist. Note that much of the remaining planning
1433  * work will be done with the PathTarget representation of tlists, but
1434  * we must also maintain the full representation of the final tlist so
1435  * that we can transfer its decoration (resnames etc) to the topmost
1436  * tlist of the finished Plan. This is kept in processed_tlist.
1437  */
1438  preprocess_targetlist(root);
1439 
1440  /*
1441  * Mark all the aggregates with resolved aggtranstypes, and detect
1442  * aggregates that are duplicates or can share transition state. We
1443  * must do this before slicing and dicing the tlist into various
1444  * pathtargets, else some copies of the Aggref nodes might escape
1445  * being marked.
1446  */
1447  if (parse->hasAggs)
1448  {
1449  preprocess_aggrefs(root, (Node *) root->processed_tlist);
1450  preprocess_aggrefs(root, (Node *) parse->havingQual);
1451  }
1452 
1453  /*
1454  * Locate any window functions in the tlist. (We don't need to look
1455  * anywhere else, since expressions used in ORDER BY will be in there
1456  * too.) Note that they could all have been eliminated by constant
1457  * folding, in which case we don't need to do any more work.
1458  */
1459  if (parse->hasWindowFuncs)
1460  {
1461  wflists = find_window_functions((Node *) root->processed_tlist,
1462  list_length(parse->windowClause));
1463  if (wflists->numWindowFuncs > 0)
1464  {
1465  /*
1466  * See if any modifications can be made to each WindowClause
1467  * to allow the executor to execute the WindowFuncs more
1468  * quickly.
1469  */
1470  optimize_window_clauses(root, wflists);
1471 
1472  activeWindows = select_active_windows(root, wflists);
1473  }
1474  else
1475  parse->hasWindowFuncs = false;
1476  }
1477 
1478  /*
1479  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1480  * adding logic between here and the query_planner() call. Anything
1481  * that is needed in MIN/MAX-optimizable cases will have to be
1482  * duplicated in planagg.c.
1483  */
1484  if (parse->hasAggs)
1486 
1487  /*
1488  * Figure out whether there's a hard limit on the number of rows that
1489  * query_planner's result subplan needs to return. Even if we know a
1490  * hard limit overall, it doesn't apply if the query has any
1491  * grouping/aggregation operations, or SRFs in the tlist.
1492  */
1493  if (parse->groupClause ||
1494  parse->groupingSets ||
1495  parse->distinctClause ||
1496  parse->hasAggs ||
1497  parse->hasWindowFuncs ||
1498  parse->hasTargetSRFs ||
1499  root->hasHavingQual)
1500  root->limit_tuples = -1.0;
1501  else
1502  root->limit_tuples = limit_tuples;
1503 
1504  /* Set up data needed by standard_qp_callback */
1505  qp_extra.activeWindows = activeWindows;
1506  qp_extra.gset_data = gset_data;
1507 
1508  /*
1509  * Generate the best unsorted and presorted paths for the scan/join
1510  * portion of this Query, ie the processing represented by the
1511  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1512  * We also generate (in standard_qp_callback) pathkey representations
1513  * of the query's sort clause, distinct clause, etc.
1514  */
1515  current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1516 
1517  /*
1518  * Convert the query's result tlist into PathTarget format.
1519  *
1520  * Note: this cannot be done before query_planner() has performed
1521  * appendrel expansion, because that might add resjunk entries to
1522  * root->processed_tlist. Waiting till afterwards is also helpful
1523  * because the target width estimates can use per-Var width numbers
1524  * that were obtained within query_planner().
1525  */
1526  final_target = create_pathtarget(root, root->processed_tlist);
1527  final_target_parallel_safe =
1528  is_parallel_safe(root, (Node *) final_target->exprs);
1529 
1530  /*
1531  * If ORDER BY was given, consider whether we should use a post-sort
1532  * projection, and compute the adjusted target for preceding steps if
1533  * so.
1534  */
1535  if (parse->sortClause)
1536  {
1537  sort_input_target = make_sort_input_target(root,
1538  final_target,
1539  &have_postponed_srfs);
1540  sort_input_target_parallel_safe =
1541  is_parallel_safe(root, (Node *) sort_input_target->exprs);
1542  }
1543  else
1544  {
1545  sort_input_target = final_target;
1546  sort_input_target_parallel_safe = final_target_parallel_safe;
1547  }
1548 
1549  /*
1550  * If we have window functions to deal with, the output from any
1551  * grouping step needs to be what the window functions want;
1552  * otherwise, it should be sort_input_target.
1553  */
1554  if (activeWindows)
1555  {
1556  grouping_target = make_window_input_target(root,
1557  final_target,
1558  activeWindows);
1559  grouping_target_parallel_safe =
1560  is_parallel_safe(root, (Node *) grouping_target->exprs);
1561  }
1562  else
1563  {
1564  grouping_target = sort_input_target;
1565  grouping_target_parallel_safe = sort_input_target_parallel_safe;
1566  }
1567 
1568  /*
1569  * If we have grouping or aggregation to do, the topmost scan/join
1570  * plan node must emit what the grouping step wants; otherwise, it
1571  * should emit grouping_target.
1572  */
1573  have_grouping = (parse->groupClause || parse->groupingSets ||
1574  parse->hasAggs || root->hasHavingQual);
1575  if (have_grouping)
1576  {
1577  scanjoin_target = make_group_input_target(root, final_target);
1578  scanjoin_target_parallel_safe =
1579  is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1580  }
1581  else
1582  {
1583  scanjoin_target = grouping_target;
1584  scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1585  }
1586 
1587  /*
1588  * If there are any SRFs in the targetlist, we must separate each of
1589  * these PathTargets into SRF-computing and SRF-free targets. Replace
1590  * each of the named targets with a SRF-free version, and remember the
1591  * list of additional projection steps we need to add afterwards.
1592  */
1593  if (parse->hasTargetSRFs)
1594  {
1595  /* final_target doesn't recompute any SRFs in sort_input_target */
1596  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1597  &final_targets,
1598  &final_targets_contain_srfs);
1599  final_target = linitial_node(PathTarget, final_targets);
1600  Assert(!linitial_int(final_targets_contain_srfs));
1601  /* likewise for sort_input_target vs. grouping_target */
1602  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1603  &sort_input_targets,
1604  &sort_input_targets_contain_srfs);
1605  sort_input_target = linitial_node(PathTarget, sort_input_targets);
1606  Assert(!linitial_int(sort_input_targets_contain_srfs));
1607  /* likewise for grouping_target vs. scanjoin_target */
1608  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1609  &grouping_targets,
1610  &grouping_targets_contain_srfs);
1611  grouping_target = linitial_node(PathTarget, grouping_targets);
1612  Assert(!linitial_int(grouping_targets_contain_srfs));
1613  /* scanjoin_target will not have any SRFs precomputed for it */
1614  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1615  &scanjoin_targets,
1616  &scanjoin_targets_contain_srfs);
1617  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1618  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1619  }
1620  else
1621  {
1622  /* initialize lists; for most of these, dummy values are OK */
1623  final_targets = final_targets_contain_srfs = NIL;
1624  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1625  grouping_targets = grouping_targets_contain_srfs = NIL;
1626  scanjoin_targets = list_make1(scanjoin_target);
1627  scanjoin_targets_contain_srfs = NIL;
1628  }
1629 
1630  /* Apply scan/join target. */
1631  scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1632  && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1633  apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1634  scanjoin_targets_contain_srfs,
1635  scanjoin_target_parallel_safe,
1636  scanjoin_target_same_exprs);
1637 
1638  /*
1639  * Save the various upper-rel PathTargets we just computed into
1640  * root->upper_targets[]. The core code doesn't use this, but it
1641  * provides a convenient place for extensions to get at the info. For
1642  * consistency, we save all the intermediate targets, even though some
1643  * of the corresponding upperrels might not be needed for this query.
1644  */
1645  root->upper_targets[UPPERREL_FINAL] = final_target;
1646  root->upper_targets[UPPERREL_ORDERED] = final_target;
1647  root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1648  root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1649  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1650  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1651 
1652  /*
1653  * If we have grouping and/or aggregation, consider ways to implement
1654  * that. We build a new upperrel representing the output of this
1655  * phase.
1656  */
1657  if (have_grouping)
1658  {
1659  current_rel = create_grouping_paths(root,
1660  current_rel,
1661  grouping_target,
1662  grouping_target_parallel_safe,
1663  gset_data);
1664  /* Fix things up if grouping_target contains SRFs */
1665  if (parse->hasTargetSRFs)
1666  adjust_paths_for_srfs(root, current_rel,
1667  grouping_targets,
1668  grouping_targets_contain_srfs);
1669  }
1670 
1671  /*
1672  * If we have window functions, consider ways to implement those. We
1673  * build a new upperrel representing the output of this phase.
1674  */
1675  if (activeWindows)
1676  {
1677  current_rel = create_window_paths(root,
1678  current_rel,
1679  grouping_target,
1680  sort_input_target,
1681  sort_input_target_parallel_safe,
1682  wflists,
1683  activeWindows);
1684  /* Fix things up if sort_input_target contains SRFs */
1685  if (parse->hasTargetSRFs)
1686  adjust_paths_for_srfs(root, current_rel,
1687  sort_input_targets,
1688  sort_input_targets_contain_srfs);
1689  }
1690 
1691  /*
1692  * If there is a DISTINCT clause, consider ways to implement that. We
1693  * build a new upperrel representing the output of this phase.
1694  */
1695  if (parse->distinctClause)
1696  {
1697  current_rel = create_distinct_paths(root,
1698  current_rel);
1699  }
1700  } /* end of if (setOperations) */
1701 
1702  /*
1703  * If ORDER BY was given, consider ways to implement that, and generate a
1704  * new upperrel containing only paths that emit the correct ordering and
1705  * project the correct final_target. We can apply the original
1706  * limit_tuples limit in sort costing here, but only if there are no
1707  * postponed SRFs.
1708  */
1709  if (parse->sortClause)
1710  {
1711  current_rel = create_ordered_paths(root,
1712  current_rel,
1713  final_target,
1714  final_target_parallel_safe,
1715  have_postponed_srfs ? -1.0 :
1716  limit_tuples);
1717  /* Fix things up if final_target contains SRFs */
1718  if (parse->hasTargetSRFs)
1719  adjust_paths_for_srfs(root, current_rel,
1720  final_targets,
1721  final_targets_contain_srfs);
1722  }
1723 
1724  /*
1725  * Now we are prepared to build the final-output upperrel.
1726  */
1727  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1728 
1729  /*
1730  * If the input rel is marked consider_parallel and there's nothing that's
1731  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1732  * consider_parallel as well. Note that if the query has rowMarks or is
1733  * not a SELECT, consider_parallel will be false for every relation in the
1734  * query.
1735  */
1736  if (current_rel->consider_parallel &&
1737  is_parallel_safe(root, parse->limitOffset) &&
1738  is_parallel_safe(root, parse->limitCount))
1739  final_rel->consider_parallel = true;
1740 
1741  /*
1742  * If the current_rel belongs to a single FDW, so does the final_rel.
1743  */
1744  final_rel->serverid = current_rel->serverid;
1745  final_rel->userid = current_rel->userid;
1746  final_rel->useridiscurrent = current_rel->useridiscurrent;
1747  final_rel->fdwroutine = current_rel->fdwroutine;
1748 
1749  /*
1750  * Generate paths for the final_rel. Insert all surviving paths, with
1751  * LockRows, Limit, and/or ModifyTable steps added if needed.
1752  */
1753  foreach(lc, current_rel->pathlist)
1754  {
1755  Path *path = (Path *) lfirst(lc);
1756 
1757  /*
1758  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1759  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1760  * here. If there are only non-locking rowmarks, they should be
1761  * handled by the ModifyTable node instead. However, root->rowMarks
1762  * is what goes into the LockRows node.)
1763  */
1764  if (parse->rowMarks)
1765  {
1766  path = (Path *) create_lockrows_path(root, final_rel, path,
1767  root->rowMarks,
1769  }
1770 
1771  /*
1772  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1773  */
1774  if (limit_needed(parse))
1775  {
1776  path = (Path *) create_limit_path(root, final_rel, path,
1777  parse->limitOffset,
1778  parse->limitCount,
1779  parse->limitOption,
1780  offset_est, count_est);
1781  }
1782 
1783  /*
1784  * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1785  */
1786  if (parse->commandType != CMD_SELECT)
1787  {
1788  Index rootRelation;
1789  List *resultRelations = NIL;
1790  List *updateColnosLists = NIL;
1791  List *withCheckOptionLists = NIL;
1792  List *returningLists = NIL;
1793  List *mergeActionLists = NIL;
1794  List *rowMarks;
1795 
1797  {
1798  /* Inherited UPDATE/DELETE/MERGE */
1799  RelOptInfo *top_result_rel = find_base_rel(root,
1800  parse->resultRelation);
1801  int resultRelation = -1;
1802 
1803  /* Add only leaf children to ModifyTable. */
1804  while ((resultRelation = bms_next_member(root->leaf_result_relids,
1805  resultRelation)) >= 0)
1806  {
1807  RelOptInfo *this_result_rel = find_base_rel(root,
1808  resultRelation);
1809 
1810  /*
1811  * Also exclude any leaf rels that have turned dummy since
1812  * being added to the list, for example, by being excluded
1813  * by constraint exclusion.
1814  */
1815  if (IS_DUMMY_REL(this_result_rel))
1816  continue;
1817 
1818  /* Build per-target-rel lists needed by ModifyTable */
1819  resultRelations = lappend_int(resultRelations,
1820  resultRelation);
1821  if (parse->commandType == CMD_UPDATE)
1822  {
1823  List *update_colnos = root->update_colnos;
1824 
1825  if (this_result_rel != top_result_rel)
1826  update_colnos =
1828  update_colnos,
1829  this_result_rel->relid,
1830  top_result_rel->relid);
1831  updateColnosLists = lappend(updateColnosLists,
1832  update_colnos);
1833  }
1834  if (parse->withCheckOptions)
1835  {
1836  List *withCheckOptions = parse->withCheckOptions;
1837 
1838  if (this_result_rel != top_result_rel)
1839  withCheckOptions = (List *)
1841  (Node *) withCheckOptions,
1842  this_result_rel,
1843  top_result_rel);
1844  withCheckOptionLists = lappend(withCheckOptionLists,
1845  withCheckOptions);
1846  }
1847  if (parse->returningList)
1848  {
1849  List *returningList = parse->returningList;
1850 
1851  if (this_result_rel != top_result_rel)
1852  returningList = (List *)
1854  (Node *) returningList,
1855  this_result_rel,
1856  top_result_rel);
1857  returningLists = lappend(returningLists,
1858  returningList);
1859  }
1860  if (parse->mergeActionList)
1861  {
1862  ListCell *l;
1863  List *mergeActionList = NIL;
1864 
1865  /*
1866  * Copy MergeActions and translate stuff that
1867  * references attribute numbers.
1868  */
1869  foreach(l, parse->mergeActionList)
1870  {
1871  MergeAction *action = lfirst(l),
1872  *leaf_action = copyObject(action);
1873 
1874  leaf_action->qual =
1876  (Node *) action->qual,
1877  this_result_rel,
1878  top_result_rel);
1879  leaf_action->targetList = (List *)
1881  (Node *) action->targetList,
1882  this_result_rel,
1883  top_result_rel);
1884  if (leaf_action->commandType == CMD_UPDATE)
1885  leaf_action->updateColnos =
1887  action->updateColnos,
1888  this_result_rel->relid,
1889  top_result_rel->relid);
1890  mergeActionList = lappend(mergeActionList,
1891  leaf_action);
1892  }
1893 
1894  mergeActionLists = lappend(mergeActionLists,
1895  mergeActionList);
1896  }
1897  }
1898 
1899  if (resultRelations == NIL)
1900  {
1901  /*
1902  * We managed to exclude every child rel, so generate a
1903  * dummy one-relation plan using info for the top target
1904  * rel (even though that may not be a leaf target).
1905  * Although it's clear that no data will be updated or
1906  * deleted, we still need to have a ModifyTable node so
1907  * that any statement triggers will be executed. (This
1908  * could be cleaner if we fixed nodeModifyTable.c to allow
1909  * zero target relations, but that probably wouldn't be a
1910  * net win.)
1911  */
1912  resultRelations = list_make1_int(parse->resultRelation);
1913  if (parse->commandType == CMD_UPDATE)
1914  updateColnosLists = list_make1(root->update_colnos);
1915  if (parse->withCheckOptions)
1916  withCheckOptionLists = list_make1(parse->withCheckOptions);
1917  if (parse->returningList)
1918  returningLists = list_make1(parse->returningList);
1919  if (parse->mergeActionList)
1920  mergeActionLists = list_make1(parse->mergeActionList);
1921  }
1922  }
1923  else
1924  {
1925  /* Single-relation INSERT/UPDATE/DELETE/MERGE. */
1926  resultRelations = list_make1_int(parse->resultRelation);
1927  if (parse->commandType == CMD_UPDATE)
1928  updateColnosLists = list_make1(root->update_colnos);
1929  if (parse->withCheckOptions)
1930  withCheckOptionLists = list_make1(parse->withCheckOptions);
1931  if (parse->returningList)
1932  returningLists = list_make1(parse->returningList);
1933  if (parse->mergeActionList)
1934  mergeActionLists = list_make1(parse->mergeActionList);
1935  }
1936 
1937  /*
1938  * If target is a partition root table, we need to mark the
1939  * ModifyTable node appropriately for that.
1940  */
1941  if (rt_fetch(parse->resultRelation, parse->rtable)->relkind ==
1942  RELKIND_PARTITIONED_TABLE)
1943  rootRelation = parse->resultRelation;
1944  else
1945  rootRelation = 0;
1946 
1947  /*
1948  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
1949  * will have dealt with fetching non-locked marked rows, else we
1950  * need to have ModifyTable do that.
1951  */
1952  if (parse->rowMarks)
1953  rowMarks = NIL;
1954  else
1955  rowMarks = root->rowMarks;
1956 
1957  path = (Path *)
1958  create_modifytable_path(root, final_rel,
1959  path,
1960  parse->commandType,
1961  parse->canSetTag,
1962  parse->resultRelation,
1963  rootRelation,
1964  root->partColsUpdated,
1965  resultRelations,
1966  updateColnosLists,
1967  withCheckOptionLists,
1968  returningLists,
1969  rowMarks,
1970  parse->onConflict,
1971  mergeActionLists,
1973  }
1974 
1975  /* And shove it into final_rel */
1976  add_path(final_rel, path);
1977  }
1978 
1979  /*
1980  * Generate partial paths for final_rel, too, if outer query levels might
1981  * be able to make use of them.
1982  */
1983  if (final_rel->consider_parallel && root->query_level > 1 &&
1984  !limit_needed(parse))
1985  {
1986  Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
1987  foreach(lc, current_rel->partial_pathlist)
1988  {
1989  Path *partial_path = (Path *) lfirst(lc);
1990 
1991  add_partial_path(final_rel, partial_path);
1992  }
1993  }
1994 
1995  extra.limit_needed = limit_needed(parse);
1996  extra.limit_tuples = limit_tuples;
1997  extra.count_est = count_est;
1998  extra.offset_est = offset_est;
1999 
2000  /*
2001  * If there is an FDW that's responsible for all baserels of the query,
2002  * let it consider adding ForeignPaths.
2003  */
2004  if (final_rel->fdwroutine &&
2005  final_rel->fdwroutine->GetForeignUpperPaths)
2006  final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
2007  current_rel, final_rel,
2008  &extra);
2009 
2010  /* Let extensions possibly add some more paths */
2012  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2013  current_rel, final_rel, &extra);
2014 
2015  /* Note: currently, we leave it to callers to do set_cheapest() */
2016 }
2017 
2018 /*
2019  * Do preprocessing for groupingSets clause and related data. This handles the
2020  * preliminary steps of expanding the grouping sets, organizing them into lists
2021  * of rollups, and preparing annotations which will later be filled in with
2022  * size estimates.
2023  */
2024 static grouping_sets_data *
2026 {
2027  Query *parse = root->parse;
2028  List *sets;
2029  int maxref = 0;
2030  ListCell *lc_set;
2032 
2033  parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
2034 
2035  gd->any_hashable = false;
2036  gd->unhashable_refs = NULL;
2037  gd->unsortable_refs = NULL;
2038  gd->unsortable_sets = NIL;
2039 
2040  /*
2041  * We don't currently make any attempt to optimize the groupClause when
2042  * there are grouping sets, so just duplicate it in processed_groupClause.
2043  */
2044  root->processed_groupClause = parse->groupClause;
2045 
2046  if (parse->groupClause)
2047  {
2048  ListCell *lc;
2049 
2050  foreach(lc, parse->groupClause)
2051  {
2053  Index ref = gc->tleSortGroupRef;
2054 
2055  if (ref > maxref)
2056  maxref = ref;
2057 
2058  if (!gc->hashable)
2060 
2061  if (!OidIsValid(gc->sortop))
2063  }
2064  }
2065 
2066  /* Allocate workspace array for remapping */
2067  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2068 
2069  /*
2070  * If we have any unsortable sets, we must extract them before trying to
2071  * prepare rollups. Unsortable sets don't go through
2072  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2073  * here.
2074  */
2075  if (!bms_is_empty(gd->unsortable_refs))
2076  {
2077  List *sortable_sets = NIL;
2078  ListCell *lc;
2079 
2080  foreach(lc, parse->groupingSets)
2081  {
2082  List *gset = (List *) lfirst(lc);
2083 
2084  if (bms_overlap_list(gd->unsortable_refs, gset))
2085  {
2087 
2088  gs->set = gset;
2089  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2090 
2091  /*
2092  * We must enforce here that an unsortable set is hashable;
2093  * later code assumes this. Parse analysis only checks that
2094  * every individual column is either hashable or sortable.
2095  *
2096  * Note that passing this test doesn't guarantee we can
2097  * generate a plan; there might be other showstoppers.
2098  */
2099  if (bms_overlap_list(gd->unhashable_refs, gset))
2100  ereport(ERROR,
2101  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2102  errmsg("could not implement GROUP BY"),
2103  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2104  }
2105  else
2106  sortable_sets = lappend(sortable_sets, gset);
2107  }
2108 
2109  if (sortable_sets)
2110  sets = extract_rollup_sets(sortable_sets);
2111  else
2112  sets = NIL;
2113  }
2114  else
2115  sets = extract_rollup_sets(parse->groupingSets);
2116 
2117  foreach(lc_set, sets)
2118  {
2119  List *current_sets = (List *) lfirst(lc_set);
2120  RollupData *rollup = makeNode(RollupData);
2121  GroupingSetData *gs;
2122 
2123  /*
2124  * Reorder the current list of grouping sets into correct prefix
2125  * order. If only one aggregation pass is needed, try to make the
2126  * list match the ORDER BY clause; if more than one pass is needed, we
2127  * don't bother with that.
2128  *
2129  * Note that this reorders the sets from smallest-member-first to
2130  * largest-member-first, and applies the GroupingSetData annotations,
2131  * though the data will be filled in later.
2132  */
2133  current_sets = reorder_grouping_sets(current_sets,
2134  (list_length(sets) == 1
2135  ? parse->sortClause
2136  : NIL));
2137 
2138  /*
2139  * Get the initial (and therefore largest) grouping set.
2140  */
2141  gs = linitial_node(GroupingSetData, current_sets);
2142 
2143  /*
2144  * Order the groupClause appropriately. If the first grouping set is
2145  * empty, then the groupClause must also be empty; otherwise we have
2146  * to force the groupClause to match that grouping set's order.
2147  *
2148  * (The first grouping set can be empty even though parse->groupClause
2149  * is not empty only if all non-empty grouping sets are unsortable.
2150  * The groupClauses for hashed grouping sets are built later on.)
2151  */
2152  if (gs->set)
2153  rollup->groupClause = preprocess_groupclause(root, gs->set);
2154  else
2155  rollup->groupClause = NIL;
2156 
2157  /*
2158  * Is it hashable? We pretend empty sets are hashable even though we
2159  * actually force them not to be hashed later. But don't bother if
2160  * there's nothing but empty sets (since in that case we can't hash
2161  * anything).
2162  */
2163  if (gs->set &&
2165  {
2166  rollup->hashable = true;
2167  gd->any_hashable = true;
2168  }
2169 
2170  /*
2171  * Now that we've pinned down an order for the groupClause for this
2172  * list of grouping sets, we need to remap the entries in the grouping
2173  * sets from sortgrouprefs to plain indices (0-based) into the
2174  * groupClause for this collection of grouping sets. We keep the
2175  * original form for later use, though.
2176  */
2177  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2178  current_sets,
2179  gd->tleref_to_colnum_map);
2180  rollup->gsets_data = current_sets;
2181 
2182  gd->rollups = lappend(gd->rollups, rollup);
2183  }
2184 
2185  if (gd->unsortable_sets)
2186  {
2187  /*
2188  * We have not yet pinned down a groupclause for this, but we will
2189  * need index-based lists for estimation purposes. Construct
2190  * hash_sets_idx based on the entire original groupclause for now.
2191  */
2192  gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2193  gd->unsortable_sets,
2194  gd->tleref_to_colnum_map);
2195  gd->any_hashable = true;
2196  }
2197 
2198  return gd;
2199 }
2200 
2201 /*
2202  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2203  * (without annotation) mapped to indexes into the given groupclause.
2204  */
2205 static List *
2207  List *gsets,
2208  int *tleref_to_colnum_map)
2209 {
2210  int ref = 0;
2211  List *result = NIL;
2212  ListCell *lc;
2213 
2214  foreach(lc, groupClause)
2215  {
2217 
2218  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2219  }
2220 
2221  foreach(lc, gsets)
2222  {
2223  List *set = NIL;
2224  ListCell *lc2;
2226 
2227  foreach(lc2, gs->set)
2228  {
2229  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2230  }
2231 
2232  result = lappend(result, set);
2233  }
2234 
2235  return result;
2236 }
2237 
2238 
2239 /*
2240  * preprocess_rowmarks - set up PlanRowMarks if needed
2241  */
2242 static void
2244 {
2245  Query *parse = root->parse;
2246  Bitmapset *rels;
2247  List *prowmarks;
2248  ListCell *l;
2249  int i;
2250 
2251  if (parse->rowMarks)
2252  {
2253  /*
2254  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2255  * grouping, since grouping renders a reference to individual tuple
2256  * CTIDs invalid. This is also checked at parse time, but that's
2257  * insufficient because of rule substitution, query pullup, etc.
2258  */
2260  parse->rowMarks)->strength);
2261  }
2262  else
2263  {
2264  /*
2265  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2266  * UPDATE/SHARE.
2267  */
2268  if (parse->commandType != CMD_UPDATE &&
2269  parse->commandType != CMD_DELETE)
2270  return;
2271  }
2272 
2273  /*
2274  * We need to have rowmarks for all base relations except the target. We
2275  * make a bitmapset of all base rels and then remove the items we don't
2276  * need or have FOR [KEY] UPDATE/SHARE marks for.
2277  */
2278  rels = get_relids_in_jointree((Node *) parse->jointree, false, false);
2279  if (parse->resultRelation)
2280  rels = bms_del_member(rels, parse->resultRelation);
2281 
2282  /*
2283  * Convert RowMarkClauses to PlanRowMark representation.
2284  */
2285  prowmarks = NIL;
2286  foreach(l, parse->rowMarks)
2287  {
2289  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2290  PlanRowMark *newrc;
2291 
2292  /*
2293  * Currently, it is syntactically impossible to have FOR UPDATE et al
2294  * applied to an update/delete target rel. If that ever becomes
2295  * possible, we should drop the target from the PlanRowMark list.
2296  */
2297  Assert(rc->rti != parse->resultRelation);
2298 
2299  /*
2300  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2301  * can't support true locking. Subqueries that got flattened into the
2302  * main query should be ignored completely. Any that didn't will get
2303  * ROW_MARK_COPY items in the next loop.
2304  */
2305  if (rte->rtekind != RTE_RELATION)
2306  continue;
2307 
2308  rels = bms_del_member(rels, rc->rti);
2309 
2310  newrc = makeNode(PlanRowMark);
2311  newrc->rti = newrc->prti = rc->rti;
2312  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2313  newrc->markType = select_rowmark_type(rte, rc->strength);
2314  newrc->allMarkTypes = (1 << newrc->markType);
2315  newrc->strength = rc->strength;
2316  newrc->waitPolicy = rc->waitPolicy;
2317  newrc->isParent = false;
2318 
2319  prowmarks = lappend(prowmarks, newrc);
2320  }
2321 
2322  /*
2323  * Now, add rowmarks for any non-target, non-locked base relations.
2324  */
2325  i = 0;
2326  foreach(l, parse->rtable)
2327  {
2329  PlanRowMark *newrc;
2330 
2331  i++;
2332  if (!bms_is_member(i, rels))
2333  continue;
2334 
2335  newrc = makeNode(PlanRowMark);
2336  newrc->rti = newrc->prti = i;
2337  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2338  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2339  newrc->allMarkTypes = (1 << newrc->markType);
2340  newrc->strength = LCS_NONE;
2341  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2342  newrc->isParent = false;
2343 
2344  prowmarks = lappend(prowmarks, newrc);
2345  }
2346 
2347  root->rowMarks = prowmarks;
2348 }
2349 
2350 /*
2351  * Select RowMarkType to use for a given table
2352  */
2355 {
2356  if (rte->rtekind != RTE_RELATION)
2357  {
2358  /* If it's not a table at all, use ROW_MARK_COPY */
2359  return ROW_MARK_COPY;
2360  }
2361  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2362  {
2363  /* Let the FDW select the rowmark type, if it wants to */
2364  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2365 
2366  if (fdwroutine->GetForeignRowMarkType != NULL)
2367  return fdwroutine->GetForeignRowMarkType(rte, strength);
2368  /* Otherwise, use ROW_MARK_COPY by default */
2369  return ROW_MARK_COPY;
2370  }
2371  else
2372  {
2373  /* Regular table, apply the appropriate lock type */
2374  switch (strength)
2375  {
2376  case LCS_NONE:
2377 
2378  /*
2379  * We don't need a tuple lock, only the ability to re-fetch
2380  * the row.
2381  */
2382  return ROW_MARK_REFERENCE;
2383  break;
2384  case LCS_FORKEYSHARE:
2385  return ROW_MARK_KEYSHARE;
2386  break;
2387  case LCS_FORSHARE:
2388  return ROW_MARK_SHARE;
2389  break;
2390  case LCS_FORNOKEYUPDATE:
2391  return ROW_MARK_NOKEYEXCLUSIVE;
2392  break;
2393  case LCS_FORUPDATE:
2394  return ROW_MARK_EXCLUSIVE;
2395  break;
2396  }
2397  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2398  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2399  }
2400 }
2401 
2402 /*
2403  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2404  *
2405  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2406  * results back in *count_est and *offset_est. These variables are set to
2407  * 0 if the corresponding clause is not present, and -1 if it's present
2408  * but we couldn't estimate the value for it. (The "0" convention is OK
2409  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2410  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2411  * usual practice of never estimating less than one row.) These values will
2412  * be passed to create_limit_path, which see if you change this code.
2413  *
2414  * The return value is the suitably adjusted tuple_fraction to use for
2415  * planning the query. This adjustment is not overridable, since it reflects
2416  * plan actions that grouping_planner() will certainly take, not assumptions
2417  * about context.
2418  */
2419 static double
2420 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2421  int64 *offset_est, int64 *count_est)
2422 {
2423  Query *parse = root->parse;
2424  Node *est;
2425  double limit_fraction;
2426 
2427  /* Should not be called unless LIMIT or OFFSET */
2428  Assert(parse->limitCount || parse->limitOffset);
2429 
2430  /*
2431  * Try to obtain the clause values. We use estimate_expression_value
2432  * primarily because it can sometimes do something useful with Params.
2433  */
2434  if (parse->limitCount)
2435  {
2436  est = estimate_expression_value(root, parse->limitCount);
2437  if (est && IsA(est, Const))
2438  {
2439  if (((Const *) est)->constisnull)
2440  {
2441  /* NULL indicates LIMIT ALL, ie, no limit */
2442  *count_est = 0; /* treat as not present */
2443  }
2444  else
2445  {
2446  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2447  if (*count_est <= 0)
2448  *count_est = 1; /* force to at least 1 */
2449  }
2450  }
2451  else
2452  *count_est = -1; /* can't estimate */
2453  }
2454  else
2455  *count_est = 0; /* not present */
2456 
2457  if (parse->limitOffset)
2458  {
2459  est = estimate_expression_value(root, parse->limitOffset);
2460  if (est && IsA(est, Const))
2461  {
2462  if (((Const *) est)->constisnull)
2463  {
2464  /* Treat NULL as no offset; the executor will too */
2465  *offset_est = 0; /* treat as not present */
2466  }
2467  else
2468  {
2469  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2470  if (*offset_est < 0)
2471  *offset_est = 0; /* treat as not present */
2472  }
2473  }
2474  else
2475  *offset_est = -1; /* can't estimate */
2476  }
2477  else
2478  *offset_est = 0; /* not present */
2479 
2480  if (*count_est != 0)
2481  {
2482  /*
2483  * A LIMIT clause limits the absolute number of tuples returned.
2484  * However, if it's not a constant LIMIT then we have to guess; for
2485  * lack of a better idea, assume 10% of the plan's result is wanted.
2486  */
2487  if (*count_est < 0 || *offset_est < 0)
2488  {
2489  /* LIMIT or OFFSET is an expression ... punt ... */
2490  limit_fraction = 0.10;
2491  }
2492  else
2493  {
2494  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2495  limit_fraction = (double) *count_est + (double) *offset_est;
2496  }
2497 
2498  /*
2499  * If we have absolute limits from both caller and LIMIT, use the
2500  * smaller value; likewise if they are both fractional. If one is
2501  * fractional and the other absolute, we can't easily determine which
2502  * is smaller, but we use the heuristic that the absolute will usually
2503  * be smaller.
2504  */
2505  if (tuple_fraction >= 1.0)
2506  {
2507  if (limit_fraction >= 1.0)
2508  {
2509  /* both absolute */
2510  tuple_fraction = Min(tuple_fraction, limit_fraction);
2511  }
2512  else
2513  {
2514  /* caller absolute, limit fractional; use caller's value */
2515  }
2516  }
2517  else if (tuple_fraction > 0.0)
2518  {
2519  if (limit_fraction >= 1.0)
2520  {
2521  /* caller fractional, limit absolute; use limit */
2522  tuple_fraction = limit_fraction;
2523  }
2524  else
2525  {
2526  /* both fractional */
2527  tuple_fraction = Min(tuple_fraction, limit_fraction);
2528  }
2529  }
2530  else
2531  {
2532  /* no info from caller, just use limit */
2533  tuple_fraction = limit_fraction;
2534  }
2535  }
2536  else if (*offset_est != 0 && tuple_fraction > 0.0)
2537  {
2538  /*
2539  * We have an OFFSET but no LIMIT. This acts entirely differently
2540  * from the LIMIT case: here, we need to increase rather than decrease
2541  * the caller's tuple_fraction, because the OFFSET acts to cause more
2542  * tuples to be fetched instead of fewer. This only matters if we got
2543  * a tuple_fraction > 0, however.
2544  *
2545  * As above, use 10% if OFFSET is present but unestimatable.
2546  */
2547  if (*offset_est < 0)
2548  limit_fraction = 0.10;
2549  else
2550  limit_fraction = (double) *offset_est;
2551 
2552  /*
2553  * If we have absolute counts from both caller and OFFSET, add them
2554  * together; likewise if they are both fractional. If one is
2555  * fractional and the other absolute, we want to take the larger, and
2556  * we heuristically assume that's the fractional one.
2557  */
2558  if (tuple_fraction >= 1.0)
2559  {
2560  if (limit_fraction >= 1.0)
2561  {
2562  /* both absolute, so add them together */
2563  tuple_fraction += limit_fraction;
2564  }
2565  else
2566  {
2567  /* caller absolute, limit fractional; use limit */
2568  tuple_fraction = limit_fraction;
2569  }
2570  }
2571  else
2572  {
2573  if (limit_fraction >= 1.0)
2574  {
2575  /* caller fractional, limit absolute; use caller's value */
2576  }
2577  else
2578  {
2579  /* both fractional, so add them together */
2580  tuple_fraction += limit_fraction;
2581  if (tuple_fraction >= 1.0)
2582  tuple_fraction = 0.0; /* assume fetch all */
2583  }
2584  }
2585  }
2586 
2587  return tuple_fraction;
2588 }
2589 
2590 /*
2591  * limit_needed - do we actually need a Limit plan node?
2592  *
2593  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2594  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2595  * locution for an optimization fence. (Because other places in the planner
2596  * merely check whether parse->limitOffset isn't NULL, it will still work as
2597  * an optimization fence --- we're just suppressing unnecessary run-time
2598  * overhead.)
2599  *
2600  * This might look like it could be merged into preprocess_limit, but there's
2601  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2602  * in preprocess_limit it's good enough to consider estimated values.
2603  */
2604 bool
2606 {
2607  Node *node;
2608 
2609  node = parse->limitCount;
2610  if (node)
2611  {
2612  if (IsA(node, Const))
2613  {
2614  /* NULL indicates LIMIT ALL, ie, no limit */
2615  if (!((Const *) node)->constisnull)
2616  return true; /* LIMIT with a constant value */
2617  }
2618  else
2619  return true; /* non-constant LIMIT */
2620  }
2621 
2622  node = parse->limitOffset;
2623  if (node)
2624  {
2625  if (IsA(node, Const))
2626  {
2627  /* Treat NULL as no offset; the executor would too */
2628  if (!((Const *) node)->constisnull)
2629  {
2630  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2631 
2632  if (offset != 0)
2633  return true; /* OFFSET with a nonzero value */
2634  }
2635  }
2636  else
2637  return true; /* non-constant OFFSET */
2638  }
2639 
2640  return false; /* don't need a Limit plan node */
2641 }
2642 
2643 
2644 /*
2645  * remove_useless_groupby_columns
2646  * Remove any columns in the GROUP BY clause that are redundant due to
2647  * being functionally dependent on other GROUP BY columns.
2648  *
2649  * Since some other DBMSes do not allow references to ungrouped columns, it's
2650  * not unusual to find all columns listed in GROUP BY even though listing the
2651  * primary-key columns would be sufficient. Deleting such excess columns
2652  * avoids redundant sorting work, so it's worth doing.
2653  *
2654  * Relcache invalidations will ensure that cached plans become invalidated
2655  * when the underlying index of the pkey constraint is dropped.
2656  *
2657  * Currently, we only make use of pkey constraints for this, however, we may
2658  * wish to take this further in the future and also use unique constraints
2659  * which have NOT NULL columns. In that case, plan invalidation will still
2660  * work since relations will receive a relcache invalidation when a NOT NULL
2661  * constraint is dropped.
2662  */
2663 static void
2665 {
2666  Query *parse = root->parse;
2667  Bitmapset **groupbyattnos;
2668  Bitmapset **surplusvars;
2669  ListCell *lc;
2670  int relid;
2671 
2672  /* No chance to do anything if there are less than two GROUP BY items */
2673  if (list_length(root->processed_groupClause) < 2)
2674  return;
2675 
2676  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2677  if (parse->groupingSets)
2678  return;
2679 
2680  /*
2681  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2682  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2683  * that are GROUP BY items.
2684  */
2685  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2686  (list_length(parse->rtable) + 1));
2687  foreach(lc, root->processed_groupClause)
2688  {
2690  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2691  Var *var = (Var *) tle->expr;
2692 
2693  /*
2694  * Ignore non-Vars and Vars from other query levels.
2695  *
2696  * XXX in principle, stable expressions containing Vars could also be
2697  * removed, if all the Vars are functionally dependent on other GROUP
2698  * BY items. But it's not clear that such cases occur often enough to
2699  * be worth troubling over.
2700  */
2701  if (!IsA(var, Var) ||
2702  var->varlevelsup > 0)
2703  continue;
2704 
2705  /* OK, remember we have this Var */
2706  relid = var->varno;
2707  Assert(relid <= list_length(parse->rtable));
2708  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2710  }
2711 
2712  /*
2713  * Consider each relation and see if it is possible to remove some of its
2714  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2715  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2716  * of the column attnos of RTE k that are removable GROUP BY items.
2717  */
2718  surplusvars = NULL; /* don't allocate array unless required */
2719  relid = 0;
2720  foreach(lc, parse->rtable)
2721  {
2723  Bitmapset *relattnos;
2724  Bitmapset *pkattnos;
2725  Oid constraintOid;
2726 
2727  relid++;
2728 
2729  /* Only plain relations could have primary-key constraints */
2730  if (rte->rtekind != RTE_RELATION)
2731  continue;
2732 
2733  /*
2734  * We must skip inheritance parent tables as some of the child rels
2735  * may cause duplicate rows. This cannot happen with partitioned
2736  * tables, however.
2737  */
2738  if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
2739  continue;
2740 
2741  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2742  relattnos = groupbyattnos[relid];
2743  if (bms_membership(relattnos) != BMS_MULTIPLE)
2744  continue;
2745 
2746  /*
2747  * Can't remove any columns for this rel if there is no suitable
2748  * (i.e., nondeferrable) primary key constraint.
2749  */
2750  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2751  if (pkattnos == NULL)
2752  continue;
2753 
2754  /*
2755  * If the primary key is a proper subset of relattnos then we have
2756  * some items in the GROUP BY that can be removed.
2757  */
2758  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2759  {
2760  /*
2761  * To easily remember whether we've found anything to do, we don't
2762  * allocate the surplusvars[] array until we find something.
2763  */
2764  if (surplusvars == NULL)
2765  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2766  (list_length(parse->rtable) + 1));
2767 
2768  /* Remember the attnos of the removable columns */
2769  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2770  }
2771  }
2772 
2773  /*
2774  * If we found any surplus Vars, build a new GROUP BY clause without them.
2775  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2776  * markings, but that's harmless.)
2777  */
2778  if (surplusvars != NULL)
2779  {
2780  List *new_groupby = NIL;
2781 
2782  foreach(lc, root->processed_groupClause)
2783  {
2785  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2786  Var *var = (Var *) tle->expr;
2787 
2788  /*
2789  * New list must include non-Vars, outer Vars, and anything not
2790  * marked as surplus.
2791  */
2792  if (!IsA(var, Var) ||
2793  var->varlevelsup > 0 ||
2795  surplusvars[var->varno]))
2796  new_groupby = lappend(new_groupby, sgc);
2797  }
2798 
2799  root->processed_groupClause = new_groupby;
2800  }
2801 }
2802 
2803 /*
2804  * preprocess_groupclause - do preparatory work on GROUP BY clause
2805  *
2806  * The idea here is to adjust the ordering of the GROUP BY elements
2807  * (which in itself is semantically insignificant) to match ORDER BY,
2808  * thereby allowing a single sort operation to both implement the ORDER BY
2809  * requirement and set up for a Unique step that implements GROUP BY.
2810  *
2811  * In principle it might be interesting to consider other orderings of the
2812  * GROUP BY elements, which could match the sort ordering of other
2813  * possible plans (eg an indexscan) and thereby reduce cost. We don't
2814  * bother with that, though. Hashed grouping will frequently win anyway.
2815  *
2816  * Note: we need no comparable processing of the distinctClause because
2817  * the parser already enforced that that matches ORDER BY.
2818  *
2819  * Note: we return a fresh List, but its elements are the same
2820  * SortGroupClauses appearing in parse->groupClause. This is important
2821  * because later processing may modify the processed_groupClause list.
2822  *
2823  * For grouping sets, the order of items is instead forced to agree with that
2824  * of the grouping set (and items not in the grouping set are skipped). The
2825  * work of sorting the order of grouping set elements to match the ORDER BY if
2826  * possible is done elsewhere.
2827  */
2828 static List *
2830 {
2831  Query *parse = root->parse;
2832  List *new_groupclause = NIL;
2833  bool partial_match;
2834  ListCell *sl;
2835  ListCell *gl;
2836 
2837  /* For grouping sets, we need to force the ordering */
2838  if (force)
2839  {
2840  foreach(sl, force)
2841  {
2842  Index ref = lfirst_int(sl);
2843  SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2844 
2845  new_groupclause = lappend(new_groupclause, cl);
2846  }
2847 
2848  return new_groupclause;
2849  }
2850 
2851  /* If no ORDER BY, nothing useful to do here */
2852  if (parse->sortClause == NIL)
2853  return list_copy(parse->groupClause);
2854 
2855  /*
2856  * Scan the ORDER BY clause and construct a list of matching GROUP BY
2857  * items, but only as far as we can make a matching prefix.
2858  *
2859  * This code assumes that the sortClause contains no duplicate items.
2860  */
2861  foreach(sl, parse->sortClause)
2862  {
2864 
2865  foreach(gl, parse->groupClause)
2866  {
2868 
2869  if (equal(gc, sc))
2870  {
2871  new_groupclause = lappend(new_groupclause, gc);
2872  break;
2873  }
2874  }
2875  if (gl == NULL)
2876  break; /* no match, so stop scanning */
2877  }
2878 
2879  /* Did we match all of the ORDER BY list, or just some of it? */
2880  partial_match = (sl != NULL);
2881 
2882  /* If no match at all, no point in reordering GROUP BY */
2883  if (new_groupclause == NIL)
2884  return list_copy(parse->groupClause);
2885 
2886  /*
2887  * Add any remaining GROUP BY items to the new list, but only if we were
2888  * able to make a complete match. In other words, we only rearrange the
2889  * GROUP BY list if the result is that one list is a prefix of the other
2890  * --- otherwise there's no possibility of a common sort. Also, give up
2891  * if there are any non-sortable GROUP BY items, since then there's no
2892  * hope anyway.
2893  */
2894  foreach(gl, parse->groupClause)
2895  {
2897 
2898  if (list_member_ptr(new_groupclause, gc))
2899  continue; /* it matched an ORDER BY item */
2900  if (partial_match) /* give up, no common sort possible */
2901  return list_copy(parse->groupClause);
2902  if (!OidIsValid(gc->sortop)) /* give up, GROUP BY can't be sorted */
2903  return list_copy(parse->groupClause);
2904  new_groupclause = lappend(new_groupclause, gc);
2905  }
2906 
2907  /* Success --- install the rearranged GROUP BY list */
2908  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2909  return new_groupclause;
2910 }
2911 
2912 /*
2913  * Extract lists of grouping sets that can be implemented using a single
2914  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2915  *
2916  * Input must be sorted with smallest sets first. Result has each sublist
2917  * sorted with smallest sets first.
2918  *
2919  * We want to produce the absolute minimum possible number of lists here to
2920  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2921  * of finding the minimal partition of a partially-ordered set into chains
2922  * (which is what we need, taking the list of grouping sets as a poset ordered
2923  * by set inclusion) can be mapped to the problem of finding the maximum
2924  * cardinality matching on a bipartite graph, which is solvable in polynomial
2925  * time with a worst case of no worse than O(n^2.5) and usually much
2926  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2927  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2928  * half a second on my modest system even with optimization off and assertions
2929  * on.)
2930  */
2931 static List *
2933 {
2934  int num_sets_raw = list_length(groupingSets);
2935  int num_empty = 0;
2936  int num_sets = 0; /* distinct sets */
2937  int num_chains = 0;
2938  List *result = NIL;
2939  List **results;
2940  List **orig_sets;
2941  Bitmapset **set_masks;
2942  int *chains;
2943  short **adjacency;
2944  short *adjacency_buf;
2946  int i;
2947  int j;
2948  int j_size;
2949  ListCell *lc1 = list_head(groupingSets);
2950  ListCell *lc;
2951 
2952  /*
2953  * Start by stripping out empty sets. The algorithm doesn't require this,
2954  * but the planner currently needs all empty sets to be returned in the
2955  * first list, so we strip them here and add them back after.
2956  */
2957  while (lc1 && lfirst(lc1) == NIL)
2958  {
2959  ++num_empty;
2960  lc1 = lnext(groupingSets, lc1);
2961  }
2962 
2963  /* bail out now if it turns out that all we had were empty sets. */
2964  if (!lc1)
2965  return list_make1(groupingSets);
2966 
2967  /*----------
2968  * We don't strictly need to remove duplicate sets here, but if we don't,
2969  * they tend to become scattered through the result, which is a bit
2970  * confusing (and irritating if we ever decide to optimize them out).
2971  * So we remove them here and add them back after.
2972  *
2973  * For each non-duplicate set, we fill in the following:
2974  *
2975  * orig_sets[i] = list of the original set lists
2976  * set_masks[i] = bitmapset for testing inclusion
2977  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2978  *
2979  * chains[i] will be the result group this set is assigned to.
2980  *
2981  * We index all of these from 1 rather than 0 because it is convenient
2982  * to leave 0 free for the NIL node in the graph algorithm.
2983  *----------
2984  */
2985  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2986  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2987  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2988  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2989 
2990  j_size = 0;
2991  j = 0;
2992  i = 1;
2993 
2994  for_each_cell(lc, groupingSets, lc1)
2995  {
2996  List *candidate = (List *) lfirst(lc);
2997  Bitmapset *candidate_set = NULL;
2998  ListCell *lc2;
2999  int dup_of = 0;
3000 
3001  foreach(lc2, candidate)
3002  {
3003  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
3004  }
3005 
3006  /* we can only be a dup if we're the same length as a previous set */
3007  if (j_size == list_length(candidate))
3008  {
3009  int k;
3010 
3011  for (k = j; k < i; ++k)
3012  {
3013  if (bms_equal(set_masks[k], candidate_set))
3014  {
3015  dup_of = k;
3016  break;
3017  }
3018  }
3019  }
3020  else if (j_size < list_length(candidate))
3021  {
3022  j_size = list_length(candidate);
3023  j = i;
3024  }
3025 
3026  if (dup_of > 0)
3027  {
3028  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3029  bms_free(candidate_set);
3030  }
3031  else
3032  {
3033  int k;
3034  int n_adj = 0;
3035 
3036  orig_sets[i] = list_make1(candidate);
3037  set_masks[i] = candidate_set;
3038 
3039  /* fill in adjacency list; no need to compare equal-size sets */
3040 
3041  for (k = j - 1; k > 0; --k)
3042  {
3043  if (bms_is_subset(set_masks[k], candidate_set))
3044  adjacency_buf[++n_adj] = k;
3045  }
3046 
3047  if (n_adj > 0)
3048  {
3049  adjacency_buf[0] = n_adj;
3050  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3051  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3052  }
3053  else
3054  adjacency[i] = NULL;
3055 
3056  ++i;
3057  }
3058  }
3059 
3060  num_sets = i - 1;
3061 
3062  /*
3063  * Apply the graph matching algorithm to do the work.
3064  */
3065  state = BipartiteMatch(num_sets, num_sets, adjacency);
3066 
3067  /*
3068  * Now, the state->pair* fields have the info we need to assign sets to
3069  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3070  * pair_vu[v] = u (both will be true, but we check both so that we can do
3071  * it in one pass)
3072  */
3073  chains = palloc0((num_sets + 1) * sizeof(int));
3074 
3075  for (i = 1; i <= num_sets; ++i)
3076  {
3077  int u = state->pair_vu[i];
3078  int v = state->pair_uv[i];
3079 
3080  if (u > 0 && u < i)
3081  chains[i] = chains[u];
3082  else if (v > 0 && v < i)
3083  chains[i] = chains[v];
3084  else
3085  chains[i] = ++num_chains;
3086  }
3087 
3088  /* build result lists. */
3089  results = palloc0((num_chains + 1) * sizeof(List *));
3090 
3091  for (i = 1; i <= num_sets; ++i)
3092  {
3093  int c = chains[i];
3094 
3095  Assert(c > 0);
3096 
3097  results[c] = list_concat(results[c], orig_sets[i]);
3098  }
3099 
3100  /* push any empty sets back on the first list. */
3101  while (num_empty-- > 0)
3102  results[1] = lcons(NIL, results[1]);
3103 
3104  /* make result list */
3105  for (i = 1; i <= num_chains; ++i)
3106  result = lappend(result, results[i]);
3107 
3108  /*
3109  * Free all the things.
3110  *
3111  * (This is over-fussy for small sets but for large sets we could have
3112  * tied up a nontrivial amount of memory.)
3113  */
3115  pfree(results);
3116  pfree(chains);
3117  for (i = 1; i <= num_sets; ++i)
3118  if (adjacency[i])
3119  pfree(adjacency[i]);
3120  pfree(adjacency);
3121  pfree(adjacency_buf);
3122  pfree(orig_sets);
3123  for (i = 1; i <= num_sets; ++i)
3124  bms_free(set_masks[i]);
3125  pfree(set_masks);
3126 
3127  return result;
3128 }
3129 
3130 /*
3131  * Reorder the elements of a list of grouping sets such that they have correct
3132  * prefix relationships. Also inserts the GroupingSetData annotations.
3133  *
3134  * The input must be ordered with smallest sets first; the result is returned
3135  * with largest sets first. Note that the result shares no list substructure
3136  * with the input, so it's safe for the caller to modify it later.
3137  *
3138  * If we're passed in a sortclause, we follow its order of columns to the
3139  * extent possible, to minimize the chance that we add unnecessary sorts.
3140  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3141  * gets implemented in one pass.)
3142  */
3143 static List *
3144 reorder_grouping_sets(List *groupingSets, List *sortclause)
3145 {
3146  ListCell *lc;
3147  List *previous = NIL;
3148  List *result = NIL;
3149 
3150  foreach(lc, groupingSets)
3151  {
3152  List *candidate = (List *) lfirst(lc);
3153  List *new_elems = list_difference_int(candidate, previous);
3155 
3156  while (list_length(sortclause) > list_length(previous) &&
3157  new_elems != NIL)
3158  {
3159  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3160  int ref = sc->tleSortGroupRef;
3161 
3162  if (list_member_int(new_elems, ref))
3163  {
3164  previous = lappend_int(previous, ref);
3165  new_elems = list_delete_int(new_elems, ref);
3166  }
3167  else
3168  {
3169  /* diverged from the sortclause; give up on it */
3170  sortclause = NIL;
3171  break;
3172  }
3173  }
3174 
3175  previous = list_concat(previous, new_elems);
3176 
3177  gs->set = list_copy(previous);
3178  result = lcons(gs, result);
3179  }
3180 
3181  list_free(previous);
3182 
3183  return result;
3184 }
3185 
3186 /*
3187  * has_volatile_pathkey
3188  * Returns true if any PathKey in 'keys' has an EquivalenceClass
3189  * containing a volatile function. Otherwise returns false.
3190  */
3191 static bool
3193 {
3194  ListCell *lc;
3195 
3196  foreach(lc, keys)
3197  {
3198  PathKey *pathkey = lfirst_node(PathKey, lc);
3199 
3200  if (pathkey->pk_eclass->ec_has_volatile)
3201  return true;
3202  }
3203 
3204  return false;
3205 }
3206 
3207 /*
3208  * adjust_group_pathkeys_for_groupagg
3209  * Add pathkeys to root->group_pathkeys to reflect the best set of
3210  * pre-ordered input for ordered aggregates.
3211  *
3212  * We define "best" as the pathkeys that suit the largest number of
3213  * aggregate functions. We find these by looking at the first ORDER BY /
3214  * DISTINCT aggregate and take the pathkeys for that before searching for
3215  * other aggregates that require the same or a more strict variation of the
3216  * same pathkeys. We then repeat that process for any remaining aggregates
3217  * with different pathkeys and if we find another set of pathkeys that suits a
3218  * larger number of aggregates then we select those pathkeys instead.
3219  *
3220  * When the best pathkeys are found we also mark each Aggref that can use
3221  * those pathkeys as aggpresorted = true.
3222  *
3223  * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
3224  * volatile functions, we never make use of these pathkeys. We want to ensure
3225  * that sorts using volatile functions are done independently in each Aggref
3226  * rather than once at the query level. If we were to allow this then Aggrefs
3227  * with compatible sort orders would all transition their rows in the same
3228  * order if those pathkeys were deemed to be the best pathkeys to sort on.
3229  * Whereas, if some other set of Aggref's pathkeys happened to be deemed
3230  * better pathkeys to sort on, then the volatile function Aggrefs would be
3231  * left to perform their sorts individually. To avoid this inconsistent
3232  * behavior which could make Aggref results depend on what other Aggrefs the
3233  * query contains, we always force Aggrefs with volatile functions to perform
3234  * their own sorts.
3235  */
3236 static void
3238 {
3239  List *grouppathkeys = root->group_pathkeys;
3240  List *bestpathkeys;
3241  Bitmapset *bestaggs;
3242  Bitmapset *unprocessed_aggs;
3243  ListCell *lc;
3244  int i;
3245 
3246  /* Shouldn't be here if there are grouping sets */
3247  Assert(root->parse->groupingSets == NIL);
3248  /* Shouldn't be here unless there are some ordered aggregates */
3249  Assert(root->numOrderedAggs > 0);
3250 
3251  /* Do nothing if disabled */
3253  return;
3254 
3255  /*
3256  * Make a first pass over all AggInfos to collect a Bitmapset containing
3257  * the indexes of all AggInfos to be processed below.
3258  */
3259  unprocessed_aggs = NULL;
3260  foreach(lc, root->agginfos)
3261  {
3262  AggInfo *agginfo = lfirst_node(AggInfo, lc);
3263  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3264 
3265  if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
3266  continue;
3267 
3268  /* only add aggregates with a DISTINCT or ORDER BY */
3269  if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
3270  unprocessed_aggs = bms_add_member(unprocessed_aggs,
3271  foreach_current_index(lc));
3272  }
3273 
3274  /*
3275  * Now process all the unprocessed_aggs to find the best set of pathkeys
3276  * for the given set of aggregates.
3277  *
3278  * On the first outer loop here 'bestaggs' will be empty. We'll populate
3279  * this during the first loop using the pathkeys for the very first
3280  * AggInfo then taking any stronger pathkeys from any other AggInfos with
3281  * a more strict set of compatible pathkeys. Once the outer loop is
3282  * complete, we mark off all the aggregates with compatible pathkeys then
3283  * remove those from the unprocessed_aggs and repeat the process to try to
3284  * find another set of pathkeys that are suitable for a larger number of
3285  * aggregates. The outer loop will stop when there are not enough
3286  * unprocessed aggregates for it to be possible to find a set of pathkeys
3287  * to suit a larger number of aggregates.
3288  */
3289  bestpathkeys = NIL;
3290  bestaggs = NULL;
3291  while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
3292  {
3293  Bitmapset *aggindexes = NULL;
3294  List *currpathkeys = NIL;
3295 
3296  i = -1;
3297  while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
3298  {
3299  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3300  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3301  List *sortlist;
3302  List *pathkeys;
3303 
3304  if (aggref->aggdistinct != NIL)
3305  sortlist = aggref->aggdistinct;
3306  else
3307  sortlist = aggref->aggorder;
3308 
3309  pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3310  aggref->args);
3311 
3312  /*
3313  * Ignore Aggrefs which have volatile functions in their ORDER BY
3314  * or DISTINCT clause.
3315  */
3316  if (has_volatile_pathkey(pathkeys))
3317  {
3318  unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
3319  continue;
3320  }
3321 
3322  /*
3323  * When not set yet, take the pathkeys from the first unprocessed
3324  * aggregate.
3325  */
3326  if (currpathkeys == NIL)
3327  {
3328  currpathkeys = pathkeys;
3329 
3330  /* include the GROUP BY pathkeys, if they exist */
3331  if (grouppathkeys != NIL)
3332  currpathkeys = append_pathkeys(list_copy(grouppathkeys),
3333  currpathkeys);
3334 
3335  /* record that we found pathkeys for this aggregate */
3336  aggindexes = bms_add_member(aggindexes, i);
3337  }
3338  else
3339  {
3340  /* now look for a stronger set of matching pathkeys */
3341 
3342  /* include the GROUP BY pathkeys, if they exist */
3343  if (grouppathkeys != NIL)
3344  pathkeys = append_pathkeys(list_copy(grouppathkeys),
3345  pathkeys);
3346 
3347  /* are 'pathkeys' compatible or better than 'currpathkeys'? */
3348  switch (compare_pathkeys(currpathkeys, pathkeys))
3349  {
3350  case PATHKEYS_BETTER2:
3351  /* 'pathkeys' are stronger, use these ones instead */
3352  currpathkeys = pathkeys;
3353  /* FALLTHROUGH */
3354 
3355  case PATHKEYS_BETTER1:
3356  /* 'pathkeys' are less strict */
3357  /* FALLTHROUGH */
3358 
3359  case PATHKEYS_EQUAL:
3360  /* mark this aggregate as covered by 'currpathkeys' */
3361  aggindexes = bms_add_member(aggindexes, i);
3362  break;
3363 
3364  case PATHKEYS_DIFFERENT:
3365  break;
3366  }
3367  }
3368  }
3369 
3370  /* remove the aggregates that we've just processed */
3371  unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
3372 
3373  /*
3374  * If this pass included more aggregates than the previous best then
3375  * use these ones as the best set.
3376  */
3377  if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
3378  {
3379  bestaggs = aggindexes;
3380  bestpathkeys = currpathkeys;
3381  }
3382  }
3383 
3384  /*
3385  * If we found any ordered aggregates, update root->group_pathkeys to add
3386  * the best set of aggregate pathkeys. Note that bestpathkeys includes
3387  * the original GROUP BY pathkeys already.
3388  */
3389  if (bestpathkeys != NIL)
3390  root->group_pathkeys = bestpathkeys;
3391 
3392  /*
3393  * Now that we've found the best set of aggregates we can set the
3394  * presorted flag to indicate to the executor that it needn't bother
3395  * performing a sort for these Aggrefs. We're able to do this now as
3396  * there's no chance of a Hash Aggregate plan as create_grouping_paths
3397  * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
3398  * of ordered aggregates.
3399  */
3400  i = -1;
3401  while ((i = bms_next_member(bestaggs, i)) >= 0)
3402  {
3403  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3404 
3405  foreach(lc, agginfo->aggrefs)
3406  {
3407  Aggref *aggref = lfirst_node(Aggref, lc);
3408 
3409  aggref->aggpresorted = true;
3410  }
3411  }
3412 }
3413 
3414 /*
3415  * Compute query_pathkeys and other pathkeys during plan generation
3416  */
3417 static void
3419 {
3420  Query *parse = root->parse;
3421  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3422  List *tlist = root->processed_tlist;
3423  List *activeWindows = qp_extra->activeWindows;
3424 
3425  /*
3426  * Calculate pathkeys that represent grouping/ordering and/or ordered
3427  * aggregate requirements.
3428  */
3429  if (qp_extra->gset_data)
3430  {
3431  /*
3432  * With grouping sets, just use the first RollupData's groupClause. We
3433  * don't make any effort to optimize grouping clauses when there are
3434  * grouping sets, nor can we combine aggregate ordering keys with
3435  * grouping.
3436  */
3437  List *rollups = qp_extra->gset_data->rollups;
3438  List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
3439 
3440  if (grouping_is_sortable(groupClause))
3441  {
3443  groupClause,
3444  tlist);
3446  }
3447  else
3448  {
3449  root->group_pathkeys = NIL;
3450  root->num_groupby_pathkeys = 0;
3451  }
3452  }
3453  else if (parse->groupClause || root->numOrderedAggs > 0)
3454  {
3455  /*
3456  * With a plain GROUP BY list, we can remove any grouping items that
3457  * are proven redundant by EquivalenceClass processing. For example,
3458  * we can remove y given "WHERE x = y GROUP BY x, y". These aren't
3459  * especially common cases, but they're nearly free to detect. Note
3460  * that we remove redundant items from processed_groupClause but not
3461  * the original parse->groupClause.
3462  */
3463  bool sortable;
3464 
3465  root->group_pathkeys =
3467  &root->processed_groupClause,
3468  tlist,
3469  true,
3470  &sortable);
3471  if (!sortable)
3472  {
3473  /* Can't sort; no point in considering aggregate ordering either */
3474  root->group_pathkeys = NIL;
3475  root->num_groupby_pathkeys = 0;
3476  }
3477  else
3478  {
3480  /* If we have ordered aggs, consider adding onto group_pathkeys */
3481  if (root->numOrderedAggs > 0)
3483  }
3484  }
3485  else
3486  {
3487  root->group_pathkeys = NIL;
3488  root->num_groupby_pathkeys = 0;
3489  }
3490 
3491  /* We consider only the first (bottom) window in pathkeys logic */
3492  if (activeWindows != NIL)
3493  {
3494  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3495 
3497  wc,
3498  tlist);
3499  }
3500  else
3501  root->window_pathkeys = NIL;
3502 
3503  /*
3504  * As with GROUP BY, we can discard any DISTINCT items that are proven
3505  * redundant by EquivalenceClass processing. The non-redundant list is
3506  * kept in root->processed_distinctClause, leaving the original
3507  * parse->distinctClause alone.
3508  */
3509  if (parse->distinctClause)
3510  {
3511  bool sortable;
3512 
3513  /* Make a copy since pathkey processing can modify the list */
3514  root->processed_distinctClause = list_copy(parse->distinctClause);
3515  root->distinct_pathkeys =
3517  &root->processed_distinctClause,
3518  tlist,
3519  true,
3520  &sortable);
3521  if (!sortable)
3522  root->distinct_pathkeys = NIL;
3523  }
3524  else
3525  root->distinct_pathkeys = NIL;
3526 
3527  root->sort_pathkeys =
3529  parse->sortClause,
3530  tlist);
3531 
3532  /*
3533  * Figure out whether we want a sorted result from query_planner.
3534  *
3535  * If we have a sortable GROUP BY clause, then we want a result sorted
3536  * properly for grouping. Otherwise, if we have window functions to
3537  * evaluate, we try to sort for the first window. Otherwise, if there's a
3538  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3539  * we try to produce output that's sufficiently well sorted for the
3540  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3541  * by the ORDER BY clause.
3542  *
3543  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3544  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3545  * that might just leave us failing to exploit an available sort order at
3546  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3547  * much easier, since we know that the parser ensured that one is a
3548  * superset of the other.
3549  */
3550  if (root->group_pathkeys)
3551  root->query_pathkeys = root->group_pathkeys;
3552  else if (root->window_pathkeys)
3553  root->query_pathkeys = root->window_pathkeys;
3554  else if (list_length(root->distinct_pathkeys) >
3555  list_length(root->sort_pathkeys))
3556  root->query_pathkeys = root->distinct_pathkeys;
3557  else if (root->sort_pathkeys)
3558  root->query_pathkeys = root->sort_pathkeys;
3559  else
3560  root->query_pathkeys = NIL;
3561 }
3562 
3563 /*
3564  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3565  *
3566  * path_rows: number of output rows from scan/join step
3567  * gd: grouping sets data including list of grouping sets and their clauses
3568  * target_list: target list containing group clause references
3569  *
3570  * If doing grouping sets, we also annotate the gsets data with the estimates
3571  * for each set and each individual rollup list, with a view to later
3572  * determining whether some combination of them could be hashed instead.
3573  */
3574 static double
3576  double path_rows,
3577  grouping_sets_data *gd,
3578  List *target_list)
3579 {
3580  Query *parse = root->parse;
3581  double dNumGroups;
3582 
3583  if (parse->groupClause)
3584  {
3585  List *groupExprs;
3586 
3587  if (parse->groupingSets)
3588  {
3589  /* Add up the estimates for each grouping set */
3590  ListCell *lc;
3591 
3592  Assert(gd); /* keep Coverity happy */
3593 
3594  dNumGroups = 0;
3595 
3596  foreach(lc, gd->rollups)
3597  {
3598  RollupData *rollup = lfirst_node(RollupData, lc);
3599  ListCell *lc2;
3600  ListCell *lc3;
3601 
3602  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3603  target_list);
3604 
3605  rollup->numGroups = 0.0;
3606 
3607  forboth(lc2, rollup->gsets, lc3, rollup->gsets_data)
3608  {
3609  List *gset = (List *) lfirst(lc2);
3611  double numGroups = estimate_num_groups(root,
3612  groupExprs,
3613  path_rows,
3614  &gset,
3615  NULL);
3616 
3617  gs->numGroups = numGroups;
3618  rollup->numGroups += numGroups;
3619  }
3620 
3621  dNumGroups += rollup->numGroups;
3622  }
3623 
3624  if (gd->hash_sets_idx)
3625  {
3626  ListCell *lc2;
3627 
3628  gd->dNumHashGroups = 0;
3629 
3630  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3631  target_list);
3632 
3633  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3634  {
3635  List *gset = (List *) lfirst(lc);
3637  double numGroups = estimate_num_groups(root,
3638  groupExprs,
3639  path_rows,
3640  &gset,
3641  NULL);
3642 
3643  gs->numGroups = numGroups;
3644  gd->dNumHashGroups += numGroups;
3645  }
3646 
3647  dNumGroups += gd->dNumHashGroups;
3648  }
3649  }
3650  else
3651  {
3652  /* Plain GROUP BY -- estimate based on optimized groupClause */
3654  target_list);
3655 
3656  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3657  NULL, NULL);
3658  }
3659  }
3660  else if (parse->groupingSets)
3661  {
3662  /* Empty grouping sets ... one result row for each one */
3663  dNumGroups = list_length(parse->groupingSets);
3664  }
3665  else if (parse->hasAggs || root->hasHavingQual)
3666  {
3667  /* Plain aggregation, one result row */
3668  dNumGroups = 1;
3669  }
3670  else
3671  {
3672  /* Not grouping */
3673  dNumGroups = 1;
3674  }
3675 
3676  return dNumGroups;
3677 }
3678 
3679 /*
3680  * create_grouping_paths
3681  *
3682  * Build a new upperrel containing Paths for grouping and/or aggregation.
3683  * Along the way, we also build an upperrel for Paths which are partially
3684  * grouped and/or aggregated. A partially grouped and/or aggregated path
3685  * needs a FinalizeAggregate node to complete the aggregation. Currently,
3686  * the only partially grouped paths we build are also partial paths; that
3687  * is, they need a Gather and then a FinalizeAggregate.
3688  *
3689  * input_rel: contains the source-data Paths
3690  * target: the pathtarget for the result Paths to compute
3691  * gd: grouping sets data including list of grouping sets and their clauses
3692  *
3693  * Note: all Paths in input_rel are expected to return the target computed
3694  * by make_group_input_target.
3695  */
3696 static RelOptInfo *
3698  RelOptInfo *input_rel,
3699  PathTarget *target,
3700  bool target_parallel_safe,
3701  grouping_sets_data *gd)
3702 {
3703  Query *parse = root->parse;
3704  RelOptInfo *grouped_rel;
3705  RelOptInfo *partially_grouped_rel;
3706  AggClauseCosts agg_costs;
3707 
3708  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3709  get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
3710 
3711  /*
3712  * Create grouping relation to hold fully aggregated grouping and/or
3713  * aggregation paths.
3714  */
3715  grouped_rel = make_grouping_rel(root, input_rel, target,
3716  target_parallel_safe, parse->havingQual);
3717 
3718  /*
3719  * Create either paths for a degenerate grouping or paths for ordinary
3720  * grouping, as appropriate.
3721  */
3722  if (is_degenerate_grouping(root))
3723  create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3724  else
3725  {
3726  int flags = 0;
3727  GroupPathExtraData extra;
3728 
3729  /*
3730  * Determine whether it's possible to perform sort-based
3731  * implementations of grouping. (Note that if processed_groupClause
3732  * is empty, grouping_is_sortable() is trivially true, and all the
3733  * pathkeys_contained_in() tests will succeed too, so that we'll
3734  * consider every surviving input path.)
3735  *
3736  * If we have grouping sets, we might be able to sort some but not all
3737  * of them; in this case, we need can_sort to be true as long as we
3738  * must consider any sorted-input plan.
3739  */
3740  if ((gd && gd->rollups != NIL)
3742  flags |= GROUPING_CAN_USE_SORT;
3743 
3744  /*
3745  * Determine whether we should consider hash-based implementations of
3746  * grouping.
3747  *
3748  * Hashed aggregation only applies if we're grouping. If we have
3749  * grouping sets, some groups might be hashable but others not; in
3750  * this case we set can_hash true as long as there is nothing globally
3751  * preventing us from hashing (and we should therefore consider plans
3752  * with hashes).
3753  *
3754  * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3755  * BY aggregates. (Doing so would imply storing *all* the input
3756  * values in the hash table, and/or running many sorts in parallel,
3757  * either of which seems like a certain loser.) We similarly don't
3758  * support ordered-set aggregates in hashed aggregation, but that case
3759  * is also included in the numOrderedAggs count.
3760  *
3761  * Note: grouping_is_hashable() is much more expensive to check than
3762  * the other gating conditions, so we want to do it last.
3763  */
3764  if ((parse->groupClause != NIL &&
3765  root->numOrderedAggs == 0 &&
3767  flags |= GROUPING_CAN_USE_HASH;
3768 
3769  /*
3770  * Determine whether partial aggregation is possible.
3771  */
3772  if (can_partial_agg(root))
3773  flags |= GROUPING_CAN_PARTIAL_AGG;
3774 
3775  extra.flags = flags;
3776  extra.target_parallel_safe = target_parallel_safe;
3777  extra.havingQual = parse->havingQual;
3778  extra.targetList = parse->targetList;
3779  extra.partial_costs_set = false;
3780 
3781  /*
3782  * Determine whether partitionwise aggregation is in theory possible.
3783  * It can be disabled by the user, and for now, we don't try to
3784  * support grouping sets. create_ordinary_grouping_paths() will check
3785  * additional conditions, such as whether input_rel is partitioned.
3786  */
3787  if (enable_partitionwise_aggregate && !parse->groupingSets)
3789  else
3791 
3792  create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3793  &agg_costs, gd, &extra,
3794  &partially_grouped_rel);
3795  }
3796 
3797  set_cheapest(grouped_rel);
3798  return grouped_rel;
3799 }
3800 
3801 /*
3802  * make_grouping_rel
3803  *
3804  * Create a new grouping rel and set basic properties.
3805  *
3806  * input_rel represents the underlying scan/join relation.
3807  * target is the output expected from the grouping relation.
3808  */
3809 static RelOptInfo *
3811  PathTarget *target, bool target_parallel_safe,
3812  Node *havingQual)
3813 {
3814  RelOptInfo *grouped_rel;
3815 
3816  if (IS_OTHER_REL(input_rel))
3817  {
3818  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3819  input_rel->relids);
3820  grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3821  }
3822  else
3823  {
3824  /*
3825  * By tradition, the relids set for the main grouping relation is
3826  * NULL. (This could be changed, but might require adjustments
3827  * elsewhere.)
3828  */
3829  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3830  }
3831 
3832  /* Set target. */
3833  grouped_rel->reltarget = target;
3834 
3835  /*
3836  * If the input relation is not parallel-safe, then the grouped relation
3837  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3838  * target list and HAVING quals are parallel-safe.
3839  */
3840  if (input_rel->consider_parallel && target_parallel_safe &&
3841  is_parallel_safe(root, (Node *) havingQual))
3842  grouped_rel->consider_parallel = true;
3843 
3844  /*
3845  * If the input rel belongs to a single FDW, so does the grouped rel.
3846  */
3847  grouped_rel->serverid = input_rel->serverid;
3848  grouped_rel->userid = input_rel->userid;
3849  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3850  grouped_rel->fdwroutine = input_rel->fdwroutine;
3851 
3852  return grouped_rel;
3853 }
3854 
3855 /*
3856  * is_degenerate_grouping
3857  *
3858  * A degenerate grouping is one in which the query has a HAVING qual and/or
3859  * grouping sets, but no aggregates and no GROUP BY (which implies that the
3860  * grouping sets are all empty).
3861  */
3862 static bool
3864 {
3865  Query *parse = root->parse;
3866 
3867  return (root->hasHavingQual || parse->groupingSets) &&
3868  !parse->hasAggs && parse->groupClause == NIL;
3869 }
3870 
3871 /*
3872  * create_degenerate_grouping_paths
3873  *
3874  * When the grouping is degenerate (see is_degenerate_grouping), we are
3875  * supposed to emit either zero or one row for each grouping set depending on
3876  * whether HAVING succeeds. Furthermore, there cannot be any variables in
3877  * either HAVING or the targetlist, so we actually do not need the FROM table
3878  * at all! We can just throw away the plan-so-far and generate a Result node.
3879  * This is a sufficiently unusual corner case that it's not worth contorting
3880  * the structure of this module to avoid having to generate the earlier paths
3881  * in the first place.
3882  */
3883 static void
3885  RelOptInfo *grouped_rel)
3886 {
3887  Query *parse = root->parse;
3888  int nrows;
3889  Path *path;
3890 
3891  nrows = list_length(parse->groupingSets);
3892  if (nrows > 1)
3893  {
3894  /*
3895  * Doesn't seem worthwhile writing code to cons up a generate_series
3896  * or a values scan to emit multiple rows. Instead just make N clones
3897  * and append them. (With a volatile HAVING clause, this means you
3898  * might get between 0 and N output rows. Offhand I think that's
3899  * desired.)
3900  */
3901  List *paths = NIL;
3902 
3903  while (--nrows >= 0)
3904  {
3905  path = (Path *)
3906  create_group_result_path(root, grouped_rel,
3907  grouped_rel->reltarget,
3908  (List *) parse->havingQual);
3909  paths = lappend(paths, path);
3910  }
3911  path = (Path *)
3912  create_append_path(root,
3913  grouped_rel,
3914  paths,
3915  NIL,
3916  NIL,
3917  NULL,
3918  0,
3919  false,
3920  -1);
3921  }
3922  else
3923  {
3924  /* No grouping sets, or just one, so one output row */
3925  path = (Path *)
3926  create_group_result_path(root, grouped_rel,
3927  grouped_rel->reltarget,
3928  (List *) parse->havingQual);
3929  }
3930 
3931  add_path(grouped_rel, path);
3932 }
3933 
3934 /*
3935  * create_ordinary_grouping_paths
3936  *
3937  * Create grouping paths for the ordinary (that is, non-degenerate) case.
3938  *
3939  * We need to consider sorted and hashed aggregation in the same function,
3940  * because otherwise (1) it would be harder to throw an appropriate error
3941  * message if neither way works, and (2) we should not allow hashtable size
3942  * considerations to dissuade us from using hashing if sorting is not possible.
3943  *
3944  * *partially_grouped_rel_p will be set to the partially grouped rel which this
3945  * function creates, or to NULL if it doesn't create one.
3946  */
3947 static void
3949  RelOptInfo *grouped_rel,
3950  const AggClauseCosts *agg_costs,
3951  grouping_sets_data *gd,
3952  GroupPathExtraData *extra,
3953  RelOptInfo **partially_grouped_rel_p)
3954 {
3955  Path *cheapest_path = input_rel->cheapest_total_path;
3956  RelOptInfo *partially_grouped_rel = NULL;
3957  double dNumGroups;
3959 
3960  /*
3961  * If this is the topmost grouping relation or if the parent relation is
3962  * doing some form of partitionwise aggregation, then we may be able to do
3963  * it at this level also. However, if the input relation is not
3964  * partitioned, partitionwise aggregate is impossible.
3965  */
3966  if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
3967  IS_PARTITIONED_REL(input_rel))
3968  {
3969  /*
3970  * If this is the topmost relation or if the parent relation is doing
3971  * full partitionwise aggregation, then we can do full partitionwise
3972  * aggregation provided that the GROUP BY clause contains all of the
3973  * partitioning columns at this level. Otherwise, we can do at most
3974  * partial partitionwise aggregation. But if partial aggregation is
3975  * not supported in general then we can't use it for partitionwise
3976  * aggregation either.
3977  *
3978  * Check parse->groupClause not processed_groupClause, because it's
3979  * okay if some of the partitioning columns were proved redundant.
3980  */
3981  if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
3982  group_by_has_partkey(input_rel, extra->targetList,
3983  root->parse->groupClause))
3985  else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3987  else
3989  }
3990 
3991  /*
3992  * Before generating paths for grouped_rel, we first generate any possible
3993  * partially grouped paths; that way, later code can easily consider both
3994  * parallel and non-parallel approaches to grouping.
3995  */
3996  if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3997  {
3998  bool force_rel_creation;
3999 
4000  /*
4001  * If we're doing partitionwise aggregation at this level, force
4002  * creation of a partially_grouped_rel so we can add partitionwise
4003  * paths to it.
4004  */
4005  force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
4006 
4007  partially_grouped_rel =
4009  grouped_rel,
4010  input_rel,
4011  gd,
4012  extra,
4013  force_rel_creation);
4014  }
4015 
4016  /* Set out parameter. */
4017  *partially_grouped_rel_p = partially_grouped_rel;
4018 
4019  /* Apply partitionwise aggregation technique, if possible. */
4020  if (patype != PARTITIONWISE_AGGREGATE_NONE)
4021  create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
4022  partially_grouped_rel, agg_costs,
4023  gd, patype, extra);
4024 
4025  /* If we are doing partial aggregation only, return. */
4027  {
4028  Assert(partially_grouped_rel);
4029 
4030  if (partially_grouped_rel->pathlist)
4031  set_cheapest(partially_grouped_rel);
4032 
4033  return;
4034  }
4035 
4036  /* Gather any partially grouped partial paths. */
4037  if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
4038  {
4039  gather_grouping_paths(root, partially_grouped_rel);
4040  set_cheapest(partially_grouped_rel);
4041  }
4042 
4043  /*
4044  * Estimate number of groups.
4045  */
4046  dNumGroups = get_number_of_groups(root,
4047  cheapest_path->rows,
4048  gd,
4049  extra->targetList);
4050 
4051  /* Build final grouping paths */
4052  add_paths_to_grouping_rel(root, input_rel, grouped_rel,
4053  partially_grouped_rel, agg_costs, gd,
4054  dNumGroups, extra);
4055 
4056  /* Give a helpful error if we failed to find any implementation */
4057  if (grouped_rel->pathlist == NIL)
4058  ereport(ERROR,
4059  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4060  errmsg("could not implement GROUP BY"),
4061  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4062 
4063  /*
4064  * If there is an FDW that's responsible for all baserels of the query,
4065  * let it consider adding ForeignPaths.
4066  */
4067  if (grouped_rel->fdwroutine &&
4068  grouped_rel->fdwroutine->GetForeignUpperPaths)
4069  grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
4070  input_rel, grouped_rel,
4071  extra);
4072 
4073  /* Let extensions possibly add some more paths */
4075  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4076  input_rel, grouped_rel,
4077  extra);
4078 }
4079 
4080 /*
4081  * For a given input path, consider the possible ways of doing grouping sets on
4082  * it, by combinations of hashing and sorting. This can be called multiple
4083  * times, so it's important that it not scribble on input. No result is
4084  * returned, but any generated paths are added to grouped_rel.
4085  */
4086 static void
4088  RelOptInfo *grouped_rel,
4089  Path *path,
4090  bool is_sorted,
4091  bool can_hash,
4092  grouping_sets_data *gd,
4093  const AggClauseCosts *agg_costs,
4094  double dNumGroups)
4095 {
4096  Query *parse = root->parse;
4097  Size hash_mem_limit = get_hash_memory_limit();
4098 
4099  /*
4100  * If we're not being offered sorted input, then only consider plans that
4101  * can be done entirely by hashing.
4102  *
4103  * We can hash everything if it looks like it'll fit in hash_mem. But if
4104  * the input is actually sorted despite not being advertised as such, we
4105  * prefer to make use of that in order to use less memory.
4106  *
4107  * If none of the grouping sets are sortable, then ignore the hash_mem
4108  * limit and generate a path anyway, since otherwise we'll just fail.
4109  */
4110  if (!is_sorted)
4111  {
4112  List *new_rollups = NIL;
4113  RollupData *unhashed_rollup = NULL;
4114  List *sets_data;
4115  List *empty_sets_data = NIL;
4116  List *empty_sets = NIL;
4117  ListCell *lc;
4118  ListCell *l_start = list_head(gd->rollups);
4119  AggStrategy strat = AGG_HASHED;
4120  double hashsize;
4121  double exclude_groups = 0.0;
4122 
4123  Assert(can_hash);
4124 
4125  /*
4126  * If the input is coincidentally sorted usefully (which can happen
4127  * even if is_sorted is false, since that only means that our caller
4128  * has set up the sorting for us), then save some hashtable space by
4129  * making use of that. But we need to watch out for degenerate cases:
4130  *
4131  * 1) If there are any empty grouping sets, then group_pathkeys might
4132  * be NIL if all non-empty grouping sets are unsortable. In this case,
4133  * there will be a rollup containing only empty groups, and the
4134  * pathkeys_contained_in test is vacuously true; this is ok.
4135  *
4136  * XXX: the above relies on the fact that group_pathkeys is generated
4137  * from the first rollup. If we add the ability to consider multiple
4138  * sort orders for grouping input, this assumption might fail.
4139  *
4140  * 2) If there are no empty sets and only unsortable sets, then the
4141  * rollups list will be empty (and thus l_start == NULL), and
4142  * group_pathkeys will be NIL; we must ensure that the vacuously-true
4143  * pathkeys_contained_in test doesn't cause us to crash.
4144  */
4145  if (l_start != NULL &&
4147  {
4148  unhashed_rollup = lfirst_node(RollupData, l_start);
4149  exclude_groups = unhashed_rollup->numGroups;
4150  l_start = lnext(gd->rollups, l_start);
4151  }
4152 
4153  hashsize = estimate_hashagg_tablesize(root,
4154  path,
4155  agg_costs,
4156  dNumGroups - exclude_groups);
4157 
4158  /*
4159  * gd->rollups is empty if we have only unsortable columns to work
4160  * with. Override hash_mem in that case; otherwise, we'll rely on the
4161  * sorted-input case to generate usable mixed paths.
4162  */
4163  if (hashsize > hash_mem_limit && gd->rollups)
4164  return; /* nope, won't fit */
4165 
4166  /*
4167  * We need to burst the existing rollups list into individual grouping
4168  * sets and recompute a groupClause for each set.
4169  */
4170  sets_data = list_copy(gd->unsortable_sets);
4171 
4172  for_each_cell(lc, gd->rollups, l_start)
4173  {
4174  RollupData *rollup = lfirst_node(RollupData, lc);
4175 
4176  /*
4177  * If we find an unhashable rollup that's not been skipped by the
4178  * "actually sorted" check above, we can't cope; we'd need sorted
4179  * input (with a different sort order) but we can't get that here.
4180  * So bail out; we'll get a valid path from the is_sorted case
4181  * instead.
4182  *
4183  * The mere presence of empty grouping sets doesn't make a rollup
4184  * unhashable (see preprocess_grouping_sets), we handle those
4185  * specially below.
4186  */
4187  if (!rollup->hashable)
4188  return;
4189 
4190  sets_data = list_concat(sets_data, rollup->gsets_data);
4191  }
4192  foreach(lc, sets_data)
4193  {
4195  List *gset = gs->set;
4196  RollupData *rollup;
4197 
4198  if (gset == NIL)
4199  {
4200  /* Empty grouping sets can't be hashed. */
4201  empty_sets_data = lappend(empty_sets_data, gs);
4202  empty_sets = lappend(empty_sets, NIL);
4203  }
4204  else
4205  {
4206  rollup = makeNode(RollupData);
4207 
4208  rollup->groupClause = preprocess_groupclause(root, gset);
4209  rollup->gsets_data = list_make1(gs);
4210  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4211  rollup->gsets_data,
4212  gd->tleref_to_colnum_map);
4213  rollup->numGroups = gs->numGroups;
4214  rollup->hashable = true;
4215  rollup->is_hashed = true;
4216  new_rollups = lappend(new_rollups, rollup);
4217  }
4218  }
4219 
4220  /*
4221  * If we didn't find anything nonempty to hash, then bail. We'll
4222  * generate a path from the is_sorted case.
4223  */
4224  if (new_rollups == NIL)
4225  return;
4226 
4227  /*
4228  * If there were empty grouping sets they should have been in the
4229  * first rollup.
4230  */
4231  Assert(!unhashed_rollup || !empty_sets);
4232 
4233  if (unhashed_rollup)
4234  {
4235  new_rollups = lappend(new_rollups, unhashed_rollup);
4236  strat = AGG_MIXED;
4237  }
4238  else if (empty_sets)
4239  {
4240  RollupData *rollup = makeNode(RollupData);
4241 
4242  rollup->groupClause = NIL;
4243  rollup->gsets_data = empty_sets_data;
4244  rollup->gsets = empty_sets;
4245  rollup->numGroups = list_length(empty_sets);
4246  rollup->hashable = false;
4247  rollup->is_hashed = false;
4248  new_rollups = lappend(new_rollups, rollup);
4249  strat = AGG_MIXED;
4250  }
4251 
4252  add_path(grouped_rel, (Path *)
4254  grouped_rel,
4255  path,
4256  (List *) parse->havingQual,
4257  strat,
4258  new_rollups,
4259  agg_costs));
4260  return;
4261  }
4262 
4263  /*
4264  * If we have sorted input but nothing we can do with it, bail.
4265  */
4266  if (gd->rollups == NIL)
4267  return;
4268 
4269  /*
4270  * Given sorted input, we try and make two paths: one sorted and one mixed
4271  * sort/hash. (We need to try both because hashagg might be disabled, or
4272  * some columns might not be sortable.)
4273  *
4274  * can_hash is passed in as false if some obstacle elsewhere (such as
4275  * ordered aggs) means that we shouldn't consider hashing at all.
4276  */
4277  if (can_hash && gd->any_hashable)
4278  {
4279  List *rollups = NIL;
4280  List *hash_sets = list_copy(gd->unsortable_sets);
4281  double availspace = hash_mem_limit;
4282  ListCell *lc;
4283 
4284  /*
4285  * Account first for space needed for groups we can't sort at all.
4286  */
4287  availspace -= estimate_hashagg_tablesize(root,
4288  path,
4289  agg_costs,
4290  gd->dNumHashGroups);
4291 
4292  if (availspace > 0 && list_length(gd->rollups) > 1)
4293  {
4294  double scale;
4295  int num_rollups = list_length(gd->rollups);
4296  int k_capacity;
4297  int *k_weights = palloc(num_rollups * sizeof(int));
4298  Bitmapset *hash_items = NULL;
4299  int i;
4300 
4301  /*
4302  * We treat this as a knapsack problem: the knapsack capacity
4303  * represents hash_mem, the item weights are the estimated memory
4304  * usage of the hashtables needed to implement a single rollup,
4305  * and we really ought to use the cost saving as the item value;
4306  * however, currently the costs assigned to sort nodes don't
4307  * reflect the comparison costs well, and so we treat all items as
4308  * of equal value (each rollup we hash instead saves us one sort).
4309  *
4310  * To use the discrete knapsack, we need to scale the values to a
4311  * reasonably small bounded range. We choose to allow a 5% error
4312  * margin; we have no more than 4096 rollups in the worst possible
4313  * case, which with a 5% error margin will require a bit over 42MB
4314  * of workspace. (Anyone wanting to plan queries that complex had
4315  * better have the memory for it. In more reasonable cases, with
4316  * no more than a couple of dozen rollups, the memory usage will
4317  * be negligible.)
4318  *
4319  * k_capacity is naturally bounded, but we clamp the values for
4320  * scale and weight (below) to avoid overflows or underflows (or
4321  * uselessly trying to use a scale factor less than 1 byte).
4322  */
4323  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4324  k_capacity = (int) floor(availspace / scale);
4325 
4326  /*
4327  * We leave the first rollup out of consideration since it's the
4328  * one that matches the input sort order. We assign indexes "i"
4329  * to only those entries considered for hashing; the second loop,
4330  * below, must use the same condition.
4331  */
4332  i = 0;
4333  for_each_from(lc, gd->rollups, 1)
4334  {
4335  RollupData *rollup = lfirst_node(RollupData, lc);
4336 
4337  if (rollup->hashable)
4338  {
4339  double sz = estimate_hashagg_tablesize(root,
4340  path,
4341  agg_costs,
4342  rollup->numGroups);
4343 
4344  /*
4345  * If sz is enormous, but hash_mem (and hence scale) is
4346  * small, avoid integer overflow here.
4347  */
4348  k_weights[i] = (int) Min(floor(sz / scale),
4349  k_capacity + 1.0);
4350  ++i;
4351  }
4352  }
4353 
4354  /*
4355  * Apply knapsack algorithm; compute the set of items which
4356  * maximizes the value stored (in this case the number of sorts
4357  * saved) while keeping the total size (approximately) within
4358  * capacity.
4359  */
4360  if (i > 0)
4361  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4362 
4363  if (!bms_is_empty(hash_items))
4364  {
4365  rollups = list_make1(linitial(gd->rollups));
4366 
4367  i = 0;
4368  for_each_from(lc, gd->rollups, 1)
4369  {
4370  RollupData *rollup = lfirst_node(RollupData, lc);
4371 
4372  if (rollup->hashable)
4373  {
4374  if (bms_is_member(i, hash_items))
4375  hash_sets = list_concat(hash_sets,
4376  rollup->gsets_data);
4377  else
4378  rollups = lappend(rollups, rollup);
4379  ++i;
4380  }
4381  else
4382  rollups = lappend(rollups, rollup);
4383  }
4384  }
4385  }
4386 
4387  if (!rollups && hash_sets)
4388  rollups = list_copy(gd->rollups);
4389 
4390  foreach(lc, hash_sets)
4391  {
4393  RollupData *rollup = makeNode(RollupData);
4394 
4395  Assert(gs->set != NIL);
4396 
4397  rollup->groupClause = preprocess_groupclause(root, gs->set);
4398  rollup->gsets_data = list_make1(gs);
4399  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4400  rollup->gsets_data,
4401  gd->tleref_to_colnum_map);
4402  rollup->numGroups = gs->numGroups;
4403  rollup->hashable = true;
4404  rollup->is_hashed = true;
4405  rollups = lcons(rollup, rollups);
4406  }
4407 
4408  if (rollups)
4409  {
4410  add_path(grouped_rel, (Path *)
4412  grouped_rel,
4413  path,
4414  (List *) parse->havingQual,
4415  AGG_MIXED,
4416  rollups,
4417  agg_costs));
4418  }
4419  }
4420 
4421  /*
4422  * Now try the simple sorted case.
4423  */
4424  if (!gd->unsortable_sets)
4425  add_path(grouped_rel, (Path *)
4427  grouped_rel,
4428  path,
4429  (List *) parse->havingQual,
4430  AGG_SORTED,
4431  gd->rollups,
4432  agg_costs));
4433 }
4434 
4435 /*
4436  * create_window_paths
4437  *
4438  * Build a new upperrel containing Paths for window-function evaluation.
4439  *
4440  * input_rel: contains the source-data Paths
4441  * input_target: result of make_window_input_target
4442  * output_target: what the topmost WindowAggPath should return
4443  * wflists: result of find_window_functions
4444  * activeWindows: result of select_active_windows
4445  *
4446  * Note: all Paths in input_rel are expected to return input_target.
4447  */
4448 static RelOptInfo *
4450  RelOptInfo *input_rel,
4451  PathTarget *input_target,
4452  PathTarget *output_target,
4453  bool output_target_parallel_safe,
4454  WindowFuncLists *wflists,
4455  List *activeWindows)
4456 {
4457  RelOptInfo *window_rel;
4458  ListCell *lc;
4459 
4460  /* For now, do all work in the (WINDOW, NULL) upperrel */
4461  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4462 
4463  /*
4464  * If the input relation is not parallel-safe, then the window relation
4465  * can't be parallel-safe, either. Otherwise, we need to examine the
4466  * target list and active windows for non-parallel-safe constructs.
4467  */
4468  if (input_rel->consider_parallel && output_target_parallel_safe &&
4469  is_parallel_safe(root, (Node *) activeWindows))
4470  window_rel->consider_parallel = true;
4471 
4472  /*
4473  * If the input rel belongs to a single FDW, so does the window rel.
4474  */
4475  window_rel->serverid = input_rel->serverid;
4476  window_rel->userid = input_rel->userid;
4477  window_rel->useridiscurrent = input_rel->useridiscurrent;
4478  window_rel->fdwroutine = input_rel->fdwroutine;
4479 
4480  /*
4481  * Consider computing window functions starting from the existing
4482  * cheapest-total path (which will likely require a sort) as well as any
4483  * existing paths that satisfy or partially satisfy root->window_pathkeys.
4484  */
4485  foreach(lc, input_rel->pathlist)
4486  {
4487  Path *path = (Path *) lfirst(lc);
4488  int presorted_keys;
4489 
4490  if (path == input_rel->cheapest_total_path ||
4492  &presorted_keys) ||
4493  presorted_keys > 0)
4495  window_rel,
4496  path,
4497  input_target,
4498  output_target,
4499  wflists,
4500  activeWindows);
4501  }
4502 
4503  /*
4504  * If there is an FDW that's responsible for all baserels of the query,
4505  * let it consider adding ForeignPaths.
4506  */
4507  if (window_rel->fdwroutine &&
4508  window_rel->fdwroutine->GetForeignUpperPaths)
4509  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4510  input_rel, window_rel,
4511  NULL);
4512 
4513  /* Let extensions possibly add some more paths */
4515  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4516  input_rel, window_rel, NULL);
4517 
4518  /* Now choose the best path(s) */
4519  set_cheapest(window_rel);
4520 
4521  return window_rel;
4522 }
4523 
4524 /*
4525  * Stack window-function implementation steps atop the given Path, and
4526  * add the result to window_rel.
4527  *
4528  * window_rel: upperrel to contain result
4529  * path: input Path to use (must return input_target)
4530  * input_target: result of make_window_input_target
4531  * output_target: what the topmost WindowAggPath should return
4532  * wflists: result of find_window_functions
4533  * activeWindows: result of select_active_windows
4534  */
4535 static void
4537  RelOptInfo *window_rel,
4538  Path *path,
4539  PathTarget *input_target,
4540  PathTarget *output_target,
4541  WindowFuncLists *wflists,
4542  List *activeWindows)
4543 {
4544  PathTarget *window_target;
4545  ListCell *l;
4546  List *topqual = NIL;
4547 
4548  /*
4549  * Since each window clause could require a different sort order, we stack
4550  * up a WindowAgg node for each clause, with sort steps between them as
4551  * needed. (We assume that select_active_windows chose a good order for
4552  * executing the clauses in.)
4553  *
4554  * input_target should contain all Vars and Aggs needed for the result.
4555  * (In some cases we wouldn't need to propagate all of these all the way
4556  * to the top, since they might only be needed as inputs to WindowFuncs.
4557  * It's probably not worth trying to optimize that though.) It must also
4558  * contain all window partitioning and sorting expressions, to ensure
4559  * they're computed only once at the bottom of the stack (that's critical
4560  * for volatile functions). As we climb up the stack, we'll add outputs
4561  * for the WindowFuncs computed at each level.
4562  */
4563  window_target = input_target;
4564 
4565  foreach(l, activeWindows)
4566  {
4568  List *window_pathkeys;
4569  int presorted_keys;
4570  bool is_sorted;
4571  bool topwindow;
4572 
4573  window_pathkeys = make_pathkeys_for_window(root,
4574  wc,
4575  root->processed_tlist);
4576 
4577  is_sorted = pathkeys_count_contained_in(window_pathkeys,
4578  path->pathkeys,
4579  &presorted_keys);
4580 
4581  /* Sort if necessary */
4582  if (!is_sorted)
4583  {
4584  /*
4585  * No presorted keys or incremental sort disabled, just perform a
4586  * complete sort.
4587  */
4588  if (presorted_keys == 0 || !enable_incremental_sort)
4589  path = (Path *) create_sort_path(root, window_rel,
4590  path,
4591  window_pathkeys,
4592  -1.0);
4593  else
4594  {
4595  /*
4596  * Since we have presorted keys and incremental sort is
4597  * enabled, just use incremental sort.
4598  */
4599  path = (Path *) create_incremental_sort_path(root,
4600  window_rel,
4601  path,
4602  window_pathkeys,
4603  presorted_keys,
4604  -1.0);
4605  }
4606  }
4607 
4608  if (lnext(activeWindows, l))
4609  {
4610  /*
4611  * Add the current WindowFuncs to the output target for this
4612  * intermediate WindowAggPath. We must copy window_target to
4613  * avoid changing the previous path's target.
4614  *
4615  * Note: a WindowFunc adds nothing to the target's eval costs; but
4616  * we do need to account for the increase in tlist width.
4617  */
4618  ListCell *lc2;
4619 
4620  window_target = copy_pathtarget(window_target);
4621  foreach(lc2, wflists->windowFuncs[wc->winref])
4622  {
4623  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4624 
4625  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4626  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4627  }
4628  }
4629  else
4630  {
4631  /* Install the goal target in the topmost WindowAgg */
4632  window_target = output_target;
4633  }
4634 
4635  /* mark the final item in the list as the top-level window */
4636  topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4637 
4638  /*
4639  * Accumulate all of the runConditions from each intermediate
4640  * WindowClause. The top-level WindowAgg must pass these as a qual so
4641  * that it filters out unwanted tuples correctly.
4642  */
4643  if (!topwindow)
4644  topqual = list_concat(topqual, wc->runCondition);
4645 
4646  path = (Path *)
4647  create_windowagg_path(root, window_rel, path, window_target,
4648  wflists->windowFuncs[wc->winref],
4649  wc, topwindow ? topqual : NIL, topwindow);
4650  }
4651 
4652  add_path(window_rel, path);
4653 }
4654 
4655 /*
4656  * create_distinct_paths
4657  *
4658  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4659  *
4660  * input_rel: contains the source-data Paths
4661  *
4662  * Note: input paths should already compute the desired pathtarget, since
4663  * Sort/Unique won't project anything.
4664  */
4665 static RelOptInfo *
4667 {
4668  RelOptInfo *distinct_rel;
4669 
4670  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4671  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4672 
4673  /*
4674  * We don't compute anything at this level, so distinct_rel will be
4675  * parallel-safe if the input rel is parallel-safe. In particular, if
4676  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4677  * output those expressions, and will not be parallel-safe unless those
4678  * expressions are parallel-safe.
4679  */
4680  distinct_rel->consider_parallel = input_rel->consider_parallel;
4681 
4682  /*
4683  * If the input rel belongs to a single FDW, so does the distinct_rel.
4684  */
4685  distinct_rel->serverid = input_rel->serverid;
4686  distinct_rel->userid = input_rel->userid;
4687  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4688  distinct_rel->fdwroutine = input_rel->fdwroutine;
4689 
4690  /* build distinct paths based on input_rel's pathlist */
4691  create_final_distinct_paths(root, input_rel, distinct_rel);
4692 
4693  /* now build distinct paths based on input_rel's partial_pathlist */
4694  create_partial_distinct_paths(root, input_rel, distinct_rel);
4695 
4696  /* Give a helpful error if we failed to create any paths */
4697  if (distinct_rel->pathlist == NIL)
4698  ereport(ERROR,
4699  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4700  errmsg("could not implement DISTINCT"),
4701  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4702 
4703  /*
4704  * If there is an FDW that's responsible for all baserels of the query,
4705  * let it consider adding ForeignPaths.
4706  */
4707  if (distinct_rel->fdwroutine &&
4708  distinct_rel->fdwroutine->GetForeignUpperPaths)
4709  distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4711  input_rel,
4712  distinct_rel,
4713  NULL);
4714 
4715  /* Let extensions possibly add some more paths */
4717  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4718  distinct_rel, NULL);
4719 
4720  /* Now choose the best path(s) */
4721  set_cheapest(distinct_rel);
4722 
4723  return distinct_rel;
4724 }
4725 
4726 /*
4727  * create_partial_distinct_paths
4728  *
4729  * Process 'input_rel' partial paths and add unique/aggregate paths to the
4730  * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4731  * paths on top and add a final unique/aggregate path to remove any duplicate
4732  * produced from combining rows from parallel workers.
4733  */
4734 static void
4736  RelOptInfo *final_distinct_rel)
4737 {
4738  RelOptInfo *partial_distinct_rel;
4739  Query *parse;
4740  List *distinctExprs;
4741  double numDistinctRows;
4742  Path *cheapest_partial_path;
4743  ListCell *lc;
4744 
4745  /* nothing to do when there are no partial paths in the input rel */
4746  if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4747  return;
4748 
4749  parse = root->parse;
4750 
4751  /* can't do parallel DISTINCT ON */
4752  if (parse->hasDistinctOn)
4753  return;
4754 
4755  partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4756  NULL);
4757  partial_distinct_rel->reltarget = root->upper_targets[UPPERREL_PARTIAL_DISTINCT];
4758  partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4759 
4760  /*
4761  * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4762  */
4763  partial_distinct_rel->serverid = input_rel->serverid;
4764  partial_distinct_rel->userid = input_rel->userid;
4765  partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4766  partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4767 
4768  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4769 
4770  distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4771  parse->targetList);
4772 
4773  /* estimate how many distinct rows we'll get from each worker */
4774  numDistinctRows = estimate_num_groups(root, distinctExprs,
4775  cheapest_partial_path->rows,
4776  NULL, NULL);
4777 
4778  /*
4779  * Try sorting the cheapest path and incrementally sorting any paths with
4780  * presorted keys and put a unique paths atop of those.
4781  */
4783  {
4784  foreach(lc, input_rel->partial_pathlist)
4785  {
4786  Path *input_path = (Path *) lfirst(lc);
4787  Path *sorted_path;
4788  bool is_sorted;
4789  int presorted_keys;
4790 
4792  input_path->pathkeys,
4793  &presorted_keys);
4794 
4795  if (is_sorted)
4796  sorted_path = input_path;
4797  else
4798  {
4799  /*
4800  * Try at least sorting the cheapest path and also try
4801  * incrementally sorting any path which is partially sorted
4802  * already (no need to deal with paths which have presorted
4803  * keys when incremental sort is disabled unless it's the
4804  * cheapest partial path).
4805  */
4806  if (input_path != cheapest_partial_path &&
4807  (presorted_keys == 0 || !enable_incremental_sort))
4808  continue;
4809 
4810  /*
4811  * We've no need to consider both a sort and incremental sort.
4812  * We'll just do a sort if there are no presorted keys and an
4813  * incremental sort when there are presorted keys.
4814  */
4815  if (presorted_keys == 0 || !enable_incremental_sort)
4816  sorted_path = (Path *) create_sort_path(root,
4817  partial_distinct_rel,
4818  input_path,
4819  root->distinct_pathkeys,
4820  -1.0);
4821  else
4822  sorted_path = (Path *) create_incremental_sort_path(root,
4823  partial_distinct_rel,
4824  input_path,
4825  root->distinct_pathkeys,
4826  presorted_keys,
4827  -1.0);
4828  }
4829 
4830  add_partial_path(partial_distinct_rel, (Path *)
4831  create_upper_unique_path(root, partial_distinct_rel,
4832  sorted_path,
4834  numDistinctRows));
4835  }
4836  }
4837 
4838  /*
4839  * Now try hash aggregate paths, if enabled and hashing is possible. Since
4840  * we're not on the hook to ensure we do our best to create at least one
4841  * path here, we treat enable_hashagg as a hard off-switch rather than the
4842  * slightly softer variant in create_final_distinct_paths.
4843  */
4845  {
4846  add_partial_path(partial_distinct_rel, (Path *)
4847  create_agg_path(root,
4848  partial_distinct_rel,
4849  cheapest_partial_path,
4850  cheapest_partial_path->pathtarget,
4851  AGG_HASHED,
4854  NIL,
4855  NULL,
4856  numDistinctRows));
4857  }
4858 
4859  /*
4860  * If there is an FDW that's responsible for all baserels of the query,
4861  * let it consider adding ForeignPaths.
4862  */
4863  if (partial_distinct_rel->fdwroutine &&
4864  partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
4865  partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4867  input_rel,
4868  partial_distinct_rel,
4869  NULL);
4870 
4871  /* Let extensions possibly add some more partial paths */
4873  (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
4874  input_rel, partial_distinct_rel, NULL);
4875 
4876  if (partial_distinct_rel->partial_pathlist != NIL)
4877  {
4878  generate_gather_paths(root, partial_distinct_rel, true);
4879  set_cheapest(partial_distinct_rel);
4880 
4881  /*
4882  * Finally, create paths to distinctify the final result. This step
4883  * is needed to remove any duplicates due to combining rows from
4884  * parallel workers.
4885  */
4886  create_final_distinct_paths(root, partial_distinct_rel,
4887  final_distinct_rel);
4888  }
4889 }
4890 
4891 /*
4892  * create_final_distinct_paths
4893  * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4894  *
4895  * input_rel: contains the source-data paths
4896  * distinct_rel: destination relation for storing created paths
4897  */
4898 static RelOptInfo *
4900  RelOptInfo *distinct_rel)
4901 {
4902  Query *parse = root->parse;
4903  Path *cheapest_input_path = input_rel->cheapest_total_path;
4904  double numDistinctRows;
4905  bool allow_hash;
4906 
4907  /* Estimate number of distinct rows there will be */
4908  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4909  root->hasHavingQual)
4910  {
4911  /*
4912  * If there was grouping or aggregation, use the number of input rows
4913  * as the estimated number of DISTINCT rows (ie, assume the input is
4914  * already mostly unique).
4915  */
4916  numDistinctRows = cheapest_input_path->rows;
4917  }
4918  else
4919  {
4920  /*
4921  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4922  */
4923  List *distinctExprs;
4924 
4925  distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4926  parse->targetList);
4927  numDistinctRows = estimate_num_groups(root, distinctExprs,
4928  cheapest_input_path->rows,
4929  NULL, NULL);
4930  }
4931 
4932  /*
4933  * Consider sort-based implementations of DISTINCT, if possible.
4934  */
4936  {
4937  /*
4938  * Firstly, if we have any adequately-presorted paths, just stick a
4939  * Unique node on those. We also, consider doing an explicit sort of
4940  * the cheapest input path and Unique'ing that. If any paths have
4941  * presorted keys then we'll create an incremental sort atop of those
4942  * before adding a unique node on the top.
4943  *
4944  * When we have DISTINCT ON, we must sort by the more rigorous of
4945  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4946  * Also, if we do have to do an explicit sort, we might as well use
4947  * the more rigorous ordering to avoid a second sort later. (Note
4948  * that the parser will have ensured that one clause is a prefix of
4949  * the other.)
4950  */
4951  List *needed_pathkeys;
4952  ListCell *lc;
4953  double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
4954 
4955  if (parse->hasDistinctOn &&
4957  list_length(root->sort_pathkeys))
4958  needed_pathkeys = root->sort_pathkeys;
4959  else
4960  needed_pathkeys = root->distinct_pathkeys;
4961 
4962  foreach(lc, input_rel->pathlist)
4963  {
4964  Path *input_path = (Path *) lfirst(lc);
4965  Path *sorted_path;
4966  bool is_sorted;
4967  int presorted_keys;
4968 
4969  is_sorted = pathkeys_count_contained_in(needed_pathkeys,
4970  input_path->pathkeys,
4971  &presorted_keys);
4972 
4973  if (is_sorted)
4974  sorted_path = input_path;
4975  else
4976  {
4977  /*
4978  * Try at least sorting the cheapest path and also try
4979  * incrementally sorting any path which is partially sorted
4980  * already (no need to deal with paths which have presorted
4981  * keys when incremental sort is disabled unless it's the
4982  * cheapest input path).
4983  */
4984  if (input_path != cheapest_input_path &&
4985  (presorted_keys == 0 || !enable_incremental_sort))
4986  continue;
4987 
4988  /*
4989  * We've no need to consider both a sort and incremental sort.
4990  * We'll just do a sort if there are no presorted keys and an
4991  * incremental sort when there are presorted keys.
4992  */
4993  if (presorted_keys == 0 || !enable_incremental_sort)
4994  sorted_path = (Path *) create_sort_path(root,
4995  distinct_rel,
4996  input_path,
4997  needed_pathkeys,
4998  limittuples);
4999  else
5000  sorted_path = (Path *) create_incremental_sort_path(root,
5001  distinct_rel,
5002  input_path,
5003  needed_pathkeys,
5004  presorted_keys,
5005  limittuples);
5006  }
5007 
5008  /*
5009  * distinct_pathkeys may have become empty if all of the pathkeys
5010  * were determined to be redundant. If all of the pathkeys are
5011  * redundant then each DISTINCT target must only allow a single
5012  * value, therefore all resulting tuples must be identical (or at
5013  * least indistinguishable by an equality check). We can uniquify
5014  * these tuples simply by just taking the first tuple. All we do
5015  * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
5016  * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
5017  * list, so we must still only do this with paths which are
5018  * correctly sorted by sort_pathkeys.
5019  */
5020  if (root->distinct_pathkeys == NIL)
5021  {
5022  Node *limitCount;
5023 
5024  limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
5025  sizeof(int64),
5026  Int64GetDatum(1), false,
5027  FLOAT8PASSBYVAL);
5028 
5029  /*
5030  * If the query already has a LIMIT clause, then we could end
5031  * up with a duplicate LimitPath in the final plan. That does
5032  * not seem worth troubling over too much.
5033  */
5034  add_path(distinct_rel, (Path *)
5035  create_limit_path(root, distinct_rel, sorted_path,
5036  NULL, limitCount,
5037  LIMIT_OPTION_COUNT, 0, 1));
5038  }
5039  else
5040  {
5041  add_path(distinct_rel, (Path *)
5042  create_upper_unique_path(root, distinct_rel,
5043  sorted_path,
5045  numDistinctRows));
5046  }
5047  }
5048  }
5049 
5050  /*
5051  * Consider hash-based implementations of DISTINCT, if possible.
5052  *
5053  * If we were not able to make any other types of path, we *must* hash or
5054  * die trying. If we do have other choices, there are two things that
5055  * should prevent selection of hashing: if the query uses DISTINCT ON
5056  * (because it won't really have the expected behavior if we hash), or if
5057  * enable_hashagg is off.
5058  *
5059  * Note: grouping_is_hashable() is much more expensive to check than the
5060  * other gating conditions, so we want to do it last.
5061  */
5062  if (distinct_rel->pathlist == NIL)
5063  allow_hash = true; /* we have no alternatives */
5064  else if (parse->hasDistinctOn || !enable_hashagg)
5065  allow_hash = false; /* policy-based decision not to hash */
5066  else
5067  allow_hash = true; /* default */
5068 
5069  if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
5070  {
5071  /* Generate hashed aggregate path --- no sort needed */
5072  add_path(distinct_rel, (Path *)
5073  create_agg_path(root,
5074  distinct_rel,
5075  cheapest_input_path,
5076  cheapest_input_path->pathtarget,
5077  AGG_HASHED,
5080  NIL,
5081  NULL,
5082  numDistinctRows));
5083  }
5084 
5085  return distinct_rel;
5086 }
5087 
5088 /*
5089  * create_ordered_paths
5090  *
5091  * Build a new upperrel containing Paths for ORDER BY evaluation.
5092  *
5093  * All paths in the result must satisfy the ORDER BY ordering.
5094  * The only new paths we need consider are an explicit full sort
5095  * and incremental sort on the cheapest-total existing path.
5096  *
5097  * input_rel: contains the source-data Paths
5098  * target: the output tlist the result Paths must emit
5099  * limit_tuples: estimated bound on the number of output tuples,
5100  * or -1 if no LIMIT or couldn't estimate
5101  *
5102  * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
5103  * other pathkeys (grouping, ...) like generate_useful_gather_paths.
5104  */
5105 static RelOptInfo *
5107  RelOptInfo *input_rel,
5108  PathTarget *target,
5109  bool target_parallel_safe,
5110  double limit_tuples)
5111 {
5112  Path *cheapest_input_path = input_rel->cheapest_total_path;
5113  RelOptInfo *ordered_rel;
5114  ListCell *lc;
5115 
5116  /* For now, do all work in the (ORDERED, NULL) upperrel */
5117  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
5118 
5119  /*
5120  * If the input relation is not parallel-safe, then the ordered relation
5121  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
5122  * target list is parallel-safe.
5123  */
5124  if (input_rel->consider_parallel && target_parallel_safe)
5125  ordered_rel->consider_parallel = true;
5126 
5127  /*
5128  * If the input rel belongs to a single FDW, so does the ordered_rel.
5129  */
5130  ordered_rel->serverid = input_rel->serverid;
5131  ordered_rel->userid = input_rel->userid;
5132  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
5133  ordered_rel->fdwroutine = input_rel->fdwroutine;
5134 
5135  foreach(lc, input_rel->pathlist)
5136  {
5137  Path *input_path = (Path *) lfirst(lc);
5138  Path *sorted_path;
5139  bool is_sorted;
5140  int presorted_keys;
5141 
5142  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5143  input_path->pathkeys, &presorted_keys);
5144 
5145  if (is_sorted)
5146  sorted_path = input_path;
5147  else
5148  {
5149  /*
5150  * Try at least sorting the cheapest path and also try
5151  * incrementally sorting any path which is partially sorted
5152  * already (no need to deal with paths which have presorted keys
5153  * when incremental sort is disabled unless it's the cheapest
5154  * input path).
5155  */
5156  if (input_path != cheapest_input_path &&
5157  (presorted_keys == 0 || !enable_incremental_sort))
5158  continue;
5159 
5160  /*
5161  * We've no need to consider both a sort and incremental sort.
5162  * We'll just do a sort if there are no presorted keys and an
5163  * incremental sort when there are presorted keys.
5164  */
5165  if (presorted_keys == 0 || !enable_incremental_sort)
5166  sorted_path = (Path *) create_sort_path(root,
5167  ordered_rel,
5168  input_path,
5169  root->sort_pathkeys,
5170  limit_tuples);
5171  else
5172  sorted_path = (Path *) create_incremental_sort_path(root,
5173  ordered_rel,
5174  input_path,
5175  root->sort_pathkeys,
5176  presorted_keys,
5177  limit_tuples);
5178  }
5179 
5180  /* Add projection step if needed */
5181  if (sorted_path->pathtarget != target)
5182  sorted_path = apply_projection_to_path(root, ordered_rel,
5183  sorted_path, target);
5184 
5185  add_path(ordered_rel, sorted_path);
5186  }
5187 
5188  /*
5189  * generate_gather_paths() will have already generated a simple Gather
5190  * path for the best parallel path, if any, and the loop above will have
5191  * considered sorting it. Similarly, generate_gather_paths() will also
5192  * have generated order-preserving Gather Merge plans which can be used
5193  * without sorting if they happen to match the sort_pathkeys, and the loop
5194  * above will have handled those as well. However, there's one more
5195  * possibility: it may make sense to sort the cheapest partial path
5196  * according to the required output order and then use Gather Merge.
5197  */
5198  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5199  input_rel->partial_pathlist != NIL)
5200  {
5201  Path *cheapest_partial_path;
5202 
5203  cheapest_partial_path = linitial(input_rel->partial_pathlist);
5204 
5205  /*
5206  * If cheapest partial path doesn't need a sort, this is redundant
5207  * with what's already been tried.
5208  */
5210  cheapest_partial_path->pathkeys))
5211  {
5212  Path *path;
5213  double total_groups;
5214 
5215  path = (Path *) create_sort_path(root,
5216  ordered_rel,
5217  cheapest_partial_path,
5218  root->sort_pathkeys,
5219  limit_tuples);
5220 
5221  total_groups = cheapest_partial_path->rows *
5222  cheapest_partial_path->parallel_workers;
5223  path = (Path *)
5224  create_gather_merge_path(root, ordered_rel,
5225  path,
5226  path->pathtarget,
5227  root->sort_pathkeys, NULL,
5228  &total_groups);
5229 
5230  /* Add projection step if needed */
5231  if (path->pathtarget != target)
5232  path = apply_projection_to_path(root, ordered_rel,
5233  path, target);
5234 
5235  add_path(ordered_rel, path);
5236  }
5237 
5238  /*
5239  * Consider incremental sort with a gather merge on partial paths.
5240  *
5241  * We can also skip the entire loop when we only have a single-item
5242  * sort_pathkeys because then we can't possibly have a presorted
5243  * prefix of the list without having the list be fully sorted.
5244  */
5246  {
5247  foreach(lc, input_rel->partial_pathlist)
5248  {
5249  Path *input_path = (Path *) lfirst(lc);
5250  Path *sorted_path;
5251  bool is_sorted;
5252  int presorted_keys;
5253  double total_groups;
5254 
5255  /*
5256  * We don't care if this is the cheapest partial path - we
5257  * can't simply skip it, because it may be partially sorted in
5258  * which case we want to consider adding incremental sort
5259  * (instead of full sort, which is what happens above).
5260  */
5261 
5262  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5263  input_path->pathkeys,
5264  &presorted_keys);
5265 
5266  /* No point in adding incremental sort on fully sorted paths. */
5267  if (is_sorted)
5268  continue;
5269 
5270  if (presorted_keys == 0)
5271  continue;
5272 
5273  /* Since we have presorted keys, consider incremental sort. */
5274  sorted_path = (Path *) create_incremental_sort_path(root,
5275  ordered_rel,
5276  input_path,
5277  root->sort_pathkeys,
5278  presorted_keys,
5279  limit_tuples);
5280  total_groups = input_path->rows *
5281  input_path->parallel_workers;
5282  sorted_path = (Path *)
5283  create_gather_merge_path(root, ordered_rel,
5284  sorted_path,
5285  sorted_path->pathtarget,
5286  root->sort_pathkeys, NULL,
5287  &total_groups);
5288 
5289  /* Add projection step if needed */
5290  if (sorted_path->pathtarget != target)
5291  sorted_path = apply_projection_to_path(root, ordered_rel,
5292  sorted_path, target);
5293 
5294  add_path(ordered_rel, sorted_path);
5295  }
5296  }
5297  }
5298 
5299  /*
5300  * If there is an FDW that's responsible for all baserels of the query,
5301  * let it consider adding ForeignPaths.
5302  */
5303  if (ordered_rel->fdwroutine &&
5304  ordered_rel->fdwroutine->GetForeignUpperPaths)
5305  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5306  input_rel, ordered_rel,
5307  NULL);
5308 
5309  /* Let extensions possibly add some more paths */
5311  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5312  input_rel, ordered_rel, NULL);
5313 
5314  /*
5315  * No need to bother with set_cheapest here; grouping_planner does not
5316  * need us to do it.
5317  */
5318  Assert(ordered_rel->pathlist != NIL);
5319 
5320  return ordered_rel;
5321 }
5322 
5323 
5324 /*
5325  * make_group_input_target
5326  * Generate appropriate PathTarget for initial input to grouping nodes.
5327  *
5328  * If there is grouping or aggregation, the scan/join subplan cannot emit
5329  * the query's final targetlist; for example, it certainly can't emit any
5330  * aggregate function calls. This routine generates the correct target
5331  * for the scan/join subplan.
5332  *
5333  * The query target list passed from the parser already contains entries
5334  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5335  * for variables used only in HAVING clauses; so we need to add those
5336  * variables to the subplan target list. Also, we flatten all expressions
5337  * except GROUP BY items into their component variables; other expressions
5338  * will be computed by the upper plan nodes rather than by the subplan.
5339  * For example, given a query like
5340  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5341  * we want to pass this targetlist to the subplan:
5342  * a+b,c,d
5343  * where the a+b target will be used by the Sort/Group steps, and the
5344  * other targets will be used for computing the final results.
5345  *
5346  * 'final_target' is the query's final target list (in PathTarget form)
5347  *
5348  * The result is the PathTarget to be computed by the Paths returned from
5349  * query_planner().
5350  */
5351 static PathTarget *
5353 {
5354  Query *parse = root->parse;
5355  PathTarget *input_target;
5356  List *non_group_cols;
5357  List *non_group_vars;
5358  int i;
5359  ListCell *lc;
5360 
5361  /*
5362  * We must build a target containing all grouping columns, plus any other
5363  * Vars mentioned in the query's targetlist and HAVING qual.
5364  */
5365  input_target = create_empty_pathtarget();
5366  non_group_cols = NIL;
5367 
5368  i = 0;
5369  foreach(lc, final_target->exprs)
5370  {
5371  Expr *expr = (Expr *) lfirst(lc);
5372  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5373 
5374  if (sgref && root->processed_groupClause &&
5376  root->processed_groupClause) != NULL)
5377  {
5378  /*
5379  * It's a grouping column, so add it to the input target as-is.
5380  */
5381  add_column_to_pathtarget(input_target, expr, sgref);
5382  }
5383  else
5384  {
5385  /*
5386  * Non-grouping column, so just remember the expression for later
5387  * call to pull_var_clause.
5388  */
5389  non_group_cols = lappend(non_group_cols, expr);
5390  }
5391 
5392  i++;
5393  }
5394 
5395  /*
5396  * If there's a HAVING clause, we'll need the Vars it uses, too.
5397  */
5398  if (parse->havingQual)
5399  non_group_cols = lappend(non_group_cols, parse->havingQual);
5400 
5401  /*
5402  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5403  * add them to the input target if not already present. (A Var used
5404  * directly as a GROUP BY item will be present already.) Note this
5405  * includes Vars used in resjunk items, so we are covering the needs of
5406  * ORDER BY and window specifications. Vars used within Aggrefs and
5407  * WindowFuncs will be pulled out here, too.
5408  */
5409  non_group_vars = pull_var_clause((Node *) non_group_cols,
5413  add_new_columns_to_pathtarget(input_target, non_group_vars);
5414 
5415  /* clean up cruft */
5416  list_free(non_group_vars);
5417  list_free(non_group_cols);
5418 
5419  /* XXX this causes some redundant cost calculation ... */
5420  return set_pathtarget_cost_width(root, input_target);
5421 }
5422 
5423 /*
5424  * make_partial_grouping_target
5425  * Generate appropriate PathTarget for output of partial aggregate
5426  * (or partial grouping, if there are no aggregates) nodes.
5427  *
5428  * A partial aggregation node needs to emit all the same aggregates that
5429  * a regular aggregation node would, plus any aggregates used in HAVING;
5430  * except that the Aggref nodes should be marked as partial aggregates.
5431  *
5432  * In addition, we'd better emit any Vars and PlaceHolderVars that are
5433  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5434  * these would be Vars that are grouped by or used in grouping expressions.)
5435  *
5436  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5437  * havingQual represents the HAVING clause.
5438  */
5439 static PathTarget *
5441  PathTarget *grouping_target,
5442  Node *havingQual)
5443 {
5444  PathTarget *partial_target;
5445  List *non_group_cols;
5446  List *non_group_exprs;
5447  int i;
5448  ListCell *lc;
5449 
5450  partial_target = create_empty_pathtarget();
5451  non_group_cols = NIL;
5452 
5453  i = 0;
5454  foreach(lc, grouping_target->exprs)
5455  {
5456  Expr *expr = (Expr *) lfirst(lc);
5457  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5458 
5459  if (sgref && root->processed_groupClause &&
5461  root->processed_groupClause) != NULL)
5462  {
5463  /*
5464  * It's a grouping column, so add it to the partial_target as-is.
5465  * (This allows the upper agg step to repeat the grouping calcs.)
5466  */
5467  add_column_to_pathtarget(partial_target, expr, sgref);
5468  }
5469  else
5470  {
5471  /*
5472  * Non-grouping column, so just remember the expression for later
5473  * call to pull_var_clause.
5474  */
5475  non_group_cols = lappend(non_group_cols, expr);
5476  }
5477 
5478  i++;
5479  }
5480 
5481  /*
5482  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5483  */
5484  if (havingQual)
5485  non_group_cols = lappend(non_group_cols, havingQual);
5486 
5487  /*
5488  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5489  * non-group cols (plus HAVING), and add them to the partial_target if not
5490  * already present. (An expression used directly as a GROUP BY item will
5491  * be present already.) Note this includes Vars used in resjunk items, so
5492  * we are covering the needs of ORDER BY and window specifications.
5493  */
5494  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5498 
5499  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5500 
5501  /*
5502  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5503  * are at the top level of the target list, so we can just scan the list
5504  * rather than recursing through the expression trees.
5505  */
5506  foreach(lc, partial_target->exprs)
5507  {
5508  Aggref *aggref = (Aggref *) lfirst(lc);
5509 
5510  if (IsA(aggref, Aggref))
5511  {
5512  Aggref *newaggref;
5513 
5514  /*
5515  * We shouldn't need to copy the substructure of the Aggref node,
5516  * but flat-copy the node itself to avoid damaging other trees.
5517  */
5518  newaggref = makeNode(Aggref);
5519  memcpy(newaggref, aggref, sizeof(Aggref));
5520 
5521  /* For now, assume serialization is required */
5523 
5524  lfirst(lc) = newaggref;
5525  }
5526  }
5527 
5528  /* clean up cruft */
5529  list_free(non_group_exprs);
5530  list_free(non_group_cols);
5531 
5532  /* XXX this causes some redundant cost calculation ... */
5533  return set_pathtarget_cost_width(root, partial_target);
5534 }
5535 
5536 /*
5537  * mark_partial_aggref
5538  * Adjust an Aggref to make it represent a partial-aggregation step.
5539  *
5540  * The Aggref node is modified in-place; caller must do any copying required.
5541  */
5542 void
5544 {
5545  /* aggtranstype should be computed by this point */
5546  Assert(OidIsValid(agg->aggtranstype));
5547  /* ... but aggsplit should still be as the parser left it */
5548  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5549 
5550  /* Mark the Aggref with the intended partial-aggregation mode */
5551  agg->aggsplit = aggsplit;
5552 
5553  /*
5554  * Adjust result type if needed. Normally, a partial aggregate returns
5555  * the aggregate's transition type; but if that's INTERNAL and we're
5556  * serializing, it returns BYTEA instead.
5557  */
5558  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5559  {
5560  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5561  agg->aggtype = BYTEAOID;
5562  else
5563  agg->aggtype = agg->aggtranstype;
5564  }
5565 }
5566 
5567 /*
5568  * postprocess_setop_tlist
5569  * Fix up targetlist returned by plan_set_operations().
5570  *
5571  * We need to transpose sort key info from the orig_tlist into new_tlist.
5572  * NOTE: this would not be good enough if we supported resjunk sort keys
5573  * for results of set operations --- then, we'd need to project a whole
5574  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5575  * find any resjunk columns in orig_tlist.
5576  */
5577 static List *
5578 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5579 {
5580  ListCell *l;
5581  ListCell *orig_tlist_item = list_head(orig_tlist);
5582 
5583  foreach(l, new_tlist)
5584  {
5585  TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5586  TargetEntry *orig_tle;
5587 
5588  /* ignore resjunk columns in setop result */
5589  if (new_tle->resjunk)
5590  continue;
5591 
5592  Assert(orig_tlist_item != NULL);
5593  orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5594  orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5595  if (orig_tle->resjunk) /* should not happen */
5596  elog(ERROR, "resjunk output columns are not implemented");
5597  Assert(new_tle->resno == orig_tle->resno);
5598  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5599  }
5600  if (orig_tlist_item != NULL)
5601  elog(ERROR, "resjunk output columns are not implemented");
5602  return new_tlist;
5603 }
5604 
5605 /*
5606  * optimize_window_clauses
5607  * Call each WindowFunc's prosupport function to see if we're able to
5608  * make any adjustments to any of the WindowClause's so that the executor
5609  * can execute the window functions in a more optimal way.
5610  *
5611  * Currently we only allow adjustments to the WindowClause's frameOptions. We
5612  * may allow more things to be done here in the future.
5613  */
5614 static void
5616 {
5617  List *windowClause = root->parse->windowClause;
5618  ListCell *lc;
5619 
5620  foreach(lc, windowClause)
5621  {
5623  ListCell *lc2;
5624  int optimizedFrameOptions = 0;
5625 
5626  Assert(wc->winref <= wflists->maxWinRef);
5627 
5628  /* skip any WindowClauses that have no WindowFuncs */
5629  if (wflists->windowFuncs[wc->winref] == NIL)
5630  continue;
5631 
5632  foreach(lc2, wflists->windowFuncs[wc->winref])
5633  {
5636  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
5637  Oid prosupport;
5638 
5639  prosupport = get_func_support(wfunc->winfnoid);
5640 
5641  /* Check if there's a support function for 'wfunc' */
5642  if (!OidIsValid(prosupport))
5643  break; /* can't optimize this WindowClause */
5644 
5645  req.type = T_SupportRequestOptimizeWindowClause;
5646  req.window_clause = wc;
5647  req.window_func = wfunc;
5648  req.frameOptions = wc->frameOptions;
5649 
5650  /* call the support function */
5652  DatumGetPointer(OidFunctionCall1(prosupport,
5653  PointerGetDatum(&req)));
5654 
5655  /*
5656  * Skip to next WindowClause if the support function does not
5657  * support this request type.
5658  */
5659  if (res == NULL)
5660  break;
5661 
5662  /*
5663  * Save these frameOptions for the first WindowFunc for this
5664  * WindowClause.
5665  */
5666  if (foreach_current_index(lc2) == 0)
5667  optimizedFrameOptions = res->frameOptions;
5668 
5669  /*
5670  * On subsequent WindowFuncs, if the frameOptions are not the same
5671  * then we're unable to optimize the frameOptions for this
5672  * WindowClause.
5673  */
5674  else if (optimizedFrameOptions != res->frameOptions)
5675  break; /* skip to the next WindowClause, if any */
5676  }
5677 
5678  /* adjust the frameOptions if all WindowFunc's agree that it's ok */
5679  if (lc2 == NULL && wc->frameOptions != optimizedFrameOptions)
5680  {
5681  ListCell *lc3;
5682 
5683  /* apply the new frame options */
5684  wc->frameOptions = optimizedFrameOptions;
5685 
5686  /*
5687  * We now check to see if changing the frameOptions has caused
5688  * this WindowClause to be a duplicate of some other WindowClause.
5689  * This can only happen if we have multiple WindowClauses, so
5690  * don't bother if there's only 1.
5691  */
5692  if (list_length(windowClause) == 1)
5693  continue;
5694 
5695  /*
5696  * Do the duplicate check and reuse the existing WindowClause if
5697  * we find a duplicate.
5698  */
5699  foreach(lc3, windowClause)
5700  {
5701  WindowClause *existing_wc = lfirst_node(WindowClause, lc3);
5702 
5703  /* skip over the WindowClause we're currently editing */
5704  if (existing_wc == wc)
5705  continue;
5706 
5707  /*
5708  * Perform the same duplicate check that is done in
5709  * transformWindowFuncCall.
5710  */
5711  if (equal(wc->partitionClause, existing_wc->partitionClause) &&
5712  equal(wc->orderClause, existing_wc->orderClause) &&
5713  wc->frameOptions == existing_wc->frameOptions &&
5714  equal(wc->startOffset, existing_wc->startOffset) &&
5715  equal(wc->endOffset, existing_wc->endOffset))
5716  {
5717  ListCell *lc4;
5718 
5719  /*
5720  * Now move each WindowFunc in 'wc' into 'existing_wc'.
5721  * This required adjusting each WindowFunc's winref and
5722  * moving the WindowFuncs in 'wc' to the list of
5723  * WindowFuncs in 'existing_wc'.
5724  */
5725  foreach(lc4, wflists->windowFuncs[wc->winref])
5726  {
5727  WindowFunc *wfunc = lfirst_node(WindowFunc, lc4);
5728 
5729  wfunc->winref = existing_wc->winref;
5730  }
5731 
5732  /* move list items */
5733  wflists->windowFuncs[existing_wc->winref] = list_concat(wflists->windowFuncs[existing_wc->winref],
5734  wflists->windowFuncs[wc->winref]);
5735  wflists->windowFuncs[wc->winref] = NIL;
5736 
5737  /*
5738  * transformWindowFuncCall() should have made sure there
5739  * are no other duplicates, so we needn't bother looking
5740  * any further.
5741  */
5742  break;
5743  }
5744  }
5745  }
5746  }
5747 }
5748 
5749 /*
5750  * select_active_windows
5751  * Create a list of the "active" window clauses (ie, those referenced
5752  * by non-deleted WindowFuncs) in the order they are to be executed.
5753  */
5754 static List *
5756 {
5757  List *windowClause = root->parse->windowClause;
5758  List *result = NIL;
5759  ListCell *lc;
5760  int nActive = 0;
5762  * list_length(windowClause));
5763 
5764  /* First, construct an array of the active windows */
5765  foreach(lc, windowClause)
5766  {
5768 
5769  /* It's only active if wflists shows some related WindowFuncs */
5770  Assert(wc->winref <= wflists->maxWinRef);
5771  if (wflists->windowFuncs[wc->winref] == NIL)
5772  continue;
5773 
5774  actives[nActive].wc = wc; /* original clause */
5775 
5776  /*
5777  * For sorting, we want the list of partition keys followed by the
5778  * list of sort keys. But pathkeys construction will remove duplicates
5779  * between the two, so we can as well (even though we can't detect all
5780  * of the duplicates, since some may come from ECs - that might mean
5781  * we miss optimization chances here). We must, however, ensure that
5782  * the order of entries is preserved with respect to the ones we do
5783  * keep.
5784  *
5785  * partitionClause and orderClause had their own duplicates removed in
5786  * parse analysis, so we're only concerned here with removing
5787  * orderClause entries that also appear in partitionClause.
5788  */
5789  actives[nActive].uniqueOrder =
5791  wc->orderClause);
5792  nActive++;
5793  }
5794 
5795  /*
5796  * Sort active windows by their partitioning/ordering clauses, ignoring
5797  * any framing clauses, so that the windows that need the same sorting are
5798  * adjacent in the list. When we come to generate paths, this will avoid
5799  * inserting additional Sort nodes.
5800  *
5801  * This is how we implement a specific requirement from the SQL standard,
5802  * which says that when two or more windows are order-equivalent (i.e.
5803  * have matching partition and order clauses, even if their names or
5804  * framing clauses differ), then all peer rows must be presented in the
5805  * same order in all of them. If we allowed multiple sort nodes for such
5806  * cases, we'd risk having the peer rows end up in different orders in
5807  * equivalent windows due to sort instability. (See General Rule 4 of
5808  * <window clause> in SQL2008 - SQL2016.)
5809  *
5810  * Additionally, if the entire list of clauses of one window is a prefix
5811  * of another, put first the window with stronger sorting requirements.
5812  * This way we will first sort for stronger window, and won't have to sort
5813  * again for the weaker one.
5814  */
5815  qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
5816 
5817  /* build ordered list of the original WindowClause nodes */
5818  for (int i = 0; i < nActive; i++)
5819  result = lappend(result, actives[i].wc);
5820 
5821  pfree(actives);
5822 
5823  return result;
5824 }
5825 
5826 /*
5827  * common_prefix_cmp
5828  * QSort comparison function for WindowClauseSortData
5829  *
5830  * Sort the windows by the required sorting clauses. First, compare the sort
5831  * clauses themselves. Second, if one window's clauses are a prefix of another
5832  * one's clauses, put the window with more sort clauses first.