PostgreSQL Source Code  git master
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/genam.h"
22 #include "access/parallel.h"
23 #include "access/sysattr.h"
24 #include "access/table.h"
25 #include "catalog/pg_aggregate.h"
26 #include "catalog/pg_constraint.h"
27 #include "catalog/pg_inherits.h"
28 #include "catalog/pg_proc.h"
29 #include "catalog/pg_type.h"
30 #include "executor/executor.h"
31 #include "foreign/fdwapi.h"
32 #include "jit/jit.h"
33 #include "lib/bipartite_match.h"
34 #include "lib/knapsack.h"
35 #include "miscadmin.h"
36 #include "nodes/makefuncs.h"
37 #include "nodes/nodeFuncs.h"
38 #ifdef OPTIMIZER_DEBUG
39 #include "nodes/print.h"
40 #endif
41 #include "nodes/supportnodes.h"
42 #include "optimizer/appendinfo.h"
43 #include "optimizer/clauses.h"
44 #include "optimizer/cost.h"
45 #include "optimizer/optimizer.h"
46 #include "optimizer/paramassign.h"
47 #include "optimizer/pathnode.h"
48 #include "optimizer/paths.h"
49 #include "optimizer/plancat.h"
50 #include "optimizer/planmain.h"
51 #include "optimizer/planner.h"
52 #include "optimizer/prep.h"
53 #include "optimizer/subselect.h"
54 #include "optimizer/tlist.h"
55 #include "parser/analyze.h"
56 #include "parser/parse_agg.h"
57 #include "parser/parse_clause.h"
58 #include "parser/parse_relation.h"
59 #include "parser/parsetree.h"
60 #include "partitioning/partdesc.h"
61 #include "utils/lsyscache.h"
62 #include "utils/rel.h"
63 #include "utils/selfuncs.h"
64 
65 /* GUC parameters */
69 
70 /* Hook for plugins to get control in planner() */
72 
73 /* Hook for plugins to get control when grouping_planner() plans upper rels */
75 
76 
77 /* Expression kind codes for preprocess_expression */
78 #define EXPRKIND_QUAL 0
79 #define EXPRKIND_TARGET 1
80 #define EXPRKIND_RTFUNC 2
81 #define EXPRKIND_RTFUNC_LATERAL 3
82 #define EXPRKIND_VALUES 4
83 #define EXPRKIND_VALUES_LATERAL 5
84 #define EXPRKIND_LIMIT 6
85 #define EXPRKIND_APPINFO 7
86 #define EXPRKIND_PHV 8
87 #define EXPRKIND_TABLESAMPLE 9
88 #define EXPRKIND_ARBITER_ELEM 10
89 #define EXPRKIND_TABLEFUNC 11
90 #define EXPRKIND_TABLEFUNC_LATERAL 12
91 
92 /*
93  * Data specific to grouping sets
94  */
95 typedef struct
96 {
106 
107 /*
108  * Temporary structure for use during WindowClause reordering in order to be
109  * able to sort WindowClauses on partitioning/ordering prefix.
110  */
111 typedef struct
112 {
114  List *uniqueOrder; /* A List of unique ordering/partitioning
115  * clauses per Window */
117 
118 /* Passthrough data for standard_qp_callback */
119 typedef struct
120 {
121  List *activeWindows; /* active windows, if any */
122  grouping_sets_data *gset_data; /* grouping sets data, if any */
123  SetOperationStmt *setop; /* parent set operation or NULL if not a
124  * subquery belonging to a set operation */
126 
127 /* Local functions */
128 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
129 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
130 static void grouping_planner(PlannerInfo *root, double tuple_fraction,
131  SetOperationStmt *setops);
133 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
134  int *tleref_to_colnum_map);
135 static void preprocess_rowmarks(PlannerInfo *root);
136 static double preprocess_limit(PlannerInfo *root,
137  double tuple_fraction,
138  int64 *offset_est, int64 *count_est);
141 static List *extract_rollup_sets(List *groupingSets);
142 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
143 static void standard_qp_callback(PlannerInfo *root, void *extra);
144 static double get_number_of_groups(PlannerInfo *root,
145  double path_rows,
146  grouping_sets_data *gd,
147  List *target_list);
149  RelOptInfo *input_rel,
150  PathTarget *target,
151  bool target_parallel_safe,
152  grouping_sets_data *gd);
155  RelOptInfo *input_rel,
156  RelOptInfo *grouped_rel);
158  PathTarget *target, bool target_parallel_safe,
159  Node *havingQual);
161  RelOptInfo *input_rel,
162  RelOptInfo *grouped_rel,
163  const AggClauseCosts *agg_costs,
164  grouping_sets_data *gd,
165  GroupPathExtraData *extra,
166  RelOptInfo **partially_grouped_rel_p);
168  RelOptInfo *grouped_rel,
169  Path *path,
170  bool is_sorted,
171  bool can_hash,
172  grouping_sets_data *gd,
173  const AggClauseCosts *agg_costs,
174  double dNumGroups);
176  RelOptInfo *input_rel,
177  PathTarget *input_target,
178  PathTarget *output_target,
179  bool output_target_parallel_safe,
180  WindowFuncLists *wflists,
181  List *activeWindows);
183  RelOptInfo *window_rel,
184  Path *path,
185  PathTarget *input_target,
186  PathTarget *output_target,
187  WindowFuncLists *wflists,
188  List *activeWindows);
190  RelOptInfo *input_rel,
191  PathTarget *target);
193  RelOptInfo *input_rel,
194  RelOptInfo *final_distinct_rel,
195  PathTarget *target);
197  RelOptInfo *input_rel,
198  RelOptInfo *distinct_rel);
200  RelOptInfo *input_rel,
201  PathTarget *target,
202  bool target_parallel_safe,
203  double limit_tuples);
205  PathTarget *final_target);
207  PathTarget *grouping_target,
208  Node *havingQual);
209 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
211  WindowFuncLists *wflists);
214  PathTarget *final_target,
215  List *activeWindows);
217  List *tlist);
219  PathTarget *final_target,
220  bool *have_postponed_srfs);
222  List *targets, List *targets_contain_srfs);
223 static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
224  RelOptInfo *grouped_rel,
225  RelOptInfo *partially_grouped_rel,
226  const AggClauseCosts *agg_costs,
227  grouping_sets_data *gd,
228  double dNumGroups,
229  GroupPathExtraData *extra);
231  RelOptInfo *grouped_rel,
232  RelOptInfo *input_rel,
233  grouping_sets_data *gd,
234  GroupPathExtraData *extra,
235  bool force_rel_creation);
237 static bool can_partial_agg(PlannerInfo *root);
239  RelOptInfo *rel,
240  List *scanjoin_targets,
241  List *scanjoin_targets_contain_srfs,
242  bool scanjoin_target_parallel_safe,
243  bool tlist_same_exprs);
245  RelOptInfo *input_rel,
246  RelOptInfo *grouped_rel,
247  RelOptInfo *partially_grouped_rel,
248  const AggClauseCosts *agg_costs,
249  grouping_sets_data *gd,
251  GroupPathExtraData *extra);
252 static bool group_by_has_partkey(RelOptInfo *input_rel,
253  List *targetList,
254  List *groupClause);
255 static int common_prefix_cmp(const void *a, const void *b);
257  List *targetlist);
258 
259 
260 /*****************************************************************************
261  *
262  * Query optimizer entry point
263  *
264  * To support loadable plugins that monitor or modify planner behavior,
265  * we provide a hook variable that lets a plugin get control before and
266  * after the standard planning process. The plugin would normally call
267  * standard_planner().
268  *
269  * Note to plugin authors: standard_planner() scribbles on its Query input,
270  * so you'd better copy that data structure if you want to plan more than once.
271  *
272  *****************************************************************************/
273 PlannedStmt *
274 planner(Query *parse, const char *query_string, int cursorOptions,
275  ParamListInfo boundParams)
276 {
277  PlannedStmt *result;
278 
279  if (planner_hook)
280  result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
281  else
282  result = standard_planner(parse, query_string, cursorOptions, boundParams);
283  return result;
284 }
285 
286 PlannedStmt *
287 standard_planner(Query *parse, const char *query_string, int cursorOptions,
288  ParamListInfo boundParams)
289 {
290  PlannedStmt *result;
291  PlannerGlobal *glob;
292  double tuple_fraction;
293  PlannerInfo *root;
294  RelOptInfo *final_rel;
295  Path *best_path;
296  Plan *top_plan;
297  ListCell *lp,
298  *lr;
299 
300  /*
301  * Set up global state for this planner invocation. This data is needed
302  * across all levels of sub-Query that might exist in the given command,
303  * so we keep it in a separate struct that's linked to by each per-Query
304  * PlannerInfo.
305  */
306  glob = makeNode(PlannerGlobal);
307 
308  glob->boundParams = boundParams;
309  glob->subplans = NIL;
310  glob->subpaths = NIL;
311  glob->subroots = NIL;
312  glob->rewindPlanIDs = NULL;
313  glob->finalrtable = NIL;
314  glob->finalrteperminfos = NIL;
315  glob->finalrowmarks = NIL;
316  glob->resultRelations = NIL;
317  glob->appendRelations = NIL;
318  glob->relationOids = NIL;
319  glob->invalItems = NIL;
320  glob->paramExecTypes = NIL;
321  glob->lastPHId = 0;
322  glob->lastRowMarkId = 0;
323  glob->lastPlanNodeId = 0;
324  glob->transientPlan = false;
325  glob->dependsOnRole = false;
326 
327  /*
328  * Assess whether it's feasible to use parallel mode for this query. We
329  * can't do this in a standalone backend, or if the command will try to
330  * modify any data, or if this is a cursor operation, or if GUCs are set
331  * to values that don't permit parallelism, or if parallel-unsafe
332  * functions are present in the query tree.
333  *
334  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
335  * MATERIALIZED VIEW to use parallel plans, but this is safe only because
336  * the command is writing into a completely new table which workers won't
337  * be able to see. If the workers could see the table, the fact that
338  * group locking would cause them to ignore the leader's heavyweight GIN
339  * page locks would make this unsafe. We'll have to fix that somehow if
340  * we want to allow parallel inserts in general; updates and deletes have
341  * additional problems especially around combo CIDs.)
342  *
343  * For now, we don't try to use parallel mode if we're running inside a
344  * parallel worker. We might eventually be able to relax this
345  * restriction, but for now it seems best not to have parallel workers
346  * trying to create their own parallel workers.
347  */
348  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
350  parse->commandType == CMD_SELECT &&
351  !parse->hasModifyingCTE &&
353  !IsParallelWorker())
354  {
355  /* all the cheap tests pass, so scan the query tree */
357  glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
358  }
359  else
360  {
361  /* skip the query tree scan, just assume it's unsafe */
362  glob->maxParallelHazard = PROPARALLEL_UNSAFE;
363  glob->parallelModeOK = false;
364  }
365 
366  /*
367  * glob->parallelModeNeeded is normally set to false here and changed to
368  * true during plan creation if a Gather or Gather Merge plan is actually
369  * created (cf. create_gather_plan, create_gather_merge_plan).
370  *
371  * However, if debug_parallel_query = on or debug_parallel_query =
372  * regress, then we impose parallel mode whenever it's safe to do so, even
373  * if the final plan doesn't use parallelism. It's not safe to do so if
374  * the query contains anything parallel-unsafe; parallelModeOK will be
375  * false in that case. Note that parallelModeOK can't change after this
376  * point. Otherwise, everything in the query is either parallel-safe or
377  * parallel-restricted, and in either case it should be OK to impose
378  * parallel-mode restrictions. If that ends up breaking something, then
379  * either some function the user included in the query is incorrectly
380  * labeled as parallel-safe or parallel-restricted when in reality it's
381  * parallel-unsafe, or else the query planner itself has a bug.
382  */
383  glob->parallelModeNeeded = glob->parallelModeOK &&
385 
386  /* Determine what fraction of the plan is likely to be scanned */
387  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
388  {
389  /*
390  * We have no real idea how many tuples the user will ultimately FETCH
391  * from a cursor, but it is often the case that he doesn't want 'em
392  * all, or would prefer a fast-start plan anyway so that he can
393  * process some of the tuples sooner. Use a GUC parameter to decide
394  * what fraction to optimize for.
395  */
396  tuple_fraction = cursor_tuple_fraction;
397 
398  /*
399  * We document cursor_tuple_fraction as simply being a fraction, which
400  * means the edge cases 0 and 1 have to be treated specially here. We
401  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
402  */
403  if (tuple_fraction >= 1.0)
404  tuple_fraction = 0.0;
405  else if (tuple_fraction <= 0.0)
406  tuple_fraction = 1e-10;
407  }
408  else
409  {
410  /* Default assumption is we need all the tuples */
411  tuple_fraction = 0.0;
412  }
413 
414  /* primary planning entry point (may recurse for subqueries) */
415  root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
416 
417  /* Select best Path and turn it into a Plan */
418  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
419  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
420 
421  top_plan = create_plan(root, best_path);
422 
423  /*
424  * If creating a plan for a scrollable cursor, make sure it can run
425  * backwards on demand. Add a Material node at the top at need.
426  */
427  if (cursorOptions & CURSOR_OPT_SCROLL)
428  {
429  if (!ExecSupportsBackwardScan(top_plan))
430  top_plan = materialize_finished_plan(top_plan);
431  }
432 
433  /*
434  * Optionally add a Gather node for testing purposes, provided this is
435  * actually a safe thing to do.
436  *
437  * We can add Gather even when top_plan has parallel-safe initPlans, but
438  * then we have to move the initPlans to the Gather node because of
439  * SS_finalize_plan's limitations. That would cause cosmetic breakage of
440  * regression tests when debug_parallel_query = regress, because initPlans
441  * that would normally appear on the top_plan move to the Gather, causing
442  * them to disappear from EXPLAIN output. That doesn't seem worth kluging
443  * EXPLAIN to hide, so skip it when debug_parallel_query = regress.
444  */
446  top_plan->parallel_safe &&
447  (top_plan->initPlan == NIL ||
449  {
450  Gather *gather = makeNode(Gather);
451  Cost initplan_cost;
452  bool unsafe_initplans;
453 
454  gather->plan.targetlist = top_plan->targetlist;
455  gather->plan.qual = NIL;
456  gather->plan.lefttree = top_plan;
457  gather->plan.righttree = NULL;
458  gather->num_workers = 1;
459  gather->single_copy = true;
461 
462  /* Transfer any initPlans to the new top node */
463  gather->plan.initPlan = top_plan->initPlan;
464  top_plan->initPlan = NIL;
465 
466  /*
467  * Since this Gather has no parallel-aware descendants to signal to,
468  * we don't need a rescan Param.
469  */
470  gather->rescan_param = -1;
471 
472  /*
473  * Ideally we'd use cost_gather here, but setting up dummy path data
474  * to satisfy it doesn't seem much cleaner than knowing what it does.
475  */
476  gather->plan.startup_cost = top_plan->startup_cost +
478  gather->plan.total_cost = top_plan->total_cost +
480  gather->plan.plan_rows = top_plan->plan_rows;
481  gather->plan.plan_width = top_plan->plan_width;
482  gather->plan.parallel_aware = false;
483  gather->plan.parallel_safe = false;
484 
485  /*
486  * Delete the initplans' cost from top_plan. We needn't add it to the
487  * Gather node, since the above coding already included it there.
488  */
490  &initplan_cost, &unsafe_initplans);
491  top_plan->startup_cost -= initplan_cost;
492  top_plan->total_cost -= initplan_cost;
493 
494  /* use parallel mode for parallel plans. */
495  root->glob->parallelModeNeeded = true;
496 
497  top_plan = &gather->plan;
498  }
499 
500  /*
501  * If any Params were generated, run through the plan tree and compute
502  * each plan node's extParam/allParam sets. Ideally we'd merge this into
503  * set_plan_references' tree traversal, but for now it has to be separate
504  * because we need to visit subplans before not after main plan.
505  */
506  if (glob->paramExecTypes != NIL)
507  {
508  Assert(list_length(glob->subplans) == list_length(glob->subroots));
509  forboth(lp, glob->subplans, lr, glob->subroots)
510  {
511  Plan *subplan = (Plan *) lfirst(lp);
512  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
513 
514  SS_finalize_plan(subroot, subplan);
515  }
516  SS_finalize_plan(root, top_plan);
517  }
518 
519  /* final cleanup of the plan */
520  Assert(glob->finalrtable == NIL);
521  Assert(glob->finalrteperminfos == NIL);
522  Assert(glob->finalrowmarks == NIL);
523  Assert(glob->resultRelations == NIL);
524  Assert(glob->appendRelations == NIL);
525  top_plan = set_plan_references(root, top_plan);
526  /* ... and the subplans (both regular subplans and initplans) */
527  Assert(list_length(glob->subplans) == list_length(glob->subroots));
528  forboth(lp, glob->subplans, lr, glob->subroots)
529  {
530  Plan *subplan = (Plan *) lfirst(lp);
531  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
532 
533  lfirst(lp) = set_plan_references(subroot, subplan);
534  }
535 
536  /* build the PlannedStmt result */
537  result = makeNode(PlannedStmt);
538 
539  result->commandType = parse->commandType;
540  result->queryId = parse->queryId;
541  result->hasReturning = (parse->returningList != NIL);
542  result->hasModifyingCTE = parse->hasModifyingCTE;
543  result->canSetTag = parse->canSetTag;
544  result->transientPlan = glob->transientPlan;
545  result->dependsOnRole = glob->dependsOnRole;
546  result->parallelModeNeeded = glob->parallelModeNeeded;
547  result->planTree = top_plan;
548  result->rtable = glob->finalrtable;
549  result->permInfos = glob->finalrteperminfos;
550  result->resultRelations = glob->resultRelations;
551  result->appendRelations = glob->appendRelations;
552  result->subplans = glob->subplans;
553  result->rewindPlanIDs = glob->rewindPlanIDs;
554  result->rowMarks = glob->finalrowmarks;
555  result->relationOids = glob->relationOids;
556  result->invalItems = glob->invalItems;
557  result->paramExecTypes = glob->paramExecTypes;
558  /* utilityStmt should be null, but we might as well copy it */
559  result->utilityStmt = parse->utilityStmt;
560  result->stmt_location = parse->stmt_location;
561  result->stmt_len = parse->stmt_len;
562 
563  result->jitFlags = PGJIT_NONE;
564  if (jit_enabled && jit_above_cost >= 0 &&
565  top_plan->total_cost > jit_above_cost)
566  {
567  result->jitFlags |= PGJIT_PERFORM;
568 
569  /*
570  * Decide how much effort should be put into generating better code.
571  */
572  if (jit_optimize_above_cost >= 0 &&
574  result->jitFlags |= PGJIT_OPT3;
575  if (jit_inline_above_cost >= 0 &&
576  top_plan->total_cost > jit_inline_above_cost)
577  result->jitFlags |= PGJIT_INLINE;
578 
579  /*
580  * Decide which operations should be JITed.
581  */
582  if (jit_expressions)
583  result->jitFlags |= PGJIT_EXPR;
585  result->jitFlags |= PGJIT_DEFORM;
586  }
587 
588  if (glob->partition_directory != NULL)
589  DestroyPartitionDirectory(glob->partition_directory);
590 
591  return result;
592 }
593 
594 
595 /*--------------------
596  * subquery_planner
597  * Invokes the planner on a subquery. We recurse to here for each
598  * sub-SELECT found in the query tree.
599  *
600  * glob is the global state for the current planner run.
601  * parse is the querytree produced by the parser & rewriter.
602  * parent_root is the immediate parent Query's info (NULL at the top level).
603  * hasRecursion is true if this is a recursive WITH query.
604  * tuple_fraction is the fraction of tuples we expect will be retrieved.
605  * tuple_fraction is interpreted as explained for grouping_planner, below.
606  * setops is used for set operation subqueries to provide the subquery with
607  * the context in which it's being used so that Paths correctly sorted for the
608  * set operation can be generated. NULL when not planning a set operation
609  * child.
610  *
611  * Basically, this routine does the stuff that should only be done once
612  * per Query object. It then calls grouping_planner. At one time,
613  * grouping_planner could be invoked recursively on the same Query object;
614  * that's not currently true, but we keep the separation between the two
615  * routines anyway, in case we need it again someday.
616  *
617  * subquery_planner will be called recursively to handle sub-Query nodes
618  * found within the query's expressions and rangetable.
619  *
620  * Returns the PlannerInfo struct ("root") that contains all data generated
621  * while planning the subquery. In particular, the Path(s) attached to
622  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
623  * cheapest way(s) to implement the query. The top level will select the
624  * best Path and pass it through createplan.c to produce a finished Plan.
625  *--------------------
626  */
627 PlannerInfo *
629  bool hasRecursion, double tuple_fraction,
630  SetOperationStmt *setops)
631 {
632  PlannerInfo *root;
633  List *newWithCheckOptions;
634  List *newHaving;
635  bool hasOuterJoins;
636  bool hasResultRTEs;
637  RelOptInfo *final_rel;
638  ListCell *l;
639 
640  /* Create a PlannerInfo data structure for this subquery */
642  root->parse = parse;
643  root->glob = glob;
644  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
645  root->parent_root = parent_root;
646  root->plan_params = NIL;
647  root->outer_params = NULL;
648  root->planner_cxt = CurrentMemoryContext;
649  root->init_plans = NIL;
650  root->cte_plan_ids = NIL;
651  root->multiexpr_params = NIL;
652  root->join_domains = NIL;
653  root->eq_classes = NIL;
654  root->ec_merging_done = false;
655  root->last_rinfo_serial = 0;
656  root->all_result_relids =
657  parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
658  root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
659  root->append_rel_list = NIL;
660  root->row_identity_vars = NIL;
661  root->rowMarks = NIL;
662  memset(root->upper_rels, 0, sizeof(root->upper_rels));
663  memset(root->upper_targets, 0, sizeof(root->upper_targets));
664  root->processed_groupClause = NIL;
665  root->processed_distinctClause = NIL;
666  root->processed_tlist = NIL;
667  root->update_colnos = NIL;
668  root->grouping_map = NULL;
669  root->minmax_aggs = NIL;
670  root->qual_security_level = 0;
671  root->hasPseudoConstantQuals = false;
672  root->hasAlternativeSubPlans = false;
673  root->placeholdersFrozen = false;
674  root->hasRecursion = hasRecursion;
675  if (hasRecursion)
676  root->wt_param_id = assign_special_exec_param(root);
677  else
678  root->wt_param_id = -1;
679  root->non_recursive_path = NULL;
680  root->partColsUpdated = false;
681 
682  /*
683  * Create the top-level join domain. This won't have valid contents until
684  * deconstruct_jointree fills it in, but the node needs to exist before
685  * that so we can build EquivalenceClasses referencing it.
686  */
687  root->join_domains = list_make1(makeNode(JoinDomain));
688 
689  /*
690  * If there is a WITH list, process each WITH query and either convert it
691  * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
692  */
693  if (parse->cteList)
695 
696  /*
697  * If it's a MERGE command, transform the joinlist as appropriate.
698  */
700 
701  /*
702  * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
703  * that we don't need so many special cases to deal with that situation.
704  */
706 
707  /*
708  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
709  * to transform them into joins. Note that this step does not descend
710  * into subqueries; if we pull up any subqueries below, their SubLinks are
711  * processed just before pulling them up.
712  */
713  if (parse->hasSubLinks)
715 
716  /*
717  * Scan the rangetable for function RTEs, do const-simplification on them,
718  * and then inline them if possible (producing subqueries that might get
719  * pulled up next). Recursion issues here are handled in the same way as
720  * for SubLinks.
721  */
723 
724  /*
725  * Check to see if any subqueries in the jointree can be merged into this
726  * query.
727  */
729 
730  /*
731  * If this is a simple UNION ALL query, flatten it into an appendrel. We
732  * do this now because it requires applying pull_up_subqueries to the leaf
733  * queries of the UNION ALL, which weren't touched above because they
734  * weren't referenced by the jointree (they will be after we do this).
735  */
736  if (parse->setOperations)
738 
739  /*
740  * Survey the rangetable to see what kinds of entries are present. We can
741  * skip some later processing if relevant SQL features are not used; for
742  * example if there are no JOIN RTEs we can avoid the expense of doing
743  * flatten_join_alias_vars(). This must be done after we have finished
744  * adding rangetable entries, of course. (Note: actually, processing of
745  * inherited or partitioned rels can cause RTEs for their child tables to
746  * get added later; but those must all be RTE_RELATION entries, so they
747  * don't invalidate the conclusions drawn here.)
748  */
749  root->hasJoinRTEs = false;
750  root->hasLateralRTEs = false;
751  hasOuterJoins = false;
752  hasResultRTEs = false;
753  foreach(l, parse->rtable)
754  {
756 
757  switch (rte->rtekind)
758  {
759  case RTE_RELATION:
760  if (rte->inh)
761  {
762  /*
763  * Check to see if the relation actually has any children;
764  * if not, clear the inh flag so we can treat it as a
765  * plain base relation.
766  *
767  * Note: this could give a false-positive result, if the
768  * rel once had children but no longer does. We used to
769  * be able to clear rte->inh later on when we discovered
770  * that, but no more; we have to handle such cases as
771  * full-fledged inheritance.
772  */
773  rte->inh = has_subclass(rte->relid);
774  }
775  break;
776  case RTE_JOIN:
777  root->hasJoinRTEs = true;
778  if (IS_OUTER_JOIN(rte->jointype))
779  hasOuterJoins = true;
780  break;
781  case RTE_RESULT:
782  hasResultRTEs = true;
783  break;
784  default:
785  /* No work here for other RTE types */
786  break;
787  }
788 
789  if (rte->lateral)
790  root->hasLateralRTEs = true;
791 
792  /*
793  * We can also determine the maximum security level required for any
794  * securityQuals now. Addition of inheritance-child RTEs won't affect
795  * this, because child tables don't have their own securityQuals; see
796  * expand_single_inheritance_child().
797  */
798  if (rte->securityQuals)
799  root->qual_security_level = Max(root->qual_security_level,
800  list_length(rte->securityQuals));
801  }
802 
803  /*
804  * If we have now verified that the query target relation is
805  * non-inheriting, mark it as a leaf target.
806  */
807  if (parse->resultRelation)
808  {
809  RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
810 
811  if (!rte->inh)
812  root->leaf_result_relids =
813  bms_make_singleton(parse->resultRelation);
814  }
815 
816  /*
817  * Preprocess RowMark information. We need to do this after subquery
818  * pullup, so that all base relations are present.
819  */
821 
822  /*
823  * Set hasHavingQual to remember if HAVING clause is present. Needed
824  * because preprocess_expression will reduce a constant-true condition to
825  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
826  */
827  root->hasHavingQual = (parse->havingQual != NULL);
828 
829  /*
830  * Do expression preprocessing on targetlist and quals, as well as other
831  * random expressions in the querytree. Note that we do not need to
832  * handle sort/group expressions explicitly, because they are actually
833  * part of the targetlist.
834  */
835  parse->targetList = (List *)
836  preprocess_expression(root, (Node *) parse->targetList,
838 
839  /* Constant-folding might have removed all set-returning functions */
840  if (parse->hasTargetSRFs)
841  parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
842 
843  newWithCheckOptions = NIL;
844  foreach(l, parse->withCheckOptions)
845  {
847 
848  wco->qual = preprocess_expression(root, wco->qual,
849  EXPRKIND_QUAL);
850  if (wco->qual != NULL)
851  newWithCheckOptions = lappend(newWithCheckOptions, wco);
852  }
853  parse->withCheckOptions = newWithCheckOptions;
854 
855  parse->returningList = (List *)
856  preprocess_expression(root, (Node *) parse->returningList,
858 
859  preprocess_qual_conditions(root, (Node *) parse->jointree);
860 
861  parse->havingQual = preprocess_expression(root, parse->havingQual,
862  EXPRKIND_QUAL);
863 
864  foreach(l, parse->windowClause)
865  {
867 
868  /* partitionClause/orderClause are sort/group expressions */
873  }
874 
875  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
877  parse->limitCount = preprocess_expression(root, parse->limitCount,
879 
880  if (parse->onConflict)
881  {
882  parse->onConflict->arbiterElems = (List *)
884  (Node *) parse->onConflict->arbiterElems,
886  parse->onConflict->arbiterWhere =
888  parse->onConflict->arbiterWhere,
889  EXPRKIND_QUAL);
890  parse->onConflict->onConflictSet = (List *)
892  (Node *) parse->onConflict->onConflictSet,
894  parse->onConflict->onConflictWhere =
896  parse->onConflict->onConflictWhere,
897  EXPRKIND_QUAL);
898  /* exclRelTlist contains only Vars, so no preprocessing needed */
899  }
900 
901  foreach(l, parse->mergeActionList)
902  {
904 
905  action->targetList = (List *)
907  (Node *) action->targetList,
909  action->qual =
911  (Node *) action->qual,
912  EXPRKIND_QUAL);
913  }
914 
915  parse->mergeJoinCondition =
916  preprocess_expression(root, parse->mergeJoinCondition, EXPRKIND_QUAL);
917 
918  root->append_rel_list = (List *)
919  preprocess_expression(root, (Node *) root->append_rel_list,
921 
922  /* Also need to preprocess expressions within RTEs */
923  foreach(l, parse->rtable)
924  {
926  int kind;
927  ListCell *lcsq;
928 
929  if (rte->rtekind == RTE_RELATION)
930  {
931  if (rte->tablesample)
932  rte->tablesample = (TableSampleClause *)
934  (Node *) rte->tablesample,
936  }
937  else if (rte->rtekind == RTE_SUBQUERY)
938  {
939  /*
940  * We don't want to do all preprocessing yet on the subquery's
941  * expressions, since that will happen when we plan it. But if it
942  * contains any join aliases of our level, those have to get
943  * expanded now, because planning of the subquery won't do it.
944  * That's only possible if the subquery is LATERAL.
945  */
946  if (rte->lateral && root->hasJoinRTEs)
947  rte->subquery = (Query *)
949  (Node *) rte->subquery);
950  }
951  else if (rte->rtekind == RTE_FUNCTION)
952  {
953  /* Preprocess the function expression(s) fully */
954  kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
955  rte->functions = (List *)
956  preprocess_expression(root, (Node *) rte->functions, kind);
957  }
958  else if (rte->rtekind == RTE_TABLEFUNC)
959  {
960  /* Preprocess the function expression(s) fully */
961  kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC;
962  rte->tablefunc = (TableFunc *)
963  preprocess_expression(root, (Node *) rte->tablefunc, kind);
964  }
965  else if (rte->rtekind == RTE_VALUES)
966  {
967  /* Preprocess the values lists fully */
968  kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
969  rte->values_lists = (List *)
970  preprocess_expression(root, (Node *) rte->values_lists, kind);
971  }
972 
973  /*
974  * Process each element of the securityQuals list as if it were a
975  * separate qual expression (as indeed it is). We need to do it this
976  * way to get proper canonicalization of AND/OR structure. Note that
977  * this converts each element into an implicit-AND sublist.
978  */
979  foreach(lcsq, rte->securityQuals)
980  {
982  (Node *) lfirst(lcsq),
983  EXPRKIND_QUAL);
984  }
985  }
986 
987  /*
988  * Now that we are done preprocessing expressions, and in particular done
989  * flattening join alias variables, get rid of the joinaliasvars lists.
990  * They no longer match what expressions in the rest of the tree look
991  * like, because we have not preprocessed expressions in those lists (and
992  * do not want to; for example, expanding a SubLink there would result in
993  * a useless unreferenced subplan). Leaving them in place simply creates
994  * a hazard for later scans of the tree. We could try to prevent that by
995  * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
996  * but that doesn't sound very reliable.
997  */
998  if (root->hasJoinRTEs)
999  {
1000  foreach(l, parse->rtable)
1001  {
1003 
1004  rte->joinaliasvars = NIL;
1005  }
1006  }
1007 
1008  /*
1009  * In some cases we may want to transfer a HAVING clause into WHERE. We
1010  * cannot do so if the HAVING clause contains aggregates (obviously) or
1011  * volatile functions (since a HAVING clause is supposed to be executed
1012  * only once per group). We also can't do this if there are any nonempty
1013  * grouping sets; moving such a clause into WHERE would potentially change
1014  * the results, if any referenced column isn't present in all the grouping
1015  * sets. (If there are only empty grouping sets, then the HAVING clause
1016  * must be degenerate as discussed below.)
1017  *
1018  * Also, it may be that the clause is so expensive to execute that we're
1019  * better off doing it only once per group, despite the loss of
1020  * selectivity. This is hard to estimate short of doing the entire
1021  * planning process twice, so we use a heuristic: clauses containing
1022  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
1023  * clause into WHERE, in hopes of eliminating tuples before aggregation
1024  * instead of after.
1025  *
1026  * If the query has explicit grouping then we can simply move such a
1027  * clause into WHERE; any group that fails the clause will not be in the
1028  * output because none of its tuples will reach the grouping or
1029  * aggregation stage. Otherwise we must have a degenerate (variable-free)
1030  * HAVING clause, which we put in WHERE so that query_planner() can use it
1031  * in a gating Result node, but also keep in HAVING to ensure that we
1032  * don't emit a bogus aggregated row. (This could be done better, but it
1033  * seems not worth optimizing.)
1034  *
1035  * Note that both havingQual and parse->jointree->quals are in
1036  * implicitly-ANDed-list form at this point, even though they are declared
1037  * as Node *.
1038  */
1039  newHaving = NIL;
1040  foreach(l, (List *) parse->havingQual)
1041  {
1042  Node *havingclause = (Node *) lfirst(l);
1043 
1044  if ((parse->groupClause && parse->groupingSets) ||
1045  contain_agg_clause(havingclause) ||
1046  contain_volatile_functions(havingclause) ||
1047  contain_subplans(havingclause))
1048  {
1049  /* keep it in HAVING */
1050  newHaving = lappend(newHaving, havingclause);
1051  }
1052  else if (parse->groupClause && !parse->groupingSets)
1053  {
1054  /* move it to WHERE */
1055  parse->jointree->quals = (Node *)
1056  lappend((List *) parse->jointree->quals, havingclause);
1057  }
1058  else
1059  {
1060  /* put a copy in WHERE, keep it in HAVING */
1061  parse->jointree->quals = (Node *)
1062  lappend((List *) parse->jointree->quals,
1063  copyObject(havingclause));
1064  newHaving = lappend(newHaving, havingclause);
1065  }
1066  }
1067  parse->havingQual = (Node *) newHaving;
1068 
1069  /*
1070  * If we have any outer joins, try to reduce them to plain inner joins.
1071  * This step is most easily done after we've done expression
1072  * preprocessing.
1073  */
1074  if (hasOuterJoins)
1076 
1077  /*
1078  * If we have any RTE_RESULT relations, see if they can be deleted from
1079  * the jointree. We also rely on this processing to flatten single-child
1080  * FromExprs underneath outer joins. This step is most effectively done
1081  * after we've done expression preprocessing and outer join reduction.
1082  */
1083  if (hasResultRTEs || hasOuterJoins)
1085 
1086  /*
1087  * Do the main planning.
1088  */
1089  grouping_planner(root, tuple_fraction, setops);
1090 
1091  /*
1092  * Capture the set of outer-level param IDs we have access to, for use in
1093  * extParam/allParam calculations later.
1094  */
1096 
1097  /*
1098  * If any initPlans were created in this query level, adjust the surviving
1099  * Paths' costs and parallel-safety flags to account for them. The
1100  * initPlans won't actually get attached to the plan tree till
1101  * create_plan() runs, but we must include their effects now.
1102  */
1103  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1104  SS_charge_for_initplans(root, final_rel);
1105 
1106  /*
1107  * Make sure we've identified the cheapest Path for the final rel. (By
1108  * doing this here not in grouping_planner, we include initPlan costs in
1109  * the decision, though it's unlikely that will change anything.)
1110  */
1111  set_cheapest(final_rel);
1112 
1113  return root;
1114 }
1115 
1116 /*
1117  * preprocess_expression
1118  * Do subquery_planner's preprocessing work for an expression,
1119  * which can be a targetlist, a WHERE clause (including JOIN/ON
1120  * conditions), a HAVING clause, or a few other things.
1121  */
1122 static Node *
1124 {
1125  /*
1126  * Fall out quickly if expression is empty. This occurs often enough to
1127  * be worth checking. Note that null->null is the correct conversion for
1128  * implicit-AND result format, too.
1129  */
1130  if (expr == NULL)
1131  return NULL;
1132 
1133  /*
1134  * If the query has any join RTEs, replace join alias variables with
1135  * base-relation variables. We must do this first, since any expressions
1136  * we may extract from the joinaliasvars lists have not been preprocessed.
1137  * For example, if we did this after sublink processing, sublinks expanded
1138  * out from join aliases would not get processed. But we can skip this in
1139  * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1140  * they can't contain any Vars of the current query level.
1141  */
1142  if (root->hasJoinRTEs &&
1143  !(kind == EXPRKIND_RTFUNC ||
1144  kind == EXPRKIND_VALUES ||
1145  kind == EXPRKIND_TABLESAMPLE ||
1146  kind == EXPRKIND_TABLEFUNC))
1147  expr = flatten_join_alias_vars(root, root->parse, expr);
1148 
1149  /*
1150  * Simplify constant expressions. For function RTEs, this was already
1151  * done by preprocess_function_rtes. (But note we must do it again for
1152  * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1153  * un-simplified subexpressions inserted by flattening of subqueries or
1154  * join alias variables.)
1155  *
1156  * Note: an essential effect of this is to convert named-argument function
1157  * calls to positional notation and insert the current actual values of
1158  * any default arguments for functions. To ensure that happens, we *must*
1159  * process all expressions here. Previous PG versions sometimes skipped
1160  * const-simplification if it didn't seem worth the trouble, but we can't
1161  * do that anymore.
1162  *
1163  * Note: this also flattens nested AND and OR expressions into N-argument
1164  * form. All processing of a qual expression after this point must be
1165  * careful to maintain AND/OR flatness --- that is, do not generate a tree
1166  * with AND directly under AND, nor OR directly under OR.
1167  */
1168  if (kind != EXPRKIND_RTFUNC)
1169  expr = eval_const_expressions(root, expr);
1170 
1171  /*
1172  * If it's a qual or havingQual, canonicalize it.
1173  */
1174  if (kind == EXPRKIND_QUAL)
1175  {
1176  expr = (Node *) canonicalize_qual((Expr *) expr, false);
1177 
1178 #ifdef OPTIMIZER_DEBUG
1179  printf("After canonicalize_qual()\n");
1180  pprint(expr);
1181 #endif
1182  }
1183 
1184  /*
1185  * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1186  * hashfuncid of any that might execute more quickly by using hash lookups
1187  * instead of a linear search.
1188  */
1189  if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1190  {
1192  }
1193 
1194  /* Expand SubLinks to SubPlans */
1195  if (root->parse->hasSubLinks)
1196  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1197 
1198  /*
1199  * XXX do not insert anything here unless you have grokked the comments in
1200  * SS_replace_correlation_vars ...
1201  */
1202 
1203  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1204  if (root->query_level > 1)
1205  expr = SS_replace_correlation_vars(root, expr);
1206 
1207  /*
1208  * If it's a qual or havingQual, convert it to implicit-AND format. (We
1209  * don't want to do this before eval_const_expressions, since the latter
1210  * would be unable to simplify a top-level AND correctly. Also,
1211  * SS_process_sublinks expects explicit-AND format.)
1212  */
1213  if (kind == EXPRKIND_QUAL)
1214  expr = (Node *) make_ands_implicit((Expr *) expr);
1215 
1216  return expr;
1217 }
1218 
1219 /*
1220  * preprocess_qual_conditions
1221  * Recursively scan the query's jointree and do subquery_planner's
1222  * preprocessing work on each qual condition found therein.
1223  */
1224 static void
1226 {
1227  if (jtnode == NULL)
1228  return;
1229  if (IsA(jtnode, RangeTblRef))
1230  {
1231  /* nothing to do here */
1232  }
1233  else if (IsA(jtnode, FromExpr))
1234  {
1235  FromExpr *f = (FromExpr *) jtnode;
1236  ListCell *l;
1237 
1238  foreach(l, f->fromlist)
1240 
1242  }
1243  else if (IsA(jtnode, JoinExpr))
1244  {
1245  JoinExpr *j = (JoinExpr *) jtnode;
1246 
1249 
1250  j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1251  }
1252  else
1253  elog(ERROR, "unrecognized node type: %d",
1254  (int) nodeTag(jtnode));
1255 }
1256 
1257 /*
1258  * preprocess_phv_expression
1259  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1260  *
1261  * If a LATERAL subquery references an output of another subquery, and that
1262  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1263  * join, then we'll push the PlaceHolderVar expression down into the subquery
1264  * and later pull it back up during find_lateral_references, which runs after
1265  * subquery_planner has preprocessed all the expressions that were in the
1266  * current query level to start with. So we need to preprocess it then.
1267  */
1268 Expr *
1270 {
1271  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1272 }
1273 
1274 /*--------------------
1275  * grouping_planner
1276  * Perform planning steps related to grouping, aggregation, etc.
1277  *
1278  * This function adds all required top-level processing to the scan/join
1279  * Path(s) produced by query_planner.
1280  *
1281  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1282  * tuple_fraction is interpreted as follows:
1283  * 0: expect all tuples to be retrieved (normal case)
1284  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1285  * from the plan to be retrieved
1286  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1287  * expected to be retrieved (ie, a LIMIT specification).
1288  * setops is used for set operation subqueries to provide the subquery with
1289  * the context in which it's being used so that Paths correctly sorted for the
1290  * set operation can be generated. NULL when not planning a set operation
1291  * child.
1292  *
1293  * Returns nothing; the useful output is in the Paths we attach to the
1294  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1295  * root->processed_tlist contains the final processed targetlist.
1296  *
1297  * Note that we have not done set_cheapest() on the final rel; it's convenient
1298  * to leave this to the caller.
1299  *--------------------
1300  */
1301 static void
1302 grouping_planner(PlannerInfo *root, double tuple_fraction,
1303  SetOperationStmt *setops)
1304 {
1305  Query *parse = root->parse;
1306  int64 offset_est = 0;
1307  int64 count_est = 0;
1308  double limit_tuples = -1.0;
1309  bool have_postponed_srfs = false;
1310  PathTarget *final_target;
1311  List *final_targets;
1312  List *final_targets_contain_srfs;
1313  bool final_target_parallel_safe;
1314  RelOptInfo *current_rel;
1315  RelOptInfo *final_rel;
1316  FinalPathExtraData extra;
1317  ListCell *lc;
1318 
1319  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1320  if (parse->limitCount || parse->limitOffset)
1321  {
1322  tuple_fraction = preprocess_limit(root, tuple_fraction,
1323  &offset_est, &count_est);
1324 
1325  /*
1326  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1327  * estimate the effects of using a bounded sort.
1328  */
1329  if (count_est > 0 && offset_est >= 0)
1330  limit_tuples = (double) count_est + (double) offset_est;
1331  }
1332 
1333  /* Make tuple_fraction accessible to lower-level routines */
1334  root->tuple_fraction = tuple_fraction;
1335 
1336  if (parse->setOperations)
1337  {
1338  /*
1339  * Construct Paths for set operations. The results will not need any
1340  * work except perhaps a top-level sort and/or LIMIT. Note that any
1341  * special work for recursive unions is the responsibility of
1342  * plan_set_operations.
1343  */
1344  current_rel = plan_set_operations(root);
1345 
1346  /*
1347  * We should not need to call preprocess_targetlist, since we must be
1348  * in a SELECT query node. Instead, use the processed_tlist returned
1349  * by plan_set_operations (since this tells whether it returned any
1350  * resjunk columns!), and transfer any sort key information from the
1351  * original tlist.
1352  */
1353  Assert(parse->commandType == CMD_SELECT);
1354 
1355  /* for safety, copy processed_tlist instead of modifying in-place */
1356  root->processed_tlist =
1357  postprocess_setop_tlist(copyObject(root->processed_tlist),
1358  parse->targetList);
1359 
1360  /* Also extract the PathTarget form of the setop result tlist */
1361  final_target = current_rel->cheapest_total_path->pathtarget;
1362 
1363  /* And check whether it's parallel safe */
1364  final_target_parallel_safe =
1365  is_parallel_safe(root, (Node *) final_target->exprs);
1366 
1367  /* The setop result tlist couldn't contain any SRFs */
1368  Assert(!parse->hasTargetSRFs);
1369  final_targets = final_targets_contain_srfs = NIL;
1370 
1371  /*
1372  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1373  * checked already, but let's make sure).
1374  */
1375  if (parse->rowMarks)
1376  ereport(ERROR,
1377  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1378  /*------
1379  translator: %s is a SQL row locking clause such as FOR UPDATE */
1380  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1382  parse->rowMarks)->strength))));
1383 
1384  /*
1385  * Calculate pathkeys that represent result ordering requirements
1386  */
1387  Assert(parse->distinctClause == NIL);
1388  root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
1389  parse->sortClause,
1390  root->processed_tlist);
1391  }
1392  else
1393  {
1394  /* No set operations, do regular planning */
1395  PathTarget *sort_input_target;
1396  List *sort_input_targets;
1397  List *sort_input_targets_contain_srfs;
1398  bool sort_input_target_parallel_safe;
1399  PathTarget *grouping_target;
1400  List *grouping_targets;
1401  List *grouping_targets_contain_srfs;
1402  bool grouping_target_parallel_safe;
1403  PathTarget *scanjoin_target;
1404  List *scanjoin_targets;
1405  List *scanjoin_targets_contain_srfs;
1406  bool scanjoin_target_parallel_safe;
1407  bool scanjoin_target_same_exprs;
1408  bool have_grouping;
1409  WindowFuncLists *wflists = NULL;
1410  List *activeWindows = NIL;
1411  grouping_sets_data *gset_data = NULL;
1412  standard_qp_extra qp_extra;
1413 
1414  /* A recursive query should always have setOperations */
1415  Assert(!root->hasRecursion);
1416 
1417  /* Preprocess grouping sets and GROUP BY clause, if any */
1418  if (parse->groupingSets)
1419  {
1420  gset_data = preprocess_grouping_sets(root);
1421  }
1422  else if (parse->groupClause)
1423  {
1424  /* Preprocess regular GROUP BY clause, if any */
1425  root->processed_groupClause = list_copy(parse->groupClause);
1426  /* Remove any redundant GROUP BY columns */
1428  }
1429 
1430  /*
1431  * Preprocess targetlist. Note that much of the remaining planning
1432  * work will be done with the PathTarget representation of tlists, but
1433  * we must also maintain the full representation of the final tlist so
1434  * that we can transfer its decoration (resnames etc) to the topmost
1435  * tlist of the finished Plan. This is kept in processed_tlist.
1436  */
1438 
1439  /*
1440  * Mark all the aggregates with resolved aggtranstypes, and detect
1441  * aggregates that are duplicates or can share transition state. We
1442  * must do this before slicing and dicing the tlist into various
1443  * pathtargets, else some copies of the Aggref nodes might escape
1444  * being marked.
1445  */
1446  if (parse->hasAggs)
1447  {
1448  preprocess_aggrefs(root, (Node *) root->processed_tlist);
1449  preprocess_aggrefs(root, (Node *) parse->havingQual);
1450  }
1451 
1452  /*
1453  * Locate any window functions in the tlist. (We don't need to look
1454  * anywhere else, since expressions used in ORDER BY will be in there
1455  * too.) Note that they could all have been eliminated by constant
1456  * folding, in which case we don't need to do any more work.
1457  */
1458  if (parse->hasWindowFuncs)
1459  {
1460  wflists = find_window_functions((Node *) root->processed_tlist,
1461  list_length(parse->windowClause));
1462  if (wflists->numWindowFuncs > 0)
1463  {
1464  /*
1465  * See if any modifications can be made to each WindowClause
1466  * to allow the executor to execute the WindowFuncs more
1467  * quickly.
1468  */
1469  optimize_window_clauses(root, wflists);
1470 
1471  activeWindows = select_active_windows(root, wflists);
1472  }
1473  else
1474  parse->hasWindowFuncs = false;
1475  }
1476 
1477  /*
1478  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1479  * adding logic between here and the query_planner() call. Anything
1480  * that is needed in MIN/MAX-optimizable cases will have to be
1481  * duplicated in planagg.c.
1482  */
1483  if (parse->hasAggs)
1485 
1486  /*
1487  * Figure out whether there's a hard limit on the number of rows that
1488  * query_planner's result subplan needs to return. Even if we know a
1489  * hard limit overall, it doesn't apply if the query has any
1490  * grouping/aggregation operations, or SRFs in the tlist.
1491  */
1492  if (parse->groupClause ||
1493  parse->groupingSets ||
1494  parse->distinctClause ||
1495  parse->hasAggs ||
1496  parse->hasWindowFuncs ||
1497  parse->hasTargetSRFs ||
1498  root->hasHavingQual)
1499  root->limit_tuples = -1.0;
1500  else
1501  root->limit_tuples = limit_tuples;
1502 
1503  /* Set up data needed by standard_qp_callback */
1504  qp_extra.activeWindows = activeWindows;
1505  qp_extra.gset_data = gset_data;
1506 
1507  /*
1508  * If we're a subquery for a set operation, store the SetOperationStmt
1509  * in qp_extra.
1510  */
1511  qp_extra.setop = setops;
1512 
1513  /*
1514  * Generate the best unsorted and presorted paths for the scan/join
1515  * portion of this Query, ie the processing represented by the
1516  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1517  * We also generate (in standard_qp_callback) pathkey representations
1518  * of the query's sort clause, distinct clause, etc.
1519  */
1520  current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1521 
1522  /*
1523  * Convert the query's result tlist into PathTarget format.
1524  *
1525  * Note: this cannot be done before query_planner() has performed
1526  * appendrel expansion, because that might add resjunk entries to
1527  * root->processed_tlist. Waiting till afterwards is also helpful
1528  * because the target width estimates can use per-Var width numbers
1529  * that were obtained within query_planner().
1530  */
1531  final_target = create_pathtarget(root, root->processed_tlist);
1532  final_target_parallel_safe =
1533  is_parallel_safe(root, (Node *) final_target->exprs);
1534 
1535  /*
1536  * If ORDER BY was given, consider whether we should use a post-sort
1537  * projection, and compute the adjusted target for preceding steps if
1538  * so.
1539  */
1540  if (parse->sortClause)
1541  {
1542  sort_input_target = make_sort_input_target(root,
1543  final_target,
1544  &have_postponed_srfs);
1545  sort_input_target_parallel_safe =
1546  is_parallel_safe(root, (Node *) sort_input_target->exprs);
1547  }
1548  else
1549  {
1550  sort_input_target = final_target;
1551  sort_input_target_parallel_safe = final_target_parallel_safe;
1552  }
1553 
1554  /*
1555  * If we have window functions to deal with, the output from any
1556  * grouping step needs to be what the window functions want;
1557  * otherwise, it should be sort_input_target.
1558  */
1559  if (activeWindows)
1560  {
1561  grouping_target = make_window_input_target(root,
1562  final_target,
1563  activeWindows);
1564  grouping_target_parallel_safe =
1565  is_parallel_safe(root, (Node *) grouping_target->exprs);
1566  }
1567  else
1568  {
1569  grouping_target = sort_input_target;
1570  grouping_target_parallel_safe = sort_input_target_parallel_safe;
1571  }
1572 
1573  /*
1574  * If we have grouping or aggregation to do, the topmost scan/join
1575  * plan node must emit what the grouping step wants; otherwise, it
1576  * should emit grouping_target.
1577  */
1578  have_grouping = (parse->groupClause || parse->groupingSets ||
1579  parse->hasAggs || root->hasHavingQual);
1580  if (have_grouping)
1581  {
1582  scanjoin_target = make_group_input_target(root, final_target);
1583  scanjoin_target_parallel_safe =
1584  is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1585  }
1586  else
1587  {
1588  scanjoin_target = grouping_target;
1589  scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1590  }
1591 
1592  /*
1593  * If there are any SRFs in the targetlist, we must separate each of
1594  * these PathTargets into SRF-computing and SRF-free targets. Replace
1595  * each of the named targets with a SRF-free version, and remember the
1596  * list of additional projection steps we need to add afterwards.
1597  */
1598  if (parse->hasTargetSRFs)
1599  {
1600  /* final_target doesn't recompute any SRFs in sort_input_target */
1601  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1602  &final_targets,
1603  &final_targets_contain_srfs);
1604  final_target = linitial_node(PathTarget, final_targets);
1605  Assert(!linitial_int(final_targets_contain_srfs));
1606  /* likewise for sort_input_target vs. grouping_target */
1607  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1608  &sort_input_targets,
1609  &sort_input_targets_contain_srfs);
1610  sort_input_target = linitial_node(PathTarget, sort_input_targets);
1611  Assert(!linitial_int(sort_input_targets_contain_srfs));
1612  /* likewise for grouping_target vs. scanjoin_target */
1613  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1614  &grouping_targets,
1615  &grouping_targets_contain_srfs);
1616  grouping_target = linitial_node(PathTarget, grouping_targets);
1617  Assert(!linitial_int(grouping_targets_contain_srfs));
1618  /* scanjoin_target will not have any SRFs precomputed for it */
1619  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1620  &scanjoin_targets,
1621  &scanjoin_targets_contain_srfs);
1622  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1623  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1624  }
1625  else
1626  {
1627  /* initialize lists; for most of these, dummy values are OK */
1628  final_targets = final_targets_contain_srfs = NIL;
1629  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1630  grouping_targets = grouping_targets_contain_srfs = NIL;
1631  scanjoin_targets = list_make1(scanjoin_target);
1632  scanjoin_targets_contain_srfs = NIL;
1633  }
1634 
1635  /* Apply scan/join target. */
1636  scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1637  && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1638  apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1639  scanjoin_targets_contain_srfs,
1640  scanjoin_target_parallel_safe,
1641  scanjoin_target_same_exprs);
1642 
1643  /*
1644  * Save the various upper-rel PathTargets we just computed into
1645  * root->upper_targets[]. The core code doesn't use this, but it
1646  * provides a convenient place for extensions to get at the info. For
1647  * consistency, we save all the intermediate targets, even though some
1648  * of the corresponding upperrels might not be needed for this query.
1649  */
1650  root->upper_targets[UPPERREL_FINAL] = final_target;
1651  root->upper_targets[UPPERREL_ORDERED] = final_target;
1652  root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1653  root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1654  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1655  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1656 
1657  /*
1658  * If we have grouping and/or aggregation, consider ways to implement
1659  * that. We build a new upperrel representing the output of this
1660  * phase.
1661  */
1662  if (have_grouping)
1663  {
1664  current_rel = create_grouping_paths(root,
1665  current_rel,
1666  grouping_target,
1667  grouping_target_parallel_safe,
1668  gset_data);
1669  /* Fix things up if grouping_target contains SRFs */
1670  if (parse->hasTargetSRFs)
1671  adjust_paths_for_srfs(root, current_rel,
1672  grouping_targets,
1673  grouping_targets_contain_srfs);
1674  }
1675 
1676  /*
1677  * If we have window functions, consider ways to implement those. We
1678  * build a new upperrel representing the output of this phase.
1679  */
1680  if (activeWindows)
1681  {
1682  current_rel = create_window_paths(root,
1683  current_rel,
1684  grouping_target,
1685  sort_input_target,
1686  sort_input_target_parallel_safe,
1687  wflists,
1688  activeWindows);
1689  /* Fix things up if sort_input_target contains SRFs */
1690  if (parse->hasTargetSRFs)
1691  adjust_paths_for_srfs(root, current_rel,
1692  sort_input_targets,
1693  sort_input_targets_contain_srfs);
1694  }
1695 
1696  /*
1697  * If there is a DISTINCT clause, consider ways to implement that. We
1698  * build a new upperrel representing the output of this phase.
1699  */
1700  if (parse->distinctClause)
1701  {
1702  current_rel = create_distinct_paths(root,
1703  current_rel,
1704  sort_input_target);
1705  }
1706  } /* end of if (setOperations) */
1707 
1708  /*
1709  * If ORDER BY was given, consider ways to implement that, and generate a
1710  * new upperrel containing only paths that emit the correct ordering and
1711  * project the correct final_target. We can apply the original
1712  * limit_tuples limit in sort costing here, but only if there are no
1713  * postponed SRFs.
1714  */
1715  if (parse->sortClause)
1716  {
1717  current_rel = create_ordered_paths(root,
1718  current_rel,
1719  final_target,
1720  final_target_parallel_safe,
1721  have_postponed_srfs ? -1.0 :
1722  limit_tuples);
1723  /* Fix things up if final_target contains SRFs */
1724  if (parse->hasTargetSRFs)
1725  adjust_paths_for_srfs(root, current_rel,
1726  final_targets,
1727  final_targets_contain_srfs);
1728  }
1729 
1730  /*
1731  * Now we are prepared to build the final-output upperrel.
1732  */
1733  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1734 
1735  /*
1736  * If the input rel is marked consider_parallel and there's nothing that's
1737  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1738  * consider_parallel as well. Note that if the query has rowMarks or is
1739  * not a SELECT, consider_parallel will be false for every relation in the
1740  * query.
1741  */
1742  if (current_rel->consider_parallel &&
1743  is_parallel_safe(root, parse->limitOffset) &&
1744  is_parallel_safe(root, parse->limitCount))
1745  final_rel->consider_parallel = true;
1746 
1747  /*
1748  * If the current_rel belongs to a single FDW, so does the final_rel.
1749  */
1750  final_rel->serverid = current_rel->serverid;
1751  final_rel->userid = current_rel->userid;
1752  final_rel->useridiscurrent = current_rel->useridiscurrent;
1753  final_rel->fdwroutine = current_rel->fdwroutine;
1754 
1755  /*
1756  * Generate paths for the final_rel. Insert all surviving paths, with
1757  * LockRows, Limit, and/or ModifyTable steps added if needed.
1758  */
1759  foreach(lc, current_rel->pathlist)
1760  {
1761  Path *path = (Path *) lfirst(lc);
1762 
1763  /*
1764  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1765  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1766  * here. If there are only non-locking rowmarks, they should be
1767  * handled by the ModifyTable node instead. However, root->rowMarks
1768  * is what goes into the LockRows node.)
1769  */
1770  if (parse->rowMarks)
1771  {
1772  path = (Path *) create_lockrows_path(root, final_rel, path,
1773  root->rowMarks,
1775  }
1776 
1777  /*
1778  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1779  */
1780  if (limit_needed(parse))
1781  {
1782  path = (Path *) create_limit_path(root, final_rel, path,
1783  parse->limitOffset,
1784  parse->limitCount,
1785  parse->limitOption,
1786  offset_est, count_est);
1787  }
1788 
1789  /*
1790  * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1791  */
1792  if (parse->commandType != CMD_SELECT)
1793  {
1794  Index rootRelation;
1795  List *resultRelations = NIL;
1796  List *updateColnosLists = NIL;
1797  List *withCheckOptionLists = NIL;
1798  List *returningLists = NIL;
1799  List *mergeActionLists = NIL;
1800  List *mergeJoinConditions = NIL;
1801  List *rowMarks;
1802 
1803  if (bms_membership(root->all_result_relids) == BMS_MULTIPLE)
1804  {
1805  /* Inherited UPDATE/DELETE/MERGE */
1806  RelOptInfo *top_result_rel = find_base_rel(root,
1807  parse->resultRelation);
1808  int resultRelation = -1;
1809 
1810  /* Pass the root result rel forward to the executor. */
1811  rootRelation = parse->resultRelation;
1812 
1813  /* Add only leaf children to ModifyTable. */
1814  while ((resultRelation = bms_next_member(root->leaf_result_relids,
1815  resultRelation)) >= 0)
1816  {
1817  RelOptInfo *this_result_rel = find_base_rel(root,
1818  resultRelation);
1819 
1820  /*
1821  * Also exclude any leaf rels that have turned dummy since
1822  * being added to the list, for example, by being excluded
1823  * by constraint exclusion.
1824  */
1825  if (IS_DUMMY_REL(this_result_rel))
1826  continue;
1827 
1828  /* Build per-target-rel lists needed by ModifyTable */
1829  resultRelations = lappend_int(resultRelations,
1830  resultRelation);
1831  if (parse->commandType == CMD_UPDATE)
1832  {
1833  List *update_colnos = root->update_colnos;
1834 
1835  if (this_result_rel != top_result_rel)
1836  update_colnos =
1838  update_colnos,
1839  this_result_rel->relid,
1840  top_result_rel->relid);
1841  updateColnosLists = lappend(updateColnosLists,
1842  update_colnos);
1843  }
1844  if (parse->withCheckOptions)
1845  {
1846  List *withCheckOptions = parse->withCheckOptions;
1847 
1848  if (this_result_rel != top_result_rel)
1849  withCheckOptions = (List *)
1851  (Node *) withCheckOptions,
1852  this_result_rel,
1853  top_result_rel);
1854  withCheckOptionLists = lappend(withCheckOptionLists,
1855  withCheckOptions);
1856  }
1857  if (parse->returningList)
1858  {
1859  List *returningList = parse->returningList;
1860 
1861  if (this_result_rel != top_result_rel)
1862  returningList = (List *)
1864  (Node *) returningList,
1865  this_result_rel,
1866  top_result_rel);
1867  returningLists = lappend(returningLists,
1868  returningList);
1869  }
1870  if (parse->mergeActionList)
1871  {
1872  ListCell *l;
1873  List *mergeActionList = NIL;
1874 
1875  /*
1876  * Copy MergeActions and translate stuff that
1877  * references attribute numbers.
1878  */
1879  foreach(l, parse->mergeActionList)
1880  {
1881  MergeAction *action = lfirst(l),
1882  *leaf_action = copyObject(action);
1883 
1884  leaf_action->qual =
1886  (Node *) action->qual,
1887  this_result_rel,
1888  top_result_rel);
1889  leaf_action->targetList = (List *)
1891  (Node *) action->targetList,
1892  this_result_rel,
1893  top_result_rel);
1894  if (leaf_action->commandType == CMD_UPDATE)
1895  leaf_action->updateColnos =
1897  action->updateColnos,
1898  this_result_rel->relid,
1899  top_result_rel->relid);
1900  mergeActionList = lappend(mergeActionList,
1901  leaf_action);
1902  }
1903 
1904  mergeActionLists = lappend(mergeActionLists,
1905  mergeActionList);
1906  }
1907  if (parse->commandType == CMD_MERGE)
1908  {
1909  Node *mergeJoinCondition = parse->mergeJoinCondition;
1910 
1911  if (this_result_rel != top_result_rel)
1912  mergeJoinCondition =
1914  mergeJoinCondition,
1915  this_result_rel,
1916  top_result_rel);
1917  mergeJoinConditions = lappend(mergeJoinConditions,
1918  mergeJoinCondition);
1919  }
1920  }
1921 
1922  if (resultRelations == NIL)
1923  {
1924  /*
1925  * We managed to exclude every child rel, so generate a
1926  * dummy one-relation plan using info for the top target
1927  * rel (even though that may not be a leaf target).
1928  * Although it's clear that no data will be updated or
1929  * deleted, we still need to have a ModifyTable node so
1930  * that any statement triggers will be executed. (This
1931  * could be cleaner if we fixed nodeModifyTable.c to allow
1932  * zero target relations, but that probably wouldn't be a
1933  * net win.)
1934  */
1935  resultRelations = list_make1_int(parse->resultRelation);
1936  if (parse->commandType == CMD_UPDATE)
1937  updateColnosLists = list_make1(root->update_colnos);
1938  if (parse->withCheckOptions)
1939  withCheckOptionLists = list_make1(parse->withCheckOptions);
1940  if (parse->returningList)
1941  returningLists = list_make1(parse->returningList);
1942  if (parse->mergeActionList)
1943  mergeActionLists = list_make1(parse->mergeActionList);
1944  if (parse->commandType == CMD_MERGE)
1945  mergeJoinConditions = list_make1(parse->mergeJoinCondition);
1946  }
1947  }
1948  else
1949  {
1950  /* Single-relation INSERT/UPDATE/DELETE/MERGE. */
1951  rootRelation = 0; /* there's no separate root rel */
1952  resultRelations = list_make1_int(parse->resultRelation);
1953  if (parse->commandType == CMD_UPDATE)
1954  updateColnosLists = list_make1(root->update_colnos);
1955  if (parse->withCheckOptions)
1956  withCheckOptionLists = list_make1(parse->withCheckOptions);
1957  if (parse->returningList)
1958  returningLists = list_make1(parse->returningList);
1959  if (parse->mergeActionList)
1960  mergeActionLists = list_make1(parse->mergeActionList);
1961  if (parse->commandType == CMD_MERGE)
1962  mergeJoinConditions = list_make1(parse->mergeJoinCondition);
1963  }
1964 
1965  /*
1966  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
1967  * will have dealt with fetching non-locked marked rows, else we
1968  * need to have ModifyTable do that.
1969  */
1970  if (parse->rowMarks)
1971  rowMarks = NIL;
1972  else
1973  rowMarks = root->rowMarks;
1974 
1975  path = (Path *)
1976  create_modifytable_path(root, final_rel,
1977  path,
1978  parse->commandType,
1979  parse->canSetTag,
1980  parse->resultRelation,
1981  rootRelation,
1982  root->partColsUpdated,
1983  resultRelations,
1984  updateColnosLists,
1985  withCheckOptionLists,
1986  returningLists,
1987  rowMarks,
1988  parse->onConflict,
1989  mergeActionLists,
1990  mergeJoinConditions,
1992  }
1993 
1994  /* And shove it into final_rel */
1995  add_path(final_rel, path);
1996  }
1997 
1998  /*
1999  * Generate partial paths for final_rel, too, if outer query levels might
2000  * be able to make use of them.
2001  */
2002  if (final_rel->consider_parallel && root->query_level > 1 &&
2003  !limit_needed(parse))
2004  {
2005  Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
2006  foreach(lc, current_rel->partial_pathlist)
2007  {
2008  Path *partial_path = (Path *) lfirst(lc);
2009 
2010  add_partial_path(final_rel, partial_path);
2011  }
2012  }
2013 
2014  extra.limit_needed = limit_needed(parse);
2015  extra.limit_tuples = limit_tuples;
2016  extra.count_est = count_est;
2017  extra.offset_est = offset_est;
2018 
2019  /*
2020  * If there is an FDW that's responsible for all baserels of the query,
2021  * let it consider adding ForeignPaths.
2022  */
2023  if (final_rel->fdwroutine &&
2024  final_rel->fdwroutine->GetForeignUpperPaths)
2025  final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
2026  current_rel, final_rel,
2027  &extra);
2028 
2029  /* Let extensions possibly add some more paths */
2031  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2032  current_rel, final_rel, &extra);
2033 
2034  /* Note: currently, we leave it to callers to do set_cheapest() */
2035 }
2036 
2037 /*
2038  * Do preprocessing for groupingSets clause and related data. This handles the
2039  * preliminary steps of expanding the grouping sets, organizing them into lists
2040  * of rollups, and preparing annotations which will later be filled in with
2041  * size estimates.
2042  */
2043 static grouping_sets_data *
2045 {
2046  Query *parse = root->parse;
2047  List *sets;
2048  int maxref = 0;
2049  ListCell *lc_set;
2051 
2052  parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
2053 
2054  gd->any_hashable = false;
2055  gd->unhashable_refs = NULL;
2056  gd->unsortable_refs = NULL;
2057  gd->unsortable_sets = NIL;
2058 
2059  /*
2060  * We don't currently make any attempt to optimize the groupClause when
2061  * there are grouping sets, so just duplicate it in processed_groupClause.
2062  */
2063  root->processed_groupClause = parse->groupClause;
2064 
2065  if (parse->groupClause)
2066  {
2067  ListCell *lc;
2068 
2069  foreach(lc, parse->groupClause)
2070  {
2072  Index ref = gc->tleSortGroupRef;
2073 
2074  if (ref > maxref)
2075  maxref = ref;
2076 
2077  if (!gc->hashable)
2079 
2080  if (!OidIsValid(gc->sortop))
2082  }
2083  }
2084 
2085  /* Allocate workspace array for remapping */
2086  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2087 
2088  /*
2089  * If we have any unsortable sets, we must extract them before trying to
2090  * prepare rollups. Unsortable sets don't go through
2091  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2092  * here.
2093  */
2094  if (!bms_is_empty(gd->unsortable_refs))
2095  {
2096  List *sortable_sets = NIL;
2097  ListCell *lc;
2098 
2099  foreach(lc, parse->groupingSets)
2100  {
2101  List *gset = (List *) lfirst(lc);
2102 
2103  if (bms_overlap_list(gd->unsortable_refs, gset))
2104  {
2106 
2107  gs->set = gset;
2108  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2109 
2110  /*
2111  * We must enforce here that an unsortable set is hashable;
2112  * later code assumes this. Parse analysis only checks that
2113  * every individual column is either hashable or sortable.
2114  *
2115  * Note that passing this test doesn't guarantee we can
2116  * generate a plan; there might be other showstoppers.
2117  */
2118  if (bms_overlap_list(gd->unhashable_refs, gset))
2119  ereport(ERROR,
2120  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2121  errmsg("could not implement GROUP BY"),
2122  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2123  }
2124  else
2125  sortable_sets = lappend(sortable_sets, gset);
2126  }
2127 
2128  if (sortable_sets)
2129  sets = extract_rollup_sets(sortable_sets);
2130  else
2131  sets = NIL;
2132  }
2133  else
2134  sets = extract_rollup_sets(parse->groupingSets);
2135 
2136  foreach(lc_set, sets)
2137  {
2138  List *current_sets = (List *) lfirst(lc_set);
2139  RollupData *rollup = makeNode(RollupData);
2140  GroupingSetData *gs;
2141 
2142  /*
2143  * Reorder the current list of grouping sets into correct prefix
2144  * order. If only one aggregation pass is needed, try to make the
2145  * list match the ORDER BY clause; if more than one pass is needed, we
2146  * don't bother with that.
2147  *
2148  * Note that this reorders the sets from smallest-member-first to
2149  * largest-member-first, and applies the GroupingSetData annotations,
2150  * though the data will be filled in later.
2151  */
2152  current_sets = reorder_grouping_sets(current_sets,
2153  (list_length(sets) == 1
2154  ? parse->sortClause
2155  : NIL));
2156 
2157  /*
2158  * Get the initial (and therefore largest) grouping set.
2159  */
2160  gs = linitial_node(GroupingSetData, current_sets);
2161 
2162  /*
2163  * Order the groupClause appropriately. If the first grouping set is
2164  * empty, then the groupClause must also be empty; otherwise we have
2165  * to force the groupClause to match that grouping set's order.
2166  *
2167  * (The first grouping set can be empty even though parse->groupClause
2168  * is not empty only if all non-empty grouping sets are unsortable.
2169  * The groupClauses for hashed grouping sets are built later on.)
2170  */
2171  if (gs->set)
2173  else
2174  rollup->groupClause = NIL;
2175 
2176  /*
2177  * Is it hashable? We pretend empty sets are hashable even though we
2178  * actually force them not to be hashed later. But don't bother if
2179  * there's nothing but empty sets (since in that case we can't hash
2180  * anything).
2181  */
2182  if (gs->set &&
2184  {
2185  rollup->hashable = true;
2186  gd->any_hashable = true;
2187  }
2188 
2189  /*
2190  * Now that we've pinned down an order for the groupClause for this
2191  * list of grouping sets, we need to remap the entries in the grouping
2192  * sets from sortgrouprefs to plain indices (0-based) into the
2193  * groupClause for this collection of grouping sets. We keep the
2194  * original form for later use, though.
2195  */
2196  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2197  current_sets,
2198  gd->tleref_to_colnum_map);
2199  rollup->gsets_data = current_sets;
2200 
2201  gd->rollups = lappend(gd->rollups, rollup);
2202  }
2203 
2204  if (gd->unsortable_sets)
2205  {
2206  /*
2207  * We have not yet pinned down a groupclause for this, but we will
2208  * need index-based lists for estimation purposes. Construct
2209  * hash_sets_idx based on the entire original groupclause for now.
2210  */
2211  gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2212  gd->unsortable_sets,
2213  gd->tleref_to_colnum_map);
2214  gd->any_hashable = true;
2215  }
2216 
2217  return gd;
2218 }
2219 
2220 /*
2221  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2222  * (without annotation) mapped to indexes into the given groupclause.
2223  */
2224 static List *
2226  List *gsets,
2227  int *tleref_to_colnum_map)
2228 {
2229  int ref = 0;
2230  List *result = NIL;
2231  ListCell *lc;
2232 
2233  foreach(lc, groupClause)
2234  {
2236 
2237  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2238  }
2239 
2240  foreach(lc, gsets)
2241  {
2242  List *set = NIL;
2243  ListCell *lc2;
2245 
2246  foreach(lc2, gs->set)
2247  {
2248  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2249  }
2250 
2251  result = lappend(result, set);
2252  }
2253 
2254  return result;
2255 }
2256 
2257 
2258 /*
2259  * preprocess_rowmarks - set up PlanRowMarks if needed
2260  */
2261 static void
2263 {
2264  Query *parse = root->parse;
2265  Bitmapset *rels;
2266  List *prowmarks;
2267  ListCell *l;
2268  int i;
2269 
2270  if (parse->rowMarks)
2271  {
2272  /*
2273  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2274  * grouping, since grouping renders a reference to individual tuple
2275  * CTIDs invalid. This is also checked at parse time, but that's
2276  * insufficient because of rule substitution, query pullup, etc.
2277  */
2279  parse->rowMarks)->strength);
2280  }
2281  else
2282  {
2283  /*
2284  * We only need rowmarks for UPDATE, DELETE, MERGE, or FOR [KEY]
2285  * UPDATE/SHARE.
2286  */
2287  if (parse->commandType != CMD_UPDATE &&
2288  parse->commandType != CMD_DELETE &&
2289  parse->commandType != CMD_MERGE)
2290  return;
2291  }
2292 
2293  /*
2294  * We need to have rowmarks for all base relations except the target. We
2295  * make a bitmapset of all base rels and then remove the items we don't
2296  * need or have FOR [KEY] UPDATE/SHARE marks for.
2297  */
2298  rels = get_relids_in_jointree((Node *) parse->jointree, false, false);
2299  if (parse->resultRelation)
2300  rels = bms_del_member(rels, parse->resultRelation);
2301 
2302  /*
2303  * Convert RowMarkClauses to PlanRowMark representation.
2304  */
2305  prowmarks = NIL;
2306  foreach(l, parse->rowMarks)
2307  {
2309  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2310  PlanRowMark *newrc;
2311 
2312  /*
2313  * Currently, it is syntactically impossible to have FOR UPDATE et al
2314  * applied to an update/delete target rel. If that ever becomes
2315  * possible, we should drop the target from the PlanRowMark list.
2316  */
2317  Assert(rc->rti != parse->resultRelation);
2318 
2319  /*
2320  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2321  * can't support true locking. Subqueries that got flattened into the
2322  * main query should be ignored completely. Any that didn't will get
2323  * ROW_MARK_COPY items in the next loop.
2324  */
2325  if (rte->rtekind != RTE_RELATION)
2326  continue;
2327 
2328  rels = bms_del_member(rels, rc->rti);
2329 
2330  newrc = makeNode(PlanRowMark);
2331  newrc->rti = newrc->prti = rc->rti;
2332  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2333  newrc->markType = select_rowmark_type(rte, rc->strength);
2334  newrc->allMarkTypes = (1 << newrc->markType);
2335  newrc->strength = rc->strength;
2336  newrc->waitPolicy = rc->waitPolicy;
2337  newrc->isParent = false;
2338 
2339  prowmarks = lappend(prowmarks, newrc);
2340  }
2341 
2342  /*
2343  * Now, add rowmarks for any non-target, non-locked base relations.
2344  */
2345  i = 0;
2346  foreach(l, parse->rtable)
2347  {
2349  PlanRowMark *newrc;
2350 
2351  i++;
2352  if (!bms_is_member(i, rels))
2353  continue;
2354 
2355  newrc = makeNode(PlanRowMark);
2356  newrc->rti = newrc->prti = i;
2357  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2358  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2359  newrc->allMarkTypes = (1 << newrc->markType);
2360  newrc->strength = LCS_NONE;
2361  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2362  newrc->isParent = false;
2363 
2364  prowmarks = lappend(prowmarks, newrc);
2365  }
2366 
2367  root->rowMarks = prowmarks;
2368 }
2369 
2370 /*
2371  * Select RowMarkType to use for a given table
2372  */
2375 {
2376  if (rte->rtekind != RTE_RELATION)
2377  {
2378  /* If it's not a table at all, use ROW_MARK_COPY */
2379  return ROW_MARK_COPY;
2380  }
2381  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2382  {
2383  /* Let the FDW select the rowmark type, if it wants to */
2384  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2385 
2386  if (fdwroutine->GetForeignRowMarkType != NULL)
2387  return fdwroutine->GetForeignRowMarkType(rte, strength);
2388  /* Otherwise, use ROW_MARK_COPY by default */
2389  return ROW_MARK_COPY;
2390  }
2391  else
2392  {
2393  /* Regular table, apply the appropriate lock type */
2394  switch (strength)
2395  {
2396  case LCS_NONE:
2397 
2398  /*
2399  * We don't need a tuple lock, only the ability to re-fetch
2400  * the row.
2401  */
2402  return ROW_MARK_REFERENCE;
2403  break;
2404  case LCS_FORKEYSHARE:
2405  return ROW_MARK_KEYSHARE;
2406  break;
2407  case LCS_FORSHARE:
2408  return ROW_MARK_SHARE;
2409  break;
2410  case LCS_FORNOKEYUPDATE:
2411  return ROW_MARK_NOKEYEXCLUSIVE;
2412  break;
2413  case LCS_FORUPDATE:
2414  return ROW_MARK_EXCLUSIVE;
2415  break;
2416  }
2417  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2418  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2419  }
2420 }
2421 
2422 /*
2423  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2424  *
2425  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2426  * results back in *count_est and *offset_est. These variables are set to
2427  * 0 if the corresponding clause is not present, and -1 if it's present
2428  * but we couldn't estimate the value for it. (The "0" convention is OK
2429  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2430  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2431  * usual practice of never estimating less than one row.) These values will
2432  * be passed to create_limit_path, which see if you change this code.
2433  *
2434  * The return value is the suitably adjusted tuple_fraction to use for
2435  * planning the query. This adjustment is not overridable, since it reflects
2436  * plan actions that grouping_planner() will certainly take, not assumptions
2437  * about context.
2438  */
2439 static double
2440 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2441  int64 *offset_est, int64 *count_est)
2442 {
2443  Query *parse = root->parse;
2444  Node *est;
2445  double limit_fraction;
2446 
2447  /* Should not be called unless LIMIT or OFFSET */
2448  Assert(parse->limitCount || parse->limitOffset);
2449 
2450  /*
2451  * Try to obtain the clause values. We use estimate_expression_value
2452  * primarily because it can sometimes do something useful with Params.
2453  */
2454  if (parse->limitCount)
2455  {
2456  est = estimate_expression_value(root, parse->limitCount);
2457  if (est && IsA(est, Const))
2458  {
2459  if (((Const *) est)->constisnull)
2460  {
2461  /* NULL indicates LIMIT ALL, ie, no limit */
2462  *count_est = 0; /* treat as not present */
2463  }
2464  else
2465  {
2466  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2467  if (*count_est <= 0)
2468  *count_est = 1; /* force to at least 1 */
2469  }
2470  }
2471  else
2472  *count_est = -1; /* can't estimate */
2473  }
2474  else
2475  *count_est = 0; /* not present */
2476 
2477  if (parse->limitOffset)
2478  {
2479  est = estimate_expression_value(root, parse->limitOffset);
2480  if (est && IsA(est, Const))
2481  {
2482  if (((Const *) est)->constisnull)
2483  {
2484  /* Treat NULL as no offset; the executor will too */
2485  *offset_est = 0; /* treat as not present */
2486  }
2487  else
2488  {
2489  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2490  if (*offset_est < 0)
2491  *offset_est = 0; /* treat as not present */
2492  }
2493  }
2494  else
2495  *offset_est = -1; /* can't estimate */
2496  }
2497  else
2498  *offset_est = 0; /* not present */
2499 
2500  if (*count_est != 0)
2501  {
2502  /*
2503  * A LIMIT clause limits the absolute number of tuples returned.
2504  * However, if it's not a constant LIMIT then we have to guess; for
2505  * lack of a better idea, assume 10% of the plan's result is wanted.
2506  */
2507  if (*count_est < 0 || *offset_est < 0)
2508  {
2509  /* LIMIT or OFFSET is an expression ... punt ... */
2510  limit_fraction = 0.10;
2511  }
2512  else
2513  {
2514  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2515  limit_fraction = (double) *count_est + (double) *offset_est;
2516  }
2517 
2518  /*
2519  * If we have absolute limits from both caller and LIMIT, use the
2520  * smaller value; likewise if they are both fractional. If one is
2521  * fractional and the other absolute, we can't easily determine which
2522  * is smaller, but we use the heuristic that the absolute will usually
2523  * be smaller.
2524  */
2525  if (tuple_fraction >= 1.0)
2526  {
2527  if (limit_fraction >= 1.0)
2528  {
2529  /* both absolute */
2530  tuple_fraction = Min(tuple_fraction, limit_fraction);
2531  }
2532  else
2533  {
2534  /* caller absolute, limit fractional; use caller's value */
2535  }
2536  }
2537  else if (tuple_fraction > 0.0)
2538  {
2539  if (limit_fraction >= 1.0)
2540  {
2541  /* caller fractional, limit absolute; use limit */
2542  tuple_fraction = limit_fraction;
2543  }
2544  else
2545  {
2546  /* both fractional */
2547  tuple_fraction = Min(tuple_fraction, limit_fraction);
2548  }
2549  }
2550  else
2551  {
2552  /* no info from caller, just use limit */
2553  tuple_fraction = limit_fraction;
2554  }
2555  }
2556  else if (*offset_est != 0 && tuple_fraction > 0.0)
2557  {
2558  /*
2559  * We have an OFFSET but no LIMIT. This acts entirely differently
2560  * from the LIMIT case: here, we need to increase rather than decrease
2561  * the caller's tuple_fraction, because the OFFSET acts to cause more
2562  * tuples to be fetched instead of fewer. This only matters if we got
2563  * a tuple_fraction > 0, however.
2564  *
2565  * As above, use 10% if OFFSET is present but unestimatable.
2566  */
2567  if (*offset_est < 0)
2568  limit_fraction = 0.10;
2569  else
2570  limit_fraction = (double) *offset_est;
2571 
2572  /*
2573  * If we have absolute counts from both caller and OFFSET, add them
2574  * together; likewise if they are both fractional. If one is
2575  * fractional and the other absolute, we want to take the larger, and
2576  * we heuristically assume that's the fractional one.
2577  */
2578  if (tuple_fraction >= 1.0)
2579  {
2580  if (limit_fraction >= 1.0)
2581  {
2582  /* both absolute, so add them together */
2583  tuple_fraction += limit_fraction;
2584  }
2585  else
2586  {
2587  /* caller absolute, limit fractional; use limit */
2588  tuple_fraction = limit_fraction;
2589  }
2590  }
2591  else
2592  {
2593  if (limit_fraction >= 1.0)
2594  {
2595  /* caller fractional, limit absolute; use caller's value */
2596  }
2597  else
2598  {
2599  /* both fractional, so add them together */
2600  tuple_fraction += limit_fraction;
2601  if (tuple_fraction >= 1.0)
2602  tuple_fraction = 0.0; /* assume fetch all */
2603  }
2604  }
2605  }
2606 
2607  return tuple_fraction;
2608 }
2609 
2610 /*
2611  * limit_needed - do we actually need a Limit plan node?
2612  *
2613  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2614  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2615  * locution for an optimization fence. (Because other places in the planner
2616  * merely check whether parse->limitOffset isn't NULL, it will still work as
2617  * an optimization fence --- we're just suppressing unnecessary run-time
2618  * overhead.)
2619  *
2620  * This might look like it could be merged into preprocess_limit, but there's
2621  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2622  * in preprocess_limit it's good enough to consider estimated values.
2623  */
2624 bool
2626 {
2627  Node *node;
2628 
2629  node = parse->limitCount;
2630  if (node)
2631  {
2632  if (IsA(node, Const))
2633  {
2634  /* NULL indicates LIMIT ALL, ie, no limit */
2635  if (!((Const *) node)->constisnull)
2636  return true; /* LIMIT with a constant value */
2637  }
2638  else
2639  return true; /* non-constant LIMIT */
2640  }
2641 
2642  node = parse->limitOffset;
2643  if (node)
2644  {
2645  if (IsA(node, Const))
2646  {
2647  /* Treat NULL as no offset; the executor would too */
2648  if (!((Const *) node)->constisnull)
2649  {
2650  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2651 
2652  if (offset != 0)
2653  return true; /* OFFSET with a nonzero value */
2654  }
2655  }
2656  else
2657  return true; /* non-constant OFFSET */
2658  }
2659 
2660  return false; /* don't need a Limit plan node */
2661 }
2662 
2663 
2664 /*
2665  * remove_useless_groupby_columns
2666  * Remove any columns in the GROUP BY clause that are redundant due to
2667  * being functionally dependent on other GROUP BY columns.
2668  *
2669  * Since some other DBMSes do not allow references to ungrouped columns, it's
2670  * not unusual to find all columns listed in GROUP BY even though listing the
2671  * primary-key columns would be sufficient. Deleting such excess columns
2672  * avoids redundant sorting work, so it's worth doing.
2673  *
2674  * Relcache invalidations will ensure that cached plans become invalidated
2675  * when the underlying index of the pkey constraint is dropped.
2676  *
2677  * Currently, we only make use of pkey constraints for this, however, we may
2678  * wish to take this further in the future and also use unique constraints
2679  * which have NOT NULL columns. In that case, plan invalidation will still
2680  * work since relations will receive a relcache invalidation when a NOT NULL
2681  * constraint is dropped.
2682  */
2683 static void
2685 {
2686  Query *parse = root->parse;
2687  Bitmapset **groupbyattnos;
2688  Bitmapset **surplusvars;
2689  ListCell *lc;
2690  int relid;
2691 
2692  /* No chance to do anything if there are less than two GROUP BY items */
2693  if (list_length(root->processed_groupClause) < 2)
2694  return;
2695 
2696  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2697  if (parse->groupingSets)
2698  return;
2699 
2700  /*
2701  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2702  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2703  * that are GROUP BY items.
2704  */
2705  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2706  (list_length(parse->rtable) + 1));
2707  foreach(lc, root->processed_groupClause)
2708  {
2710  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2711  Var *var = (Var *) tle->expr;
2712 
2713  /*
2714  * Ignore non-Vars and Vars from other query levels.
2715  *
2716  * XXX in principle, stable expressions containing Vars could also be
2717  * removed, if all the Vars are functionally dependent on other GROUP
2718  * BY items. But it's not clear that such cases occur often enough to
2719  * be worth troubling over.
2720  */
2721  if (!IsA(var, Var) ||
2722  var->varlevelsup > 0)
2723  continue;
2724 
2725  /* OK, remember we have this Var */
2726  relid = var->varno;
2727  Assert(relid <= list_length(parse->rtable));
2728  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2730  }
2731 
2732  /*
2733  * Consider each relation and see if it is possible to remove some of its
2734  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2735  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2736  * of the column attnos of RTE k that are removable GROUP BY items.
2737  */
2738  surplusvars = NULL; /* don't allocate array unless required */
2739  relid = 0;
2740  foreach(lc, parse->rtable)
2741  {
2743  Bitmapset *relattnos;
2744  Bitmapset *pkattnos;
2745  Oid constraintOid;
2746 
2747  relid++;
2748 
2749  /* Only plain relations could have primary-key constraints */
2750  if (rte->rtekind != RTE_RELATION)
2751  continue;
2752 
2753  /*
2754  * We must skip inheritance parent tables as some of the child rels
2755  * may cause duplicate rows. This cannot happen with partitioned
2756  * tables, however.
2757  */
2758  if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
2759  continue;
2760 
2761  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2762  relattnos = groupbyattnos[relid];
2763  if (bms_membership(relattnos) != BMS_MULTIPLE)
2764  continue;
2765 
2766  /*
2767  * Can't remove any columns for this rel if there is no suitable
2768  * (i.e., nondeferrable) primary key constraint.
2769  */
2770  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2771  if (pkattnos == NULL)
2772  continue;
2773 
2774  /*
2775  * If the primary key is a proper subset of relattnos then we have
2776  * some items in the GROUP BY that can be removed.
2777  */
2778  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2779  {
2780  /*
2781  * To easily remember whether we've found anything to do, we don't
2782  * allocate the surplusvars[] array until we find something.
2783  */
2784  if (surplusvars == NULL)
2785  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2786  (list_length(parse->rtable) + 1));
2787 
2788  /* Remember the attnos of the removable columns */
2789  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2790  }
2791  }
2792 
2793  /*
2794  * If we found any surplus Vars, build a new GROUP BY clause without them.
2795  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2796  * markings, but that's harmless.)
2797  */
2798  if (surplusvars != NULL)
2799  {
2800  List *new_groupby = NIL;
2801 
2802  foreach(lc, root->processed_groupClause)
2803  {
2805  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2806  Var *var = (Var *) tle->expr;
2807 
2808  /*
2809  * New list must include non-Vars, outer Vars, and anything not
2810  * marked as surplus.
2811  */
2812  if (!IsA(var, Var) ||
2813  var->varlevelsup > 0 ||
2815  surplusvars[var->varno]))
2816  new_groupby = lappend(new_groupby, sgc);
2817  }
2818 
2819  root->processed_groupClause = new_groupby;
2820  }
2821 }
2822 
2823 /*
2824  * groupclause_apply_groupingset
2825  * Apply the order of GROUP BY clauses defined by grouping sets. Items
2826  * not in the grouping set are skipped.
2827  */
2828 static List *
2830 {
2831  Query *parse = root->parse;
2832  List *new_groupclause = NIL;
2833  ListCell *sl;
2834 
2835  foreach(sl, gset)
2836  {
2837  Index ref = lfirst_int(sl);
2838  SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2839 
2840  new_groupclause = lappend(new_groupclause, cl);
2841  }
2842  return new_groupclause;
2843 }
2844 
2845 /*
2846  * Extract lists of grouping sets that can be implemented using a single
2847  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2848  *
2849  * Input must be sorted with smallest sets first. Result has each sublist
2850  * sorted with smallest sets first.
2851  *
2852  * We want to produce the absolute minimum possible number of lists here to
2853  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2854  * of finding the minimal partition of a partially-ordered set into chains
2855  * (which is what we need, taking the list of grouping sets as a poset ordered
2856  * by set inclusion) can be mapped to the problem of finding the maximum
2857  * cardinality matching on a bipartite graph, which is solvable in polynomial
2858  * time with a worst case of no worse than O(n^2.5) and usually much
2859  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2860  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2861  * half a second on my modest system even with optimization off and assertions
2862  * on.)
2863  */
2864 static List *
2866 {
2867  int num_sets_raw = list_length(groupingSets);
2868  int num_empty = 0;
2869  int num_sets = 0; /* distinct sets */
2870  int num_chains = 0;
2871  List *result = NIL;
2872  List **results;
2873  List **orig_sets;
2874  Bitmapset **set_masks;
2875  int *chains;
2876  short **adjacency;
2877  short *adjacency_buf;
2879  int i;
2880  int j;
2881  int j_size;
2882  ListCell *lc1 = list_head(groupingSets);
2883  ListCell *lc;
2884 
2885  /*
2886  * Start by stripping out empty sets. The algorithm doesn't require this,
2887  * but the planner currently needs all empty sets to be returned in the
2888  * first list, so we strip them here and add them back after.
2889  */
2890  while (lc1 && lfirst(lc1) == NIL)
2891  {
2892  ++num_empty;
2893  lc1 = lnext(groupingSets, lc1);
2894  }
2895 
2896  /* bail out now if it turns out that all we had were empty sets. */
2897  if (!lc1)
2898  return list_make1(groupingSets);
2899 
2900  /*----------
2901  * We don't strictly need to remove duplicate sets here, but if we don't,
2902  * they tend to become scattered through the result, which is a bit
2903  * confusing (and irritating if we ever decide to optimize them out).
2904  * So we remove them here and add them back after.
2905  *
2906  * For each non-duplicate set, we fill in the following:
2907  *
2908  * orig_sets[i] = list of the original set lists
2909  * set_masks[i] = bitmapset for testing inclusion
2910  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2911  *
2912  * chains[i] will be the result group this set is assigned to.
2913  *
2914  * We index all of these from 1 rather than 0 because it is convenient
2915  * to leave 0 free for the NIL node in the graph algorithm.
2916  *----------
2917  */
2918  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2919  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2920  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2921  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2922 
2923  j_size = 0;
2924  j = 0;
2925  i = 1;
2926 
2927  for_each_cell(lc, groupingSets, lc1)
2928  {
2929  List *candidate = (List *) lfirst(lc);
2930  Bitmapset *candidate_set = NULL;
2931  ListCell *lc2;
2932  int dup_of = 0;
2933 
2934  foreach(lc2, candidate)
2935  {
2936  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
2937  }
2938 
2939  /* we can only be a dup if we're the same length as a previous set */
2940  if (j_size == list_length(candidate))
2941  {
2942  int k;
2943 
2944  for (k = j; k < i; ++k)
2945  {
2946  if (bms_equal(set_masks[k], candidate_set))
2947  {
2948  dup_of = k;
2949  break;
2950  }
2951  }
2952  }
2953  else if (j_size < list_length(candidate))
2954  {
2955  j_size = list_length(candidate);
2956  j = i;
2957  }
2958 
2959  if (dup_of > 0)
2960  {
2961  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
2962  bms_free(candidate_set);
2963  }
2964  else
2965  {
2966  int k;
2967  int n_adj = 0;
2968 
2969  orig_sets[i] = list_make1(candidate);
2970  set_masks[i] = candidate_set;
2971 
2972  /* fill in adjacency list; no need to compare equal-size sets */
2973 
2974  for (k = j - 1; k > 0; --k)
2975  {
2976  if (bms_is_subset(set_masks[k], candidate_set))
2977  adjacency_buf[++n_adj] = k;
2978  }
2979 
2980  if (n_adj > 0)
2981  {
2982  adjacency_buf[0] = n_adj;
2983  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
2984  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
2985  }
2986  else
2987  adjacency[i] = NULL;
2988 
2989  ++i;
2990  }
2991  }
2992 
2993  num_sets = i - 1;
2994 
2995  /*
2996  * Apply the graph matching algorithm to do the work.
2997  */
2998  state = BipartiteMatch(num_sets, num_sets, adjacency);
2999 
3000  /*
3001  * Now, the state->pair* fields have the info we need to assign sets to
3002  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3003  * pair_vu[v] = u (both will be true, but we check both so that we can do
3004  * it in one pass)
3005  */
3006  chains = palloc0((num_sets + 1) * sizeof(int));
3007 
3008  for (i = 1; i <= num_sets; ++i)
3009  {
3010  int u = state->pair_vu[i];
3011  int v = state->pair_uv[i];
3012 
3013  if (u > 0 && u < i)
3014  chains[i] = chains[u];
3015  else if (v > 0 && v < i)
3016  chains[i] = chains[v];
3017  else
3018  chains[i] = ++num_chains;
3019  }
3020 
3021  /* build result lists. */
3022  results = palloc0((num_chains + 1) * sizeof(List *));
3023 
3024  for (i = 1; i <= num_sets; ++i)
3025  {
3026  int c = chains[i];
3027 
3028  Assert(c > 0);
3029 
3030  results[c] = list_concat(results[c], orig_sets[i]);
3031  }
3032 
3033  /* push any empty sets back on the first list. */
3034  while (num_empty-- > 0)
3035  results[1] = lcons(NIL, results[1]);
3036 
3037  /* make result list */
3038  for (i = 1; i <= num_chains; ++i)
3039  result = lappend(result, results[i]);
3040 
3041  /*
3042  * Free all the things.
3043  *
3044  * (This is over-fussy for small sets but for large sets we could have
3045  * tied up a nontrivial amount of memory.)
3046  */
3048  pfree(results);
3049  pfree(chains);
3050  for (i = 1; i <= num_sets; ++i)
3051  if (adjacency[i])
3052  pfree(adjacency[i]);
3053  pfree(adjacency);
3054  pfree(adjacency_buf);
3055  pfree(orig_sets);
3056  for (i = 1; i <= num_sets; ++i)
3057  bms_free(set_masks[i]);
3058  pfree(set_masks);
3059 
3060  return result;
3061 }
3062 
3063 /*
3064  * Reorder the elements of a list of grouping sets such that they have correct
3065  * prefix relationships. Also inserts the GroupingSetData annotations.
3066  *
3067  * The input must be ordered with smallest sets first; the result is returned
3068  * with largest sets first. Note that the result shares no list substructure
3069  * with the input, so it's safe for the caller to modify it later.
3070  *
3071  * If we're passed in a sortclause, we follow its order of columns to the
3072  * extent possible, to minimize the chance that we add unnecessary sorts.
3073  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3074  * gets implemented in one pass.)
3075  */
3076 static List *
3077 reorder_grouping_sets(List *groupingSets, List *sortclause)
3078 {
3079  ListCell *lc;
3080  List *previous = NIL;
3081  List *result = NIL;
3082 
3083  foreach(lc, groupingSets)
3084  {
3085  List *candidate = (List *) lfirst(lc);
3086  List *new_elems = list_difference_int(candidate, previous);
3088 
3089  while (list_length(sortclause) > list_length(previous) &&
3090  new_elems != NIL)
3091  {
3092  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3093  int ref = sc->tleSortGroupRef;
3094 
3095  if (list_member_int(new_elems, ref))
3096  {
3097  previous = lappend_int(previous, ref);
3098  new_elems = list_delete_int(new_elems, ref);
3099  }
3100  else
3101  {
3102  /* diverged from the sortclause; give up on it */
3103  sortclause = NIL;
3104  break;
3105  }
3106  }
3107 
3108  previous = list_concat(previous, new_elems);
3109 
3110  gs->set = list_copy(previous);
3111  result = lcons(gs, result);
3112  }
3113 
3114  list_free(previous);
3115 
3116  return result;
3117 }
3118 
3119 /*
3120  * has_volatile_pathkey
3121  * Returns true if any PathKey in 'keys' has an EquivalenceClass
3122  * containing a volatile function. Otherwise returns false.
3123  */
3124 static bool
3126 {
3127  ListCell *lc;
3128 
3129  foreach(lc, keys)
3130  {
3131  PathKey *pathkey = lfirst_node(PathKey, lc);
3132 
3133  if (pathkey->pk_eclass->ec_has_volatile)
3134  return true;
3135  }
3136 
3137  return false;
3138 }
3139 
3140 /*
3141  * adjust_group_pathkeys_for_groupagg
3142  * Add pathkeys to root->group_pathkeys to reflect the best set of
3143  * pre-ordered input for ordered aggregates.
3144  *
3145  * We define "best" as the pathkeys that suit the largest number of
3146  * aggregate functions. We find these by looking at the first ORDER BY /
3147  * DISTINCT aggregate and take the pathkeys for that before searching for
3148  * other aggregates that require the same or a more strict variation of the
3149  * same pathkeys. We then repeat that process for any remaining aggregates
3150  * with different pathkeys and if we find another set of pathkeys that suits a
3151  * larger number of aggregates then we select those pathkeys instead.
3152  *
3153  * When the best pathkeys are found we also mark each Aggref that can use
3154  * those pathkeys as aggpresorted = true.
3155  *
3156  * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
3157  * volatile functions, we never make use of these pathkeys. We want to ensure
3158  * that sorts using volatile functions are done independently in each Aggref
3159  * rather than once at the query level. If we were to allow this then Aggrefs
3160  * with compatible sort orders would all transition their rows in the same
3161  * order if those pathkeys were deemed to be the best pathkeys to sort on.
3162  * Whereas, if some other set of Aggref's pathkeys happened to be deemed
3163  * better pathkeys to sort on, then the volatile function Aggrefs would be
3164  * left to perform their sorts individually. To avoid this inconsistent
3165  * behavior which could make Aggref results depend on what other Aggrefs the
3166  * query contains, we always force Aggrefs with volatile functions to perform
3167  * their own sorts.
3168  */
3169 static void
3171 {
3172  List *grouppathkeys = root->group_pathkeys;
3173  List *bestpathkeys;
3174  Bitmapset *bestaggs;
3175  Bitmapset *unprocessed_aggs;
3176  ListCell *lc;
3177  int i;
3178 
3179  /* Shouldn't be here if there are grouping sets */
3180  Assert(root->parse->groupingSets == NIL);
3181  /* Shouldn't be here unless there are some ordered aggregates */
3182  Assert(root->numOrderedAggs > 0);
3183 
3184  /* Do nothing if disabled */
3186  return;
3187 
3188  /*
3189  * Make a first pass over all AggInfos to collect a Bitmapset containing
3190  * the indexes of all AggInfos to be processed below.
3191  */
3192  unprocessed_aggs = NULL;
3193  foreach(lc, root->agginfos)
3194  {
3195  AggInfo *agginfo = lfirst_node(AggInfo, lc);
3196  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3197 
3198  if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
3199  continue;
3200 
3201  /* only add aggregates with a DISTINCT or ORDER BY */
3202  if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
3203  unprocessed_aggs = bms_add_member(unprocessed_aggs,
3204  foreach_current_index(lc));
3205  }
3206 
3207  /*
3208  * Now process all the unprocessed_aggs to find the best set of pathkeys
3209  * for the given set of aggregates.
3210  *
3211  * On the first outer loop here 'bestaggs' will be empty. We'll populate
3212  * this during the first loop using the pathkeys for the very first
3213  * AggInfo then taking any stronger pathkeys from any other AggInfos with
3214  * a more strict set of compatible pathkeys. Once the outer loop is
3215  * complete, we mark off all the aggregates with compatible pathkeys then
3216  * remove those from the unprocessed_aggs and repeat the process to try to
3217  * find another set of pathkeys that are suitable for a larger number of
3218  * aggregates. The outer loop will stop when there are not enough
3219  * unprocessed aggregates for it to be possible to find a set of pathkeys
3220  * to suit a larger number of aggregates.
3221  */
3222  bestpathkeys = NIL;
3223  bestaggs = NULL;
3224  while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
3225  {
3226  Bitmapset *aggindexes = NULL;
3227  List *currpathkeys = NIL;
3228 
3229  i = -1;
3230  while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
3231  {
3232  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3233  Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
3234  List *sortlist;
3235  List *pathkeys;
3236 
3237  if (aggref->aggdistinct != NIL)
3238  sortlist = aggref->aggdistinct;
3239  else
3240  sortlist = aggref->aggorder;
3241 
3242  pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
3243  aggref->args);
3244 
3245  /*
3246  * Ignore Aggrefs which have volatile functions in their ORDER BY
3247  * or DISTINCT clause.
3248  */
3249  if (has_volatile_pathkey(pathkeys))
3250  {
3251  unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
3252  continue;
3253  }
3254 
3255  /*
3256  * When not set yet, take the pathkeys from the first unprocessed
3257  * aggregate.
3258  */
3259  if (currpathkeys == NIL)
3260  {
3261  currpathkeys = pathkeys;
3262 
3263  /* include the GROUP BY pathkeys, if they exist */
3264  if (grouppathkeys != NIL)
3265  currpathkeys = append_pathkeys(list_copy(grouppathkeys),
3266  currpathkeys);
3267 
3268  /* record that we found pathkeys for this aggregate */
3269  aggindexes = bms_add_member(aggindexes, i);
3270  }
3271  else
3272  {
3273  /* now look for a stronger set of matching pathkeys */
3274 
3275  /* include the GROUP BY pathkeys, if they exist */
3276  if (grouppathkeys != NIL)
3277  pathkeys = append_pathkeys(list_copy(grouppathkeys),
3278  pathkeys);
3279 
3280  /* are 'pathkeys' compatible or better than 'currpathkeys'? */
3281  switch (compare_pathkeys(currpathkeys, pathkeys))
3282  {
3283  case PATHKEYS_BETTER2:
3284  /* 'pathkeys' are stronger, use these ones instead */
3285  currpathkeys = pathkeys;
3286  /* FALLTHROUGH */
3287 
3288  case PATHKEYS_BETTER1:
3289  /* 'pathkeys' are less strict */
3290  /* FALLTHROUGH */
3291 
3292  case PATHKEYS_EQUAL:
3293  /* mark this aggregate as covered by 'currpathkeys' */
3294  aggindexes = bms_add_member(aggindexes, i);
3295  break;
3296 
3297  case PATHKEYS_DIFFERENT:
3298  break;
3299  }
3300  }
3301  }
3302 
3303  /* remove the aggregates that we've just processed */
3304  unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
3305 
3306  /*
3307  * If this pass included more aggregates than the previous best then
3308  * use these ones as the best set.
3309  */
3310  if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
3311  {
3312  bestaggs = aggindexes;
3313  bestpathkeys = currpathkeys;
3314  }
3315  }
3316 
3317  /*
3318  * If we found any ordered aggregates, update root->group_pathkeys to add
3319  * the best set of aggregate pathkeys. Note that bestpathkeys includes
3320  * the original GROUP BY pathkeys already.
3321  */
3322  if (bestpathkeys != NIL)
3323  root->group_pathkeys = bestpathkeys;
3324 
3325  /*
3326  * Now that we've found the best set of aggregates we can set the
3327  * presorted flag to indicate to the executor that it needn't bother
3328  * performing a sort for these Aggrefs. We're able to do this now as
3329  * there's no chance of a Hash Aggregate plan as create_grouping_paths
3330  * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
3331  * of ordered aggregates.
3332  */
3333  i = -1;
3334  while ((i = bms_next_member(bestaggs, i)) >= 0)
3335  {
3336  AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i);
3337 
3338  foreach(lc, agginfo->aggrefs)
3339  {
3340  Aggref *aggref = lfirst_node(Aggref, lc);
3341 
3342  aggref->aggpresorted = true;
3343  }
3344  }
3345 }
3346 
3347 /*
3348  * Compute query_pathkeys and other pathkeys during plan generation
3349  */
3350 static void
3352 {
3353  Query *parse = root->parse;
3354  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3355  List *tlist = root->processed_tlist;
3356  List *activeWindows = qp_extra->activeWindows;
3357 
3358  /*
3359  * Calculate pathkeys that represent grouping/ordering and/or ordered
3360  * aggregate requirements.
3361  */
3362  if (qp_extra->gset_data)
3363  {
3364  /*
3365  * With grouping sets, just use the first RollupData's groupClause. We
3366  * don't make any effort to optimize grouping clauses when there are
3367  * grouping sets, nor can we combine aggregate ordering keys with
3368  * grouping.
3369  */
3370  List *rollups = qp_extra->gset_data->rollups;
3371  List *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
3372 
3373  if (grouping_is_sortable(groupClause))
3374  {
3375  root->group_pathkeys = make_pathkeys_for_sortclauses(root,
3376  groupClause,
3377  tlist);
3378  root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3379  }
3380  else
3381  {
3382  root->group_pathkeys = NIL;
3383  root->num_groupby_pathkeys = 0;
3384  }
3385  }
3386  else if (parse->groupClause || root->numOrderedAggs > 0)
3387  {
3388  /*
3389  * With a plain GROUP BY list, we can remove any grouping items that
3390  * are proven redundant by EquivalenceClass processing. For example,
3391  * we can remove y given "WHERE x = y GROUP BY x, y". These aren't
3392  * especially common cases, but they're nearly free to detect. Note
3393  * that we remove redundant items from processed_groupClause but not
3394  * the original parse->groupClause.
3395  */
3396  bool sortable;
3397 
3398  root->group_pathkeys =
3400  &root->processed_groupClause,
3401  tlist,
3402  true,
3403  &sortable);
3404  if (!sortable)
3405  {
3406  /* Can't sort; no point in considering aggregate ordering either */
3407  root->group_pathkeys = NIL;
3408  root->num_groupby_pathkeys = 0;
3409  }
3410  else
3411  {
3412  root->num_groupby_pathkeys = list_length(root->group_pathkeys);
3413  /* If we have ordered aggs, consider adding onto group_pathkeys */
3414  if (root->numOrderedAggs > 0)
3416  }
3417  }
3418  else
3419  {
3420  root->group_pathkeys = NIL;
3421  root->num_groupby_pathkeys = 0;
3422  }
3423 
3424  /* We consider only the first (bottom) window in pathkeys logic */
3425  if (activeWindows != NIL)
3426  {
3427  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3428 
3429  root->window_pathkeys = make_pathkeys_for_window(root,
3430  wc,
3431  tlist);
3432  }
3433  else
3434  root->window_pathkeys = NIL;
3435 
3436  /*
3437  * As with GROUP BY, we can discard any DISTINCT items that are proven
3438  * redundant by EquivalenceClass processing. The non-redundant list is
3439  * kept in root->processed_distinctClause, leaving the original
3440  * parse->distinctClause alone.
3441  */
3442  if (parse->distinctClause)
3443  {
3444  bool sortable;
3445 
3446  /* Make a copy since pathkey processing can modify the list */
3447  root->processed_distinctClause = list_copy(parse->distinctClause);
3448  root->distinct_pathkeys =
3450  &root->processed_distinctClause,
3451  tlist,
3452  true,
3453  &sortable);
3454  if (!sortable)
3455  root->distinct_pathkeys = NIL;
3456  }
3457  else
3458  root->distinct_pathkeys = NIL;
3459 
3460  root->sort_pathkeys =
3462  parse->sortClause,
3463  tlist);
3464 
3465  /* setting setop_pathkeys might be useful to the union planner */
3466  if (qp_extra->setop != NULL &&
3468  {
3469  List *groupClauses;
3470  bool sortable;
3471 
3472  groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
3473 
3474  root->setop_pathkeys =
3476  &groupClauses,
3477  tlist,
3478  false,
3479  &sortable);
3480  if (!sortable)
3481  root->setop_pathkeys = NIL;
3482  }
3483  else
3484  root->setop_pathkeys = NIL;
3485 
3486  /*
3487  * Figure out whether we want a sorted result from query_planner.
3488  *
3489  * If we have a sortable GROUP BY clause, then we want a result sorted
3490  * properly for grouping. Otherwise, if we have window functions to
3491  * evaluate, we try to sort for the first window. Otherwise, if there's a
3492  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3493  * we try to produce output that's sufficiently well sorted for the
3494  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3495  * by the ORDER BY clause. Otherwise, if we're a subquery being planned
3496  * for a set operation which can benefit from presorted results and have a
3497  * sortable targetlist, we want to sort by the target list.
3498  *
3499  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3500  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3501  * that might just leave us failing to exploit an available sort order at
3502  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3503  * much easier, since we know that the parser ensured that one is a
3504  * superset of the other.
3505  */
3506  if (root->group_pathkeys)
3507  root->query_pathkeys = root->group_pathkeys;
3508  else if (root->window_pathkeys)
3509  root->query_pathkeys = root->window_pathkeys;
3510  else if (list_length(root->distinct_pathkeys) >
3511  list_length(root->sort_pathkeys))
3512  root->query_pathkeys = root->distinct_pathkeys;
3513  else if (root->sort_pathkeys)
3514  root->query_pathkeys = root->sort_pathkeys;
3515  else if (root->setop_pathkeys != NIL)
3516  root->query_pathkeys = root->setop_pathkeys;
3517  else
3518  root->query_pathkeys = NIL;
3519 }
3520 
3521 /*
3522  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3523  *
3524  * path_rows: number of output rows from scan/join step
3525  * gd: grouping sets data including list of grouping sets and their clauses
3526  * target_list: target list containing group clause references
3527  *
3528  * If doing grouping sets, we also annotate the gsets data with the estimates
3529  * for each set and each individual rollup list, with a view to later
3530  * determining whether some combination of them could be hashed instead.
3531  */
3532 static double
3534  double path_rows,
3535  grouping_sets_data *gd,
3536  List *target_list)
3537 {
3538  Query *parse = root->parse;
3539  double dNumGroups;
3540 
3541  if (parse->groupClause)
3542  {
3543  List *groupExprs;
3544 
3545  if (parse->groupingSets)
3546  {
3547  /* Add up the estimates for each grouping set */
3548  ListCell *lc;
3549 
3550  Assert(gd); /* keep Coverity happy */
3551 
3552  dNumGroups = 0;
3553 
3554  foreach(lc, gd->rollups)
3555  {
3556  RollupData *rollup = lfirst_node(RollupData, lc);
3557  ListCell *lc2;
3558  ListCell *lc3;
3559 
3560  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3561  target_list);
3562 
3563  rollup->numGroups = 0.0;
3564 
3565  forboth(lc2, rollup->gsets, lc3, rollup->gsets_data)
3566  {
3567  List *gset = (List *) lfirst(lc2);
3569  double numGroups = estimate_num_groups(root,
3570  groupExprs,
3571  path_rows,
3572  &gset,
3573  NULL);
3574 
3575  gs->numGroups = numGroups;
3576  rollup->numGroups += numGroups;
3577  }
3578 
3579  dNumGroups += rollup->numGroups;
3580  }
3581 
3582  if (gd->hash_sets_idx)
3583  {
3584  ListCell *lc2;
3585 
3586  gd->dNumHashGroups = 0;
3587 
3588  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3589  target_list);
3590 
3591  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3592  {
3593  List *gset = (List *) lfirst(lc);
3595  double numGroups = estimate_num_groups(root,
3596  groupExprs,
3597  path_rows,
3598  &gset,
3599  NULL);
3600 
3601  gs->numGroups = numGroups;
3602  gd->dNumHashGroups += numGroups;
3603  }
3604 
3605  dNumGroups += gd->dNumHashGroups;
3606  }
3607  }
3608  else
3609  {
3610  /* Plain GROUP BY -- estimate based on optimized groupClause */
3611  groupExprs = get_sortgrouplist_exprs(root->processed_groupClause,
3612  target_list);
3613 
3614  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3615  NULL, NULL);
3616  }
3617  }
3618  else if (parse->groupingSets)
3619  {
3620  /* Empty grouping sets ... one result row for each one */
3621  dNumGroups = list_length(parse->groupingSets);
3622  }
3623  else if (parse->hasAggs || root->hasHavingQual)
3624  {
3625  /* Plain aggregation, one result row */
3626  dNumGroups = 1;
3627  }
3628  else
3629  {
3630  /* Not grouping */
3631  dNumGroups = 1;
3632  }
3633 
3634  return dNumGroups;
3635 }
3636 
3637 /*
3638  * create_grouping_paths
3639  *
3640  * Build a new upperrel containing Paths for grouping and/or aggregation.
3641  * Along the way, we also build an upperrel for Paths which are partially
3642  * grouped and/or aggregated. A partially grouped and/or aggregated path
3643  * needs a FinalizeAggregate node to complete the aggregation. Currently,
3644  * the only partially grouped paths we build are also partial paths; that
3645  * is, they need a Gather and then a FinalizeAggregate.
3646  *
3647  * input_rel: contains the source-data Paths
3648  * target: the pathtarget for the result Paths to compute
3649  * gd: grouping sets data including list of grouping sets and their clauses
3650  *
3651  * Note: all Paths in input_rel are expected to return the target computed
3652  * by make_group_input_target.
3653  */
3654 static RelOptInfo *
3656  RelOptInfo *input_rel,
3657  PathTarget *target,
3658  bool target_parallel_safe,
3659  grouping_sets_data *gd)
3660 {
3661  Query *parse = root->parse;
3662  RelOptInfo *grouped_rel;
3663  RelOptInfo *partially_grouped_rel;
3664  AggClauseCosts agg_costs;
3665 
3666  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3668 
3669  /*
3670  * Create grouping relation to hold fully aggregated grouping and/or
3671  * aggregation paths.
3672  */
3673  grouped_rel = make_grouping_rel(root, input_rel, target,
3674  target_parallel_safe, parse->havingQual);
3675 
3676  /*
3677  * Create either paths for a degenerate grouping or paths for ordinary
3678  * grouping, as appropriate.
3679  */
3681  create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3682  else
3683  {
3684  int flags = 0;
3685  GroupPathExtraData extra;
3686 
3687  /*
3688  * Determine whether it's possible to perform sort-based
3689  * implementations of grouping. (Note that if processed_groupClause
3690  * is empty, grouping_is_sortable() is trivially true, and all the
3691  * pathkeys_contained_in() tests will succeed too, so that we'll
3692  * consider every surviving input path.)
3693  *
3694  * If we have grouping sets, we might be able to sort some but not all
3695  * of them; in this case, we need can_sort to be true as long as we
3696  * must consider any sorted-input plan.
3697  */
3698  if ((gd && gd->rollups != NIL)
3699  || grouping_is_sortable(root->processed_groupClause))
3700  flags |= GROUPING_CAN_USE_SORT;
3701 
3702  /*
3703  * Determine whether we should consider hash-based implementations of
3704  * grouping.
3705  *
3706  * Hashed aggregation only applies if we're grouping. If we have
3707  * grouping sets, some groups might be hashable but others not; in
3708  * this case we set can_hash true as long as there is nothing globally
3709  * preventing us from hashing (and we should therefore consider plans
3710  * with hashes).
3711  *
3712  * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3713  * BY aggregates. (Doing so would imply storing *all* the input
3714  * values in the hash table, and/or running many sorts in parallel,
3715  * either of which seems like a certain loser.) We similarly don't
3716  * support ordered-set aggregates in hashed aggregation, but that case
3717  * is also included in the numOrderedAggs count.
3718  *
3719  * Note: grouping_is_hashable() is much more expensive to check than
3720  * the other gating conditions, so we want to do it last.
3721  */
3722  if ((parse->groupClause != NIL &&
3723  root->numOrderedAggs == 0 &&
3724  (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause))))
3725  flags |= GROUPING_CAN_USE_HASH;
3726 
3727  /*
3728  * Determine whether partial aggregation is possible.
3729  */
3730  if (can_partial_agg(root))
3731  flags |= GROUPING_CAN_PARTIAL_AGG;
3732 
3733  extra.flags = flags;
3734  extra.target_parallel_safe = target_parallel_safe;
3735  extra.havingQual = parse->havingQual;
3736  extra.targetList = parse->targetList;
3737  extra.partial_costs_set = false;
3738 
3739  /*
3740  * Determine whether partitionwise aggregation is in theory possible.
3741  * It can be disabled by the user, and for now, we don't try to
3742  * support grouping sets. create_ordinary_grouping_paths() will check
3743  * additional conditions, such as whether input_rel is partitioned.
3744  */
3745  if (enable_partitionwise_aggregate && !parse->groupingSets)
3747  else
3749 
3750  create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3751  &agg_costs, gd, &extra,
3752  &partially_grouped_rel);
3753  }
3754 
3755  set_cheapest(grouped_rel);
3756  return grouped_rel;
3757 }
3758 
3759 /*
3760  * make_grouping_rel
3761  *
3762  * Create a new grouping rel and set basic properties.
3763  *
3764  * input_rel represents the underlying scan/join relation.
3765  * target is the output expected from the grouping relation.
3766  */
3767 static RelOptInfo *
3769  PathTarget *target, bool target_parallel_safe,
3770  Node *havingQual)
3771 {
3772  RelOptInfo *grouped_rel;
3773 
3774  if (IS_OTHER_REL(input_rel))
3775  {
3776  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3777  input_rel->relids);
3778  grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3779  }
3780  else
3781  {
3782  /*
3783  * By tradition, the relids set for the main grouping relation is
3784  * NULL. (This could be changed, but might require adjustments
3785  * elsewhere.)
3786  */
3787  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3788  }
3789 
3790  /* Set target. */
3791  grouped_rel->reltarget = target;
3792 
3793  /*
3794  * If the input relation is not parallel-safe, then the grouped relation
3795  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3796  * target list and HAVING quals are parallel-safe.
3797  */
3798  if (input_rel->consider_parallel && target_parallel_safe &&
3799  is_parallel_safe(root, (Node *) havingQual))
3800  grouped_rel->consider_parallel = true;
3801 
3802  /*
3803  * If the input rel belongs to a single FDW, so does the grouped rel.
3804  */
3805  grouped_rel->serverid = input_rel->serverid;
3806  grouped_rel->userid = input_rel->userid;
3807  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3808  grouped_rel->fdwroutine = input_rel->fdwroutine;
3809 
3810  return grouped_rel;
3811 }
3812 
3813 /*
3814  * is_degenerate_grouping
3815  *
3816  * A degenerate grouping is one in which the query has a HAVING qual and/or
3817  * grouping sets, but no aggregates and no GROUP BY (which implies that the
3818  * grouping sets are all empty).
3819  */
3820 static bool
3822 {
3823  Query *parse = root->parse;
3824 
3825  return (root->hasHavingQual || parse->groupingSets) &&
3826  !parse->hasAggs && parse->groupClause == NIL;
3827 }
3828 
3829 /*
3830  * create_degenerate_grouping_paths
3831  *
3832  * When the grouping is degenerate (see is_degenerate_grouping), we are
3833  * supposed to emit either zero or one row for each grouping set depending on
3834  * whether HAVING succeeds. Furthermore, there cannot be any variables in
3835  * either HAVING or the targetlist, so we actually do not need the FROM table
3836  * at all! We can just throw away the plan-so-far and generate a Result node.
3837  * This is a sufficiently unusual corner case that it's not worth contorting
3838  * the structure of this module to avoid having to generate the earlier paths
3839  * in the first place.
3840  */
3841 static void
3843  RelOptInfo *grouped_rel)
3844 {
3845  Query *parse = root->parse;
3846  int nrows;
3847  Path *path;
3848 
3849  nrows = list_length(parse->groupingSets);
3850  if (nrows > 1)
3851  {
3852  /*
3853  * Doesn't seem worthwhile writing code to cons up a generate_series
3854  * or a values scan to emit multiple rows. Instead just make N clones
3855  * and append them. (With a volatile HAVING clause, this means you
3856  * might get between 0 and N output rows. Offhand I think that's
3857  * desired.)
3858  */
3859  List *paths = NIL;
3860 
3861  while (--nrows >= 0)
3862  {
3863  path = (Path *)
3864  create_group_result_path(root, grouped_rel,
3865  grouped_rel->reltarget,
3866  (List *) parse->havingQual);
3867  paths = lappend(paths, path);
3868  }
3869  path = (Path *)
3871  grouped_rel,
3872  paths,
3873  NIL,
3874  NIL,
3875  NULL,
3876  0,
3877  false,
3878  -1);
3879  }
3880  else
3881  {
3882  /* No grouping sets, or just one, so one output row */
3883  path = (Path *)
3884  create_group_result_path(root, grouped_rel,
3885  grouped_rel->reltarget,
3886  (List *) parse->havingQual);
3887  }
3888 
3889  add_path(grouped_rel, path);
3890 }
3891 
3892 /*
3893  * create_ordinary_grouping_paths
3894  *
3895  * Create grouping paths for the ordinary (that is, non-degenerate) case.
3896  *
3897  * We need to consider sorted and hashed aggregation in the same function,
3898  * because otherwise (1) it would be harder to throw an appropriate error
3899  * message if neither way works, and (2) we should not allow hashtable size
3900  * considerations to dissuade us from using hashing if sorting is not possible.
3901  *
3902  * *partially_grouped_rel_p will be set to the partially grouped rel which this
3903  * function creates, or to NULL if it doesn't create one.
3904  */
3905 static void
3907  RelOptInfo *grouped_rel,
3908  const AggClauseCosts *agg_costs,
3909  grouping_sets_data *gd,
3910  GroupPathExtraData *extra,
3911  RelOptInfo **partially_grouped_rel_p)
3912 {
3913  Path *cheapest_path = input_rel->cheapest_total_path;
3914  RelOptInfo *partially_grouped_rel = NULL;
3915  double dNumGroups;
3917 
3918  /*
3919  * If this is the topmost grouping relation or if the parent relation is
3920  * doing some form of partitionwise aggregation, then we may be able to do
3921  * it at this level also. However, if the input relation is not
3922  * partitioned, partitionwise aggregate is impossible.
3923  */
3924  if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
3925  IS_PARTITIONED_REL(input_rel))
3926  {
3927  /*
3928  * If this is the topmost relation or if the parent relation is doing
3929  * full partitionwise aggregation, then we can do full partitionwise
3930  * aggregation provided that the GROUP BY clause contains all of the
3931  * partitioning columns at this level. Otherwise, we can do at most
3932  * partial partitionwise aggregation. But if partial aggregation is
3933  * not supported in general then we can't use it for partitionwise
3934  * aggregation either.
3935  *
3936  * Check parse->groupClause not processed_groupClause, because it's
3937  * okay if some of the partitioning columns were proved redundant.
3938  */
3939  if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
3940  group_by_has_partkey(input_rel, extra->targetList,
3941  root->parse->groupClause))
3943  else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3945  else
3947  }
3948 
3949  /*
3950  * Before generating paths for grouped_rel, we first generate any possible
3951  * partially grouped paths; that way, later code can easily consider both
3952  * parallel and non-parallel approaches to grouping.
3953  */
3954  if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3955  {
3956  bool force_rel_creation;
3957 
3958  /*
3959  * If we're doing partitionwise aggregation at this level, force
3960  * creation of a partially_grouped_rel so we can add partitionwise
3961  * paths to it.
3962  */
3963  force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
3964 
3965  partially_grouped_rel =
3967  grouped_rel,
3968  input_rel,
3969  gd,
3970  extra,
3971  force_rel_creation);
3972  }
3973 
3974  /* Set out parameter. */
3975  *partially_grouped_rel_p = partially_grouped_rel;
3976 
3977  /* Apply partitionwise aggregation technique, if possible. */
3978  if (patype != PARTITIONWISE_AGGREGATE_NONE)
3979  create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
3980  partially_grouped_rel, agg_costs,
3981  gd, patype, extra);
3982 
3983  /* If we are doing partial aggregation only, return. */
3985  {
3986  Assert(partially_grouped_rel);
3987 
3988  if (partially_grouped_rel->pathlist)
3989  set_cheapest(partially_grouped_rel);
3990 
3991  return;
3992  }
3993 
3994  /* Gather any partially grouped partial paths. */
3995  if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
3996  {
3997  gather_grouping_paths(root, partially_grouped_rel);
3998  set_cheapest(partially_grouped_rel);
3999  }
4000 
4001  /*
4002  * Estimate number of groups.
4003  */
4004  dNumGroups = get_number_of_groups(root,
4005  cheapest_path->rows,
4006  gd,
4007  extra->targetList);
4008 
4009  /* Build final grouping paths */
4010  add_paths_to_grouping_rel(root, input_rel, grouped_rel,
4011  partially_grouped_rel, agg_costs, gd,
4012  dNumGroups, extra);
4013 
4014  /* Give a helpful error if we failed to find any implementation */
4015  if (grouped_rel->pathlist == NIL)
4016  ereport(ERROR,
4017  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4018  errmsg("could not implement GROUP BY"),
4019  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4020 
4021  /*
4022  * If there is an FDW that's responsible for all baserels of the query,
4023  * let it consider adding ForeignPaths.
4024  */
4025  if (grouped_rel->fdwroutine &&
4026  grouped_rel->fdwroutine->GetForeignUpperPaths)
4027  grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
4028  input_rel, grouped_rel,
4029  extra);
4030 
4031  /* Let extensions possibly add some more paths */
4033  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4034  input_rel, grouped_rel,
4035  extra);
4036 }
4037 
4038 /*
4039  * For a given input path, consider the possible ways of doing grouping sets on
4040  * it, by combinations of hashing and sorting. This can be called multiple
4041  * times, so it's important that it not scribble on input. No result is
4042  * returned, but any generated paths are added to grouped_rel.
4043  */
4044 static void
4046  RelOptInfo *grouped_rel,
4047  Path *path,
4048  bool is_sorted,
4049  bool can_hash,
4050  grouping_sets_data *gd,
4051  const AggClauseCosts *agg_costs,
4052  double dNumGroups)
4053 {
4054  Query *parse = root->parse;
4055  Size hash_mem_limit = get_hash_memory_limit();
4056 
4057  /*
4058  * If we're not being offered sorted input, then only consider plans that
4059  * can be done entirely by hashing.
4060  *
4061  * We can hash everything if it looks like it'll fit in hash_mem. But if
4062  * the input is actually sorted despite not being advertised as such, we
4063  * prefer to make use of that in order to use less memory.
4064  *
4065  * If none of the grouping sets are sortable, then ignore the hash_mem
4066  * limit and generate a path anyway, since otherwise we'll just fail.
4067  */
4068  if (!is_sorted)
4069  {
4070  List *new_rollups = NIL;
4071  RollupData *unhashed_rollup = NULL;
4072  List *sets_data;
4073  List *empty_sets_data = NIL;
4074  List *empty_sets = NIL;
4075  ListCell *lc;
4076  ListCell *l_start = list_head(gd->rollups);
4077  AggStrategy strat = AGG_HASHED;
4078  double hashsize;
4079  double exclude_groups = 0.0;
4080 
4081  Assert(can_hash);
4082 
4083  /*
4084  * If the input is coincidentally sorted usefully (which can happen
4085  * even if is_sorted is false, since that only means that our caller
4086  * has set up the sorting for us), then save some hashtable space by
4087  * making use of that. But we need to watch out for degenerate cases:
4088  *
4089  * 1) If there are any empty grouping sets, then group_pathkeys might
4090  * be NIL if all non-empty grouping sets are unsortable. In this case,
4091  * there will be a rollup containing only empty groups, and the
4092  * pathkeys_contained_in test is vacuously true; this is ok.
4093  *
4094  * XXX: the above relies on the fact that group_pathkeys is generated
4095  * from the first rollup. If we add the ability to consider multiple
4096  * sort orders for grouping input, this assumption might fail.
4097  *
4098  * 2) If there are no empty sets and only unsortable sets, then the
4099  * rollups list will be empty (and thus l_start == NULL), and
4100  * group_pathkeys will be NIL; we must ensure that the vacuously-true
4101  * pathkeys_contained_in test doesn't cause us to crash.
4102  */
4103  if (l_start != NULL &&
4104  pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4105  {
4106  unhashed_rollup = lfirst_node(RollupData, l_start);
4107  exclude_groups = unhashed_rollup->numGroups;
4108  l_start = lnext(gd->rollups, l_start);
4109  }
4110 
4111  hashsize = estimate_hashagg_tablesize(root,
4112  path,
4113  agg_costs,
4114  dNumGroups - exclude_groups);
4115 
4116  /*
4117  * gd->rollups is empty if we have only unsortable columns to work
4118  * with. Override hash_mem in that case; otherwise, we'll rely on the
4119  * sorted-input case to generate usable mixed paths.
4120  */
4121  if (hashsize > hash_mem_limit && gd->rollups)
4122  return; /* nope, won't fit */
4123 
4124  /*
4125  * We need to burst the existing rollups list into individual grouping
4126  * sets and recompute a groupClause for each set.
4127  */
4128  sets_data = list_copy(gd->unsortable_sets);
4129 
4130  for_each_cell(lc, gd->rollups, l_start)
4131  {
4132  RollupData *rollup = lfirst_node(RollupData, lc);
4133 
4134  /*
4135  * If we find an unhashable rollup that's not been skipped by the
4136  * "actually sorted" check above, we can't cope; we'd need sorted
4137  * input (with a different sort order) but we can't get that here.
4138  * So bail out; we'll get a valid path from the is_sorted case
4139  * instead.
4140  *
4141  * The mere presence of empty grouping sets doesn't make a rollup
4142  * unhashable (see preprocess_grouping_sets), we handle those
4143  * specially below.
4144  */
4145  if (!rollup->hashable)
4146  return;
4147 
4148  sets_data = list_concat(sets_data, rollup->gsets_data);
4149  }
4150  foreach(lc, sets_data)
4151  {
4153  List *gset = gs->set;
4154  RollupData *rollup;
4155 
4156  if (gset == NIL)
4157  {
4158  /* Empty grouping sets can't be hashed. */
4159  empty_sets_data = lappend(empty_sets_data, gs);
4160  empty_sets = lappend(empty_sets, NIL);
4161  }
4162  else
4163  {
4164  rollup = makeNode(RollupData);
4165 
4167  rollup->gsets_data = list_make1(gs);
4168  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4169  rollup->gsets_data,
4170  gd->tleref_to_colnum_map);
4171  rollup->numGroups = gs->numGroups;
4172  rollup->hashable = true;
4173  rollup->is_hashed = true;
4174  new_rollups = lappend(new_rollups, rollup);
4175  }
4176  }
4177 
4178  /*
4179  * If we didn't find anything nonempty to hash, then bail. We'll
4180  * generate a path from the is_sorted case.
4181  */
4182  if (new_rollups == NIL)
4183  return;
4184 
4185  /*
4186  * If there were empty grouping sets they should have been in the
4187  * first rollup.
4188  */
4189  Assert(!unhashed_rollup || !empty_sets);
4190 
4191  if (unhashed_rollup)
4192  {
4193  new_rollups = lappend(new_rollups, unhashed_rollup);
4194  strat = AGG_MIXED;
4195  }
4196  else if (empty_sets)
4197  {
4198  RollupData *rollup = makeNode(RollupData);
4199 
4200  rollup->groupClause = NIL;
4201  rollup->gsets_data = empty_sets_data;
4202  rollup->gsets = empty_sets;
4203  rollup->numGroups = list_length(empty_sets);
4204  rollup->hashable = false;
4205  rollup->is_hashed = false;
4206  new_rollups = lappend(new_rollups, rollup);
4207  strat = AGG_MIXED;
4208  }
4209 
4210  add_path(grouped_rel, (Path *)
4212  grouped_rel,
4213  path,
4214  (List *) parse->havingQual,
4215  strat,
4216  new_rollups,
4217  agg_costs));
4218  return;
4219  }
4220 
4221  /*
4222  * If we have sorted input but nothing we can do with it, bail.
4223  */
4224  if (gd->rollups == NIL)
4225  return;
4226 
4227  /*
4228  * Given sorted input, we try and make two paths: one sorted and one mixed
4229  * sort/hash. (We need to try both because hashagg might be disabled, or
4230  * some columns might not be sortable.)
4231  *
4232  * can_hash is passed in as false if some obstacle elsewhere (such as
4233  * ordered aggs) means that we shouldn't consider hashing at all.
4234  */
4235  if (can_hash && gd->any_hashable)
4236  {
4237  List *rollups = NIL;
4238  List *hash_sets = list_copy(gd->unsortable_sets);
4239  double availspace = hash_mem_limit;
4240  ListCell *lc;
4241 
4242  /*
4243  * Account first for space needed for groups we can't sort at all.
4244  */
4245  availspace -= estimate_hashagg_tablesize(root,
4246  path,
4247  agg_costs,
4248  gd->dNumHashGroups);
4249 
4250  if (availspace > 0 && list_length(gd->rollups) > 1)
4251  {
4252  double scale;
4253  int num_rollups = list_length(gd->rollups);
4254  int k_capacity;
4255  int *k_weights = palloc(num_rollups * sizeof(int));
4256  Bitmapset *hash_items = NULL;
4257  int i;
4258 
4259  /*
4260  * We treat this as a knapsack problem: the knapsack capacity
4261  * represents hash_mem, the item weights are the estimated memory
4262  * usage of the hashtables needed to implement a single rollup,
4263  * and we really ought to use the cost saving as the item value;
4264  * however, currently the costs assigned to sort nodes don't
4265  * reflect the comparison costs well, and so we treat all items as
4266  * of equal value (each rollup we hash instead saves us one sort).
4267  *
4268  * To use the discrete knapsack, we need to scale the values to a
4269  * reasonably small bounded range. We choose to allow a 5% error
4270  * margin; we have no more than 4096 rollups in the worst possible
4271  * case, which with a 5% error margin will require a bit over 42MB
4272  * of workspace. (Anyone wanting to plan queries that complex had
4273  * better have the memory for it. In more reasonable cases, with
4274  * no more than a couple of dozen rollups, the memory usage will
4275  * be negligible.)
4276  *
4277  * k_capacity is naturally bounded, but we clamp the values for
4278  * scale and weight (below) to avoid overflows or underflows (or
4279  * uselessly trying to use a scale factor less than 1 byte).
4280  */
4281  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4282  k_capacity = (int) floor(availspace / scale);
4283 
4284  /*
4285  * We leave the first rollup out of consideration since it's the
4286  * one that matches the input sort order. We assign indexes "i"
4287  * to only those entries considered for hashing; the second loop,
4288  * below, must use the same condition.
4289  */
4290  i = 0;
4291  for_each_from(lc, gd->rollups, 1)
4292  {
4293  RollupData *rollup = lfirst_node(RollupData, lc);
4294 
4295  if (rollup->hashable)
4296  {
4297  double sz = estimate_hashagg_tablesize(root,
4298  path,
4299  agg_costs,
4300  rollup->numGroups);
4301 
4302  /*
4303  * If sz is enormous, but hash_mem (and hence scale) is
4304  * small, avoid integer overflow here.
4305  */
4306  k_weights[i] = (int) Min(floor(sz / scale),
4307  k_capacity + 1.0);
4308  ++i;
4309  }
4310  }
4311 
4312  /*
4313  * Apply knapsack algorithm; compute the set of items which
4314  * maximizes the value stored (in this case the number of sorts
4315  * saved) while keeping the total size (approximately) within
4316  * capacity.
4317  */
4318  if (i > 0)
4319  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4320 
4321  if (!bms_is_empty(hash_items))
4322  {
4323  rollups = list_make1(linitial(gd->rollups));
4324 
4325  i = 0;
4326  for_each_from(lc, gd->rollups, 1)
4327  {
4328  RollupData *rollup = lfirst_node(RollupData, lc);
4329 
4330  if (rollup->hashable)
4331  {
4332  if (bms_is_member(i, hash_items))
4333  hash_sets = list_concat(hash_sets,
4334  rollup->gsets_data);
4335  else
4336  rollups = lappend(rollups, rollup);
4337  ++i;
4338  }
4339  else
4340  rollups = lappend(rollups, rollup);
4341  }
4342  }
4343  }
4344 
4345  if (!rollups && hash_sets)
4346  rollups = list_copy(gd->rollups);
4347 
4348  foreach(lc, hash_sets)
4349  {
4351  RollupData *rollup = makeNode(RollupData);
4352 
4353  Assert(gs->set != NIL);
4354 
4356  rollup->gsets_data = list_make1(gs);
4357  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4358  rollup->gsets_data,
4359  gd->tleref_to_colnum_map);
4360  rollup->numGroups = gs->numGroups;
4361  rollup->hashable = true;
4362  rollup->is_hashed = true;
4363  rollups = lcons(rollup, rollups);
4364  }
4365 
4366  if (rollups)
4367  {
4368  add_path(grouped_rel, (Path *)
4370  grouped_rel,
4371  path,
4372  (List *) parse->havingQual,
4373  AGG_MIXED,
4374  rollups,
4375  agg_costs));
4376  }
4377  }
4378 
4379  /*
4380  * Now try the simple sorted case.
4381  */
4382  if (!gd->unsortable_sets)
4383  add_path(grouped_rel, (Path *)
4385  grouped_rel,
4386  path,
4387  (List *) parse->havingQual,
4388  AGG_SORTED,
4389  gd->rollups,
4390  agg_costs));
4391 }
4392 
4393 /*
4394  * create_window_paths
4395  *
4396  * Build a new upperrel containing Paths for window-function evaluation.
4397  *
4398  * input_rel: contains the source-data Paths
4399  * input_target: result of make_window_input_target
4400  * output_target: what the topmost WindowAggPath should return
4401  * wflists: result of find_window_functions
4402  * activeWindows: result of select_active_windows
4403  *
4404  * Note: all Paths in input_rel are expected to return input_target.
4405  */
4406 static RelOptInfo *
4408  RelOptInfo *input_rel,
4409  PathTarget *input_target,
4410  PathTarget *output_target,
4411  bool output_target_parallel_safe,
4412  WindowFuncLists *wflists,
4413  List *activeWindows)
4414 {
4415  RelOptInfo *window_rel;
4416  ListCell *lc;
4417 
4418  /* For now, do all work in the (WINDOW, NULL) upperrel */
4419  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4420 
4421  /*
4422  * If the input relation is not parallel-safe, then the window relation
4423  * can't be parallel-safe, either. Otherwise, we need to examine the
4424  * target list and active windows for non-parallel-safe constructs.
4425  */
4426  if (input_rel->consider_parallel && output_target_parallel_safe &&
4427  is_parallel_safe(root, (Node *) activeWindows))
4428  window_rel->consider_parallel = true;
4429 
4430  /*
4431  * If the input rel belongs to a single FDW, so does the window rel.
4432  */
4433  window_rel->serverid = input_rel->serverid;
4434  window_rel->userid = input_rel->userid;
4435  window_rel->useridiscurrent = input_rel->useridiscurrent;
4436  window_rel->fdwroutine = input_rel->fdwroutine;
4437 
4438  /*
4439  * Consider computing window functions starting from the existing
4440  * cheapest-total path (which will likely require a sort) as well as any
4441  * existing paths that satisfy or partially satisfy root->window_pathkeys.
4442  */
4443  foreach(lc, input_rel->pathlist)
4444  {
4445  Path *path = (Path *) lfirst(lc);
4446  int presorted_keys;
4447 
4448  if (path == input_rel->cheapest_total_path ||
4449  pathkeys_count_contained_in(root->window_pathkeys, path->pathkeys,
4450  &presorted_keys) ||
4451  presorted_keys > 0)
4453  window_rel,
4454  path,
4455  input_target,
4456  output_target,
4457  wflists,
4458  activeWindows);
4459  }
4460 
4461  /*
4462  * If there is an FDW that's responsible for all baserels of the query,
4463  * let it consider adding ForeignPaths.
4464  */
4465  if (window_rel->fdwroutine &&
4466  window_rel->fdwroutine->GetForeignUpperPaths)
4467  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4468  input_rel, window_rel,
4469  NULL);
4470 
4471  /* Let extensions possibly add some more paths */
4473  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4474  input_rel, window_rel, NULL);
4475 
4476  /* Now choose the best path(s) */
4477  set_cheapest(window_rel);
4478 
4479  return window_rel;
4480 }
4481 
4482 /*
4483  * Stack window-function implementation steps atop the given Path, and
4484  * add the result to window_rel.
4485  *
4486  * window_rel: upperrel to contain result
4487  * path: input Path to use (must return input_target)
4488  * input_target: result of make_window_input_target
4489  * output_target: what the topmost WindowAggPath should return
4490  * wflists: result of find_window_functions
4491  * activeWindows: result of select_active_windows
4492  */
4493 static void
4495  RelOptInfo *window_rel,
4496  Path *path,
4497  PathTarget *input_target,
4498  PathTarget *output_target,
4499  WindowFuncLists *wflists,
4500  List *activeWindows)
4501 {
4502  PathTarget *window_target;
4503  ListCell *l;
4504  List *topqual = NIL;
4505 
4506  /*
4507  * Since each window clause could require a different sort order, we stack
4508  * up a WindowAgg node for each clause, with sort steps between them as
4509  * needed. (We assume that select_active_windows chose a good order for
4510  * executing the clauses in.)
4511  *
4512  * input_target should contain all Vars and Aggs needed for the result.
4513  * (In some cases we wouldn't need to propagate all of these all the way
4514  * to the top, since they might only be needed as inputs to WindowFuncs.
4515  * It's probably not worth trying to optimize that though.) It must also
4516  * contain all window partitioning and sorting expressions, to ensure
4517  * they're computed only once at the bottom of the stack (that's critical
4518  * for volatile functions). As we climb up the stack, we'll add outputs
4519  * for the WindowFuncs computed at each level.
4520  */
4521  window_target = input_target;
4522 
4523  foreach(l, activeWindows)
4524  {
4526  List *window_pathkeys;
4527  List *runcondition = NIL;
4528  int presorted_keys;
4529  bool is_sorted;
4530  bool topwindow;
4531  ListCell *lc2;
4532 
4533  window_pathkeys = make_pathkeys_for_window(root,
4534  wc,
4535  root->processed_tlist);
4536 
4537  is_sorted = pathkeys_count_contained_in(window_pathkeys,
4538  path->pathkeys,
4539  &presorted_keys);
4540 
4541  /* Sort if necessary */
4542  if (!is_sorted)
4543  {
4544  /*
4545  * No presorted keys or incremental sort disabled, just perform a
4546  * complete sort.
4547  */
4548  if (presorted_keys == 0 || !enable_incremental_sort)
4549  path = (Path *) create_sort_path(root, window_rel,
4550  path,
4551  window_pathkeys,
4552  -1.0);
4553  else
4554  {
4555  /*
4556  * Since we have presorted keys and incremental sort is
4557  * enabled, just use incremental sort.
4558  */
4560  window_rel,
4561  path,
4562  window_pathkeys,
4563  presorted_keys,
4564  -1.0);
4565  }
4566  }
4567 
4568  if (lnext(activeWindows, l))
4569  {
4570  /*
4571  * Add the current WindowFuncs to the output target for this
4572  * intermediate WindowAggPath. We must copy window_target to
4573  * avoid changing the previous path's target.
4574  *
4575  * Note: a WindowFunc adds nothing to the target's eval costs; but
4576  * we do need to account for the increase in tlist width.
4577  */
4578  int64 tuple_width = window_target->width;
4579 
4580  window_target = copy_pathtarget(window_target);
4581  foreach(lc2, wflists->windowFuncs[wc->winref])
4582  {
4583  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4584 
4585  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4586  tuple_width += get_typavgwidth(wfunc->wintype, -1);
4587  }
4588  window_target->width = clamp_width_est(tuple_width);
4589  }
4590  else
4591  {
4592  /* Install the goal target in the topmost WindowAgg */
4593  window_target = output_target;
4594  }
4595 
4596  /* mark the final item in the list as the top-level window */
4597  topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4598 
4599  /*
4600  * Collect the WindowFuncRunConditions from each WindowFunc and
4601  * convert them into OpExprs
4602  */
4603  foreach(lc2, wflists->windowFuncs[wc->winref])
4604  {
4605  ListCell *lc3;
4606  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4607 
4608  foreach(lc3, wfunc->runCondition)
4609  {
4610  WindowFuncRunCondition *wfuncrc =
4612  Expr *opexpr;
4613  Expr *leftop;
4614  Expr *rightop;
4615 
4616  if (wfuncrc->wfunc_left)
4617  {
4618  leftop = (Expr *) copyObject(wfunc);
4619  rightop = copyObject(wfuncrc->arg);
4620  }
4621  else
4622  {
4623  leftop = copyObject(wfuncrc->arg);
4624  rightop = (Expr *) copyObject(wfunc);
4625  }
4626 
4627  opexpr = make_opclause(wfuncrc->opno,
4628  BOOLOID,
4629  false,
4630  leftop,
4631  rightop,
4632  InvalidOid,
4633  wfuncrc->inputcollid);
4634 
4635  runcondition = lappend(runcondition, opexpr);
4636 
4637  if (!topwindow)
4638  topqual = lappend(topqual, opexpr);
4639  }
4640  }
4641 
4642  path = (Path *)
4643  create_windowagg_path(root, window_rel, path, window_target,
4644  wflists->windowFuncs[wc->winref],
4645  runcondition, wc,
4646  topwindow ? topqual : NIL, topwindow);
4647  }
4648 
4649  add_path(window_rel, path);
4650 }
4651 
4652 /*
4653  * create_distinct_paths
4654  *
4655  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4656  *
4657  * input_rel: contains the source-data Paths
4658  * target: the pathtarget for the result Paths to compute
4659  *
4660  * Note: input paths should already compute the desired pathtarget, since
4661  * Sort/Unique won't project anything.
4662  */
4663 static RelOptInfo *
4665  PathTarget *target)
4666 {
4667  RelOptInfo *distinct_rel;
4668 
4669  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4670  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4671 
4672  /*
4673  * We don't compute anything at this level, so distinct_rel will be
4674  * parallel-safe if the input rel is parallel-safe. In particular, if
4675  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4676  * output those expressions, and will not be parallel-safe unless those
4677  * expressions are parallel-safe.
4678  */
4679  distinct_rel->consider_parallel = input_rel->consider_parallel;
4680 
4681  /*
4682  * If the input rel belongs to a single FDW, so does the distinct_rel.
4683  */
4684  distinct_rel->serverid = input_rel->serverid;
4685  distinct_rel->userid = input_rel->userid;
4686  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4687  distinct_rel->fdwroutine = input_rel->fdwroutine;
4688 
4689  /* build distinct paths based on input_rel's pathlist */
4690  create_final_distinct_paths(root, input_rel, distinct_rel);
4691 
4692  /* now build distinct paths based on input_rel's partial_pathlist */
4693  create_partial_distinct_paths(root, input_rel, distinct_rel, target);
4694 
4695  /* Give a helpful error if we failed to create any paths */
4696  if (distinct_rel->pathlist == NIL)
4697  ereport(ERROR,
4698  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4699  errmsg("could not implement DISTINCT"),
4700  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4701 
4702  /*
4703  * If there is an FDW that's responsible for all baserels of the query,
4704  * let it consider adding ForeignPaths.
4705  */
4706  if (distinct_rel->fdwroutine &&
4707  distinct_rel->fdwroutine->GetForeignUpperPaths)
4708  distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4710  input_rel,
4711  distinct_rel,
4712  NULL);
4713 
4714  /* Let extensions possibly add some more paths */
4716  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4717  distinct_rel, NULL);
4718 
4719  /* Now choose the best path(s) */
4720  set_cheapest(distinct_rel);
4721 
4722  return distinct_rel;
4723 }
4724 
4725 /*
4726  * create_partial_distinct_paths
4727  *
4728  * Process 'input_rel' partial paths and add unique/aggregate paths to the
4729  * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4730  * paths on top and add a final unique/aggregate path to remove any duplicate
4731  * produced from combining rows from parallel workers.
4732  */
4733 static void
4735  RelOptInfo *final_distinct_rel,
4736  PathTarget *target)
4737 {
4738  RelOptInfo *partial_distinct_rel;
4739  Query *parse;
4740  List *distinctExprs;
4741  double numDistinctRows;
4742  Path *cheapest_partial_path;
4743  ListCell *lc;
4744 
4745  /* nothing to do when there are no partial paths in the input rel */
4746  if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4747  return;
4748 
4749  parse = root->parse;
4750 
4751  /* can't do parallel DISTINCT ON */
4752  if (parse->hasDistinctOn)
4753  return;
4754 
4755  partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4756  NULL);
4757  partial_distinct_rel->reltarget = target;
4758  partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4759 
4760  /*
4761  * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4762  */
4763  partial_distinct_rel->serverid = input_rel->serverid;
4764  partial_distinct_rel->userid = input_rel->userid;
4765  partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4766  partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4767 
4768  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4769 
4770  distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4771  parse->targetList);
4772 
4773  /* estimate how many distinct rows we'll get from each worker */
4774  numDistinctRows = estimate_num_groups(root, distinctExprs,
4775  cheapest_partial_path->rows,
4776  NULL, NULL);
4777 
4778  /*
4779  * Try sorting the cheapest path and incrementally sorting any paths with
4780  * presorted keys and put a unique paths atop of those.
4781  */
4782  if (grouping_is_sortable(root->processed_distinctClause))
4783  {
4784  foreach(lc, input_rel->partial_pathlist)
4785  {
4786  Path *input_path = (Path *) lfirst(lc);
4787  Path *sorted_path;
4788  bool is_sorted;
4789  int presorted_keys;
4790 
4791  is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys,
4792  input_path->pathkeys,
4793  &presorted_keys);
4794 
4795  if (is_sorted)
4796  sorted_path = input_path;
4797  else
4798  {
4799  /*
4800  * Try at least sorting the cheapest path and also try
4801  * incrementally sorting any path which is partially sorted
4802  * already (no need to deal with paths which have presorted
4803  * keys when incremental sort is disabled unless it's the
4804  * cheapest partial path).
4805  */
4806  if (input_path != cheapest_partial_path &&
4807  (presorted_keys == 0 || !enable_incremental_sort))
4808  continue;
4809 
4810  /*
4811  * We've no need to consider both a sort and incremental sort.
4812  * We'll just do a sort if there are no presorted keys and an
4813  * incremental sort when there are presorted keys.
4814  */
4815  if (presorted_keys == 0 || !enable_incremental_sort)
4816  sorted_path = (Path *) create_sort_path(root,
4817  partial_distinct_rel,
4818  input_path,
4819  root->distinct_pathkeys,
4820  -1.0);
4821  else
4822  sorted_path = (Path *) create_incremental_sort_path(root,
4823  partial_distinct_rel,
4824  input_path,
4825  root->distinct_pathkeys,
4826  presorted_keys,
4827  -1.0);
4828  }
4829 
4830  /*
4831  * An empty distinct_pathkeys means all tuples have the same value
4832  * for the DISTINCT clause. See create_final_distinct_paths()
4833  */
4834  if (root->distinct_pathkeys == NIL)
4835  {
4836  Node *limitCount;
4837 
4838  limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
4839  sizeof(int64),
4840  Int64GetDatum(1), false,
4841  FLOAT8PASSBYVAL);
4842 
4843  /*
4844  * Apply a LimitPath onto the partial path to restrict the
4845  * tuples from each worker to 1. create_final_distinct_paths
4846  * will need to apply an additional LimitPath to restrict this
4847  * to a single row after the Gather node. If the query
4848  * already has a LIMIT clause, then we could end up with three
4849  * Limit nodes in the final plan. Consolidating the top two
4850  * of these could be done, but does not seem worth troubling
4851  * over.
4852  */
4853  add_partial_path(partial_distinct_rel, (Path *)
4854  create_limit_path(root, partial_distinct_rel,
4855  sorted_path,
4856  NULL,
4857  limitCount,
4859  0, 1));
4860  }
4861  else
4862  {
4863  add_partial_path(partial_distinct_rel, (Path *)
4864  create_upper_unique_path(root, partial_distinct_rel,
4865  sorted_path,
4866  list_length(root->distinct_pathkeys),
4867  numDistinctRows));
4868  }
4869  }
4870  }
4871 
4872  /*
4873  * Now try hash aggregate paths, if enabled and hashing is possible. Since
4874  * we're not on the hook to ensure we do our best to create at least one
4875  * path here, we treat enable_hashagg as a hard off-switch rather than the
4876  * slightly softer variant in create_final_distinct_paths.
4877  */
4878  if (enable_hashagg && grouping_is_hashable(root->processed_distinctClause))
4879  {
4880  add_partial_path(partial_distinct_rel, (Path *)
4882  partial_distinct_rel,
4883  cheapest_partial_path,
4884  cheapest_partial_path->pathtarget,
4885  AGG_HASHED,
4887  root->processed_distinctClause,
4888  NIL,
4889  NULL,
4890  numDistinctRows));
4891  }
4892 
4893  /*
4894  * If there is an FDW that's responsible for all baserels of the query,
4895  * let it consider adding ForeignPaths.
4896  */
4897  if (partial_distinct_rel->fdwroutine &&
4898  partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
4899  partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4901  input_rel,
4902  partial_distinct_rel,
4903  NULL);
4904 
4905  /* Let extensions possibly add some more partial paths */
4907  (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
4908  input_rel, partial_distinct_rel, NULL);
4909 
4910  if (partial_distinct_rel->partial_pathlist != NIL)
4911  {
4912  generate_useful_gather_paths(root, partial_distinct_rel, true);
4913  set_cheapest(partial_distinct_rel);
4914 
4915  /*
4916  * Finally, create paths to distinctify the final result. This step
4917  * is needed to remove any duplicates due to combining rows from
4918  * parallel workers.
4919  */
4920  create_final_distinct_paths(root, partial_distinct_rel,
4921  final_distinct_rel);
4922  }
4923 }
4924 
4925 /*
4926  * create_final_distinct_paths
4927  * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4928  *
4929  * input_rel: contains the source-data paths
4930  * distinct_rel: destination relation for storing created paths
4931  */
4932 static RelOptInfo *
4934  RelOptInfo *distinct_rel)
4935 {
4936  Query *parse = root->parse;
4937  Path *cheapest_input_path = input_rel->cheapest_total_path;
4938  double numDistinctRows;
4939  bool allow_hash;
4940 
4941  /* Estimate number of distinct rows there will be */
4942  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4943  root->hasHavingQual)
4944  {
4945  /*
4946  * If there was grouping or aggregation, use the number of input rows
4947  * as the estimated number of DISTINCT rows (ie, assume the input is
4948  * already mostly unique).
4949  */
4950  numDistinctRows = cheapest_input_path->rows;
4951  }
4952  else
4953  {
4954  /*
4955  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4956  */
4957  List *distinctExprs;
4958 
4959  distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
4960  parse->targetList);
4961  numDistinctRows = estimate_num_groups(root, distinctExprs,
4962  cheapest_input_path->rows,
4963  NULL, NULL);
4964  }
4965 
4966  /*
4967  * Consider sort-based implementations of DISTINCT, if possible.
4968  */
4969  if (grouping_is_sortable(root->processed_distinctClause))
4970  {
4971  /*
4972  * Firstly, if we have any adequately-presorted paths, just stick a
4973  * Unique node on those. We also, consider doing an explicit sort of
4974  * the cheapest input path and Unique'ing that. If any paths have
4975  * presorted keys then we'll create an incremental sort atop of those
4976  * before adding a unique node on the top.
4977  *
4978  * When we have DISTINCT ON, we must sort by the more rigorous of
4979  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4980  * Also, if we do have to do an explicit sort, we might as well use
4981  * the more rigorous ordering to avoid a second sort later. (Note
4982  * that the parser will have ensured that one clause is a prefix of
4983  * the other.)
4984  */
4985  List *needed_pathkeys;
4986  ListCell *lc;
4987  double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0;
4988 
4989  if (parse->hasDistinctOn &&
4990  list_length(root->distinct_pathkeys) <
4991  list_length(root->sort_pathkeys))
4992  needed_pathkeys = root->sort_pathkeys;
4993  else
4994  needed_pathkeys = root->distinct_pathkeys;
4995 
4996  foreach(lc, input_rel->pathlist)
4997  {
4998  Path *input_path = (Path *) lfirst(lc);
4999  Path *sorted_path;
5000  bool is_sorted;
5001  int presorted_keys;
5002 
5003  is_sorted = pathkeys_count_contained_in(needed_pathkeys,
5004  input_path->pathkeys,
5005  &presorted_keys);
5006 
5007  if (is_sorted)
5008  sorted_path = input_path;
5009  else
5010  {
5011  /*
5012  * Try at least sorting the cheapest path and also try
5013  * incrementally sorting any path which is partially sorted
5014  * already (no need to deal with paths which have presorted
5015  * keys when incremental sort is disabled unless it's the
5016  * cheapest input path).
5017  */
5018  if (input_path != cheapest_input_path &&
5019  (presorted_keys == 0 || !enable_incremental_sort))
5020  continue;
5021 
5022  /*
5023  * We've no need to consider both a sort and incremental sort.
5024  * We'll just do a sort if there are no presorted keys and an
5025  * incremental sort when there are presorted keys.
5026  */
5027  if (presorted_keys == 0 || !enable_incremental_sort)
5028  sorted_path = (Path *) create_sort_path(root,
5029  distinct_rel,
5030  input_path,
5031  needed_pathkeys,
5032  limittuples);
5033  else
5034  sorted_path = (Path *) create_incremental_sort_path(root,
5035  distinct_rel,
5036  input_path,
5037  needed_pathkeys,
5038  presorted_keys,
5039  limittuples);
5040  }
5041 
5042  /*
5043  * distinct_pathkeys may have become empty if all of the pathkeys
5044  * were determined to be redundant. If all of the pathkeys are
5045  * redundant then each DISTINCT target must only allow a single
5046  * value, therefore all resulting tuples must be identical (or at
5047  * least indistinguishable by an equality check). We can uniquify
5048  * these tuples simply by just taking the first tuple. All we do
5049  * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When
5050  * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
5051  * list, so we must still only do this with paths which are
5052  * correctly sorted by sort_pathkeys.
5053  */
5054  if (root->distinct_pathkeys == NIL)
5055  {
5056  Node *limitCount;
5057 
5058  limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
5059  sizeof(int64),
5060  Int64GetDatum(1), false,
5061  FLOAT8PASSBYVAL);
5062 
5063  /*
5064  * If the query already has a LIMIT clause, then we could end
5065  * up with a duplicate LimitPath in the final plan. That does
5066  * not seem worth troubling over too much.
5067  */
5068  add_path(distinct_rel, (Path *)
5069  create_limit_path(root, distinct_rel, sorted_path,
5070  NULL, limitCount,
5071  LIMIT_OPTION_COUNT, 0, 1));
5072  }
5073  else
5074  {
5075  add_path(distinct_rel, (Path *)
5076  create_upper_unique_path(root, distinct_rel,
5077  sorted_path,
5078  list_length(root->distinct_pathkeys),
5079  numDistinctRows));
5080  }
5081  }
5082  }
5083 
5084  /*
5085  * Consider hash-based implementations of DISTINCT, if possible.
5086  *
5087  * If we were not able to make any other types of path, we *must* hash or
5088  * die trying. If we do have other choices, there are two things that
5089  * should prevent selection of hashing: if the query uses DISTINCT ON
5090  * (because it won't really have the expected behavior if we hash), or if
5091  * enable_hashagg is off.
5092  *
5093  * Note: grouping_is_hashable() is much more expensive to check than the
5094  * other gating conditions, so we want to do it last.
5095  */
5096  if (distinct_rel->pathlist == NIL)
5097  allow_hash = true; /* we have no alternatives */
5098  else if (parse->hasDistinctOn || !enable_hashagg)
5099  allow_hash = false; /* policy-based decision not to hash */
5100  else
5101  allow_hash = true; /* default */
5102 
5103  if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
5104  {
5105  /* Generate hashed aggregate path --- no sort needed */
5106  add_path(distinct_rel, (Path *)
5108  distinct_rel,
5109  cheapest_input_path,
5110  cheapest_input_path->pathtarget,
5111  AGG_HASHED,
5113  root->processed_distinctClause,
5114  NIL,
5115  NULL,
5116  numDistinctRows));
5117  }
5118 
5119  return distinct_rel;
5120 }
5121 
5122 /*
5123  * create_ordered_paths
5124  *
5125  * Build a new upperrel containing Paths for ORDER BY evaluation.
5126  *
5127  * All paths in the result must satisfy the ORDER BY ordering.
5128  * The only new paths we need consider are an explicit full sort
5129  * and incremental sort on the cheapest-total existing path.
5130  *
5131  * input_rel: contains the source-data Paths
5132  * target: the output tlist the result Paths must emit
5133  * limit_tuples: estimated bound on the number of output tuples,
5134  * or -1 if no LIMIT or couldn't estimate
5135  *
5136  * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
5137  * other pathkeys (grouping, ...) like generate_useful_gather_paths.
5138  */
5139 static RelOptInfo *
5141  RelOptInfo *input_rel,
5142  PathTarget *target,
5143  bool target_parallel_safe,
5144  double limit_tuples)
5145 {
5146  Path *cheapest_input_path = input_rel->cheapest_total_path;
5147  RelOptInfo *ordered_rel;
5148  ListCell *lc;
5149 
5150  /* For now, do all work in the (ORDERED, NULL) upperrel */
5151  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
5152 
5153  /*
5154  * If the input relation is not parallel-safe, then the ordered relation
5155  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
5156  * target list is parallel-safe.
5157  */
5158  if (input_rel->consider_parallel && target_parallel_safe)
5159  ordered_rel->consider_parallel = true;
5160 
5161  /*
5162  * If the input rel belongs to a single FDW, so does the ordered_rel.
5163  */
5164  ordered_rel->serverid = input_rel->serverid;
5165  ordered_rel->userid = input_rel->userid;
5166  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
5167  ordered_rel->fdwroutine = input_rel->fdwroutine;
5168 
5169  foreach(lc, input_rel->pathlist)
5170  {
5171  Path *input_path = (Path *) lfirst(lc);
5172  Path *sorted_path;
5173  bool is_sorted;
5174  int presorted_keys;
5175 
5176  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5177  input_path->pathkeys, &presorted_keys);
5178 
5179  if (is_sorted)
5180  sorted_path = input_path;
5181  else
5182  {
5183  /*
5184  * Try at least sorting the cheapest path and also try
5185  * incrementally sorting any path which is partially sorted
5186  * already (no need to deal with paths which have presorted keys
5187  * when incremental sort is disabled unless it's the cheapest
5188  * input path).
5189  */
5190  if (input_path != cheapest_input_path &&
5191  (presorted_keys == 0 || !enable_incremental_sort))
5192  continue;
5193 
5194  /*
5195  * We've no need to consider both a sort and incremental sort.
5196  * We'll just do a sort if there are no presorted keys and an
5197  * incremental sort when there are presorted keys.
5198  */
5199  if (presorted_keys == 0 || !enable_incremental_sort)
5200  sorted_path = (Path *) create_sort_path(root,
5201  ordered_rel,
5202  input_path,
5203  root->sort_pathkeys,
5204  limit_tuples);
5205  else
5206  sorted_path = (Path *) create_incremental_sort_path(root,
5207  ordered_rel,
5208  input_path,
5209  root->sort_pathkeys,
5210  presorted_keys,
5211  limit_tuples);
5212  }
5213 
5214  /* Add projection step if needed */
5215  if (sorted_path->pathtarget != target)
5216  sorted_path = apply_projection_to_path(root, ordered_rel,
5217  sorted_path, target);
5218 
5219  add_path(ordered_rel, sorted_path);
5220  }
5221 
5222  /*
5223  * generate_gather_paths() will have already generated a simple Gather
5224  * path for the best parallel path, if any, and the loop above will have
5225  * considered sorting it. Similarly, generate_gather_paths() will also
5226  * have generated order-preserving Gather Merge plans which can be used
5227  * without sorting if they happen to match the sort_pathkeys, and the loop
5228  * above will have handled those as well. However, there's one more
5229  * possibility: it may make sense to sort the cheapest partial path or
5230  * incrementally sort any partial path that is partially sorted according
5231  * to the required output order and then use Gather Merge.
5232  */
5233  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5234  input_rel->partial_pathlist != NIL)
5235  {
5236  Path *cheapest_partial_path;
5237 
5238  cheapest_partial_path = linitial(input_rel->partial_pathlist);
5239 
5240  foreach(lc, input_rel->partial_pathlist)
5241  {
5242  Path *input_path = (Path *) lfirst(lc);
5243  Path *sorted_path;
5244  bool is_sorted;
5245  int presorted_keys;
5246  double total_groups;
5247 
5248  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
5249  input_path->pathkeys,
5250  &presorted_keys);
5251 
5252  if (is_sorted)
5253  continue;
5254 
5255  /*
5256  * Try at least sorting the cheapest path and also try
5257  * incrementally sorting any path which is partially sorted
5258  * already (no need to deal with paths which have presorted keys
5259  * when incremental sort is disabled unless it's the cheapest
5260  * partial path).
5261  */
5262  if (input_path != cheapest_partial_path &&
5263  (presorted_keys == 0 || !enable_incremental_sort))
5264  continue;
5265 
5266  /*
5267  * We've no need to consider both a sort and incremental sort.
5268  * We'll just do a sort if there are no presorted keys and an
5269  * incremental sort when there are presorted keys.
5270  */
5271  if (presorted_keys == 0 || !enable_incremental_sort)
5272  sorted_path = (Path *) create_sort_path(root,
5273  ordered_rel,
5274  input_path,
5275  root->sort_pathkeys,
5276  limit_tuples);
5277  else
5278  sorted_path = (Path *) create_incremental_sort_path(root,
5279  ordered_rel,
5280  input_path,
5281  root->sort_pathkeys,
5282  presorted_keys,
5283  limit_tuples);
5284  total_groups = input_path->rows *
5285  input_path->parallel_workers;
5286  sorted_path = (Path *)
5287  create_gather_merge_path(root, ordered_rel,
5288  sorted_path,
5289  sorted_path->pathtarget,
5290  root->sort_pathkeys, NULL,
5291  &total_groups);
5292 
5293  /* Add projection step if needed */
5294  if (sorted_path->pathtarget != target)
5295  sorted_path = apply_projection_to_path(root, ordered_rel,
5296  sorted_path, target);
5297 
5298  add_path(ordered_rel, sorted_path);
5299  }
5300  }
5301 
5302  /*
5303  * If there is an FDW that's responsible for all baserels of the query,
5304  * let it consider adding ForeignPaths.
5305  */
5306  if (ordered_rel->fdwroutine &&
5307  ordered_rel->fdwroutine->GetForeignUpperPaths)
5308  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5309  input_rel, ordered_rel,
5310  NULL);
5311 
5312  /* Let extensions possibly add some more paths */
5314  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5315  input_rel, ordered_rel, NULL);
5316 
5317  /*
5318  * No need to bother with set_cheapest here; grouping_planner does not
5319  * need us to do it.
5320  */
5321  Assert(ordered_rel->pathlist != NIL);
5322 
5323  return ordered_rel;
5324 }
5325 
5326 
5327 /*
5328  * make_group_input_target
5329  * Generate appropriate PathTarget for initial input to grouping nodes.
5330  *
5331  * If there is grouping or aggregation, the scan/join subplan cannot emit
5332  * the query's final targetlist; for example, it certainly can't emit any
5333  * aggregate function calls. This routine generates the correct target
5334  * for the scan/join subplan.
5335  *
5336  * The query target list passed from the parser already contains entries
5337  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5338  * for variables used only in HAVING clauses; so we need to add those
5339  * variables to the subplan target list. Also, we flatten all expressions
5340  * except GROUP BY items into their component variables; other expressions
5341  * will be computed by the upper plan nodes rather than by the subplan.
5342  * For example, given a query like
5343  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5344  * we want to pass this targetlist to the subplan:
5345  * a+b,c,d
5346  * where the a+b target will be used by the Sort/Group steps, and the
5347  * other targets will be used for computing the final results.
5348  *
5349  * 'final_target' is the query's final target list (in PathTarget form)
5350  *
5351  * The result is the PathTarget to be computed by the Paths returned from
5352  * query_planner().
5353  */
5354 static PathTarget *
5356 {
5357  Query *parse = root->parse;
5358  PathTarget *input_target;
5359  List *non_group_cols;
5360  List *non_group_vars;
5361  int i;
5362  ListCell *lc;
5363 
5364  /*
5365  * We must build a target containing all grouping columns, plus any other
5366  * Vars mentioned in the query's targetlist and HAVING qual.
5367  */
5368  input_target = create_empty_pathtarget();
5369  non_group_cols = NIL;
5370 
5371  i = 0;
5372  foreach(lc, final_target->exprs)
5373  {
5374  Expr *expr = (Expr *) lfirst(lc);
5375  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5376 
5377  if (sgref && root->processed_groupClause &&
5379  root->processed_groupClause) != NULL)
5380  {
5381  /*
5382  * It's a grouping column, so add it to the input target as-is.
5383  */
5384  add_column_to_pathtarget(input_target, expr, sgref);
5385  }
5386  else
5387  {
5388  /*
5389  * Non-grouping column, so just remember the expression for later
5390  * call to pull_var_clause.
5391  */
5392  non_group_cols = lappend(non_group_cols, expr);
5393  }
5394 
5395  i++;
5396  }
5397 
5398  /*
5399  * If there's a HAVING clause, we'll need the Vars it uses, too.
5400  */
5401  if (parse->havingQual)
5402  non_group_cols = lappend(non_group_cols, parse->havingQual);
5403 
5404  /*
5405  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5406  * add them to the input target if not already present. (A Var used
5407  * directly as a GROUP BY item will be present already.) Note this
5408  * includes Vars used in resjunk items, so we are covering the needs of
5409  * ORDER BY and window specifications. Vars used within Aggrefs and
5410  * WindowFuncs will be pulled out here, too.
5411  */
5412  non_group_vars = pull_var_clause((Node *) non_group_cols,
5416  add_new_columns_to_pathtarget(input_target, non_group_vars);
5417 
5418  /* clean up cruft */
5419  list_free(non_group_vars);
5420  list_free(non_group_cols);
5421 
5422  /* XXX this causes some redundant cost calculation ... */
5423  return set_pathtarget_cost_width(root, input_target);
5424 }
5425 
5426 /*
5427  * make_partial_grouping_target
5428  * Generate appropriate PathTarget for output of partial aggregate
5429  * (or partial grouping, if there are no aggregates) nodes.
5430  *
5431  * A partial aggregation node needs to emit all the same aggregates that
5432  * a regular aggregation node would, plus any aggregates used in HAVING;
5433  * except that the Aggref nodes should be marked as partial aggregates.
5434  *
5435  * In addition, we'd better emit any Vars and PlaceHolderVars that are
5436  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5437  * these would be Vars that are grouped by or used in grouping expressions.)
5438  *
5439  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5440  * havingQual represents the HAVING clause.
5441  */
5442 static PathTarget *
5444  PathTarget *grouping_target,
5445  Node *havingQual)
5446 {
5447  PathTarget *partial_target;
5448  List *non_group_cols;
5449  List *non_group_exprs;
5450  int i;
5451  ListCell *lc;
5452 
5453  partial_target = create_empty_pathtarget();
5454  non_group_cols = NIL;
5455 
5456  i = 0;
5457  foreach(lc, grouping_target->exprs)
5458  {
5459  Expr *expr = (Expr *) lfirst(lc);
5460  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5461 
5462  if (sgref && root->processed_groupClause &&
5464  root->processed_groupClause) != NULL)
5465  {
5466  /*
5467  * It's a grouping column, so add it to the partial_target as-is.
5468  * (This allows the upper agg step to repeat the grouping calcs.)
5469  */
5470  add_column_to_pathtarget(partial_target, expr, sgref);
5471  }
5472  else
5473  {
5474  /*
5475  * Non-grouping column, so just remember the expression for later
5476  * call to pull_var_clause.
5477  */
5478  non_group_cols = lappend(non_group_cols, expr);
5479  }
5480 
5481  i++;
5482  }
5483 
5484  /*
5485  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5486  */
5487  if (havingQual)
5488  non_group_cols = lappend(non_group_cols, havingQual);
5489 
5490  /*
5491  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5492  * non-group cols (plus HAVING), and add them to the partial_target if not
5493  * already present. (An expression used directly as a GROUP BY item will
5494  * be present already.) Note this includes Vars used in resjunk items, so
5495  * we are covering the needs of ORDER BY and window specifications.
5496  */
5497  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5501 
5502  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5503 
5504  /*
5505  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5506  * are at the top level of the target list, so we can just scan the list
5507  * rather than recursing through the expression trees.
5508  */
5509  foreach(lc, partial_target->exprs)
5510  {
5511  Aggref *aggref = (Aggref *) lfirst(lc);
5512 
5513  if (IsA(aggref, Aggref))
5514  {
5515  Aggref *newaggref;
5516 
5517  /*
5518  * We shouldn't need to copy the substructure of the Aggref node,
5519  * but flat-copy the node itself to avoid damaging other trees.
5520  */
5521  newaggref = makeNode(Aggref);
5522  memcpy(newaggref, aggref, sizeof(Aggref));
5523 
5524  /* For now, assume serialization is required */
5526 
5527  lfirst(lc) = newaggref;
5528  }
5529  }
5530 
5531  /* clean up cruft */
5532  list_free(non_group_exprs);
5533  list_free(non_group_cols);
5534 
5535  /* XXX this causes some redundant cost calculation ... */
5536  return set_pathtarget_cost_width(root, partial_target);
5537 }
5538 
5539 /*
5540  * mark_partial_aggref
5541  * Adjust an Aggref to make it represent a partial-aggregation step.
5542  *
5543  * The Aggref node is modified in-place; caller must do any copying required.
5544  */
5545 void
5547 {
5548  /* aggtranstype should be computed by this point */
5549  Assert(OidIsValid(agg->aggtranstype));
5550  /* ... but aggsplit should still be as the parser left it */
5551  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5552 
5553  /* Mark the Aggref with the intended partial-aggregation mode */
5554  agg->aggsplit = aggsplit;
5555 
5556  /*
5557  * Adjust result type if needed. Normally, a partial aggregate returns
5558  * the aggregate's transition type; but if that's INTERNAL and we're
5559  * serializing, it returns BYTEA instead.
5560  */
5561  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5562  {
5563  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5564  agg->aggtype = BYTEAOID;
5565  else
5566  agg->aggtype = agg->aggtranstype;
5567  }
5568 }
5569 
5570 /*
5571  * postprocess_setop_tlist
5572  * Fix up targetlist returned by plan_set_operations().
5573  *
5574  * We need to transpose sort key info from the orig_tlist into new_tlist.
5575  * NOTE: this would not be good enough if we supported resjunk sort keys
5576  * for results of set operations --- then, we'd need to project a whole
5577  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5578  * find any resjunk columns in orig_tlist.
5579  */
5580 static List *
5581 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5582 {
5583  ListCell *l;
5584  ListCell *orig_tlist_item = list_head(orig_tlist);
5585 
5586  foreach(l, new_tlist)
5587  {
5588  TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5589  TargetEntry *orig_tle;
5590 
5591  /* ignore resjunk columns in setop result */
5592  if (new_tle->resjunk)
5593  continue;
5594 
5595  Assert(orig_tlist_item != NULL);
5596  orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5597  orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5598  if (orig_tle->resjunk) /* should not happen */
5599  elog(ERROR, "resjunk output columns are not implemented");
5600  Assert(new_tle->resno == orig_tle->resno);
5601  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5602  }
5603  if (orig_tlist_item != NULL)
5604  elog(ERROR, "resjunk output columns are not implemented");
5605  return new_tlist;
5606 }
5607 
5608 /*
5609  * optimize_window_clauses
5610  * Call each WindowFunc's prosupport function to see if we're able to
5611  * make any adjustments to any of the WindowClause's so that the executor
5612  * can execute the window functions in a more optimal way.
5613  *
5614  * Currently we only allow adjustments to the WindowClause's frameOptions. We
5615  * may allow more things to be done here in the future.
5616  */
5617 static void
5619 {
5620  List *windowClause = root->parse->windowClause;
5621  ListCell *lc;
5622 
5623  foreach(lc, windowClause)
5624  {
5626  ListCell *lc2;
5627  int optimizedFrameOptions = 0;
5628 
5629  Assert(wc->winref <= wflists->maxWinRef);
5630 
5631  /* skip any WindowClauses that have no WindowFuncs */
5632  if (wflists->windowFuncs[wc->winref] == NIL)
5633  continue;
5634 
5635  foreach(lc2, wflists->windowFuncs[wc->winref])
5636  {
5639  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
5640  Oid prosupport;
5641 
5642  prosupport = get_func_support(wfunc->winfnoid);
5643 
5644  /* Check if there's a support function for 'wfunc' */
5645  if (!OidIsValid(prosupport))
5646  break; /* can't optimize this WindowClause */
5647 
5648  req.type = T_SupportRequestOptimizeWindowClause;
5649  req.window_clause = wc;
5650  req.window_func = wfunc;
5651  req.frameOptions = wc->frameOptions;
5652 
5653  /* call the support function */
5655  DatumGetPointer(OidFunctionCall1(prosupport,
5656  PointerGetDatum(&req)));
5657 
5658  /*
5659  * Skip to next WindowClause if the support function does not
5660  * support this request type.
5661  */
5662  if (res == NULL)
5663  break;
5664 
5665  /*
5666  * Save these frameOptions for the first WindowFunc for this
5667  * WindowClause.
5668  */
5669  if (foreach_current_index(lc2) == 0)
5670  optimizedFrameOptions = res->frameOptions;
5671 
5672  /*
5673  * On subsequent WindowFuncs, if the frameOptions are not the same
5674  * then we're unable to optimize the frameOptions for this
5675  * WindowClause.
5676  */
5677  else if (optimizedFrameOptions != res->frameOptions)
5678  break; /* skip to the next WindowClause, if any */
5679  }
5680 
5681  /* adjust the frameOptions if all WindowFunc's agree that it's ok */
5682  if (lc2 == NULL && wc->frameOptions != optimizedFrameOptions)
5683  {
5684  ListCell *lc3;
5685 
5686  /* apply the new frame options */
5687  wc->frameOptions = optimizedFrameOptions;
5688 
5689  /*
5690  * We now check to see if changing the frameOptions has caused
5691  * this WindowClause to be a duplicate of some other WindowClause.
5692  * This can only happen if we have multiple WindowClauses, so
5693  * don't bother if there's only 1.
5694  */
5695  if (list_length(windowClause) == 1)
5696  continue;
5697 
5698  /*
5699  * Do the duplicate check and reuse the existing WindowClause if
5700  * we find a duplicate.
5701  */
5702  foreach(lc3, windowClause)
5703  {
5704  WindowClause *existing_wc = lfirst_node(WindowClause, lc3);
5705 
5706  /* skip over the WindowClause we're currently editing */
5707  if (existing_wc == wc)
5708  continue;
5709 
5710  /*
5711  * Perform the same duplicate check that is done in
5712  * transformWindowFuncCall.
5713  */
5714  if (equal(wc->partitionClause, existing_wc->partitionClause) &&
5715  equal(wc->orderClause, existing_wc->orderClause) &&
5716  wc->frameOptions == existing_wc->frameOptions &&
5717  equal(wc->startOffset, existing_wc->startOffset) &&
5718  equal(wc->endOffset, existing_wc->endOffset))
5719  {
5720  ListCell *lc4;