PostgreSQL Source Code  git master
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/genam.h"
22 #include "access/htup_details.h"
23 #include "access/parallel.h"
24 #include "access/sysattr.h"
25 #include "access/table.h"
26 #include "access/xact.h"
27 #include "catalog/pg_constraint.h"
28 #include "catalog/pg_inherits.h"
29 #include "catalog/pg_proc.h"
30 #include "catalog/pg_type.h"
31 #include "executor/executor.h"
32 #include "executor/nodeAgg.h"
33 #include "foreign/fdwapi.h"
34 #include "jit/jit.h"
35 #include "lib/bipartite_match.h"
36 #include "lib/knapsack.h"
37 #include "miscadmin.h"
38 #include "nodes/makefuncs.h"
39 #include "nodes/nodeFuncs.h"
40 #ifdef OPTIMIZER_DEBUG
41 #include "nodes/print.h"
42 #endif
43 #include "optimizer/appendinfo.h"
44 #include "optimizer/clauses.h"
45 #include "optimizer/cost.h"
46 #include "optimizer/inherit.h"
47 #include "optimizer/optimizer.h"
48 #include "optimizer/paramassign.h"
49 #include "optimizer/pathnode.h"
50 #include "optimizer/paths.h"
51 #include "optimizer/plancat.h"
52 #include "optimizer/planmain.h"
53 #include "optimizer/planner.h"
54 #include "optimizer/prep.h"
55 #include "optimizer/subselect.h"
56 #include "optimizer/tlist.h"
57 #include "parser/analyze.h"
58 #include "parser/parse_agg.h"
59 #include "parser/parsetree.h"
60 #include "partitioning/partdesc.h"
61 #include "rewrite/rewriteManip.h"
62 #include "storage/dsm_impl.h"
63 #include "utils/lsyscache.h"
64 #include "utils/rel.h"
65 #include "utils/selfuncs.h"
66 #include "utils/syscache.h"
67 
68 /* GUC parameters */
72 
73 /* Hook for plugins to get control in planner() */
75 
76 /* Hook for plugins to get control when grouping_planner() plans upper rels */
78 
79 
80 /* Expression kind codes for preprocess_expression */
81 #define EXPRKIND_QUAL 0
82 #define EXPRKIND_TARGET 1
83 #define EXPRKIND_RTFUNC 2
84 #define EXPRKIND_RTFUNC_LATERAL 3
85 #define EXPRKIND_VALUES 4
86 #define EXPRKIND_VALUES_LATERAL 5
87 #define EXPRKIND_LIMIT 6
88 #define EXPRKIND_APPINFO 7
89 #define EXPRKIND_PHV 8
90 #define EXPRKIND_TABLESAMPLE 9
91 #define EXPRKIND_ARBITER_ELEM 10
92 #define EXPRKIND_TABLEFUNC 11
93 #define EXPRKIND_TABLEFUNC_LATERAL 12
94 
95 /* Passthrough data for standard_qp_callback */
96 typedef struct
97 {
98  List *activeWindows; /* active windows, if any */
99  List *groupClause; /* overrides parse->groupClause */
101 
102 /*
103  * Data specific to grouping sets
104  */
105 
106 typedef struct
107 {
117 
118 /*
119  * Temporary structure for use during WindowClause reordering in order to be
120  * able to sort WindowClauses on partitioning/ordering prefix.
121  */
122 typedef struct
123 {
125  List *uniqueOrder; /* A List of unique ordering/partitioning
126  * clauses per Window */
128 
129 /* Local functions */
130 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
131 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
132 static void grouping_planner(PlannerInfo *root, double tuple_fraction);
134 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
135  int *tleref_to_colnum_map);
136 static void preprocess_rowmarks(PlannerInfo *root);
137 static double preprocess_limit(PlannerInfo *root,
138  double tuple_fraction,
139  int64 *offset_est, int64 *count_est);
141 static List *preprocess_groupclause(PlannerInfo *root, List *force);
142 static List *extract_rollup_sets(List *groupingSets);
143 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
144 static void standard_qp_callback(PlannerInfo *root, void *extra);
145 static double get_number_of_groups(PlannerInfo *root,
146  double path_rows,
147  grouping_sets_data *gd,
148  List *target_list);
150  RelOptInfo *input_rel,
151  PathTarget *target,
152  bool target_parallel_safe,
153  grouping_sets_data *gd);
154 static bool is_degenerate_grouping(PlannerInfo *root);
156  RelOptInfo *input_rel,
157  RelOptInfo *grouped_rel);
158 static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
159  PathTarget *target, bool target_parallel_safe,
160  Node *havingQual);
162  RelOptInfo *input_rel,
163  RelOptInfo *grouped_rel,
164  const AggClauseCosts *agg_costs,
165  grouping_sets_data *gd,
166  GroupPathExtraData *extra,
167  RelOptInfo **partially_grouped_rel_p);
168 static void consider_groupingsets_paths(PlannerInfo *root,
169  RelOptInfo *grouped_rel,
170  Path *path,
171  bool is_sorted,
172  bool can_hash,
173  grouping_sets_data *gd,
174  const AggClauseCosts *agg_costs,
175  double dNumGroups);
177  RelOptInfo *input_rel,
178  PathTarget *input_target,
179  PathTarget *output_target,
180  bool output_target_parallel_safe,
181  WindowFuncLists *wflists,
182  List *activeWindows);
183 static void create_one_window_path(PlannerInfo *root,
184  RelOptInfo *window_rel,
185  Path *path,
186  PathTarget *input_target,
187  PathTarget *output_target,
188  WindowFuncLists *wflists,
189  List *activeWindows);
191  RelOptInfo *input_rel);
193  RelOptInfo *input_rel,
194  RelOptInfo *final_distinct_rel);
196  RelOptInfo *input_rel,
197  RelOptInfo *distinct_rel);
199  RelOptInfo *input_rel,
200  PathTarget *target,
201  bool target_parallel_safe,
202  double limit_tuples);
204  PathTarget *final_target);
206  PathTarget *grouping_target,
207  Node *havingQual);
208 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
209 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
211  PathTarget *final_target,
212  List *activeWindows);
214  List *tlist);
216  PathTarget *final_target,
217  bool *have_postponed_srfs);
218 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
219  List *targets, List *targets_contain_srfs);
220 static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
221  RelOptInfo *grouped_rel,
222  RelOptInfo *partially_grouped_rel,
223  const AggClauseCosts *agg_costs,
224  grouping_sets_data *gd,
225  double dNumGroups,
226  GroupPathExtraData *extra);
228  RelOptInfo *grouped_rel,
229  RelOptInfo *input_rel,
230  grouping_sets_data *gd,
231  GroupPathExtraData *extra,
232  bool force_rel_creation);
233 static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
234 static bool can_partial_agg(PlannerInfo *root);
236  RelOptInfo *rel,
237  List *scanjoin_targets,
238  List *scanjoin_targets_contain_srfs,
239  bool scanjoin_target_parallel_safe,
240  bool tlist_same_exprs);
242  RelOptInfo *input_rel,
243  RelOptInfo *grouped_rel,
244  RelOptInfo *partially_grouped_rel,
245  const AggClauseCosts *agg_costs,
246  grouping_sets_data *gd,
248  GroupPathExtraData *extra);
249 static bool group_by_has_partkey(RelOptInfo *input_rel,
250  List *targetList,
251  List *groupClause);
252 static int common_prefix_cmp(const void *a, const void *b);
253 
254 
255 /*****************************************************************************
256  *
257  * Query optimizer entry point
258  *
259  * To support loadable plugins that monitor or modify planner behavior,
260  * we provide a hook variable that lets a plugin get control before and
261  * after the standard planning process. The plugin would normally call
262  * standard_planner().
263  *
264  * Note to plugin authors: standard_planner() scribbles on its Query input,
265  * so you'd better copy that data structure if you want to plan more than once.
266  *
267  *****************************************************************************/
268 PlannedStmt *
269 planner(Query *parse, const char *query_string, int cursorOptions,
270  ParamListInfo boundParams)
271 {
272  PlannedStmt *result;
273 
274  if (planner_hook)
275  result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
276  else
277  result = standard_planner(parse, query_string, cursorOptions, boundParams);
278  return result;
279 }
280 
281 PlannedStmt *
282 standard_planner(Query *parse, const char *query_string, int cursorOptions,
283  ParamListInfo boundParams)
284 {
285  PlannedStmt *result;
286  PlannerGlobal *glob;
287  double tuple_fraction;
288  PlannerInfo *root;
289  RelOptInfo *final_rel;
290  Path *best_path;
291  Plan *top_plan;
292  ListCell *lp,
293  *lr;
294 
295  /*
296  * Set up global state for this planner invocation. This data is needed
297  * across all levels of sub-Query that might exist in the given command,
298  * so we keep it in a separate struct that's linked to by each per-Query
299  * PlannerInfo.
300  */
301  glob = makeNode(PlannerGlobal);
302 
303  glob->boundParams = boundParams;
304  glob->subplans = NIL;
305  glob->subroots = NIL;
306  glob->rewindPlanIDs = NULL;
307  glob->finalrtable = NIL;
308  glob->finalrowmarks = NIL;
309  glob->resultRelations = NIL;
310  glob->appendRelations = NIL;
311  glob->relationOids = NIL;
312  glob->invalItems = NIL;
313  glob->paramExecTypes = NIL;
314  glob->lastPHId = 0;
315  glob->lastRowMarkId = 0;
316  glob->lastPlanNodeId = 0;
317  glob->transientPlan = false;
318  glob->dependsOnRole = false;
319 
320  /*
321  * Assess whether it's feasible to use parallel mode for this query. We
322  * can't do this in a standalone backend, or if the command will try to
323  * modify any data, or if this is a cursor operation, or if GUCs are set
324  * to values that don't permit parallelism, or if parallel-unsafe
325  * functions are present in the query tree.
326  *
327  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
328  * MATERIALIZED VIEW to use parallel plans, but as of now, only the leader
329  * backend writes into a completely new table. In the future, we can
330  * extend it to allow workers to write into the table. However, to allow
331  * parallel updates and deletes, we have to solve other problems,
332  * especially around combo CIDs.)
333  *
334  * For now, we don't try to use parallel mode if we're running inside a
335  * parallel worker. We might eventually be able to relax this
336  * restriction, but for now it seems best not to have parallel workers
337  * trying to create their own parallel workers.
338  */
339  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
341  parse->commandType == CMD_SELECT &&
342  !parse->hasModifyingCTE &&
344  !IsParallelWorker())
345  {
346  /* all the cheap tests pass, so scan the query tree */
348  glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
349  }
350  else
351  {
352  /* skip the query tree scan, just assume it's unsafe */
353  glob->maxParallelHazard = PROPARALLEL_UNSAFE;
354  glob->parallelModeOK = false;
355  }
356 
357  /*
358  * glob->parallelModeNeeded is normally set to false here and changed to
359  * true during plan creation if a Gather or Gather Merge plan is actually
360  * created (cf. create_gather_plan, create_gather_merge_plan).
361  *
362  * However, if force_parallel_mode = on or force_parallel_mode = regress,
363  * then we impose parallel mode whenever it's safe to do so, even if the
364  * final plan doesn't use parallelism. It's not safe to do so if the
365  * query contains anything parallel-unsafe; parallelModeOK will be false
366  * in that case. Note that parallelModeOK can't change after this point.
367  * Otherwise, everything in the query is either parallel-safe or
368  * parallel-restricted, and in either case it should be OK to impose
369  * parallel-mode restrictions. If that ends up breaking something, then
370  * either some function the user included in the query is incorrectly
371  * labeled as parallel-safe or parallel-restricted when in reality it's
372  * parallel-unsafe, or else the query planner itself has a bug.
373  */
374  glob->parallelModeNeeded = glob->parallelModeOK &&
376 
377  /* Determine what fraction of the plan is likely to be scanned */
378  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
379  {
380  /*
381  * We have no real idea how many tuples the user will ultimately FETCH
382  * from a cursor, but it is often the case that he doesn't want 'em
383  * all, or would prefer a fast-start plan anyway so that he can
384  * process some of the tuples sooner. Use a GUC parameter to decide
385  * what fraction to optimize for.
386  */
387  tuple_fraction = cursor_tuple_fraction;
388 
389  /*
390  * We document cursor_tuple_fraction as simply being a fraction, which
391  * means the edge cases 0 and 1 have to be treated specially here. We
392  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
393  */
394  if (tuple_fraction >= 1.0)
395  tuple_fraction = 0.0;
396  else if (tuple_fraction <= 0.0)
397  tuple_fraction = 1e-10;
398  }
399  else
400  {
401  /* Default assumption is we need all the tuples */
402  tuple_fraction = 0.0;
403  }
404 
405  /* primary planning entry point (may recurse for subqueries) */
406  root = subquery_planner(glob, parse, NULL,
407  false, tuple_fraction);
408 
409  /* Select best Path and turn it into a Plan */
410  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
411  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
412 
413  top_plan = create_plan(root, best_path);
414 
415  /*
416  * If creating a plan for a scrollable cursor, make sure it can run
417  * backwards on demand. Add a Material node at the top at need.
418  */
419  if (cursorOptions & CURSOR_OPT_SCROLL)
420  {
421  if (!ExecSupportsBackwardScan(top_plan))
422  top_plan = materialize_finished_plan(top_plan);
423  }
424 
425  /*
426  * Optionally add a Gather node for testing purposes, provided this is
427  * actually a safe thing to do.
428  */
430  {
431  Gather *gather = makeNode(Gather);
432 
433  /*
434  * If there are any initPlans attached to the formerly-top plan node,
435  * move them up to the Gather node; same as we do for Material node in
436  * materialize_finished_plan.
437  */
438  gather->plan.initPlan = top_plan->initPlan;
439  top_plan->initPlan = NIL;
440 
441  gather->plan.targetlist = top_plan->targetlist;
442  gather->plan.qual = NIL;
443  gather->plan.lefttree = top_plan;
444  gather->plan.righttree = NULL;
445  gather->num_workers = 1;
446  gather->single_copy = true;
448 
449  /*
450  * Since this Gather has no parallel-aware descendants to signal to,
451  * we don't need a rescan Param.
452  */
453  gather->rescan_param = -1;
454 
455  /*
456  * Ideally we'd use cost_gather here, but setting up dummy path data
457  * to satisfy it doesn't seem much cleaner than knowing what it does.
458  */
459  gather->plan.startup_cost = top_plan->startup_cost +
461  gather->plan.total_cost = top_plan->total_cost +
463  gather->plan.plan_rows = top_plan->plan_rows;
464  gather->plan.plan_width = top_plan->plan_width;
465  gather->plan.parallel_aware = false;
466  gather->plan.parallel_safe = false;
467 
468  /* use parallel mode for parallel plans. */
469  root->glob->parallelModeNeeded = true;
470 
471  top_plan = &gather->plan;
472  }
473 
474  /*
475  * If any Params were generated, run through the plan tree and compute
476  * each plan node's extParam/allParam sets. Ideally we'd merge this into
477  * set_plan_references' tree traversal, but for now it has to be separate
478  * because we need to visit subplans before not after main plan.
479  */
480  if (glob->paramExecTypes != NIL)
481  {
482  Assert(list_length(glob->subplans) == list_length(glob->subroots));
483  forboth(lp, glob->subplans, lr, glob->subroots)
484  {
485  Plan *subplan = (Plan *) lfirst(lp);
486  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
487 
488  SS_finalize_plan(subroot, subplan);
489  }
490  SS_finalize_plan(root, top_plan);
491  }
492 
493  /* final cleanup of the plan */
494  Assert(glob->finalrtable == NIL);
495  Assert(glob->finalrowmarks == NIL);
496  Assert(glob->resultRelations == NIL);
497  Assert(glob->appendRelations == NIL);
498  top_plan = set_plan_references(root, top_plan);
499  /* ... and the subplans (both regular subplans and initplans) */
500  Assert(list_length(glob->subplans) == list_length(glob->subroots));
501  forboth(lp, glob->subplans, lr, glob->subroots)
502  {
503  Plan *subplan = (Plan *) lfirst(lp);
504  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
505 
506  lfirst(lp) = set_plan_references(subroot, subplan);
507  }
508 
509  /* build the PlannedStmt result */
510  result = makeNode(PlannedStmt);
511 
512  result->commandType = parse->commandType;
513  result->queryId = parse->queryId;
514  result->hasReturning = (parse->returningList != NIL);
515  result->hasModifyingCTE = parse->hasModifyingCTE;
516  result->canSetTag = parse->canSetTag;
517  result->transientPlan = glob->transientPlan;
518  result->dependsOnRole = glob->dependsOnRole;
519  result->parallelModeNeeded = glob->parallelModeNeeded;
520  result->planTree = top_plan;
521  result->rtable = glob->finalrtable;
522  result->resultRelations = glob->resultRelations;
523  result->appendRelations = glob->appendRelations;
524  result->subplans = glob->subplans;
525  result->rewindPlanIDs = glob->rewindPlanIDs;
526  result->rowMarks = glob->finalrowmarks;
527  result->relationOids = glob->relationOids;
528  result->invalItems = glob->invalItems;
529  result->paramExecTypes = glob->paramExecTypes;
530  /* utilityStmt should be null, but we might as well copy it */
531  result->utilityStmt = parse->utilityStmt;
532  result->stmt_location = parse->stmt_location;
533  result->stmt_len = parse->stmt_len;
534 
535  result->jitFlags = PGJIT_NONE;
536  if (jit_enabled && jit_above_cost >= 0 &&
537  top_plan->total_cost > jit_above_cost)
538  {
539  result->jitFlags |= PGJIT_PERFORM;
540 
541  /*
542  * Decide how much effort should be put into generating better code.
543  */
544  if (jit_optimize_above_cost >= 0 &&
546  result->jitFlags |= PGJIT_OPT3;
547  if (jit_inline_above_cost >= 0 &&
548  top_plan->total_cost > jit_inline_above_cost)
549  result->jitFlags |= PGJIT_INLINE;
550 
551  /*
552  * Decide which operations should be JITed.
553  */
554  if (jit_expressions)
555  result->jitFlags |= PGJIT_EXPR;
557  result->jitFlags |= PGJIT_DEFORM;
558  }
559 
560  if (glob->partition_directory != NULL)
562 
563  return result;
564 }
565 
566 
567 /*--------------------
568  * subquery_planner
569  * Invokes the planner on a subquery. We recurse to here for each
570  * sub-SELECT found in the query tree.
571  *
572  * glob is the global state for the current planner run.
573  * parse is the querytree produced by the parser & rewriter.
574  * parent_root is the immediate parent Query's info (NULL at the top level).
575  * hasRecursion is true if this is a recursive WITH query.
576  * tuple_fraction is the fraction of tuples we expect will be retrieved.
577  * tuple_fraction is interpreted as explained for grouping_planner, below.
578  *
579  * Basically, this routine does the stuff that should only be done once
580  * per Query object. It then calls grouping_planner. At one time,
581  * grouping_planner could be invoked recursively on the same Query object;
582  * that's not currently true, but we keep the separation between the two
583  * routines anyway, in case we need it again someday.
584  *
585  * subquery_planner will be called recursively to handle sub-Query nodes
586  * found within the query's expressions and rangetable.
587  *
588  * Returns the PlannerInfo struct ("root") that contains all data generated
589  * while planning the subquery. In particular, the Path(s) attached to
590  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
591  * cheapest way(s) to implement the query. The top level will select the
592  * best Path and pass it through createplan.c to produce a finished Plan.
593  *--------------------
594  */
595 PlannerInfo *
597  PlannerInfo *parent_root,
598  bool hasRecursion, double tuple_fraction)
599 {
600  PlannerInfo *root;
601  List *newWithCheckOptions;
602  List *newHaving;
603  bool hasOuterJoins;
604  bool hasResultRTEs;
605  RelOptInfo *final_rel;
606  ListCell *l;
607 
608  /* Create a PlannerInfo data structure for this subquery */
609  root = makeNode(PlannerInfo);
610  root->parse = parse;
611  root->glob = glob;
612  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
613  root->parent_root = parent_root;
614  root->plan_params = NIL;
615  root->outer_params = NULL;
617  root->init_plans = NIL;
618  root->cte_plan_ids = NIL;
619  root->multiexpr_params = NIL;
620  root->eq_classes = NIL;
621  root->ec_merging_done = false;
622  root->all_result_relids =
623  parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
624  root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
625  root->append_rel_list = NIL;
626  root->row_identity_vars = NIL;
627  root->rowMarks = NIL;
628  memset(root->upper_rels, 0, sizeof(root->upper_rels));
629  memset(root->upper_targets, 0, sizeof(root->upper_targets));
630  root->processed_tlist = NIL;
631  root->update_colnos = NIL;
632  root->grouping_map = NULL;
633  root->minmax_aggs = NIL;
634  root->qual_security_level = 0;
635  root->hasPseudoConstantQuals = false;
636  root->hasAlternativeSubPlans = false;
637  root->hasRecursion = hasRecursion;
638  if (hasRecursion)
640  else
641  root->wt_param_id = -1;
642  root->non_recursive_path = NULL;
643  root->partColsUpdated = false;
644 
645  /*
646  * If there is a WITH list, process each WITH query and either convert it
647  * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
648  */
649  if (parse->cteList)
650  SS_process_ctes(root);
651 
652  /*
653  * If it's a MERGE command, transform the joinlist as appropriate.
654  */
656 
657  /*
658  * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
659  * that we don't need so many special cases to deal with that situation.
660  */
662 
663  /*
664  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
665  * to transform them into joins. Note that this step does not descend
666  * into subqueries; if we pull up any subqueries below, their SubLinks are
667  * processed just before pulling them up.
668  */
669  if (parse->hasSubLinks)
670  pull_up_sublinks(root);
671 
672  /*
673  * Scan the rangetable for function RTEs, do const-simplification on them,
674  * and then inline them if possible (producing subqueries that might get
675  * pulled up next). Recursion issues here are handled in the same way as
676  * for SubLinks.
677  */
679 
680  /*
681  * Check to see if any subqueries in the jointree can be merged into this
682  * query.
683  */
684  pull_up_subqueries(root);
685 
686  /*
687  * If this is a simple UNION ALL query, flatten it into an appendrel. We
688  * do this now because it requires applying pull_up_subqueries to the leaf
689  * queries of the UNION ALL, which weren't touched above because they
690  * weren't referenced by the jointree (they will be after we do this).
691  */
692  if (parse->setOperations)
694 
695  /*
696  * Survey the rangetable to see what kinds of entries are present. We can
697  * skip some later processing if relevant SQL features are not used; for
698  * example if there are no JOIN RTEs we can avoid the expense of doing
699  * flatten_join_alias_vars(). This must be done after we have finished
700  * adding rangetable entries, of course. (Note: actually, processing of
701  * inherited or partitioned rels can cause RTEs for their child tables to
702  * get added later; but those must all be RTE_RELATION entries, so they
703  * don't invalidate the conclusions drawn here.)
704  */
705  root->hasJoinRTEs = false;
706  root->hasLateralRTEs = false;
707  hasOuterJoins = false;
708  hasResultRTEs = false;
709  foreach(l, parse->rtable)
710  {
712 
713  switch (rte->rtekind)
714  {
715  case RTE_RELATION:
716  if (rte->inh)
717  {
718  /*
719  * Check to see if the relation actually has any children;
720  * if not, clear the inh flag so we can treat it as a
721  * plain base relation.
722  *
723  * Note: this could give a false-positive result, if the
724  * rel once had children but no longer does. We used to
725  * be able to clear rte->inh later on when we discovered
726  * that, but no more; we have to handle such cases as
727  * full-fledged inheritance.
728  */
729  rte->inh = has_subclass(rte->relid);
730  }
731  break;
732  case RTE_JOIN:
733  root->hasJoinRTEs = true;
734  if (IS_OUTER_JOIN(rte->jointype))
735  hasOuterJoins = true;
736  break;
737  case RTE_RESULT:
738  hasResultRTEs = true;
739  break;
740  default:
741  /* No work here for other RTE types */
742  break;
743  }
744 
745  if (rte->lateral)
746  root->hasLateralRTEs = true;
747 
748  /*
749  * We can also determine the maximum security level required for any
750  * securityQuals now. Addition of inheritance-child RTEs won't affect
751  * this, because child tables don't have their own securityQuals; see
752  * expand_single_inheritance_child().
753  */
754  if (rte->securityQuals)
756  list_length(rte->securityQuals));
757  }
758 
759  /*
760  * If we have now verified that the query target relation is
761  * non-inheriting, mark it as a leaf target.
762  */
763  if (parse->resultRelation)
764  {
765  RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
766 
767  if (!rte->inh)
768  root->leaf_result_relids =
769  bms_make_singleton(parse->resultRelation);
770  }
771 
772  /*
773  * Preprocess RowMark information. We need to do this after subquery
774  * pullup, so that all base relations are present.
775  */
776  preprocess_rowmarks(root);
777 
778  /*
779  * Set hasHavingQual to remember if HAVING clause is present. Needed
780  * because preprocess_expression will reduce a constant-true condition to
781  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
782  */
783  root->hasHavingQual = (parse->havingQual != NULL);
784 
785  /*
786  * Do expression preprocessing on targetlist and quals, as well as other
787  * random expressions in the querytree. Note that we do not need to
788  * handle sort/group expressions explicitly, because they are actually
789  * part of the targetlist.
790  */
791  parse->targetList = (List *)
792  preprocess_expression(root, (Node *) parse->targetList,
794 
795  /* Constant-folding might have removed all set-returning functions */
796  if (parse->hasTargetSRFs)
797  parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
798 
799  newWithCheckOptions = NIL;
800  foreach(l, parse->withCheckOptions)
801  {
803 
804  wco->qual = preprocess_expression(root, wco->qual,
805  EXPRKIND_QUAL);
806  if (wco->qual != NULL)
807  newWithCheckOptions = lappend(newWithCheckOptions, wco);
808  }
809  parse->withCheckOptions = newWithCheckOptions;
810 
811  parse->returningList = (List *)
812  preprocess_expression(root, (Node *) parse->returningList,
814 
815  preprocess_qual_conditions(root, (Node *) parse->jointree);
816 
817  parse->havingQual = preprocess_expression(root, parse->havingQual,
818  EXPRKIND_QUAL);
819 
820  foreach(l, parse->windowClause)
821  {
823 
824  /* partitionClause/orderClause are sort/group expressions */
827  wc->endOffset = preprocess_expression(root, wc->endOffset,
829  }
830 
831  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
833  parse->limitCount = preprocess_expression(root, parse->limitCount,
835 
836  if (parse->onConflict)
837  {
838  parse->onConflict->arbiterElems = (List *)
840  (Node *) parse->onConflict->arbiterElems,
842  parse->onConflict->arbiterWhere =
844  parse->onConflict->arbiterWhere,
845  EXPRKIND_QUAL);
846  parse->onConflict->onConflictSet = (List *)
848  (Node *) parse->onConflict->onConflictSet,
850  parse->onConflict->onConflictWhere =
852  parse->onConflict->onConflictWhere,
853  EXPRKIND_QUAL);
854  /* exclRelTlist contains only Vars, so no preprocessing needed */
855  }
856 
857  foreach(l, parse->mergeActionList)
858  {
860 
861  action->targetList = (List *)
863  (Node *) action->targetList,
865  action->qual =
867  (Node *) action->qual,
868  EXPRKIND_QUAL);
869  }
870 
871  root->append_rel_list = (List *)
874 
875  /* Also need to preprocess expressions within RTEs */
876  foreach(l, parse->rtable)
877  {
879  int kind;
880  ListCell *lcsq;
881 
882  if (rte->rtekind == RTE_RELATION)
883  {
884  if (rte->tablesample)
885  rte->tablesample = (TableSampleClause *)
887  (Node *) rte->tablesample,
889  }
890  else if (rte->rtekind == RTE_SUBQUERY)
891  {
892  /*
893  * We don't want to do all preprocessing yet on the subquery's
894  * expressions, since that will happen when we plan it. But if it
895  * contains any join aliases of our level, those have to get
896  * expanded now, because planning of the subquery won't do it.
897  * That's only possible if the subquery is LATERAL.
898  */
899  if (rte->lateral && root->hasJoinRTEs)
900  rte->subquery = (Query *)
902  (Node *) rte->subquery);
903  }
904  else if (rte->rtekind == RTE_FUNCTION)
905  {
906  /* Preprocess the function expression(s) fully */
908  rte->functions = (List *)
909  preprocess_expression(root, (Node *) rte->functions, kind);
910  }
911  else if (rte->rtekind == RTE_TABLEFUNC)
912  {
913  /* Preprocess the function expression(s) fully */
915  rte->tablefunc = (TableFunc *)
916  preprocess_expression(root, (Node *) rte->tablefunc, kind);
917  }
918  else if (rte->rtekind == RTE_VALUES)
919  {
920  /* Preprocess the values lists fully */
922  rte->values_lists = (List *)
923  preprocess_expression(root, (Node *) rte->values_lists, kind);
924  }
925 
926  /*
927  * Process each element of the securityQuals list as if it were a
928  * separate qual expression (as indeed it is). We need to do it this
929  * way to get proper canonicalization of AND/OR structure. Note that
930  * this converts each element into an implicit-AND sublist.
931  */
932  foreach(lcsq, rte->securityQuals)
933  {
934  lfirst(lcsq) = preprocess_expression(root,
935  (Node *) lfirst(lcsq),
936  EXPRKIND_QUAL);
937  }
938  }
939 
940  /*
941  * Now that we are done preprocessing expressions, and in particular done
942  * flattening join alias variables, get rid of the joinaliasvars lists.
943  * They no longer match what expressions in the rest of the tree look
944  * like, because we have not preprocessed expressions in those lists (and
945  * do not want to; for example, expanding a SubLink there would result in
946  * a useless unreferenced subplan). Leaving them in place simply creates
947  * a hazard for later scans of the tree. We could try to prevent that by
948  * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
949  * but that doesn't sound very reliable.
950  */
951  if (root->hasJoinRTEs)
952  {
953  foreach(l, parse->rtable)
954  {
956 
957  rte->joinaliasvars = NIL;
958  }
959  }
960 
961  /*
962  * In some cases we may want to transfer a HAVING clause into WHERE. We
963  * cannot do so if the HAVING clause contains aggregates (obviously) or
964  * volatile functions (since a HAVING clause is supposed to be executed
965  * only once per group). We also can't do this if there are any nonempty
966  * grouping sets; moving such a clause into WHERE would potentially change
967  * the results, if any referenced column isn't present in all the grouping
968  * sets. (If there are only empty grouping sets, then the HAVING clause
969  * must be degenerate as discussed below.)
970  *
971  * Also, it may be that the clause is so expensive to execute that we're
972  * better off doing it only once per group, despite the loss of
973  * selectivity. This is hard to estimate short of doing the entire
974  * planning process twice, so we use a heuristic: clauses containing
975  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
976  * clause into WHERE, in hopes of eliminating tuples before aggregation
977  * instead of after.
978  *
979  * If the query has explicit grouping then we can simply move such a
980  * clause into WHERE; any group that fails the clause will not be in the
981  * output because none of its tuples will reach the grouping or
982  * aggregation stage. Otherwise we must have a degenerate (variable-free)
983  * HAVING clause, which we put in WHERE so that query_planner() can use it
984  * in a gating Result node, but also keep in HAVING to ensure that we
985  * don't emit a bogus aggregated row. (This could be done better, but it
986  * seems not worth optimizing.)
987  *
988  * Note that both havingQual and parse->jointree->quals are in
989  * implicitly-ANDed-list form at this point, even though they are declared
990  * as Node *.
991  */
992  newHaving = NIL;
993  foreach(l, (List *) parse->havingQual)
994  {
995  Node *havingclause = (Node *) lfirst(l);
996 
997  if ((parse->groupClause && parse->groupingSets) ||
998  contain_agg_clause(havingclause) ||
999  contain_volatile_functions(havingclause) ||
1000  contain_subplans(havingclause))
1001  {
1002  /* keep it in HAVING */
1003  newHaving = lappend(newHaving, havingclause);
1004  }
1005  else if (parse->groupClause && !parse->groupingSets)
1006  {
1007  /* move it to WHERE */
1008  parse->jointree->quals = (Node *)
1009  lappend((List *) parse->jointree->quals, havingclause);
1010  }
1011  else
1012  {
1013  /* put a copy in WHERE, keep it in HAVING */
1014  parse->jointree->quals = (Node *)
1015  lappend((List *) parse->jointree->quals,
1016  copyObject(havingclause));
1017  newHaving = lappend(newHaving, havingclause);
1018  }
1019  }
1020  parse->havingQual = (Node *) newHaving;
1021 
1022  /* Remove any redundant GROUP BY columns */
1024 
1025  /*
1026  * If we have any outer joins, try to reduce them to plain inner joins.
1027  * This step is most easily done after we've done expression
1028  * preprocessing.
1029  */
1030  if (hasOuterJoins)
1031  reduce_outer_joins(root);
1032 
1033  /*
1034  * If we have any RTE_RESULT relations, see if they can be deleted from
1035  * the jointree. This step is most effectively done after we've done
1036  * expression preprocessing and outer join reduction.
1037  */
1038  if (hasResultRTEs)
1040 
1041  /*
1042  * Do the main planning.
1043  */
1044  grouping_planner(root, tuple_fraction);
1045 
1046  /*
1047  * Capture the set of outer-level param IDs we have access to, for use in
1048  * extParam/allParam calculations later.
1049  */
1051 
1052  /*
1053  * If any initPlans were created in this query level, adjust the surviving
1054  * Paths' costs and parallel-safety flags to account for them. The
1055  * initPlans won't actually get attached to the plan tree till
1056  * create_plan() runs, but we must include their effects now.
1057  */
1058  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1059  SS_charge_for_initplans(root, final_rel);
1060 
1061  /*
1062  * Make sure we've identified the cheapest Path for the final rel. (By
1063  * doing this here not in grouping_planner, we include initPlan costs in
1064  * the decision, though it's unlikely that will change anything.)
1065  */
1066  set_cheapest(final_rel);
1067 
1068  return root;
1069 }
1070 
1071 /*
1072  * preprocess_expression
1073  * Do subquery_planner's preprocessing work for an expression,
1074  * which can be a targetlist, a WHERE clause (including JOIN/ON
1075  * conditions), a HAVING clause, or a few other things.
1076  */
1077 static Node *
1078 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1079 {
1080  /*
1081  * Fall out quickly if expression is empty. This occurs often enough to
1082  * be worth checking. Note that null->null is the correct conversion for
1083  * implicit-AND result format, too.
1084  */
1085  if (expr == NULL)
1086  return NULL;
1087 
1088  /*
1089  * If the query has any join RTEs, replace join alias variables with
1090  * base-relation variables. We must do this first, since any expressions
1091  * we may extract from the joinaliasvars lists have not been preprocessed.
1092  * For example, if we did this after sublink processing, sublinks expanded
1093  * out from join aliases would not get processed. But we can skip this in
1094  * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1095  * they can't contain any Vars of the current query level.
1096  */
1097  if (root->hasJoinRTEs &&
1098  !(kind == EXPRKIND_RTFUNC ||
1099  kind == EXPRKIND_VALUES ||
1100  kind == EXPRKIND_TABLESAMPLE ||
1101  kind == EXPRKIND_TABLEFUNC))
1102  expr = flatten_join_alias_vars(root->parse, expr);
1103 
1104  /*
1105  * Simplify constant expressions. For function RTEs, this was already
1106  * done by preprocess_function_rtes. (But note we must do it again for
1107  * EXPRKIND_RTFUNC_LATERAL, because those might by now contain
1108  * un-simplified subexpressions inserted by flattening of subqueries or
1109  * join alias variables.)
1110  *
1111  * Note: an essential effect of this is to convert named-argument function
1112  * calls to positional notation and insert the current actual values of
1113  * any default arguments for functions. To ensure that happens, we *must*
1114  * process all expressions here. Previous PG versions sometimes skipped
1115  * const-simplification if it didn't seem worth the trouble, but we can't
1116  * do that anymore.
1117  *
1118  * Note: this also flattens nested AND and OR expressions into N-argument
1119  * form. All processing of a qual expression after this point must be
1120  * careful to maintain AND/OR flatness --- that is, do not generate a tree
1121  * with AND directly under AND, nor OR directly under OR.
1122  */
1123  if (kind != EXPRKIND_RTFUNC)
1124  expr = eval_const_expressions(root, expr);
1125 
1126  /*
1127  * If it's a qual or havingQual, canonicalize it.
1128  */
1129  if (kind == EXPRKIND_QUAL)
1130  {
1131  expr = (Node *) canonicalize_qual((Expr *) expr, false);
1132 
1133 #ifdef OPTIMIZER_DEBUG
1134  printf("After canonicalize_qual()\n");
1135  pprint(expr);
1136 #endif
1137  }
1138 
1139  /*
1140  * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1141  * hashfuncid of any that might execute more quickly by using hash lookups
1142  * instead of a linear search.
1143  */
1144  if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1145  {
1147  }
1148 
1149  /* Expand SubLinks to SubPlans */
1150  if (root->parse->hasSubLinks)
1151  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1152 
1153  /*
1154  * XXX do not insert anything here unless you have grokked the comments in
1155  * SS_replace_correlation_vars ...
1156  */
1157 
1158  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1159  if (root->query_level > 1)
1160  expr = SS_replace_correlation_vars(root, expr);
1161 
1162  /*
1163  * If it's a qual or havingQual, convert it to implicit-AND format. (We
1164  * don't want to do this before eval_const_expressions, since the latter
1165  * would be unable to simplify a top-level AND correctly. Also,
1166  * SS_process_sublinks expects explicit-AND format.)
1167  */
1168  if (kind == EXPRKIND_QUAL)
1169  expr = (Node *) make_ands_implicit((Expr *) expr);
1170 
1171  return expr;
1172 }
1173 
1174 /*
1175  * preprocess_qual_conditions
1176  * Recursively scan the query's jointree and do subquery_planner's
1177  * preprocessing work on each qual condition found therein.
1178  */
1179 static void
1181 {
1182  if (jtnode == NULL)
1183  return;
1184  if (IsA(jtnode, RangeTblRef))
1185  {
1186  /* nothing to do here */
1187  }
1188  else if (IsA(jtnode, FromExpr))
1189  {
1190  FromExpr *f = (FromExpr *) jtnode;
1191  ListCell *l;
1192 
1193  foreach(l, f->fromlist)
1195 
1197  }
1198  else if (IsA(jtnode, JoinExpr))
1199  {
1200  JoinExpr *j = (JoinExpr *) jtnode;
1201 
1202  preprocess_qual_conditions(root, j->larg);
1203  preprocess_qual_conditions(root, j->rarg);
1204 
1205  j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
1206  }
1207  else
1208  elog(ERROR, "unrecognized node type: %d",
1209  (int) nodeTag(jtnode));
1210 }
1211 
1212 /*
1213  * preprocess_phv_expression
1214  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1215  *
1216  * If a LATERAL subquery references an output of another subquery, and that
1217  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1218  * join, then we'll push the PlaceHolderVar expression down into the subquery
1219  * and later pull it back up during find_lateral_references, which runs after
1220  * subquery_planner has preprocessed all the expressions that were in the
1221  * current query level to start with. So we need to preprocess it then.
1222  */
1223 Expr *
1225 {
1226  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1227 }
1228 
1229 /*--------------------
1230  * grouping_planner
1231  * Perform planning steps related to grouping, aggregation, etc.
1232  *
1233  * This function adds all required top-level processing to the scan/join
1234  * Path(s) produced by query_planner.
1235  *
1236  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1237  * tuple_fraction is interpreted as follows:
1238  * 0: expect all tuples to be retrieved (normal case)
1239  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1240  * from the plan to be retrieved
1241  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1242  * expected to be retrieved (ie, a LIMIT specification)
1243  *
1244  * Returns nothing; the useful output is in the Paths we attach to the
1245  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1246  * root->processed_tlist contains the final processed targetlist.
1247  *
1248  * Note that we have not done set_cheapest() on the final rel; it's convenient
1249  * to leave this to the caller.
1250  *--------------------
1251  */
1252 static void
1253 grouping_planner(PlannerInfo *root, double tuple_fraction)
1254 {
1255  Query *parse = root->parse;
1256  int64 offset_est = 0;
1257  int64 count_est = 0;
1258  double limit_tuples = -1.0;
1259  bool have_postponed_srfs = false;
1260  PathTarget *final_target;
1261  List *final_targets;
1262  List *final_targets_contain_srfs;
1263  bool final_target_parallel_safe;
1264  RelOptInfo *current_rel;
1265  RelOptInfo *final_rel;
1266  FinalPathExtraData extra;
1267  ListCell *lc;
1268 
1269  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1270  if (parse->limitCount || parse->limitOffset)
1271  {
1272  tuple_fraction = preprocess_limit(root, tuple_fraction,
1273  &offset_est, &count_est);
1274 
1275  /*
1276  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1277  * estimate the effects of using a bounded sort.
1278  */
1279  if (count_est > 0 && offset_est >= 0)
1280  limit_tuples = (double) count_est + (double) offset_est;
1281  }
1282 
1283  /* Make tuple_fraction accessible to lower-level routines */
1284  root->tuple_fraction = tuple_fraction;
1285 
1286  if (parse->setOperations)
1287  {
1288  /*
1289  * If there's a top-level ORDER BY, assume we have to fetch all the
1290  * tuples. This might be too simplistic given all the hackery below
1291  * to possibly avoid the sort; but the odds of accurate estimates here
1292  * are pretty low anyway. XXX try to get rid of this in favor of
1293  * letting plan_set_operations generate both fast-start and
1294  * cheapest-total paths.
1295  */
1296  if (parse->sortClause)
1297  root->tuple_fraction = 0.0;
1298 
1299  /*
1300  * Construct Paths for set operations. The results will not need any
1301  * work except perhaps a top-level sort and/or LIMIT. Note that any
1302  * special work for recursive unions is the responsibility of
1303  * plan_set_operations.
1304  */
1305  current_rel = plan_set_operations(root);
1306 
1307  /*
1308  * We should not need to call preprocess_targetlist, since we must be
1309  * in a SELECT query node. Instead, use the processed_tlist returned
1310  * by plan_set_operations (since this tells whether it returned any
1311  * resjunk columns!), and transfer any sort key information from the
1312  * original tlist.
1313  */
1314  Assert(parse->commandType == CMD_SELECT);
1315 
1316  /* for safety, copy processed_tlist instead of modifying in-place */
1317  root->processed_tlist =
1319  parse->targetList);
1320 
1321  /* Also extract the PathTarget form of the setop result tlist */
1322  final_target = current_rel->cheapest_total_path->pathtarget;
1323 
1324  /* And check whether it's parallel safe */
1325  final_target_parallel_safe =
1326  is_parallel_safe(root, (Node *) final_target->exprs);
1327 
1328  /* The setop result tlist couldn't contain any SRFs */
1329  Assert(!parse->hasTargetSRFs);
1330  final_targets = final_targets_contain_srfs = NIL;
1331 
1332  /*
1333  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1334  * checked already, but let's make sure).
1335  */
1336  if (parse->rowMarks)
1337  ereport(ERROR,
1338  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1339  /*------
1340  translator: %s is a SQL row locking clause such as FOR UPDATE */
1341  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1343  parse->rowMarks)->strength))));
1344 
1345  /*
1346  * Calculate pathkeys that represent result ordering requirements
1347  */
1348  Assert(parse->distinctClause == NIL);
1350  parse->sortClause,
1351  root->processed_tlist);
1352  }
1353  else
1354  {
1355  /* No set operations, do regular planning */
1356  PathTarget *sort_input_target;
1357  List *sort_input_targets;
1358  List *sort_input_targets_contain_srfs;
1359  bool sort_input_target_parallel_safe;
1360  PathTarget *grouping_target;
1361  List *grouping_targets;
1362  List *grouping_targets_contain_srfs;
1363  bool grouping_target_parallel_safe;
1364  PathTarget *scanjoin_target;
1365  List *scanjoin_targets;
1366  List *scanjoin_targets_contain_srfs;
1367  bool scanjoin_target_parallel_safe;
1368  bool scanjoin_target_same_exprs;
1369  bool have_grouping;
1370  WindowFuncLists *wflists = NULL;
1371  List *activeWindows = NIL;
1372  grouping_sets_data *gset_data = NULL;
1373  standard_qp_extra qp_extra;
1374 
1375  /* A recursive query should always have setOperations */
1376  Assert(!root->hasRecursion);
1377 
1378  /* Preprocess grouping sets and GROUP BY clause, if any */
1379  if (parse->groupingSets)
1380  {
1381  gset_data = preprocess_grouping_sets(root);
1382  }
1383  else
1384  {
1385  /* Preprocess regular GROUP BY clause, if any */
1386  if (parse->groupClause)
1387  parse->groupClause = preprocess_groupclause(root, NIL);
1388  }
1389 
1390  /*
1391  * Preprocess targetlist. Note that much of the remaining planning
1392  * work will be done with the PathTarget representation of tlists, but
1393  * we must also maintain the full representation of the final tlist so
1394  * that we can transfer its decoration (resnames etc) to the topmost
1395  * tlist of the finished Plan. This is kept in processed_tlist.
1396  */
1397  preprocess_targetlist(root);
1398 
1399  /*
1400  * Mark all the aggregates with resolved aggtranstypes, and detect
1401  * aggregates that are duplicates or can share transition state. We
1402  * must do this before slicing and dicing the tlist into various
1403  * pathtargets, else some copies of the Aggref nodes might escape
1404  * being marked.
1405  */
1406  if (parse->hasAggs)
1407  {
1408  preprocess_aggrefs(root, (Node *) root->processed_tlist);
1409  preprocess_aggrefs(root, (Node *) parse->havingQual);
1410  }
1411 
1412  /*
1413  * Locate any window functions in the tlist. (We don't need to look
1414  * anywhere else, since expressions used in ORDER BY will be in there
1415  * too.) Note that they could all have been eliminated by constant
1416  * folding, in which case we don't need to do any more work.
1417  */
1418  if (parse->hasWindowFuncs)
1419  {
1420  wflists = find_window_functions((Node *) root->processed_tlist,
1421  list_length(parse->windowClause));
1422  if (wflists->numWindowFuncs > 0)
1423  activeWindows = select_active_windows(root, wflists);
1424  else
1425  parse->hasWindowFuncs = false;
1426  }
1427 
1428  /*
1429  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1430  * adding logic between here and the query_planner() call. Anything
1431  * that is needed in MIN/MAX-optimizable cases will have to be
1432  * duplicated in planagg.c.
1433  */
1434  if (parse->hasAggs)
1436 
1437  /*
1438  * Figure out whether there's a hard limit on the number of rows that
1439  * query_planner's result subplan needs to return. Even if we know a
1440  * hard limit overall, it doesn't apply if the query has any
1441  * grouping/aggregation operations, or SRFs in the tlist.
1442  */
1443  if (parse->groupClause ||
1444  parse->groupingSets ||
1445  parse->distinctClause ||
1446  parse->hasAggs ||
1447  parse->hasWindowFuncs ||
1448  parse->hasTargetSRFs ||
1449  root->hasHavingQual)
1450  root->limit_tuples = -1.0;
1451  else
1452  root->limit_tuples = limit_tuples;
1453 
1454  /* Set up data needed by standard_qp_callback */
1455  qp_extra.activeWindows = activeWindows;
1456  qp_extra.groupClause = (gset_data
1457  ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
1458  : parse->groupClause);
1459 
1460  /*
1461  * Generate the best unsorted and presorted paths for the scan/join
1462  * portion of this Query, ie the processing represented by the
1463  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1464  * We also generate (in standard_qp_callback) pathkey representations
1465  * of the query's sort clause, distinct clause, etc.
1466  */
1467  current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1468 
1469  /*
1470  * Convert the query's result tlist into PathTarget format.
1471  *
1472  * Note: this cannot be done before query_planner() has performed
1473  * appendrel expansion, because that might add resjunk entries to
1474  * root->processed_tlist. Waiting till afterwards is also helpful
1475  * because the target width estimates can use per-Var width numbers
1476  * that were obtained within query_planner().
1477  */
1478  final_target = create_pathtarget(root, root->processed_tlist);
1479  final_target_parallel_safe =
1480  is_parallel_safe(root, (Node *) final_target->exprs);
1481 
1482  /*
1483  * If ORDER BY was given, consider whether we should use a post-sort
1484  * projection, and compute the adjusted target for preceding steps if
1485  * so.
1486  */
1487  if (parse->sortClause)
1488  {
1489  sort_input_target = make_sort_input_target(root,
1490  final_target,
1491  &have_postponed_srfs);
1492  sort_input_target_parallel_safe =
1493  is_parallel_safe(root, (Node *) sort_input_target->exprs);
1494  }
1495  else
1496  {
1497  sort_input_target = final_target;
1498  sort_input_target_parallel_safe = final_target_parallel_safe;
1499  }
1500 
1501  /*
1502  * If we have window functions to deal with, the output from any
1503  * grouping step needs to be what the window functions want;
1504  * otherwise, it should be sort_input_target.
1505  */
1506  if (activeWindows)
1507  {
1508  grouping_target = make_window_input_target(root,
1509  final_target,
1510  activeWindows);
1511  grouping_target_parallel_safe =
1512  is_parallel_safe(root, (Node *) grouping_target->exprs);
1513  }
1514  else
1515  {
1516  grouping_target = sort_input_target;
1517  grouping_target_parallel_safe = sort_input_target_parallel_safe;
1518  }
1519 
1520  /*
1521  * If we have grouping or aggregation to do, the topmost scan/join
1522  * plan node must emit what the grouping step wants; otherwise, it
1523  * should emit grouping_target.
1524  */
1525  have_grouping = (parse->groupClause || parse->groupingSets ||
1526  parse->hasAggs || root->hasHavingQual);
1527  if (have_grouping)
1528  {
1529  scanjoin_target = make_group_input_target(root, final_target);
1530  scanjoin_target_parallel_safe =
1531  is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1532  }
1533  else
1534  {
1535  scanjoin_target = grouping_target;
1536  scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1537  }
1538 
1539  /*
1540  * If there are any SRFs in the targetlist, we must separate each of
1541  * these PathTargets into SRF-computing and SRF-free targets. Replace
1542  * each of the named targets with a SRF-free version, and remember the
1543  * list of additional projection steps we need to add afterwards.
1544  */
1545  if (parse->hasTargetSRFs)
1546  {
1547  /* final_target doesn't recompute any SRFs in sort_input_target */
1548  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1549  &final_targets,
1550  &final_targets_contain_srfs);
1551  final_target = linitial_node(PathTarget, final_targets);
1552  Assert(!linitial_int(final_targets_contain_srfs));
1553  /* likewise for sort_input_target vs. grouping_target */
1554  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1555  &sort_input_targets,
1556  &sort_input_targets_contain_srfs);
1557  sort_input_target = linitial_node(PathTarget, sort_input_targets);
1558  Assert(!linitial_int(sort_input_targets_contain_srfs));
1559  /* likewise for grouping_target vs. scanjoin_target */
1560  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1561  &grouping_targets,
1562  &grouping_targets_contain_srfs);
1563  grouping_target = linitial_node(PathTarget, grouping_targets);
1564  Assert(!linitial_int(grouping_targets_contain_srfs));
1565  /* scanjoin_target will not have any SRFs precomputed for it */
1566  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1567  &scanjoin_targets,
1568  &scanjoin_targets_contain_srfs);
1569  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1570  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1571  }
1572  else
1573  {
1574  /* initialize lists; for most of these, dummy values are OK */
1575  final_targets = final_targets_contain_srfs = NIL;
1576  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1577  grouping_targets = grouping_targets_contain_srfs = NIL;
1578  scanjoin_targets = list_make1(scanjoin_target);
1579  scanjoin_targets_contain_srfs = NIL;
1580  }
1581 
1582  /* Apply scan/join target. */
1583  scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1584  && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1585  apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1586  scanjoin_targets_contain_srfs,
1587  scanjoin_target_parallel_safe,
1588  scanjoin_target_same_exprs);
1589 
1590  /*
1591  * Save the various upper-rel PathTargets we just computed into
1592  * root->upper_targets[]. The core code doesn't use this, but it
1593  * provides a convenient place for extensions to get at the info. For
1594  * consistency, we save all the intermediate targets, even though some
1595  * of the corresponding upperrels might not be needed for this query.
1596  */
1597  root->upper_targets[UPPERREL_FINAL] = final_target;
1598  root->upper_targets[UPPERREL_ORDERED] = final_target;
1599  root->upper_targets[UPPERREL_PARTIAL_DISTINCT] = sort_input_target;
1600  root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1601  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1602  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1603 
1604  /*
1605  * If we have grouping and/or aggregation, consider ways to implement
1606  * that. We build a new upperrel representing the output of this
1607  * phase.
1608  */
1609  if (have_grouping)
1610  {
1611  current_rel = create_grouping_paths(root,
1612  current_rel,
1613  grouping_target,
1614  grouping_target_parallel_safe,
1615  gset_data);
1616  /* Fix things up if grouping_target contains SRFs */
1617  if (parse->hasTargetSRFs)
1618  adjust_paths_for_srfs(root, current_rel,
1619  grouping_targets,
1620  grouping_targets_contain_srfs);
1621  }
1622 
1623  /*
1624  * If we have window functions, consider ways to implement those. We
1625  * build a new upperrel representing the output of this phase.
1626  */
1627  if (activeWindows)
1628  {
1629  current_rel = create_window_paths(root,
1630  current_rel,
1631  grouping_target,
1632  sort_input_target,
1633  sort_input_target_parallel_safe,
1634  wflists,
1635  activeWindows);
1636  /* Fix things up if sort_input_target contains SRFs */
1637  if (parse->hasTargetSRFs)
1638  adjust_paths_for_srfs(root, current_rel,
1639  sort_input_targets,
1640  sort_input_targets_contain_srfs);
1641  }
1642 
1643  /*
1644  * If there is a DISTINCT clause, consider ways to implement that. We
1645  * build a new upperrel representing the output of this phase.
1646  */
1647  if (parse->distinctClause)
1648  {
1649  current_rel = create_distinct_paths(root,
1650  current_rel);
1651  }
1652  } /* end of if (setOperations) */
1653 
1654  /*
1655  * If ORDER BY was given, consider ways to implement that, and generate a
1656  * new upperrel containing only paths that emit the correct ordering and
1657  * project the correct final_target. We can apply the original
1658  * limit_tuples limit in sort costing here, but only if there are no
1659  * postponed SRFs.
1660  */
1661  if (parse->sortClause)
1662  {
1663  current_rel = create_ordered_paths(root,
1664  current_rel,
1665  final_target,
1666  final_target_parallel_safe,
1667  have_postponed_srfs ? -1.0 :
1668  limit_tuples);
1669  /* Fix things up if final_target contains SRFs */
1670  if (parse->hasTargetSRFs)
1671  adjust_paths_for_srfs(root, current_rel,
1672  final_targets,
1673  final_targets_contain_srfs);
1674  }
1675 
1676  /*
1677  * Now we are prepared to build the final-output upperrel.
1678  */
1679  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1680 
1681  /*
1682  * If the input rel is marked consider_parallel and there's nothing that's
1683  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1684  * consider_parallel as well. Note that if the query has rowMarks or is
1685  * not a SELECT, consider_parallel will be false for every relation in the
1686  * query.
1687  */
1688  if (current_rel->consider_parallel &&
1689  is_parallel_safe(root, parse->limitOffset) &&
1690  is_parallel_safe(root, parse->limitCount))
1691  final_rel->consider_parallel = true;
1692 
1693  /*
1694  * If the current_rel belongs to a single FDW, so does the final_rel.
1695  */
1696  final_rel->serverid = current_rel->serverid;
1697  final_rel->userid = current_rel->userid;
1698  final_rel->useridiscurrent = current_rel->useridiscurrent;
1699  final_rel->fdwroutine = current_rel->fdwroutine;
1700 
1701  /*
1702  * Generate paths for the final_rel. Insert all surviving paths, with
1703  * LockRows, Limit, and/or ModifyTable steps added if needed.
1704  */
1705  foreach(lc, current_rel->pathlist)
1706  {
1707  Path *path = (Path *) lfirst(lc);
1708 
1709  /*
1710  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1711  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1712  * here. If there are only non-locking rowmarks, they should be
1713  * handled by the ModifyTable node instead. However, root->rowMarks
1714  * is what goes into the LockRows node.)
1715  */
1716  if (parse->rowMarks)
1717  {
1718  path = (Path *) create_lockrows_path(root, final_rel, path,
1719  root->rowMarks,
1721  }
1722 
1723  /*
1724  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1725  */
1726  if (limit_needed(parse))
1727  {
1728  path = (Path *) create_limit_path(root, final_rel, path,
1729  parse->limitOffset,
1730  parse->limitCount,
1731  parse->limitOption,
1732  offset_est, count_est);
1733  }
1734 
1735  /*
1736  * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node.
1737  */
1738  if (parse->commandType != CMD_SELECT)
1739  {
1740  Index rootRelation;
1741  List *resultRelations = NIL;
1742  List *updateColnosLists = NIL;
1743  List *withCheckOptionLists = NIL;
1744  List *returningLists = NIL;
1745  List *mergeActionLists = NIL;
1746  List *rowMarks;
1747 
1749  {
1750  /* Inherited UPDATE/DELETE */
1751  RelOptInfo *top_result_rel = find_base_rel(root,
1752  parse->resultRelation);
1753  int resultRelation = -1;
1754 
1755  /* Add only leaf children to ModifyTable. */
1756  while ((resultRelation = bms_next_member(root->leaf_result_relids,
1757  resultRelation)) >= 0)
1758  {
1759  RelOptInfo *this_result_rel = find_base_rel(root,
1760  resultRelation);
1761 
1762  /*
1763  * Also exclude any leaf rels that have turned dummy since
1764  * being added to the list, for example, by being excluded
1765  * by constraint exclusion.
1766  */
1767  if (IS_DUMMY_REL(this_result_rel))
1768  continue;
1769 
1770  /* Build per-target-rel lists needed by ModifyTable */
1771  resultRelations = lappend_int(resultRelations,
1772  resultRelation);
1773  if (parse->commandType == CMD_UPDATE)
1774  {
1775  List *update_colnos = root->update_colnos;
1776 
1777  if (this_result_rel != top_result_rel)
1778  update_colnos =
1780  update_colnos,
1781  this_result_rel->relid,
1782  top_result_rel->relid);
1783  updateColnosLists = lappend(updateColnosLists,
1784  update_colnos);
1785  }
1786  if (parse->withCheckOptions)
1787  {
1788  List *withCheckOptions = parse->withCheckOptions;
1789 
1790  if (this_result_rel != top_result_rel)
1791  withCheckOptions = (List *)
1793  (Node *) withCheckOptions,
1794  this_result_rel->relids,
1795  top_result_rel->relids);
1796  withCheckOptionLists = lappend(withCheckOptionLists,
1797  withCheckOptions);
1798  }
1799  if (parse->returningList)
1800  {
1801  List *returningList = parse->returningList;
1802 
1803  if (this_result_rel != top_result_rel)
1804  returningList = (List *)
1806  (Node *) returningList,
1807  this_result_rel->relids,
1808  top_result_rel->relids);
1809  returningLists = lappend(returningLists,
1810  returningList);
1811  }
1812  if (parse->mergeActionList)
1813  {
1814  ListCell *l;
1815  List *mergeActionList = NIL;
1816 
1817  /*
1818  * Copy MergeActions and translate stuff that
1819  * references attribute numbers.
1820  */
1821  foreach(l, parse->mergeActionList)
1822  {
1823  MergeAction *action = lfirst(l),
1824  *leaf_action = copyObject(action);
1825 
1826  leaf_action->qual =
1828  (Node *) action->qual,
1829  this_result_rel->relids,
1830  top_result_rel->relids);
1831  leaf_action->targetList = (List *)
1833  (Node *) action->targetList,
1834  this_result_rel->relids,
1835  top_result_rel->relids);
1836  if (leaf_action->commandType == CMD_UPDATE)
1837  leaf_action->updateColnos =
1839  action->updateColnos,
1840  this_result_rel->relid,
1841  top_result_rel->relid);
1842  mergeActionList = lappend(mergeActionList,
1843  leaf_action);
1844  }
1845 
1846  mergeActionLists = lappend(mergeActionLists,
1847  mergeActionList);
1848  }
1849  }
1850 
1851  if (resultRelations == NIL)
1852  {
1853  /*
1854  * We managed to exclude every child rel, so generate a
1855  * dummy one-relation plan using info for the top target
1856  * rel (even though that may not be a leaf target).
1857  * Although it's clear that no data will be updated or
1858  * deleted, we still need to have a ModifyTable node so
1859  * that any statement triggers will be executed. (This
1860  * could be cleaner if we fixed nodeModifyTable.c to allow
1861  * zero target relations, but that probably wouldn't be a
1862  * net win.)
1863  */
1864  resultRelations = list_make1_int(parse->resultRelation);
1865  if (parse->commandType == CMD_UPDATE)
1866  updateColnosLists = list_make1(root->update_colnos);
1867  if (parse->withCheckOptions)
1868  withCheckOptionLists = list_make1(parse->withCheckOptions);
1869  if (parse->returningList)
1870  returningLists = list_make1(parse->returningList);
1871  if (parse->mergeActionList)
1872  mergeActionLists = list_make1(parse->mergeActionList);
1873  }
1874  }
1875  else
1876  {
1877  /* Single-relation INSERT/UPDATE/DELETE. */
1878  resultRelations = list_make1_int(parse->resultRelation);
1879  if (parse->commandType == CMD_UPDATE)
1880  updateColnosLists = list_make1(root->update_colnos);
1881  if (parse->withCheckOptions)
1882  withCheckOptionLists = list_make1(parse->withCheckOptions);
1883  if (parse->returningList)
1884  returningLists = list_make1(parse->returningList);
1885  if (parse->mergeActionList)
1886  mergeActionLists = list_make1(parse->mergeActionList);
1887  }
1888 
1889  /*
1890  * If target is a partition root table, we need to mark the
1891  * ModifyTable node appropriately for that.
1892  */
1893  if (rt_fetch(parse->resultRelation, parse->rtable)->relkind ==
1894  RELKIND_PARTITIONED_TABLE)
1895  rootRelation = parse->resultRelation;
1896  else
1897  rootRelation = 0;
1898 
1899  /*
1900  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
1901  * will have dealt with fetching non-locked marked rows, else we
1902  * need to have ModifyTable do that.
1903  */
1904  if (parse->rowMarks)
1905  rowMarks = NIL;
1906  else
1907  rowMarks = root->rowMarks;
1908 
1909  path = (Path *)
1910  create_modifytable_path(root, final_rel,
1911  path,
1912  parse->commandType,
1913  parse->canSetTag,
1914  parse->resultRelation,
1915  rootRelation,
1916  root->partColsUpdated,
1917  resultRelations,
1918  updateColnosLists,
1919  withCheckOptionLists,
1920  returningLists,
1921  rowMarks,
1922  parse->onConflict,
1923  mergeActionLists,
1925  }
1926 
1927  /* And shove it into final_rel */
1928  add_path(final_rel, path);
1929  }
1930 
1931  /*
1932  * Generate partial paths for final_rel, too, if outer query levels might
1933  * be able to make use of them.
1934  */
1935  if (final_rel->consider_parallel && root->query_level > 1 &&
1936  !limit_needed(parse))
1937  {
1938  Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
1939  foreach(lc, current_rel->partial_pathlist)
1940  {
1941  Path *partial_path = (Path *) lfirst(lc);
1942 
1943  add_partial_path(final_rel, partial_path);
1944  }
1945  }
1946 
1947  extra.limit_needed = limit_needed(parse);
1948  extra.limit_tuples = limit_tuples;
1949  extra.count_est = count_est;
1950  extra.offset_est = offset_est;
1951 
1952  /*
1953  * If there is an FDW that's responsible for all baserels of the query,
1954  * let it consider adding ForeignPaths.
1955  */
1956  if (final_rel->fdwroutine &&
1957  final_rel->fdwroutine->GetForeignUpperPaths)
1959  current_rel, final_rel,
1960  &extra);
1961 
1962  /* Let extensions possibly add some more paths */
1964  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
1965  current_rel, final_rel, &extra);
1966 
1967  /* Note: currently, we leave it to callers to do set_cheapest() */
1968 }
1969 
1970 /*
1971  * Do preprocessing for groupingSets clause and related data. This handles the
1972  * preliminary steps of expanding the grouping sets, organizing them into lists
1973  * of rollups, and preparing annotations which will later be filled in with
1974  * size estimates.
1975  */
1976 static grouping_sets_data *
1978 {
1979  Query *parse = root->parse;
1980  List *sets;
1981  int maxref = 0;
1982  ListCell *lc;
1983  ListCell *lc_set;
1985 
1986  parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
1987 
1988  gd->any_hashable = false;
1989  gd->unhashable_refs = NULL;
1990  gd->unsortable_refs = NULL;
1991  gd->unsortable_sets = NIL;
1992 
1993  if (parse->groupClause)
1994  {
1995  ListCell *lc;
1996 
1997  foreach(lc, parse->groupClause)
1998  {
2000  Index ref = gc->tleSortGroupRef;
2001 
2002  if (ref > maxref)
2003  maxref = ref;
2004 
2005  if (!gc->hashable)
2007 
2008  if (!OidIsValid(gc->sortop))
2010  }
2011  }
2012 
2013  /* Allocate workspace array for remapping */
2014  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2015 
2016  /*
2017  * If we have any unsortable sets, we must extract them before trying to
2018  * prepare rollups. Unsortable sets don't go through
2019  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2020  * here.
2021  */
2022  if (!bms_is_empty(gd->unsortable_refs))
2023  {
2024  List *sortable_sets = NIL;
2025 
2026  foreach(lc, parse->groupingSets)
2027  {
2028  List *gset = (List *) lfirst(lc);
2029 
2030  if (bms_overlap_list(gd->unsortable_refs, gset))
2031  {
2033 
2034  gs->set = gset;
2035  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2036 
2037  /*
2038  * We must enforce here that an unsortable set is hashable;
2039  * later code assumes this. Parse analysis only checks that
2040  * every individual column is either hashable or sortable.
2041  *
2042  * Note that passing this test doesn't guarantee we can
2043  * generate a plan; there might be other showstoppers.
2044  */
2045  if (bms_overlap_list(gd->unhashable_refs, gset))
2046  ereport(ERROR,
2047  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2048  errmsg("could not implement GROUP BY"),
2049  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2050  }
2051  else
2052  sortable_sets = lappend(sortable_sets, gset);
2053  }
2054 
2055  if (sortable_sets)
2056  sets = extract_rollup_sets(sortable_sets);
2057  else
2058  sets = NIL;
2059  }
2060  else
2061  sets = extract_rollup_sets(parse->groupingSets);
2062 
2063  foreach(lc_set, sets)
2064  {
2065  List *current_sets = (List *) lfirst(lc_set);
2066  RollupData *rollup = makeNode(RollupData);
2067  GroupingSetData *gs;
2068 
2069  /*
2070  * Reorder the current list of grouping sets into correct prefix
2071  * order. If only one aggregation pass is needed, try to make the
2072  * list match the ORDER BY clause; if more than one pass is needed, we
2073  * don't bother with that.
2074  *
2075  * Note that this reorders the sets from smallest-member-first to
2076  * largest-member-first, and applies the GroupingSetData annotations,
2077  * though the data will be filled in later.
2078  */
2079  current_sets = reorder_grouping_sets(current_sets,
2080  (list_length(sets) == 1
2081  ? parse->sortClause
2082  : NIL));
2083 
2084  /*
2085  * Get the initial (and therefore largest) grouping set.
2086  */
2087  gs = linitial_node(GroupingSetData, current_sets);
2088 
2089  /*
2090  * Order the groupClause appropriately. If the first grouping set is
2091  * empty, then the groupClause must also be empty; otherwise we have
2092  * to force the groupClause to match that grouping set's order.
2093  *
2094  * (The first grouping set can be empty even though parse->groupClause
2095  * is not empty only if all non-empty grouping sets are unsortable.
2096  * The groupClauses for hashed grouping sets are built later on.)
2097  */
2098  if (gs->set)
2099  rollup->groupClause = preprocess_groupclause(root, gs->set);
2100  else
2101  rollup->groupClause = NIL;
2102 
2103  /*
2104  * Is it hashable? We pretend empty sets are hashable even though we
2105  * actually force them not to be hashed later. But don't bother if
2106  * there's nothing but empty sets (since in that case we can't hash
2107  * anything).
2108  */
2109  if (gs->set &&
2111  {
2112  rollup->hashable = true;
2113  gd->any_hashable = true;
2114  }
2115 
2116  /*
2117  * Now that we've pinned down an order for the groupClause for this
2118  * list of grouping sets, we need to remap the entries in the grouping
2119  * sets from sortgrouprefs to plain indices (0-based) into the
2120  * groupClause for this collection of grouping sets. We keep the
2121  * original form for later use, though.
2122  */
2123  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2124  current_sets,
2125  gd->tleref_to_colnum_map);
2126  rollup->gsets_data = current_sets;
2127 
2128  gd->rollups = lappend(gd->rollups, rollup);
2129  }
2130 
2131  if (gd->unsortable_sets)
2132  {
2133  /*
2134  * We have not yet pinned down a groupclause for this, but we will
2135  * need index-based lists for estimation purposes. Construct
2136  * hash_sets_idx based on the entire original groupclause for now.
2137  */
2138  gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2139  gd->unsortable_sets,
2140  gd->tleref_to_colnum_map);
2141  gd->any_hashable = true;
2142  }
2143 
2144  return gd;
2145 }
2146 
2147 /*
2148  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2149  * (without annotation) mapped to indexes into the given groupclause.
2150  */
2151 static List *
2153  List *gsets,
2154  int *tleref_to_colnum_map)
2155 {
2156  int ref = 0;
2157  List *result = NIL;
2158  ListCell *lc;
2159 
2160  foreach(lc, groupClause)
2161  {
2163 
2164  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2165  }
2166 
2167  foreach(lc, gsets)
2168  {
2169  List *set = NIL;
2170  ListCell *lc2;
2172 
2173  foreach(lc2, gs->set)
2174  {
2175  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2176  }
2177 
2178  result = lappend(result, set);
2179  }
2180 
2181  return result;
2182 }
2183 
2184 
2185 /*
2186  * preprocess_rowmarks - set up PlanRowMarks if needed
2187  */
2188 static void
2190 {
2191  Query *parse = root->parse;
2192  Bitmapset *rels;
2193  List *prowmarks;
2194  ListCell *l;
2195  int i;
2196 
2197  if (parse->rowMarks)
2198  {
2199  /*
2200  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2201  * grouping, since grouping renders a reference to individual tuple
2202  * CTIDs invalid. This is also checked at parse time, but that's
2203  * insufficient because of rule substitution, query pullup, etc.
2204  */
2206  parse->rowMarks)->strength);
2207  }
2208  else
2209  {
2210  /*
2211  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2212  * UPDATE/SHARE.
2213  */
2214  if (parse->commandType != CMD_UPDATE &&
2215  parse->commandType != CMD_DELETE)
2216  return;
2217  }
2218 
2219  /*
2220  * We need to have rowmarks for all base relations except the target. We
2221  * make a bitmapset of all base rels and then remove the items we don't
2222  * need or have FOR [KEY] UPDATE/SHARE marks for.
2223  */
2224  rels = get_relids_in_jointree((Node *) parse->jointree, false);
2225  if (parse->resultRelation)
2226  rels = bms_del_member(rels, parse->resultRelation);
2227 
2228  /*
2229  * Convert RowMarkClauses to PlanRowMark representation.
2230  */
2231  prowmarks = NIL;
2232  foreach(l, parse->rowMarks)
2233  {
2235  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2236  PlanRowMark *newrc;
2237 
2238  /*
2239  * Currently, it is syntactically impossible to have FOR UPDATE et al
2240  * applied to an update/delete target rel. If that ever becomes
2241  * possible, we should drop the target from the PlanRowMark list.
2242  */
2243  Assert(rc->rti != parse->resultRelation);
2244 
2245  /*
2246  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2247  * can't support true locking. Subqueries that got flattened into the
2248  * main query should be ignored completely. Any that didn't will get
2249  * ROW_MARK_COPY items in the next loop.
2250  */
2251  if (rte->rtekind != RTE_RELATION)
2252  continue;
2253 
2254  rels = bms_del_member(rels, rc->rti);
2255 
2256  newrc = makeNode(PlanRowMark);
2257  newrc->rti = newrc->prti = rc->rti;
2258  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2259  newrc->markType = select_rowmark_type(rte, rc->strength);
2260  newrc->allMarkTypes = (1 << newrc->markType);
2261  newrc->strength = rc->strength;
2262  newrc->waitPolicy = rc->waitPolicy;
2263  newrc->isParent = false;
2264 
2265  prowmarks = lappend(prowmarks, newrc);
2266  }
2267 
2268  /*
2269  * Now, add rowmarks for any non-target, non-locked base relations.
2270  */
2271  i = 0;
2272  foreach(l, parse->rtable)
2273  {
2275  PlanRowMark *newrc;
2276 
2277  i++;
2278  if (!bms_is_member(i, rels))
2279  continue;
2280 
2281  newrc = makeNode(PlanRowMark);
2282  newrc->rti = newrc->prti = i;
2283  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2284  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2285  newrc->allMarkTypes = (1 << newrc->markType);
2286  newrc->strength = LCS_NONE;
2287  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2288  newrc->isParent = false;
2289 
2290  prowmarks = lappend(prowmarks, newrc);
2291  }
2292 
2293  root->rowMarks = prowmarks;
2294 }
2295 
2296 /*
2297  * Select RowMarkType to use for a given table
2298  */
2301 {
2302  if (rte->rtekind != RTE_RELATION)
2303  {
2304  /* If it's not a table at all, use ROW_MARK_COPY */
2305  return ROW_MARK_COPY;
2306  }
2307  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2308  {
2309  /* Let the FDW select the rowmark type, if it wants to */
2310  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2311 
2312  if (fdwroutine->GetForeignRowMarkType != NULL)
2313  return fdwroutine->GetForeignRowMarkType(rte, strength);
2314  /* Otherwise, use ROW_MARK_COPY by default */
2315  return ROW_MARK_COPY;
2316  }
2317  else
2318  {
2319  /* Regular table, apply the appropriate lock type */
2320  switch (strength)
2321  {
2322  case LCS_NONE:
2323 
2324  /*
2325  * We don't need a tuple lock, only the ability to re-fetch
2326  * the row.
2327  */
2328  return ROW_MARK_REFERENCE;
2329  break;
2330  case LCS_FORKEYSHARE:
2331  return ROW_MARK_KEYSHARE;
2332  break;
2333  case LCS_FORSHARE:
2334  return ROW_MARK_SHARE;
2335  break;
2336  case LCS_FORNOKEYUPDATE:
2337  return ROW_MARK_NOKEYEXCLUSIVE;
2338  break;
2339  case LCS_FORUPDATE:
2340  return ROW_MARK_EXCLUSIVE;
2341  break;
2342  }
2343  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2344  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2345  }
2346 }
2347 
2348 /*
2349  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2350  *
2351  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2352  * results back in *count_est and *offset_est. These variables are set to
2353  * 0 if the corresponding clause is not present, and -1 if it's present
2354  * but we couldn't estimate the value for it. (The "0" convention is OK
2355  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2356  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2357  * usual practice of never estimating less than one row.) These values will
2358  * be passed to create_limit_path, which see if you change this code.
2359  *
2360  * The return value is the suitably adjusted tuple_fraction to use for
2361  * planning the query. This adjustment is not overridable, since it reflects
2362  * plan actions that grouping_planner() will certainly take, not assumptions
2363  * about context.
2364  */
2365 static double
2366 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2367  int64 *offset_est, int64 *count_est)
2368 {
2369  Query *parse = root->parse;
2370  Node *est;
2371  double limit_fraction;
2372 
2373  /* Should not be called unless LIMIT or OFFSET */
2374  Assert(parse->limitCount || parse->limitOffset);
2375 
2376  /*
2377  * Try to obtain the clause values. We use estimate_expression_value
2378  * primarily because it can sometimes do something useful with Params.
2379  */
2380  if (parse->limitCount)
2381  {
2382  est = estimate_expression_value(root, parse->limitCount);
2383  if (est && IsA(est, Const))
2384  {
2385  if (((Const *) est)->constisnull)
2386  {
2387  /* NULL indicates LIMIT ALL, ie, no limit */
2388  *count_est = 0; /* treat as not present */
2389  }
2390  else
2391  {
2392  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2393  if (*count_est <= 0)
2394  *count_est = 1; /* force to at least 1 */
2395  }
2396  }
2397  else
2398  *count_est = -1; /* can't estimate */
2399  }
2400  else
2401  *count_est = 0; /* not present */
2402 
2403  if (parse->limitOffset)
2404  {
2405  est = estimate_expression_value(root, parse->limitOffset);
2406  if (est && IsA(est, Const))
2407  {
2408  if (((Const *) est)->constisnull)
2409  {
2410  /* Treat NULL as no offset; the executor will too */
2411  *offset_est = 0; /* treat as not present */
2412  }
2413  else
2414  {
2415  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2416  if (*offset_est < 0)
2417  *offset_est = 0; /* treat as not present */
2418  }
2419  }
2420  else
2421  *offset_est = -1; /* can't estimate */
2422  }
2423  else
2424  *offset_est = 0; /* not present */
2425 
2426  if (*count_est != 0)
2427  {
2428  /*
2429  * A LIMIT clause limits the absolute number of tuples returned.
2430  * However, if it's not a constant LIMIT then we have to guess; for
2431  * lack of a better idea, assume 10% of the plan's result is wanted.
2432  */
2433  if (*count_est < 0 || *offset_est < 0)
2434  {
2435  /* LIMIT or OFFSET is an expression ... punt ... */
2436  limit_fraction = 0.10;
2437  }
2438  else
2439  {
2440  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2441  limit_fraction = (double) *count_est + (double) *offset_est;
2442  }
2443 
2444  /*
2445  * If we have absolute limits from both caller and LIMIT, use the
2446  * smaller value; likewise if they are both fractional. If one is
2447  * fractional and the other absolute, we can't easily determine which
2448  * is smaller, but we use the heuristic that the absolute will usually
2449  * be smaller.
2450  */
2451  if (tuple_fraction >= 1.0)
2452  {
2453  if (limit_fraction >= 1.0)
2454  {
2455  /* both absolute */
2456  tuple_fraction = Min(tuple_fraction, limit_fraction);
2457  }
2458  else
2459  {
2460  /* caller absolute, limit fractional; use caller's value */
2461  }
2462  }
2463  else if (tuple_fraction > 0.0)
2464  {
2465  if (limit_fraction >= 1.0)
2466  {
2467  /* caller fractional, limit absolute; use limit */
2468  tuple_fraction = limit_fraction;
2469  }
2470  else
2471  {
2472  /* both fractional */
2473  tuple_fraction = Min(tuple_fraction, limit_fraction);
2474  }
2475  }
2476  else
2477  {
2478  /* no info from caller, just use limit */
2479  tuple_fraction = limit_fraction;
2480  }
2481  }
2482  else if (*offset_est != 0 && tuple_fraction > 0.0)
2483  {
2484  /*
2485  * We have an OFFSET but no LIMIT. This acts entirely differently
2486  * from the LIMIT case: here, we need to increase rather than decrease
2487  * the caller's tuple_fraction, because the OFFSET acts to cause more
2488  * tuples to be fetched instead of fewer. This only matters if we got
2489  * a tuple_fraction > 0, however.
2490  *
2491  * As above, use 10% if OFFSET is present but unestimatable.
2492  */
2493  if (*offset_est < 0)
2494  limit_fraction = 0.10;
2495  else
2496  limit_fraction = (double) *offset_est;
2497 
2498  /*
2499  * If we have absolute counts from both caller and OFFSET, add them
2500  * together; likewise if they are both fractional. If one is
2501  * fractional and the other absolute, we want to take the larger, and
2502  * we heuristically assume that's the fractional one.
2503  */
2504  if (tuple_fraction >= 1.0)
2505  {
2506  if (limit_fraction >= 1.0)
2507  {
2508  /* both absolute, so add them together */
2509  tuple_fraction += limit_fraction;
2510  }
2511  else
2512  {
2513  /* caller absolute, limit fractional; use limit */
2514  tuple_fraction = limit_fraction;
2515  }
2516  }
2517  else
2518  {
2519  if (limit_fraction >= 1.0)
2520  {
2521  /* caller fractional, limit absolute; use caller's value */
2522  }
2523  else
2524  {
2525  /* both fractional, so add them together */
2526  tuple_fraction += limit_fraction;
2527  if (tuple_fraction >= 1.0)
2528  tuple_fraction = 0.0; /* assume fetch all */
2529  }
2530  }
2531  }
2532 
2533  return tuple_fraction;
2534 }
2535 
2536 /*
2537  * limit_needed - do we actually need a Limit plan node?
2538  *
2539  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2540  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2541  * locution for an optimization fence. (Because other places in the planner
2542  * merely check whether parse->limitOffset isn't NULL, it will still work as
2543  * an optimization fence --- we're just suppressing unnecessary run-time
2544  * overhead.)
2545  *
2546  * This might look like it could be merged into preprocess_limit, but there's
2547  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2548  * in preprocess_limit it's good enough to consider estimated values.
2549  */
2550 bool
2552 {
2553  Node *node;
2554 
2555  node = parse->limitCount;
2556  if (node)
2557  {
2558  if (IsA(node, Const))
2559  {
2560  /* NULL indicates LIMIT ALL, ie, no limit */
2561  if (!((Const *) node)->constisnull)
2562  return true; /* LIMIT with a constant value */
2563  }
2564  else
2565  return true; /* non-constant LIMIT */
2566  }
2567 
2568  node = parse->limitOffset;
2569  if (node)
2570  {
2571  if (IsA(node, Const))
2572  {
2573  /* Treat NULL as no offset; the executor would too */
2574  if (!((Const *) node)->constisnull)
2575  {
2576  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2577 
2578  if (offset != 0)
2579  return true; /* OFFSET with a nonzero value */
2580  }
2581  }
2582  else
2583  return true; /* non-constant OFFSET */
2584  }
2585 
2586  return false; /* don't need a Limit plan node */
2587 }
2588 
2589 
2590 /*
2591  * remove_useless_groupby_columns
2592  * Remove any columns in the GROUP BY clause that are redundant due to
2593  * being functionally dependent on other GROUP BY columns.
2594  *
2595  * Since some other DBMSes do not allow references to ungrouped columns, it's
2596  * not unusual to find all columns listed in GROUP BY even though listing the
2597  * primary-key columns would be sufficient. Deleting such excess columns
2598  * avoids redundant sorting work, so it's worth doing.
2599  *
2600  * Relcache invalidations will ensure that cached plans become invalidated
2601  * when the underlying index of the pkey constraint is dropped.
2602  *
2603  * Currently, we only make use of pkey constraints for this, however, we may
2604  * wish to take this further in the future and also use unique constraints
2605  * which have NOT NULL columns. In that case, plan invalidation will still
2606  * work since relations will receive a relcache invalidation when a NOT NULL
2607  * constraint is dropped.
2608  */
2609 static void
2611 {
2612  Query *parse = root->parse;
2613  Bitmapset **groupbyattnos;
2614  Bitmapset **surplusvars;
2615  ListCell *lc;
2616  int relid;
2617 
2618  /* No chance to do anything if there are less than two GROUP BY items */
2619  if (list_length(parse->groupClause) < 2)
2620  return;
2621 
2622  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2623  if (parse->groupingSets)
2624  return;
2625 
2626  /*
2627  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2628  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2629  * that are GROUP BY items.
2630  */
2631  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2632  (list_length(parse->rtable) + 1));
2633  foreach(lc, parse->groupClause)
2634  {
2636  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2637  Var *var = (Var *) tle->expr;
2638 
2639  /*
2640  * Ignore non-Vars and Vars from other query levels.
2641  *
2642  * XXX in principle, stable expressions containing Vars could also be
2643  * removed, if all the Vars are functionally dependent on other GROUP
2644  * BY items. But it's not clear that such cases occur often enough to
2645  * be worth troubling over.
2646  */
2647  if (!IsA(var, Var) ||
2648  var->varlevelsup > 0)
2649  continue;
2650 
2651  /* OK, remember we have this Var */
2652  relid = var->varno;
2653  Assert(relid <= list_length(parse->rtable));
2654  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2656  }
2657 
2658  /*
2659  * Consider each relation and see if it is possible to remove some of its
2660  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2661  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2662  * of the column attnos of RTE k that are removable GROUP BY items.
2663  */
2664  surplusvars = NULL; /* don't allocate array unless required */
2665  relid = 0;
2666  foreach(lc, parse->rtable)
2667  {
2669  Bitmapset *relattnos;
2670  Bitmapset *pkattnos;
2671  Oid constraintOid;
2672 
2673  relid++;
2674 
2675  /* Only plain relations could have primary-key constraints */
2676  if (rte->rtekind != RTE_RELATION)
2677  continue;
2678 
2679  /*
2680  * We must skip inheritance parent tables as some of the child rels
2681  * may cause duplicate rows. This cannot happen with partitioned
2682  * tables, however.
2683  */
2684  if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
2685  continue;
2686 
2687  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2688  relattnos = groupbyattnos[relid];
2689  if (bms_membership(relattnos) != BMS_MULTIPLE)
2690  continue;
2691 
2692  /*
2693  * Can't remove any columns for this rel if there is no suitable
2694  * (i.e., nondeferrable) primary key constraint.
2695  */
2696  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2697  if (pkattnos == NULL)
2698  continue;
2699 
2700  /*
2701  * If the primary key is a proper subset of relattnos then we have
2702  * some items in the GROUP BY that can be removed.
2703  */
2704  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2705  {
2706  /*
2707  * To easily remember whether we've found anything to do, we don't
2708  * allocate the surplusvars[] array until we find something.
2709  */
2710  if (surplusvars == NULL)
2711  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2712  (list_length(parse->rtable) + 1));
2713 
2714  /* Remember the attnos of the removable columns */
2715  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2716  }
2717  }
2718 
2719  /*
2720  * If we found any surplus Vars, build a new GROUP BY clause without them.
2721  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2722  * markings, but that's harmless.)
2723  */
2724  if (surplusvars != NULL)
2725  {
2726  List *new_groupby = NIL;
2727 
2728  foreach(lc, parse->groupClause)
2729  {
2731  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2732  Var *var = (Var *) tle->expr;
2733 
2734  /*
2735  * New list must include non-Vars, outer Vars, and anything not
2736  * marked as surplus.
2737  */
2738  if (!IsA(var, Var) ||
2739  var->varlevelsup > 0 ||
2741  surplusvars[var->varno]))
2742  new_groupby = lappend(new_groupby, sgc);
2743  }
2744 
2745  parse->groupClause = new_groupby;
2746  }
2747 }
2748 
2749 /*
2750  * preprocess_groupclause - do preparatory work on GROUP BY clause
2751  *
2752  * The idea here is to adjust the ordering of the GROUP BY elements
2753  * (which in itself is semantically insignificant) to match ORDER BY,
2754  * thereby allowing a single sort operation to both implement the ORDER BY
2755  * requirement and set up for a Unique step that implements GROUP BY.
2756  *
2757  * In principle it might be interesting to consider other orderings of the
2758  * GROUP BY elements, which could match the sort ordering of other
2759  * possible plans (eg an indexscan) and thereby reduce cost. We don't
2760  * bother with that, though. Hashed grouping will frequently win anyway.
2761  *
2762  * Note: we need no comparable processing of the distinctClause because
2763  * the parser already enforced that that matches ORDER BY.
2764  *
2765  * For grouping sets, the order of items is instead forced to agree with that
2766  * of the grouping set (and items not in the grouping set are skipped). The
2767  * work of sorting the order of grouping set elements to match the ORDER BY if
2768  * possible is done elsewhere.
2769  */
2770 static List *
2772 {
2773  Query *parse = root->parse;
2774  List *new_groupclause = NIL;
2775  bool partial_match;
2776  ListCell *sl;
2777  ListCell *gl;
2778 
2779  /* For grouping sets, we need to force the ordering */
2780  if (force)
2781  {
2782  foreach(sl, force)
2783  {
2784  Index ref = lfirst_int(sl);
2785  SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2786 
2787  new_groupclause = lappend(new_groupclause, cl);
2788  }
2789 
2790  return new_groupclause;
2791  }
2792 
2793  /* If no ORDER BY, nothing useful to do here */
2794  if (parse->sortClause == NIL)
2795  return parse->groupClause;
2796 
2797  /*
2798  * Scan the ORDER BY clause and construct a list of matching GROUP BY
2799  * items, but only as far as we can make a matching prefix.
2800  *
2801  * This code assumes that the sortClause contains no duplicate items.
2802  */
2803  foreach(sl, parse->sortClause)
2804  {
2806 
2807  foreach(gl, parse->groupClause)
2808  {
2810 
2811  if (equal(gc, sc))
2812  {
2813  new_groupclause = lappend(new_groupclause, gc);
2814  break;
2815  }
2816  }
2817  if (gl == NULL)
2818  break; /* no match, so stop scanning */
2819  }
2820 
2821  /* Did we match all of the ORDER BY list, or just some of it? */
2822  partial_match = (sl != NULL);
2823 
2824  /* If no match at all, no point in reordering GROUP BY */
2825  if (new_groupclause == NIL)
2826  return parse->groupClause;
2827 
2828  /*
2829  * Add any remaining GROUP BY items to the new list, but only if we were
2830  * able to make a complete match. In other words, we only rearrange the
2831  * GROUP BY list if the result is that one list is a prefix of the other
2832  * --- otherwise there's no possibility of a common sort. Also, give up
2833  * if there are any non-sortable GROUP BY items, since then there's no
2834  * hope anyway.
2835  */
2836  foreach(gl, parse->groupClause)
2837  {
2839 
2840  if (list_member_ptr(new_groupclause, gc))
2841  continue; /* it matched an ORDER BY item */
2842  if (partial_match)
2843  return parse->groupClause; /* give up, no common sort possible */
2844  if (!OidIsValid(gc->sortop))
2845  return parse->groupClause; /* give up, GROUP BY can't be sorted */
2846  new_groupclause = lappend(new_groupclause, gc);
2847  }
2848 
2849  /* Success --- install the rearranged GROUP BY list */
2850  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2851  return new_groupclause;
2852 }
2853 
2854 /*
2855  * Extract lists of grouping sets that can be implemented using a single
2856  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2857  *
2858  * Input must be sorted with smallest sets first. Result has each sublist
2859  * sorted with smallest sets first.
2860  *
2861  * We want to produce the absolute minimum possible number of lists here to
2862  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2863  * of finding the minimal partition of a partially-ordered set into chains
2864  * (which is what we need, taking the list of grouping sets as a poset ordered
2865  * by set inclusion) can be mapped to the problem of finding the maximum
2866  * cardinality matching on a bipartite graph, which is solvable in polynomial
2867  * time with a worst case of no worse than O(n^2.5) and usually much
2868  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2869  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2870  * half a second on my modest system even with optimization off and assertions
2871  * on.)
2872  */
2873 static List *
2875 {
2876  int num_sets_raw = list_length(groupingSets);
2877  int num_empty = 0;
2878  int num_sets = 0; /* distinct sets */
2879  int num_chains = 0;
2880  List *result = NIL;
2881  List **results;
2882  List **orig_sets;
2883  Bitmapset **set_masks;
2884  int *chains;
2885  short **adjacency;
2886  short *adjacency_buf;
2888  int i;
2889  int j;
2890  int j_size;
2891  ListCell *lc1 = list_head(groupingSets);
2892  ListCell *lc;
2893 
2894  /*
2895  * Start by stripping out empty sets. The algorithm doesn't require this,
2896  * but the planner currently needs all empty sets to be returned in the
2897  * first list, so we strip them here and add them back after.
2898  */
2899  while (lc1 && lfirst(lc1) == NIL)
2900  {
2901  ++num_empty;
2902  lc1 = lnext(groupingSets, lc1);
2903  }
2904 
2905  /* bail out now if it turns out that all we had were empty sets. */
2906  if (!lc1)
2907  return list_make1(groupingSets);
2908 
2909  /*----------
2910  * We don't strictly need to remove duplicate sets here, but if we don't,
2911  * they tend to become scattered through the result, which is a bit
2912  * confusing (and irritating if we ever decide to optimize them out).
2913  * So we remove them here and add them back after.
2914  *
2915  * For each non-duplicate set, we fill in the following:
2916  *
2917  * orig_sets[i] = list of the original set lists
2918  * set_masks[i] = bitmapset for testing inclusion
2919  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2920  *
2921  * chains[i] will be the result group this set is assigned to.
2922  *
2923  * We index all of these from 1 rather than 0 because it is convenient
2924  * to leave 0 free for the NIL node in the graph algorithm.
2925  *----------
2926  */
2927  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2928  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2929  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2930  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2931 
2932  j_size = 0;
2933  j = 0;
2934  i = 1;
2935 
2936  for_each_cell(lc, groupingSets, lc1)
2937  {
2938  List *candidate = (List *) lfirst(lc);
2939  Bitmapset *candidate_set = NULL;
2940  ListCell *lc2;
2941  int dup_of = 0;
2942 
2943  foreach(lc2, candidate)
2944  {
2945  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
2946  }
2947 
2948  /* we can only be a dup if we're the same length as a previous set */
2949  if (j_size == list_length(candidate))
2950  {
2951  int k;
2952 
2953  for (k = j; k < i; ++k)
2954  {
2955  if (bms_equal(set_masks[k], candidate_set))
2956  {
2957  dup_of = k;
2958  break;
2959  }
2960  }
2961  }
2962  else if (j_size < list_length(candidate))
2963  {
2964  j_size = list_length(candidate);
2965  j = i;
2966  }
2967 
2968  if (dup_of > 0)
2969  {
2970  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
2971  bms_free(candidate_set);
2972  }
2973  else
2974  {
2975  int k;
2976  int n_adj = 0;
2977 
2978  orig_sets[i] = list_make1(candidate);
2979  set_masks[i] = candidate_set;
2980 
2981  /* fill in adjacency list; no need to compare equal-size sets */
2982 
2983  for (k = j - 1; k > 0; --k)
2984  {
2985  if (bms_is_subset(set_masks[k], candidate_set))
2986  adjacency_buf[++n_adj] = k;
2987  }
2988 
2989  if (n_adj > 0)
2990  {
2991  adjacency_buf[0] = n_adj;
2992  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
2993  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
2994  }
2995  else
2996  adjacency[i] = NULL;
2997 
2998  ++i;
2999  }
3000  }
3001 
3002  num_sets = i - 1;
3003 
3004  /*
3005  * Apply the graph matching algorithm to do the work.
3006  */
3007  state = BipartiteMatch(num_sets, num_sets, adjacency);
3008 
3009  /*
3010  * Now, the state->pair* fields have the info we need to assign sets to
3011  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3012  * pair_vu[v] = u (both will be true, but we check both so that we can do
3013  * it in one pass)
3014  */
3015  chains = palloc0((num_sets + 1) * sizeof(int));
3016 
3017  for (i = 1; i <= num_sets; ++i)
3018  {
3019  int u = state->pair_vu[i];
3020  int v = state->pair_uv[i];
3021 
3022  if (u > 0 && u < i)
3023  chains[i] = chains[u];
3024  else if (v > 0 && v < i)
3025  chains[i] = chains[v];
3026  else
3027  chains[i] = ++num_chains;
3028  }
3029 
3030  /* build result lists. */
3031  results = palloc0((num_chains + 1) * sizeof(List *));
3032 
3033  for (i = 1; i <= num_sets; ++i)
3034  {
3035  int c = chains[i];
3036 
3037  Assert(c > 0);
3038 
3039  results[c] = list_concat(results[c], orig_sets[i]);
3040  }
3041 
3042  /* push any empty sets back on the first list. */
3043  while (num_empty-- > 0)
3044  results[1] = lcons(NIL, results[1]);
3045 
3046  /* make result list */
3047  for (i = 1; i <= num_chains; ++i)
3048  result = lappend(result, results[i]);
3049 
3050  /*
3051  * Free all the things.
3052  *
3053  * (This is over-fussy for small sets but for large sets we could have
3054  * tied up a nontrivial amount of memory.)
3055  */
3057  pfree(results);
3058  pfree(chains);
3059  for (i = 1; i <= num_sets; ++i)
3060  if (adjacency[i])
3061  pfree(adjacency[i]);
3062  pfree(adjacency);
3063  pfree(adjacency_buf);
3064  pfree(orig_sets);
3065  for (i = 1; i <= num_sets; ++i)
3066  bms_free(set_masks[i]);
3067  pfree(set_masks);
3068 
3069  return result;
3070 }
3071 
3072 /*
3073  * Reorder the elements of a list of grouping sets such that they have correct
3074  * prefix relationships. Also inserts the GroupingSetData annotations.
3075  *
3076  * The input must be ordered with smallest sets first; the result is returned
3077  * with largest sets first. Note that the result shares no list substructure
3078  * with the input, so it's safe for the caller to modify it later.
3079  *
3080  * If we're passed in a sortclause, we follow its order of columns to the
3081  * extent possible, to minimize the chance that we add unnecessary sorts.
3082  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3083  * gets implemented in one pass.)
3084  */
3085 static List *
3086 reorder_grouping_sets(List *groupingsets, List *sortclause)
3087 {
3088  ListCell *lc;
3089  List *previous = NIL;
3090  List *result = NIL;
3091 
3092  foreach(lc, groupingsets)
3093  {
3094  List *candidate = (List *) lfirst(lc);
3095  List *new_elems = list_difference_int(candidate, previous);
3097 
3098  while (list_length(sortclause) > list_length(previous) &&
3099  list_length(new_elems) > 0)
3100  {
3101  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3102  int ref = sc->tleSortGroupRef;
3103 
3104  if (list_member_int(new_elems, ref))
3105  {
3106  previous = lappend_int(previous, ref);
3107  new_elems = list_delete_int(new_elems, ref);
3108  }
3109  else
3110  {
3111  /* diverged from the sortclause; give up on it */
3112  sortclause = NIL;
3113  break;
3114  }
3115  }
3116 
3117  previous = list_concat(previous, new_elems);
3118 
3119  gs->set = list_copy(previous);
3120  result = lcons(gs, result);
3121  }
3122 
3123  list_free(previous);
3124 
3125  return result;
3126 }
3127 
3128 /*
3129  * Compute query_pathkeys and other pathkeys during plan generation
3130  */
3131 static void
3133 {
3134  Query *parse = root->parse;
3135  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3136  List *tlist = root->processed_tlist;
3137  List *activeWindows = qp_extra->activeWindows;
3138 
3139  /*
3140  * Calculate pathkeys that represent grouping/ordering requirements. The
3141  * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
3142  * be, in which case we just leave their pathkeys empty.
3143  */
3144  if (qp_extra->groupClause &&
3145  grouping_is_sortable(qp_extra->groupClause))
3146  root->group_pathkeys =
3148  qp_extra->groupClause,
3149  tlist);
3150  else
3151  root->group_pathkeys = NIL;
3152 
3153  /* We consider only the first (bottom) window in pathkeys logic */
3154  if (activeWindows != NIL)
3155  {
3156  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3157 
3159  wc,
3160  tlist);
3161  }
3162  else
3163  root->window_pathkeys = NIL;
3164 
3165  if (parse->distinctClause &&
3166  grouping_is_sortable(parse->distinctClause))
3167  root->distinct_pathkeys =
3169  parse->distinctClause,
3170  tlist);
3171  else
3172  root->distinct_pathkeys = NIL;
3173 
3174  root->sort_pathkeys =
3176  parse->sortClause,
3177  tlist);
3178 
3179  /*
3180  * Figure out whether we want a sorted result from query_planner.
3181  *
3182  * If we have a sortable GROUP BY clause, then we want a result sorted
3183  * properly for grouping. Otherwise, if we have window functions to
3184  * evaluate, we try to sort for the first window. Otherwise, if there's a
3185  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3186  * we try to produce output that's sufficiently well sorted for the
3187  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3188  * by the ORDER BY clause.
3189  *
3190  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3191  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3192  * that might just leave us failing to exploit an available sort order at
3193  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3194  * much easier, since we know that the parser ensured that one is a
3195  * superset of the other.
3196  */
3197  if (root->group_pathkeys)
3198  root->query_pathkeys = root->group_pathkeys;
3199  else if (root->window_pathkeys)
3200  root->query_pathkeys = root->window_pathkeys;
3201  else if (list_length(root->distinct_pathkeys) >
3202  list_length(root->sort_pathkeys))
3203  root->query_pathkeys = root->distinct_pathkeys;
3204  else if (root->sort_pathkeys)
3205  root->query_pathkeys = root->sort_pathkeys;
3206  else
3207  root->query_pathkeys = NIL;
3208 }
3209 
3210 /*
3211  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3212  *
3213  * path_rows: number of output rows from scan/join step
3214  * gd: grouping sets data including list of grouping sets and their clauses
3215  * target_list: target list containing group clause references
3216  *
3217  * If doing grouping sets, we also annotate the gsets data with the estimates
3218  * for each set and each individual rollup list, with a view to later
3219  * determining whether some combination of them could be hashed instead.
3220  */
3221 static double
3223  double path_rows,
3224  grouping_sets_data *gd,
3225  List *target_list)
3226 {
3227  Query *parse = root->parse;
3228  double dNumGroups;
3229 
3230  if (parse->groupClause)
3231  {
3232  List *groupExprs;
3233 
3234  if (parse->groupingSets)
3235  {
3236  /* Add up the estimates for each grouping set */
3237  ListCell *lc;
3238  ListCell *lc2;
3239 
3240  Assert(gd); /* keep Coverity happy */
3241 
3242  dNumGroups = 0;
3243 
3244  foreach(lc, gd->rollups)
3245  {
3246  RollupData *rollup = lfirst_node(RollupData, lc);
3247  ListCell *lc;
3248 
3249  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3250  target_list);
3251 
3252  rollup->numGroups = 0.0;
3253 
3254  forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
3255  {
3256  List *gset = (List *) lfirst(lc);
3258  double numGroups = estimate_num_groups(root,
3259  groupExprs,
3260  path_rows,
3261  &gset,
3262  NULL);
3263 
3264  gs->numGroups = numGroups;
3265  rollup->numGroups += numGroups;
3266  }
3267 
3268  dNumGroups += rollup->numGroups;
3269  }
3270 
3271  if (gd->hash_sets_idx)
3272  {
3273  ListCell *lc;
3274 
3275  gd->dNumHashGroups = 0;
3276 
3277  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3278  target_list);
3279 
3280  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3281  {
3282  List *gset = (List *) lfirst(lc);
3284  double numGroups = estimate_num_groups(root,
3285  groupExprs,
3286  path_rows,
3287  &gset,
3288  NULL);
3289 
3290  gs->numGroups = numGroups;
3291  gd->dNumHashGroups += numGroups;
3292  }
3293 
3294  dNumGroups += gd->dNumHashGroups;
3295  }
3296  }
3297  else
3298  {
3299  /* Plain GROUP BY */
3300  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3301  target_list);
3302 
3303  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3304  NULL, NULL);
3305  }
3306  }
3307  else if (parse->groupingSets)
3308  {
3309  /* Empty grouping sets ... one result row for each one */
3310  dNumGroups = list_length(parse->groupingSets);
3311  }
3312  else if (parse->hasAggs || root->hasHavingQual)
3313  {
3314  /* Plain aggregation, one result row */
3315  dNumGroups = 1;
3316  }
3317  else
3318  {
3319  /* Not grouping */
3320  dNumGroups = 1;
3321  }
3322 
3323  return dNumGroups;
3324 }
3325 
3326 /*
3327  * create_grouping_paths
3328  *
3329  * Build a new upperrel containing Paths for grouping and/or aggregation.
3330  * Along the way, we also build an upperrel for Paths which are partially
3331  * grouped and/or aggregated. A partially grouped and/or aggregated path
3332  * needs a FinalizeAggregate node to complete the aggregation. Currently,
3333  * the only partially grouped paths we build are also partial paths; that
3334  * is, they need a Gather and then a FinalizeAggregate.
3335  *
3336  * input_rel: contains the source-data Paths
3337  * target: the pathtarget for the result Paths to compute
3338  * gd: grouping sets data including list of grouping sets and their clauses
3339  *
3340  * Note: all Paths in input_rel are expected to return the target computed
3341  * by make_group_input_target.
3342  */
3343 static RelOptInfo *
3345  RelOptInfo *input_rel,
3346  PathTarget *target,
3347  bool target_parallel_safe,
3348  grouping_sets_data *gd)
3349 {
3350  Query *parse = root->parse;
3351  RelOptInfo *grouped_rel;
3352  RelOptInfo *partially_grouped_rel;
3353  AggClauseCosts agg_costs;
3354 
3355  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3356  get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
3357 
3358  /*
3359  * Create grouping relation to hold fully aggregated grouping and/or
3360  * aggregation paths.
3361  */
3362  grouped_rel = make_grouping_rel(root, input_rel, target,
3363  target_parallel_safe, parse->havingQual);
3364 
3365  /*
3366  * Create either paths for a degenerate grouping or paths for ordinary
3367  * grouping, as appropriate.
3368  */
3369  if (is_degenerate_grouping(root))
3370  create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3371  else
3372  {
3373  int flags = 0;
3374  GroupPathExtraData extra;
3375 
3376  /*
3377  * Determine whether it's possible to perform sort-based
3378  * implementations of grouping. (Note that if groupClause is empty,
3379  * grouping_is_sortable() is trivially true, and all the
3380  * pathkeys_contained_in() tests will succeed too, so that we'll
3381  * consider every surviving input path.)
3382  *
3383  * If we have grouping sets, we might be able to sort some but not all
3384  * of them; in this case, we need can_sort to be true as long as we
3385  * must consider any sorted-input plan.
3386  */
3387  if ((gd && gd->rollups != NIL)
3388  || grouping_is_sortable(parse->groupClause))
3389  flags |= GROUPING_CAN_USE_SORT;
3390 
3391  /*
3392  * Determine whether we should consider hash-based implementations of
3393  * grouping.
3394  *
3395  * Hashed aggregation only applies if we're grouping. If we have
3396  * grouping sets, some groups might be hashable but others not; in
3397  * this case we set can_hash true as long as there is nothing globally
3398  * preventing us from hashing (and we should therefore consider plans
3399  * with hashes).
3400  *
3401  * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3402  * BY aggregates. (Doing so would imply storing *all* the input
3403  * values in the hash table, and/or running many sorts in parallel,
3404  * either of which seems like a certain loser.) We similarly don't
3405  * support ordered-set aggregates in hashed aggregation, but that case
3406  * is also included in the numOrderedAggs count.
3407  *
3408  * Note: grouping_is_hashable() is much more expensive to check than
3409  * the other gating conditions, so we want to do it last.
3410  */
3411  if ((parse->groupClause != NIL &&
3412  root->numOrderedAggs == 0 &&
3413  (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))))
3414  flags |= GROUPING_CAN_USE_HASH;
3415 
3416  /*
3417  * Determine whether partial aggregation is possible.
3418  */
3419  if (can_partial_agg(root))
3420  flags |= GROUPING_CAN_PARTIAL_AGG;
3421 
3422  extra.flags = flags;
3423  extra.target_parallel_safe = target_parallel_safe;
3424  extra.havingQual = parse->havingQual;
3425  extra.targetList = parse->targetList;
3426  extra.partial_costs_set = false;
3427 
3428  /*
3429  * Determine whether partitionwise aggregation is in theory possible.
3430  * It can be disabled by the user, and for now, we don't try to
3431  * support grouping sets. create_ordinary_grouping_paths() will check
3432  * additional conditions, such as whether input_rel is partitioned.
3433  */
3434  if (enable_partitionwise_aggregate && !parse->groupingSets)
3436  else
3438 
3439  create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3440  &agg_costs, gd, &extra,
3441  &partially_grouped_rel);
3442  }
3443 
3444  set_cheapest(grouped_rel);
3445  return grouped_rel;
3446 }
3447 
3448 /*
3449  * make_grouping_rel
3450  *
3451  * Create a new grouping rel and set basic properties.
3452  *
3453  * input_rel represents the underlying scan/join relation.
3454  * target is the output expected from the grouping relation.
3455  */
3456 static RelOptInfo *
3458  PathTarget *target, bool target_parallel_safe,
3459  Node *havingQual)
3460 {
3461  RelOptInfo *grouped_rel;
3462 
3463  if (IS_OTHER_REL(input_rel))
3464  {
3465  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3466  input_rel->relids);
3467  grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3468  }
3469  else
3470  {
3471  /*
3472  * By tradition, the relids set for the main grouping relation is
3473  * NULL. (This could be changed, but might require adjustments
3474  * elsewhere.)
3475  */
3476  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3477  }
3478 
3479  /* Set target. */
3480  grouped_rel->reltarget = target;
3481 
3482  /*
3483  * If the input relation is not parallel-safe, then the grouped relation
3484  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3485  * target list and HAVING quals are parallel-safe.
3486  */
3487  if (input_rel->consider_parallel && target_parallel_safe &&
3488  is_parallel_safe(root, (Node *) havingQual))
3489  grouped_rel->consider_parallel = true;
3490 
3491  /*
3492  * If the input rel belongs to a single FDW, so does the grouped rel.
3493  */
3494  grouped_rel->serverid = input_rel->serverid;
3495  grouped_rel->userid = input_rel->userid;
3496  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3497  grouped_rel->fdwroutine = input_rel->fdwroutine;
3498 
3499  return grouped_rel;
3500 }
3501 
3502 /*
3503  * is_degenerate_grouping
3504  *
3505  * A degenerate grouping is one in which the query has a HAVING qual and/or
3506  * grouping sets, but no aggregates and no GROUP BY (which implies that the
3507  * grouping sets are all empty).
3508  */
3509 static bool
3511 {
3512  Query *parse = root->parse;
3513 
3514  return (root->hasHavingQual || parse->groupingSets) &&
3515  !parse->hasAggs && parse->groupClause == NIL;
3516 }
3517 
3518 /*
3519  * create_degenerate_grouping_paths
3520  *
3521  * When the grouping is degenerate (see is_degenerate_grouping), we are
3522  * supposed to emit either zero or one row for each grouping set depending on
3523  * whether HAVING succeeds. Furthermore, there cannot be any variables in
3524  * either HAVING or the targetlist, so we actually do not need the FROM table
3525  * at all! We can just throw away the plan-so-far and generate a Result node.
3526  * This is a sufficiently unusual corner case that it's not worth contorting
3527  * the structure of this module to avoid having to generate the earlier paths
3528  * in the first place.
3529  */
3530 static void
3532  RelOptInfo *grouped_rel)
3533 {
3534  Query *parse = root->parse;
3535  int nrows;
3536  Path *path;
3537 
3538  nrows = list_length(parse->groupingSets);
3539  if (nrows > 1)
3540  {
3541  /*
3542  * Doesn't seem worthwhile writing code to cons up a generate_series
3543  * or a values scan to emit multiple rows. Instead just make N clones
3544  * and append them. (With a volatile HAVING clause, this means you
3545  * might get between 0 and N output rows. Offhand I think that's
3546  * desired.)
3547  */
3548  List *paths = NIL;
3549 
3550  while (--nrows >= 0)
3551  {
3552  path = (Path *)
3553  create_group_result_path(root, grouped_rel,
3554  grouped_rel->reltarget,
3555  (List *) parse->havingQual);
3556  paths = lappend(paths, path);
3557  }
3558  path = (Path *)
3559  create_append_path(root,
3560  grouped_rel,
3561  paths,
3562  NIL,
3563  NIL,
3564  NULL,
3565  0,
3566  false,
3567  -1);
3568  }
3569  else
3570  {
3571  /* No grouping sets, or just one, so one output row */
3572  path = (Path *)
3573  create_group_result_path(root, grouped_rel,
3574  grouped_rel->reltarget,
3575  (List *) parse->havingQual);
3576  }
3577 
3578  add_path(grouped_rel, path);
3579 }
3580 
3581 /*
3582  * create_ordinary_grouping_paths
3583  *
3584  * Create grouping paths for the ordinary (that is, non-degenerate) case.
3585  *
3586  * We need to consider sorted and hashed aggregation in the same function,
3587  * because otherwise (1) it would be harder to throw an appropriate error
3588  * message if neither way works, and (2) we should not allow hashtable size
3589  * considerations to dissuade us from using hashing if sorting is not possible.
3590  *
3591  * *partially_grouped_rel_p will be set to the partially grouped rel which this
3592  * function creates, or to NULL if it doesn't create one.
3593  */
3594 static void
3596  RelOptInfo *grouped_rel,
3597  const AggClauseCosts *agg_costs,
3598  grouping_sets_data *gd,
3599  GroupPathExtraData *extra,
3600  RelOptInfo **partially_grouped_rel_p)
3601 {
3602  Path *cheapest_path = input_rel->cheapest_total_path;
3603  RelOptInfo *partially_grouped_rel = NULL;
3604  double dNumGroups;
3606 
3607  /*
3608  * If this is the topmost grouping relation or if the parent relation is
3609  * doing some form of partitionwise aggregation, then we may be able to do
3610  * it at this level also. However, if the input relation is not
3611  * partitioned, partitionwise aggregate is impossible.
3612  */
3613  if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
3614  IS_PARTITIONED_REL(input_rel))
3615  {
3616  /*
3617  * If this is the topmost relation or if the parent relation is doing
3618  * full partitionwise aggregation, then we can do full partitionwise
3619  * aggregation provided that the GROUP BY clause contains all of the
3620  * partitioning columns at this level. Otherwise, we can do at most
3621  * partial partitionwise aggregation. But if partial aggregation is
3622  * not supported in general then we can't use it for partitionwise
3623  * aggregation either.
3624  */
3625  if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
3626  group_by_has_partkey(input_rel, extra->targetList,
3627  root->parse->groupClause))
3629  else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3631  else
3633  }
3634 
3635  /*
3636  * Before generating paths for grouped_rel, we first generate any possible
3637  * partially grouped paths; that way, later code can easily consider both
3638  * parallel and non-parallel approaches to grouping.
3639  */
3640  if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3641  {
3642  bool force_rel_creation;
3643 
3644  /*
3645  * If we're doing partitionwise aggregation at this level, force
3646  * creation of a partially_grouped_rel so we can add partitionwise
3647  * paths to it.
3648  */
3649  force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
3650 
3651  partially_grouped_rel =
3653  grouped_rel,
3654  input_rel,
3655  gd,
3656  extra,
3657  force_rel_creation);
3658  }
3659 
3660  /* Set out parameter. */
3661  *partially_grouped_rel_p = partially_grouped_rel;
3662 
3663  /* Apply partitionwise aggregation technique, if possible. */
3664  if (patype != PARTITIONWISE_AGGREGATE_NONE)
3665  create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
3666  partially_grouped_rel, agg_costs,
3667  gd, patype, extra);
3668 
3669  /* If we are doing partial aggregation only, return. */
3671  {
3672  Assert(partially_grouped_rel);
3673 
3674  if (partially_grouped_rel->pathlist)
3675  set_cheapest(partially_grouped_rel);
3676 
3677  return;
3678  }
3679 
3680  /* Gather any partially grouped partial paths. */
3681  if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
3682  {
3683  gather_grouping_paths(root, partially_grouped_rel);
3684  set_cheapest(partially_grouped_rel);
3685  }
3686 
3687  /*
3688  * Estimate number of groups.
3689  */
3690  dNumGroups = get_number_of_groups(root,
3691  cheapest_path->rows,
3692  gd,
3693  extra->targetList);
3694 
3695  /* Build final grouping paths */
3696  add_paths_to_grouping_rel(root, input_rel, grouped_rel,
3697  partially_grouped_rel, agg_costs, gd,
3698  dNumGroups, extra);
3699 
3700  /* Give a helpful error if we failed to find any implementation */
3701  if (grouped_rel->pathlist == NIL)
3702  ereport(ERROR,
3703  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3704  errmsg("could not implement GROUP BY"),
3705  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
3706 
3707  /*
3708  * If there is an FDW that's responsible for all baserels of the query,
3709  * let it consider adding ForeignPaths.
3710  */
3711  if (grouped_rel->fdwroutine &&
3712  grouped_rel->fdwroutine->GetForeignUpperPaths)
3714  input_rel, grouped_rel,
3715  extra);
3716 
3717  /* Let extensions possibly add some more paths */
3719  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
3720  input_rel, grouped_rel,
3721  extra);
3722 }
3723 
3724 /*
3725  * For a given input path, consider the possible ways of doing grouping sets on
3726  * it, by combinations of hashing and sorting. This can be called multiple
3727  * times, so it's important that it not scribble on input. No result is
3728  * returned, but any generated paths are added to grouped_rel.
3729  */
3730 static void
3732  RelOptInfo *grouped_rel,
3733  Path *path,
3734  bool is_sorted,
3735  bool can_hash,
3736  grouping_sets_data *gd,
3737  const AggClauseCosts *agg_costs,
3738  double dNumGroups)
3739 {
3740  Query *parse = root->parse;
3741  Size hash_mem_limit = get_hash_memory_limit();
3742 
3743  /*
3744  * If we're not being offered sorted input, then only consider plans that
3745  * can be done entirely by hashing.
3746  *
3747  * We can hash everything if it looks like it'll fit in hash_mem. But if
3748  * the input is actually sorted despite not being advertised as such, we
3749  * prefer to make use of that in order to use less memory.
3750  *
3751  * If none of the grouping sets are sortable, then ignore the hash_mem
3752  * limit and generate a path anyway, since otherwise we'll just fail.
3753  */
3754  if (!is_sorted)
3755  {
3756  List *new_rollups = NIL;
3757  RollupData *unhashed_rollup = NULL;
3758  List *sets_data;
3759  List *empty_sets_data = NIL;
3760  List *empty_sets = NIL;
3761  ListCell *lc;
3762  ListCell *l_start = list_head(gd->rollups);
3763  AggStrategy strat = AGG_HASHED;
3764  double hashsize;
3765  double exclude_groups = 0.0;
3766 
3767  Assert(can_hash);
3768 
3769  /*
3770  * If the input is coincidentally sorted usefully (which can happen
3771  * even if is_sorted is false, since that only means that our caller
3772  * has set up the sorting for us), then save some hashtable space by
3773  * making use of that. But we need to watch out for degenerate cases:
3774  *
3775  * 1) If there are any empty grouping sets, then group_pathkeys might
3776  * be NIL if all non-empty grouping sets are unsortable. In this case,
3777  * there will be a rollup containing only empty groups, and the
3778  * pathkeys_contained_in test is vacuously true; this is ok.
3779  *
3780  * XXX: the above relies on the fact that group_pathkeys is generated
3781  * from the first rollup. If we add the ability to consider multiple
3782  * sort orders for grouping input, this assumption might fail.
3783  *
3784  * 2) If there are no empty sets and only unsortable sets, then the
3785  * rollups list will be empty (and thus l_start == NULL), and
3786  * group_pathkeys will be NIL; we must ensure that the vacuously-true
3787  * pathkeys_contained_in test doesn't cause us to crash.
3788  */
3789  if (l_start != NULL &&
3791  {
3792  unhashed_rollup = lfirst_node(RollupData, l_start);
3793  exclude_groups = unhashed_rollup->numGroups;
3794  l_start = lnext(gd->rollups, l_start);
3795  }
3796 
3797  hashsize = estimate_hashagg_tablesize(root,
3798  path,
3799  agg_costs,
3800  dNumGroups - exclude_groups);
3801 
3802  /*
3803  * gd->rollups is empty if we have only unsortable columns to work
3804  * with. Override hash_mem in that case; otherwise, we'll rely on the
3805  * sorted-input case to generate usable mixed paths.
3806  */
3807  if (hashsize > hash_mem_limit && gd->rollups)
3808  return; /* nope, won't fit */
3809 
3810  /*
3811  * We need to burst the existing rollups list into individual grouping
3812  * sets and recompute a groupClause for each set.
3813  */
3814  sets_data = list_copy(gd->unsortable_sets);
3815 
3816  for_each_cell(lc, gd->rollups, l_start)
3817  {
3818  RollupData *rollup = lfirst_node(RollupData, lc);
3819 
3820  /*
3821  * If we find an unhashable rollup that's not been skipped by the
3822  * "actually sorted" check above, we can't cope; we'd need sorted
3823  * input (with a different sort order) but we can't get that here.
3824  * So bail out; we'll get a valid path from the is_sorted case
3825  * instead.
3826  *
3827  * The mere presence of empty grouping sets doesn't make a rollup
3828  * unhashable (see preprocess_grouping_sets), we handle those
3829  * specially below.
3830  */
3831  if (!rollup->hashable)
3832  return;
3833 
3834  sets_data = list_concat(sets_data, rollup->gsets_data);
3835  }
3836  foreach(lc, sets_data)
3837  {
3839  List *gset = gs->set;
3840  RollupData *rollup;
3841 
3842  if (gset == NIL)
3843  {
3844  /* Empty grouping sets can't be hashed. */
3845  empty_sets_data = lappend(empty_sets_data, gs);
3846  empty_sets = lappend(empty_sets, NIL);
3847  }
3848  else
3849  {
3850  rollup = makeNode(RollupData);
3851 
3852  rollup->groupClause = preprocess_groupclause(root, gset);
3853  rollup->gsets_data = list_make1(gs);
3854  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
3855  rollup->gsets_data,
3856  gd->tleref_to_colnum_map);
3857  rollup->numGroups = gs->numGroups;
3858  rollup->hashable = true;
3859  rollup->is_hashed = true;
3860  new_rollups = lappend(new_rollups, rollup);
3861  }
3862  }
3863 
3864  /*
3865  * If we didn't find anything nonempty to hash, then bail. We'll
3866  * generate a path from the is_sorted case.
3867  */
3868  if (new_rollups == NIL)
3869  return;
3870 
3871  /*
3872  * If there were empty grouping sets they should have been in the
3873  * first rollup.
3874  */
3875  Assert(!unhashed_rollup || !empty_sets);
3876 
3877  if (unhashed_rollup)
3878  {
3879  new_rollups = lappend(new_rollups, unhashed_rollup);
3880  strat = AGG_MIXED;
3881  }
3882  else if (empty_sets)
3883  {
3884  RollupData *rollup = makeNode(RollupData);
3885 
3886  rollup->groupClause = NIL;
3887  rollup->gsets_data = empty_sets_data;
3888  rollup->gsets = empty_sets;
3889  rollup->numGroups = list_length(empty_sets);
3890  rollup->hashable = false;
3891  rollup->is_hashed = false;
3892  new_rollups = lappend(new_rollups, rollup);
3893  strat = AGG_MIXED;
3894  }
3895 
3896  add_path(grouped_rel, (Path *)
3898  grouped_rel,
3899  path,
3900  (List *) parse->havingQual,
3901  strat,
3902  new_rollups,
3903  agg_costs));
3904  return;
3905  }
3906 
3907  /*
3908  * If we have sorted input but nothing we can do with it, bail.
3909  */
3910  if (list_length(gd->rollups) == 0)
3911  return;
3912 
3913  /*
3914  * Given sorted input, we try and make two paths: one sorted and one mixed
3915  * sort/hash. (We need to try both because hashagg might be disabled, or
3916  * some columns might not be sortable.)
3917  *
3918  * can_hash is passed in as false if some obstacle elsewhere (such as
3919  * ordered aggs) means that we shouldn't consider hashing at all.
3920  */
3921  if (can_hash && gd->any_hashable)
3922  {
3923  List *rollups = NIL;
3924  List *hash_sets = list_copy(gd->unsortable_sets);
3925  double availspace = hash_mem_limit;
3926  ListCell *lc;
3927 
3928  /*
3929  * Account first for space needed for groups we can't sort at all.
3930  */
3931  availspace -= estimate_hashagg_tablesize(root,
3932  path,
3933  agg_costs,
3934  gd->dNumHashGroups);
3935 
3936  if (availspace > 0 && list_length(gd->rollups) > 1)
3937  {
3938  double scale;
3939  int num_rollups = list_length(gd->rollups);
3940  int k_capacity;
3941  int *k_weights = palloc(num_rollups * sizeof(int));
3942  Bitmapset *hash_items = NULL;
3943  int i;
3944 
3945  /*
3946  * We treat this as a knapsack problem: the knapsack capacity
3947  * represents hash_mem, the item weights are the estimated memory
3948  * usage of the hashtables needed to implement a single rollup,
3949  * and we really ought to use the cost saving as the item value;
3950  * however, currently the costs assigned to sort nodes don't
3951  * reflect the comparison costs well, and so we treat all items as
3952  * of equal value (each rollup we hash instead saves us one sort).
3953  *
3954  * To use the discrete knapsack, we need to scale the values to a
3955  * reasonably small bounded range. We choose to allow a 5% error
3956  * margin; we have no more than 4096 rollups in the worst possible
3957  * case, which with a 5% error margin will require a bit over 42MB
3958  * of workspace. (Anyone wanting to plan queries that complex had
3959  * better have the memory for it. In more reasonable cases, with
3960  * no more than a couple of dozen rollups, the memory usage will
3961  * be negligible.)
3962  *
3963  * k_capacity is naturally bounded, but we clamp the values for
3964  * scale and weight (below) to avoid overflows or underflows (or
3965  * uselessly trying to use a scale factor less than 1 byte).
3966  */
3967  scale = Max(availspace / (20.0 * num_rollups), 1.0);
3968  k_capacity = (int) floor(availspace / scale);
3969 
3970  /*
3971  * We leave the first rollup out of consideration since it's the
3972  * one that matches the input sort order. We assign indexes "i"
3973  * to only those entries considered for hashing; the second loop,
3974  * below, must use the same condition.
3975  */
3976  i = 0;
3977  for_each_from(lc, gd->rollups, 1)
3978  {
3979  RollupData *rollup = lfirst_node(RollupData, lc);
3980 
3981  if (rollup->hashable)
3982  {
3983  double sz = estimate_hashagg_tablesize(root,
3984  path,
3985  agg_costs,
3986  rollup->numGroups);
3987 
3988  /*
3989  * If sz is enormous, but hash_mem (and hence scale) is
3990  * small, avoid integer overflow here.
3991  */
3992  k_weights[i] = (int) Min(floor(sz / scale),
3993  k_capacity + 1.0);
3994  ++i;
3995  }
3996  }
3997 
3998  /*
3999  * Apply knapsack algorithm; compute the set of items which
4000  * maximizes the value stored (in this case the number of sorts
4001  * saved) while keeping the total size (approximately) within
4002  * capacity.
4003  */
4004  if (i > 0)
4005  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4006 
4007  if (!bms_is_empty(hash_items))
4008  {
4009  rollups = list_make1(linitial(gd->rollups));
4010 
4011  i = 0;
4012  for_each_from(lc, gd->rollups, 1)
4013  {
4014  RollupData *rollup = lfirst_node(RollupData, lc);
4015 
4016  if (rollup->hashable)
4017  {
4018  if (bms_is_member(i, hash_items))
4019  hash_sets = list_concat(hash_sets,
4020  rollup->gsets_data);
4021  else
4022  rollups = lappend(rollups, rollup);
4023  ++i;
4024  }
4025  else
4026  rollups = lappend(rollups, rollup);
4027  }
4028  }
4029  }
4030 
4031  if (!rollups && hash_sets)
4032  rollups = list_copy(gd->rollups);
4033 
4034  foreach(lc, hash_sets)
4035  {
4037  RollupData *rollup = makeNode(RollupData);
4038 
4039  Assert(gs->set != NIL);
4040 
4041  rollup->groupClause = preprocess_groupclause(root, gs->set);
4042  rollup->gsets_data = list_make1(gs);
4043  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4044  rollup->gsets_data,
4045  gd->tleref_to_colnum_map);
4046  rollup->numGroups = gs->numGroups;
4047  rollup->hashable = true;
4048  rollup->is_hashed = true;
4049  rollups = lcons(rollup, rollups);
4050  }
4051 
4052  if (rollups)
4053  {
4054  add_path(grouped_rel, (Path *)
4056  grouped_rel,
4057  path,
4058  (List *) parse->havingQual,
4059  AGG_MIXED,
4060  rollups,
4061  agg_costs));
4062  }
4063  }
4064 
4065  /*
4066  * Now try the simple sorted case.
4067  */
4068  if (!gd->unsortable_sets)
4069  add_path(grouped_rel, (Path *)
4071  grouped_rel,
4072  path,
4073  (List *) parse->havingQual,
4074  AGG_SORTED,
4075  gd->rollups,
4076  agg_costs));
4077 }
4078 
4079 /*
4080  * create_window_paths
4081  *
4082  * Build a new upperrel containing Paths for window-function evaluation.
4083  *
4084  * input_rel: contains the source-data Paths
4085  * input_target: result of make_window_input_target
4086  * output_target: what the topmost WindowAggPath should return
4087  * wflists: result of find_window_functions
4088  * activeWindows: result of select_active_windows
4089  *
4090  * Note: all Paths in input_rel are expected to return input_target.
4091  */
4092 static RelOptInfo *
4094  RelOptInfo *input_rel,
4095  PathTarget *input_target,
4096  PathTarget *output_target,
4097  bool output_target_parallel_safe,
4098  WindowFuncLists *wflists,
4099  List *activeWindows)
4100 {
4101  RelOptInfo *window_rel;
4102  ListCell *lc;
4103 
4104  /* For now, do all work in the (WINDOW, NULL) upperrel */
4105  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4106 
4107  /*
4108  * If the input relation is not parallel-safe, then the window relation
4109  * can't be parallel-safe, either. Otherwise, we need to examine the
4110  * target list and active windows for non-parallel-safe constructs.
4111  */
4112  if (input_rel->consider_parallel && output_target_parallel_safe &&
4113  is_parallel_safe(root, (Node *) activeWindows))
4114  window_rel->consider_parallel = true;
4115 
4116  /*
4117  * If the input rel belongs to a single FDW, so does the window rel.
4118  */
4119  window_rel->serverid = input_rel->serverid;
4120  window_rel->userid = input_rel->userid;
4121  window_rel->useridiscurrent = input_rel->useridiscurrent;
4122  window_rel->fdwroutine = input_rel->fdwroutine;
4123 
4124  /*
4125  * Consider computing window functions starting from the existing
4126  * cheapest-total path (which will likely require a sort) as well as any
4127  * existing paths that satisfy or partially satisfy root->window_pathkeys.
4128  */
4129  foreach(lc, input_rel->pathlist)
4130  {
4131  Path *path = (Path *) lfirst(lc);
4132  int presorted_keys;
4133 
4134  if (path == input_rel->cheapest_total_path ||
4136  &presorted_keys) ||
4137  presorted_keys > 0)
4139  window_rel,
4140  path,
4141  input_target,
4142  output_target,
4143  wflists,
4144  activeWindows);
4145  }
4146 
4147  /*
4148  * If there is an FDW that's responsible for all baserels of the query,
4149  * let it consider adding ForeignPaths.
4150  */
4151  if (window_rel->fdwroutine &&
4152  window_rel->fdwroutine->GetForeignUpperPaths)
4153  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4154  input_rel, window_rel,
4155  NULL);
4156 
4157  /* Let extensions possibly add some more paths */
4159  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4160  input_rel, window_rel, NULL);
4161 
4162  /* Now choose the best path(s) */
4163  set_cheapest(window_rel);
4164 
4165  return window_rel;
4166 }
4167 
4168 /*
4169  * Stack window-function implementation steps atop the given Path, and
4170  * add the result to window_rel.
4171  *
4172  * window_rel: upperrel to contain result
4173  * path: input Path to use (must return input_target)
4174  * input_target: result of make_window_input_target
4175  * output_target: what the topmost WindowAggPath should return
4176  * wflists: result of find_window_functions
4177  * activeWindows: result of select_active_windows
4178  */
4179 static void
4181  RelOptInfo *window_rel,
4182  Path *path,
4183  PathTarget *input_target,
4184  PathTarget *output_target,
4185  WindowFuncLists *wflists,
4186  List *activeWindows)
4187 {
4188  PathTarget *window_target;
4189  ListCell *l;
4190  List *topqual = NIL;
4191 
4192  /*
4193  * Since each window clause could require a different sort order, we stack
4194  * up a WindowAgg node for each clause, with sort steps between them as
4195  * needed. (We assume that select_active_windows chose a good order for
4196  * executing the clauses in.)
4197  *
4198  * input_target should contain all Vars and Aggs needed for the result.
4199  * (In some cases we wouldn't need to propagate all of these all the way
4200  * to the top, since they might only be needed as inputs to WindowFuncs.
4201  * It's probably not worth trying to optimize that though.) It must also
4202  * contain all window partitioning and sorting expressions, to ensure
4203  * they're computed only once at the bottom of the stack (that's critical
4204  * for volatile functions). As we climb up the stack, we'll add outputs
4205  * for the WindowFuncs computed at each level.
4206  */
4207  window_target = input_target;
4208 
4209  foreach(l, activeWindows)
4210  {
4212  List *window_pathkeys;
4213  int presorted_keys;
4214  bool is_sorted;
4215  bool topwindow;
4216 
4217  window_pathkeys = make_pathkeys_for_window(root,
4218  wc,
4219  root->processed_tlist);
4220 
4221  is_sorted = pathkeys_count_contained_in(window_pathkeys,
4222  path->pathkeys,
4223  &presorted_keys);
4224 
4225  /* Sort if necessary */
4226  if (!is_sorted)
4227  {
4228  /*
4229  * No presorted keys or incremental sort disabled, just perform a
4230  * complete sort.
4231  */
4232  if (presorted_keys == 0 || !enable_incremental_sort)
4233  path = (Path *) create_sort_path(root, window_rel,
4234  path,
4235  window_pathkeys,
4236  -1.0);
4237  else
4238  {
4239  /*
4240  * Since we have presorted keys and incremental sort is
4241  * enabled, just use incremental sort.
4242  */
4243  path = (Path *) create_incremental_sort_path(root,
4244  window_rel,
4245  path,
4246  window_pathkeys,
4247  presorted_keys,
4248  -1.0);
4249  }
4250  }
4251 
4252  if (lnext(activeWindows, l))
4253  {
4254  /*
4255  * Add the current WindowFuncs to the output target for this
4256  * intermediate WindowAggPath. We must copy window_target to
4257  * avoid changing the previous path's target.
4258  *
4259  * Note: a WindowFunc adds nothing to the target's eval costs; but
4260  * we do need to account for the increase in tlist width.
4261  */
4262  ListCell *lc2;
4263 
4264  window_target = copy_pathtarget(window_target);
4265  foreach(lc2, wflists->windowFuncs[wc->winref])
4266  {
4267  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4268 
4269  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4270  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4271  }
4272  }
4273  else
4274  {
4275  /* Install the goal target in the topmost WindowAgg */
4276  window_target = output_target;
4277  }
4278 
4279  /* mark the final item in the list as the top-level window */
4280  topwindow = foreach_current_index(l) == list_length(activeWindows) - 1;
4281 
4282  /*
4283  * Accumulate all of the runConditions from each intermediate
4284  * WindowClause. The top-level WindowAgg must pass these as a qual so
4285  * that it filters out unwanted tuples correctly.
4286  */
4287  if (!topwindow)
4288  topqual = list_concat(topqual, wc->runCondition);
4289 
4290  path = (Path *)
4291  create_windowagg_path(root, window_rel, path, window_target,
4292  wflists->windowFuncs[wc->winref],
4293  wc, topwindow ? topqual : NIL, topwindow);
4294  }
4295 
4296  add_path(window_rel, path);
4297 }
4298 
4299 /*
4300  * create_distinct_paths
4301  *
4302  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4303  *
4304  * input_rel: contains the source-data Paths
4305  *
4306  * Note: input paths should already compute the desired pathtarget, since
4307  * Sort/Unique won't project anything.
4308  */
4309 static RelOptInfo *
4311 {
4312  RelOptInfo *distinct_rel;
4313 
4314  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4315  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4316 
4317  /*
4318  * We don't compute anything at this level, so distinct_rel will be
4319  * parallel-safe if the input rel is parallel-safe. In particular, if
4320  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4321  * output those expressions, and will not be parallel-safe unless those
4322  * expressions are parallel-safe.
4323  */
4324  distinct_rel->consider_parallel = input_rel->consider_parallel;
4325 
4326  /*
4327  * If the input rel belongs to a single FDW, so does the distinct_rel.
4328  */
4329  distinct_rel->serverid = input_rel->serverid;
4330  distinct_rel->userid = input_rel->userid;
4331  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4332  distinct_rel->fdwroutine = input_rel->fdwroutine;
4333 
4334  /* build distinct paths based on input_rel's pathlist */
4335  create_final_distinct_paths(root, input_rel, distinct_rel);
4336 
4337  /* now build distinct paths based on input_rel's partial_pathlist */
4338  create_partial_distinct_paths(root, input_rel, distinct_rel);
4339 
4340  /* Give a helpful error if we failed to create any paths */
4341  if (distinct_rel->pathlist == NIL)
4342  ereport(ERROR,
4343  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4344  errmsg("could not implement DISTINCT"),
4345  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4346 
4347  /*
4348  * If there is an FDW that's responsible for all baserels of the query,
4349  * let it consider adding ForeignPaths.
4350  */
4351  if (distinct_rel->fdwroutine &&
4352  distinct_rel->fdwroutine->GetForeignUpperPaths)
4353  distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4355  input_rel,
4356  distinct_rel,
4357  NULL);
4358 
4359  /* Let extensions possibly add some more paths */
4361  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT, input_rel,
4362  distinct_rel, NULL);
4363 
4364  /* Now choose the best path(s) */
4365  set_cheapest(distinct_rel);
4366 
4367  return distinct_rel;
4368 }
4369 
4370 /*
4371  * create_partial_distinct_paths
4372  *
4373  * Process 'input_rel' partial paths and add unique/aggregate paths to the
4374  * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4375  * paths on top and add a final unique/aggregate path to remove any duplicate
4376  * produced from combining rows from parallel workers.
4377  */
4378 static void
4380  RelOptInfo *final_distinct_rel)
4381 {
4382  RelOptInfo *partial_distinct_rel;
4383  Query *parse;
4384  List *distinctExprs;
4385  double numDistinctRows;
4386  Path *cheapest_partial_path;
4387  ListCell *lc;
4388 
4389  /* nothing to do when there are no partial paths in the input rel */
4390  if (!input_rel->consider_parallel || input_rel->partial_pathlist == NIL)
4391  return;
4392 
4393  parse = root->parse;
4394 
4395  /* can't do parallel DISTINCT ON */
4396  if (parse->hasDistinctOn)
4397  return;
4398 
4399  partial_distinct_rel = fetch_upper_rel(root, UPPERREL_PARTIAL_DISTINCT,
4400  NULL);
4401  partial_distinct_rel->reltarget = root->upper_targets[UPPERREL_PARTIAL_DISTINCT];
4402  partial_distinct_rel->consider_parallel = input_rel->consider_parallel;
4403 
4404  /*
4405  * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4406  */
4407  partial_distinct_rel->serverid = input_rel->serverid;
4408  partial_distinct_rel->userid = input_rel->userid;
4409  partial_distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4410  partial_distinct_rel->fdwroutine = input_rel->fdwroutine;
4411 
4412  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4413 
4414  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4415  parse->targetList);
4416 
4417  /* estimate how many distinct rows we'll get from each worker */
4418  numDistinctRows = estimate_num_groups(root, distinctExprs,
4419  cheapest_partial_path->rows,
4420  NULL, NULL);
4421 
4422  /* first try adding unique paths atop of sorted paths */
4423  if (grouping_is_sortable(parse->distinctClause))
4424  {
4425  foreach(lc, input_rel->partial_pathlist)
4426  {
4427  Path *path = (Path *) lfirst(lc);
4428 
4430  {
4431  add_partial_path(partial_distinct_rel, (Path *)
4433  partial_distinct_rel,
4434  path,
4436  numDistinctRows));
4437  }
4438  }
4439  }
4440 
4441  /*
4442  * Now try hash aggregate paths, if enabled and hashing is possible. Since
4443  * we're not on the hook to ensure we do our best to create at least one
4444  * path here, we treat enable_hashagg as a hard off-switch rather than the
4445  * slightly softer variant in create_final_distinct_paths.
4446  */
4447  if (enable_hashagg && grouping_is_hashable(parse->distinctClause))
4448  {
4449  add_partial_path(partial_distinct_rel, (Path *)
4450  create_agg_path(root,
4451  partial_distinct_rel,
4452  cheapest_partial_path,
4453  cheapest_partial_path->pathtarget,
4454  AGG_HASHED,
4456  parse->distinctClause,
4457  NIL,
4458  NULL,
4459  numDistinctRows));
4460  }
4461 
4462  /*
4463  * If there is an FDW that's responsible for all baserels of the query,
4464  * let it consider adding ForeignPaths.
4465  */
4466  if (partial_distinct_rel->fdwroutine &&
4467  partial_distinct_rel->fdwroutine->GetForeignUpperPaths)
4468  partial_distinct_rel->fdwroutine->GetForeignUpperPaths(root,
4470  input_rel,
4471  partial_distinct_rel,
4472  NULL);
4473 
4474  /* Let extensions possibly add some more partial paths */
4476  (*create_upper_paths_hook) (root, UPPERREL_PARTIAL_DISTINCT,
4477  input_rel, partial_distinct_rel, NULL);
4478 
4479  if (partial_distinct_rel->partial_pathlist != NIL)
4480  {
4481  generate_gather_paths(root, partial_distinct_rel, true);
4482  set_cheapest(partial_distinct_rel);
4483 
4484  /*
4485  * Finally, create paths to distinctify the final result. This step
4486  * is needed to remove any duplicates due to combining rows from
4487  * parallel workers.
4488  */
4489  create_final_distinct_paths(root, partial_distinct_rel,
4490  final_distinct_rel);
4491  }
4492 }
4493 
4494 /*
4495  * create_final_distinct_paths
4496  * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4497  *
4498  * input_rel: contains the source-data paths
4499  * distinct_rel: destination relation for storing created paths
4500  */
4501 static RelOptInfo *
4503  RelOptInfo *distinct_rel)
4504 {
4505  Query *parse = root->parse;
4506  Path *cheapest_input_path = input_rel->cheapest_total_path;
4507  double numDistinctRows;
4508  bool allow_hash;
4509  Path *path;
4510  ListCell *lc;
4511 
4512  /* Estimate number of distinct rows there will be */
4513  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4514  root->hasHavingQual)
4515  {
4516  /*
4517  * If there was grouping or aggregation, use the number of input rows
4518  * as the estimated number of DISTINCT rows (ie, assume the input is
4519  * already mostly unique).
4520  */
4521  numDistinctRows = cheapest_input_path->rows;
4522  }
4523  else
4524  {
4525  /*
4526  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4527  */
4528  List *distinctExprs;
4529 
4530  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4531  parse->targetList);
4532  numDistinctRows = estimate_num_groups(root, distinctExprs,
4533  cheapest_input_path->rows,
4534  NULL, NULL);
4535  }
4536 
4537  /*
4538  * Consider sort-based implementations of DISTINCT, if possible.
4539  */
4540  if (grouping_is_sortable(parse->distinctClause))
4541  {
4542  /*
4543  * First, if we have any adequately-presorted paths, just stick a
4544  * Unique node on those. Then consider doing an explicit sort of the
4545  * cheapest input path and Unique'ing that.
4546  *
4547  * When we have DISTINCT ON, we must sort by the more rigorous of
4548  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4549  * Also, if we do have to do an explicit sort, we might as well use
4550  * the more rigorous ordering to avoid a second sort later. (Note
4551  * that the parser will have ensured that one clause is a prefix of
4552  * the other.)
4553  */
4554  List *needed_pathkeys;
4555 
4556  if (parse->hasDistinctOn &&
4558  list_length(root->sort_pathkeys))
4559  needed_pathkeys = root->sort_pathkeys;
4560  else
4561  needed_pathkeys = root->distinct_pathkeys;
4562 
4563  foreach(lc, input_rel->pathlist)
4564  {
4565  Path *path = (Path *) lfirst(lc);
4566 
4567  if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4568  {
4569  add_path(distinct_rel, (Path *)
4570  create_upper_unique_path(root, distinct_rel,
4571  path,
4573  numDistinctRows));
4574  }
4575  }
4576 
4577  /* For explicit-sort case, always use the more rigorous clause */
4578  if (list_length(root->distinct_pathkeys) <
4579  list_length(root->sort_pathkeys))
4580  {
4581  needed_pathkeys = root->sort_pathkeys;
4582  /* Assert checks that parser didn't mess up... */
4584  needed_pathkeys));
4585  }
4586  else
4587  needed_pathkeys = root->distinct_pathkeys;
4588 
4589  path = cheapest_input_path;
4590  if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4591  path = (Path *) create_sort_path(root, distinct_rel,
4592  path,
4593  needed_pathkeys,
4594  -1.0);
4595 
4596  add_path(distinct_rel, (Path *)
4597  create_upper_unique_path(root, distinct_rel,
4598  path,
4600  numDistinctRows));
4601  }
4602 
4603  /*
4604  * Consider hash-based implementations of DISTINCT, if possible.
4605  *
4606  * If we were not able to make any other types of path, we *must* hash or
4607  * die trying. If we do have other choices, there are two things that
4608  * should prevent selection of hashing: if the query uses DISTINCT ON
4609  * (because it won't really have the expected behavior if we hash), or if
4610  * enable_hashagg is off.
4611  *
4612  * Note: grouping_is_hashable() is much more expensive to check than the
4613  * other gating conditions, so we want to do it last.
4614  */
4615  if (distinct_rel->pathlist == NIL)
4616  allow_hash = true; /* we have no alternatives */
4617  else if (parse->hasDistinctOn || !enable_hashagg)
4618  allow_hash = false; /* policy-based decision not to hash */
4619  else
4620  allow_hash = true; /* default */
4621 
4622  if (allow_hash && grouping_is_hashable(parse->distinctClause))
4623  {
4624  /* Generate hashed aggregate path --- no sort needed */
4625  add_path(distinct_rel, (Path *)
4626  create_agg_path(root,
4627  distinct_rel,
4628  cheapest_input_path,
4629  cheapest_input_path->pathtarget,
4630  AGG_HASHED,
4632  parse->distinctClause,
4633  NIL,
4634  NULL,
4635  numDistinctRows));
4636  }
4637 
4638  return distinct_rel;
4639 }
4640 
4641 /*
4642  * create_ordered_paths
4643  *
4644  * Build a new upperrel containing Paths for ORDER BY evaluation.
4645  *
4646  * All paths in the result must satisfy the ORDER BY ordering.
4647  * The only new paths we need consider are an explicit full sort
4648  * and incremental sort on the cheapest-total existing path.
4649  *
4650  * input_rel: contains the source-data Paths
4651  * target: the output tlist the result Paths must emit
4652  * limit_tuples: estimated bound on the number of output tuples,
4653  * or -1 if no LIMIT or couldn't estimate
4654  *
4655  * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
4656  * other pathkeys (grouping, ...) like generate_useful_gather_paths.
4657  */
4658 static RelOptInfo *
4660  RelOptInfo *input_rel,
4661  PathTarget *target,
4662  bool target_parallel_safe,
4663  double limit_tuples)
4664 {
4665  Path *cheapest_input_path = input_rel->cheapest_total_path;
4666  RelOptInfo *ordered_rel;
4667  ListCell *lc;
4668 
4669  /* For now, do all work in the (ORDERED, NULL) upperrel */
4670  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4671 
4672  /*
4673  * If the input relation is not parallel-safe, then the ordered relation
4674  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
4675  * target list is parallel-safe.
4676  */
4677  if (input_rel->consider_parallel && target_parallel_safe)
4678  ordered_rel->consider_parallel = true;
4679 
4680  /*
4681  * If the input rel belongs to a single FDW, so does the ordered_rel.
4682  */
4683  ordered_rel->serverid = input_rel->serverid;
4684  ordered_rel->userid = input_rel->userid;
4685  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
4686  ordered_rel->fdwroutine = input_rel->fdwroutine;
4687 
4688  foreach(lc, input_rel->pathlist)
4689  {
4690  Path *input_path = (Path *) lfirst(lc);
4691  Path *sorted_path = input_path;
4692  bool is_sorted;
4693  int presorted_keys;
4694 
4695  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
4696  input_path->pathkeys, &presorted_keys);
4697 
4698  if (is_sorted)
4699  {
4700  /* Use the input path as is, but add a projection step if needed */
4701  if (sorted_path->pathtarget != target)
4702  sorted_path = apply_projection_to_path(root, ordered_rel,
4703  sorted_path, target);
4704 
4705  add_path(ordered_rel, sorted_path);
4706  }
4707  else
4708  {
4709  /*
4710  * Try adding an explicit sort, but only to the cheapest total
4711  * path since a full sort should generally add the same cost to
4712  * all paths.
4713  */
4714  if (input_path == cheapest_input_path)
4715  {
4716  /*
4717  * Sort the cheapest input path. An explicit sort here can
4718  * take advantage of LIMIT.
4719  */
4720  sorted_path = (Path *) create_sort_path(root,
4721  ordered_rel,
4722  input_path,
4723  root->sort_pathkeys,
4724  limit_tuples);
4725  /* Add projection step if needed */
4726  if (sorted_path->pathtarget != target)
4727  sorted_path = apply_projection_to_path(root, ordered_rel,
4728  sorted_path, target);
4729 
4730  add_path(ordered_rel, sorted_path);
4731  }
4732 
4733  /*
4734  * If incremental sort is enabled, then try it as well. Unlike
4735  * with regular sorts, we can't just look at the cheapest path,
4736  * because the cost of incremental sort depends on how well
4737  * presorted the path is. Additionally incremental sort may enable
4738  * a cheaper startup path to win out despite higher total cost.
4739  */
4741  continue;
4742 
4743  /* Likewise, if the path can't be used for incremental sort. */
4744  if (!presorted_keys)
4745  continue;
4746 
4747  /* Also consider incremental sort. */
4748  sorted_path = (Path *) create_incremental_sort_path(root,
4749  ordered_rel,
4750  input_path,
4751  root->sort_pathkeys,
4752  presorted_keys,
4753  limit_tuples);
4754 
4755  /* Add projection step if needed */
4756  if (sorted_path->pathtarget != target)
4757  sorted_path = apply_projection_to_path(root, ordered_rel,
4758  sorted_path, target);
4759 
4760  add_path(ordered_rel, sorted_path);
4761  }
4762  }
4763 
4764  /*
4765  * generate_gather_paths() will have already generated a simple Gather
4766  * path for the best parallel path, if any, and the loop above will have
4767  * considered sorting it. Similarly, generate_gather_paths() will also
4768  * have generated order-preserving Gather Merge plans which can be used
4769  * without sorting if they happen to match the sort_pathkeys, and the loop
4770  * above will have handled those as well. However, there's one more
4771  * possibility: it may make sense to sort the cheapest partial path
4772  * according to the required output order and then use Gather Merge.
4773  */
4774  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
4775  input_rel->partial_pathlist != NIL)
4776  {
4777  Path *cheapest_partial_path;
4778 
4779  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4780 
4781  /*
4782  * If cheapest partial path doesn't need a sort, this is redundant
4783  * with what's already been tried.
4784  */
4786  cheapest_partial_path->pathkeys))
4787  {
4788  Path *path;
4789  double total_groups;
4790 
4791  path = (Path *) create_sort_path(root,
4792  ordered_rel,
4793  cheapest_partial_path,
4794  root->sort_pathkeys,
4795  limit_tuples);
4796 
4797  total_groups = cheapest_partial_path->rows *
4798  cheapest_partial_path->parallel_workers;
4799  path = (Path *)
4800  create_gather_merge_path(root, ordered_rel,
4801  path,
4802  path->pathtarget,
4803  root->sort_pathkeys, NULL,
4804  &total_groups);
4805 
4806  /* Add projection step if needed */
4807  if (path->pathtarget != target)
4808  path = apply_projection_to_path(root, ordered_rel,
4809  path, target);
4810 
4811  add_path(ordered_rel, path);
4812  }
4813 
4814  /*
4815  * Consider incremental sort with a gather merge on partial paths.
4816  *
4817  * We can also skip the entire loop when we only have a single-item
4818  * sort_pathkeys because then we can't possibly have a presorted
4819  * prefix of the list without having the list be fully sorted.
4820  */
4822  {
4823  ListCell *lc;
4824 
4825  foreach(lc, input_rel->partial_pathlist)
4826  {
4827  Path *input_path = (Path *) lfirst(lc);
4828  Path *sorted_path;
4829  bool is_sorted;
4830  int presorted_keys;
4831  double total_groups;
4832 
4833  /*
4834  * We don't care if this is the cheapest partial path - we
4835  * can't simply skip it, because it may be partially sorted in
4836  * which case we want to consider adding incremental sort
4837  * (instead of full sort, which is what happens above).
4838  */
4839 
4840  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
4841  input_path->pathkeys,
4842  &presorted_keys);
4843 
4844  /* No point in adding incremental sort on fully sorted paths. */
4845  if (is_sorted)
4846  continue;
4847 
4848  if (presorted_keys == 0)
4849  continue;
4850 
4851  /* Since we have presorted keys, consider incremental sort. */
4852  sorted_path = (Path *) create_incremental_sort_path(root,
4853  ordered_rel,
4854  input_path,
4855  root->sort_pathkeys,
4856  presorted_keys,
4857  limit_tuples);
4858  total_groups = input_path->rows *
4859  input_path->parallel_workers;
4860  sorted_path = (Path *)
4861  create_gather_merge_path(root, ordered_rel,
4862  sorted_path,
4863  sorted_path->pathtarget,
4864  root->sort_pathkeys, NULL,
4865  &total_groups);
4866 
4867  /* Add projection step if needed */
4868  if (sorted_path->pathtarget != target)
4869  sorted_path = apply_projection_to_path(root, ordered_rel,
4870  sorted_path, target);
4871 
4872  add_path(ordered_rel, sorted_path);
4873  }
4874  }
4875  }
4876 
4877  /*
4878  * If there is an FDW that's responsible for all baserels of the query,
4879  * let it consider adding ForeignPaths.
4880  */
4881  if (ordered_rel->fdwroutine &&
4882  ordered_rel->fdwroutine->GetForeignUpperPaths)
4883  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
4884  input_rel, ordered_rel,
4885  NULL);
4886 
4887  /* Let extensions possibly add some more paths */
4889  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
4890  input_rel, ordered_rel, NULL);
4891 
4892  /*
4893  * No need to bother with set_cheapest here; grouping_planner does not
4894  * need us to do it.
4895  */
4896  Assert(ordered_rel->pathlist != NIL);
4897 
4898  return ordered_rel;
4899 }
4900 
4901 
4902 /*
4903  * make_group_input_target
4904  * Generate appropriate PathTarget for initial input to grouping nodes.
4905  *
4906  * If there is grouping or aggregation, the scan/join subplan cannot emit
4907  * the query's final targetlist; for example, it certainly can't emit any
4908  * aggregate function calls. This routine generates the correct target
4909  * for the scan/join subplan.
4910  *
4911  * The query target list passed from the parser already contains entries
4912  * for all ORDER BY and GROUP BY expressions, but it will not have entries
4913  * for variables used only in HAVING clauses; so we need to add those
4914  * variables to the subplan target list. Also, we flatten all expressions
4915  * except GROUP BY items into their component variables; other expressions
4916  * will be computed by the upper plan nodes rather than by the subplan.
4917  * For example, given a query like
4918  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
4919  * we want to pass this targetlist to the subplan:
4920  * a+b,c,d
4921  * where the a+b target will be used by the Sort/Group steps, and the
4922  * other targets will be used for computing the final results.
4923  *
4924  * 'final_target' is the query's final target list (in PathTarget form)
4925  *
4926  * The result is the PathTarget to be computed by the Paths returned from
4927  * query_planner().
4928  */
4929 static PathTarget *
4931 {
4932  Query *parse = root->parse;
4933  PathTarget *input_target;
4934  List *non_group_cols;
4935  List *non_group_vars;
4936  int i;
4937  ListCell *lc;
4938 
4939  /*
4940  * We must build a target containing all grouping columns, plus any other
4941  * Vars mentioned in the query's targetlist and HAVING qual.
4942  */
4943  input_target = create_empty_pathtarget();
4944  non_group_cols = NIL;
4945 
4946  i = 0;
4947  foreach(lc, final_target->exprs)
4948  {
4949  Expr *expr = (Expr *) lfirst(lc);
4950  Index sgref = get_pathtarget_sortgroupref(final_target, i);
4951 
4952  if (sgref && parse->groupClause &&
4953  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
4954  {
4955  /*
4956  * It's a grouping column, so add it to the input target as-is.
4957  */
4958  add_column_to_pathtarget(input_target, expr, sgref);
4959  }
4960  else
4961  {
4962  /*
4963  * Non-grouping column, so just remember the expression for later
4964  * call to pull_var_clause.
4965  */
4966  non_group_cols = lappend(non_group_cols, expr);
4967  }
4968 
4969  i++;
4970  }
4971 
4972  /*
4973  * If there's a HAVING clause, we'll need the Vars it uses, too.
4974  */
4975  if (parse->havingQual)
4976  non_group_cols = lappend(non_group_cols, parse->havingQual);
4977 
4978  /*
4979  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
4980  * add them to the input target if not already present. (A Var used
4981  * directly as a GROUP BY item will be present already.) Note this
4982  * includes Vars used in resjunk items, so we are covering the needs of
4983  * ORDER BY and window specifications. Vars used within Aggrefs and
4984  * WindowFuncs will be pulled out here, too.
4985  */
4986  non_group_vars = pull_var_clause((Node *) non_group_cols,
4990  add_new_columns_to_pathtarget(input_target, non_group_vars);
4991 
4992  /* clean up cruft */
4993  list_free(non_group_vars);
4994  list_free(non_group_cols);
4995 
4996  /* XXX this causes some redundant cost calculation ... */
4997  return set_pathtarget_cost_width(root, input_target);
4998 }
4999 
5000 /*
5001  * make_partial_grouping_target
5002  * Generate appropriate PathTarget for output of partial aggregate
5003  * (or partial grouping, if there are no aggregates) nodes.
5004  *
5005  * A partial aggregation node needs to emit all the same aggregates that
5006  * a regular aggregation node would, plus any aggregates used in HAVING;
5007  * except that the Aggref nodes should be marked as partial aggregates.
5008  *
5009  * In addition, we'd better emit any Vars and PlaceHolderVars that are
5010  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5011  * these would be Vars that are grouped by or used in grouping expressions.)
5012  *
5013  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5014  * havingQual represents the HAVING clause.
5015  */
5016 static PathTarget *
5018  PathTarget *grouping_target,
5019  Node *havingQual)
5020 {
5021  Query *parse = root->parse;
5022  PathTarget *partial_target;
5023  List *non_group_cols;
5024  List *non_group_exprs;
5025  int i;
5026  ListCell *lc;
5027 
5028  partial_target = create_empty_pathtarget();
5029  non_group_cols = NIL;
5030 
5031  i = 0;
5032  foreach(lc, grouping_target->exprs)
5033  {
5034  Expr *expr = (Expr *) lfirst(lc);
5035  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5036 
5037  if (sgref && parse->groupClause &&
5038  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5039  {
5040  /*
5041  * It's a grouping column, so add it to the partial_target as-is.
5042  * (This allows the upper agg step to repeat the grouping calcs.)
5043  */
5044  add_column_to_pathtarget(partial_target, expr, sgref);
5045  }
5046  else
5047  {
5048  /*
5049  * Non-grouping column, so just remember the expression for later
5050  * call to pull_var_clause.
5051  */
5052  non_group_cols = lappend(non_group_cols, expr);
5053  }
5054 
5055  i++;
5056  }
5057 
5058  /*
5059  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5060  */
5061  if (havingQual)
5062  non_group_cols = lappend(non_group_cols, havingQual);
5063 
5064  /*
5065  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5066  * non-group cols (plus HAVING), and add them to the partial_target if not
5067  * already present. (An expression used directly as a GROUP BY item will
5068  * be present already.) Note this includes Vars used in resjunk items, so
5069  * we are covering the needs of ORDER BY and window specifications.
5070  */
5071  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5075 
5076  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5077 
5078  /*
5079  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5080  * are at the top level of the target list, so we can just scan the list
5081  * rather than recursing through the expression trees.
5082  */
5083  foreach(lc, partial_target->exprs)
5084  {
5085  Aggref *aggref = (Aggref *) lfirst(lc);
5086 
5087  if (IsA(aggref, Aggref))
5088  {
5089  Aggref *newaggref;
5090 
5091  /*
5092  * We shouldn't need to copy the substructure of the Aggref node,
5093  * but flat-copy the node itself to avoid damaging other trees.
5094  */
5095  newaggref = makeNode(Aggref);
5096  memcpy(newaggref, aggref, sizeof(Aggref));
5097 
5098  /* For now, assume serialization is required */
5100 
5101  lfirst(lc) = newaggref;
5102  }
5103  }
5104 
5105  /* clean up cruft */
5106  list_free(non_group_exprs);
5107  list_free(non_group_cols);
5108 
5109  /* XXX this causes some redundant cost calculation ... */
5110  return set_pathtarget_cost_width(root, partial_target);
5111 }
5112 
5113 /*
5114  * mark_partial_aggref
5115  * Adjust an Aggref to make it represent a partial-aggregation step.
5116  *
5117  * The Aggref node is modified in-place; caller must do any copying required.
5118  */
5119 void
5121 {
5122  /* aggtranstype should be computed by this point */
5124  /* ... but aggsplit should still be as the parser left it */
5125  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5126 
5127  /* Mark the Aggref with the intended partial-aggregation mode */
5128  agg->aggsplit = aggsplit;
5129 
5130  /*
5131  * Adjust result type if needed. Normally, a partial aggregate returns
5132  * the aggregate's transition type; but if that's INTERNAL and we're
5133  * serializing, it returns BYTEA instead.
5134  */
5135  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5136  {
5137  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5138  agg->aggtype = BYTEAOID;
5139  else
5140  agg->aggtype = agg->aggtranstype;
5141  }
5142 }
5143 
5144 /*
5145  * postprocess_setop_tlist
5146  * Fix up targetlist returned by plan_set_operations().
5147  *
5148  * We need to transpose sort key info from the orig_tlist into new_tlist.
5149  * NOTE: this would not be good enough if we supported resjunk sort keys
5150  * for results of set operations --- then, we'd need to project a whole
5151  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5152  * find any resjunk columns in orig_tlist.
5153  */
5154 static List *
5155 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5156 {
5157  ListCell *l;
5158  ListCell *orig_tlist_item = list_head(orig_tlist);
5159 
5160  foreach(l, new_tlist)
5161  {
5162  TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5163  TargetEntry *orig_tle;
5164 
5165  /* ignore resjunk columns in setop result */
5166  if (new_tle->resjunk)
5167  continue;
5168 
5169  Assert(orig_tlist_item != NULL);
5170  orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5171  orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5172  if (orig_tle->resjunk) /* should not happen */
5173  elog(ERROR, "resjunk output columns are not implemented");
5174  Assert(new_tle->resno == orig_tle->resno);
5175  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5176  }
5177  if (orig_tlist_item != NULL)
5178  elog(ERROR, "resjunk output columns are not implemented");
5179  return new_tlist;
5180 }
5181 
5182 /*
5183  * select_active_windows
5184  * Create a list of the "active" window clauses (ie, those referenced
5185  * by non-deleted WindowFuncs) in the order they are to be executed.
5186  */
5187 static List *
5189 {
5190  List *windowClause = root->parse->windowClause;
5191  List *result = NIL;
5192  ListCell *lc;
5193  int nActive = 0;
5195  * list_length(windowClause));
5196 
5197  /* First, construct an array of the active windows */
5198  foreach(lc, windowClause)
5199  {
5201 
5202  /* It's only active if wflists shows some related WindowFuncs */
5203  Assert(wc->winref <= wflists->maxWinRef);
5204  if (wflists->windowFuncs[wc->winref] == NIL)
5205  continue;
5206 
5207  actives[nActive].wc = wc; /* original clause */
5208 
5209  /*
5210  * For sorting, we want the list of partition keys followed by the
5211  * list of sort keys. But pathkeys construction will remove duplicates
5212  * between the two, so we can as well (even though we can't detect all
5213  * of the duplicates, since some may come from ECs - that might mean
5214  * we miss optimization chances here). We must, however, ensure that
5215  * the order of entries is preserved with respect to the ones we do
5216  * keep.
5217  *
5218  * partitionClause and orderClause had their own duplicates removed in
5219  * parse analysis, so we're only concerned here with removing
5220  * orderClause entries that also appear in partitionClause.
5221  */
5222  actives[nActive].uniqueOrder =
5224  wc->orderClause);
5225  nActive++;
5226  }
5227 
5228  /*
5229  * Sort active windows by their partitioning/ordering clauses, ignoring
5230  * any framing clauses, so that the windows that need the same sorting are
5231  * adjacent in the list. When we come to generate paths, this will avoid
5232  * inserting additional Sort nodes.
5233  *
5234  * This is how we implement a specific requirement from the SQL standard,
5235  * which says that when two or more windows are order-equivalent (i.e.
5236  * have matching partition and order clauses, even if their names or
5237  * framing clauses differ), then all peer rows must be presented in the
5238  * same order in all of them. If we allowed multiple sort nodes for such
5239  * cases, we'd risk having the peer rows end up in different orders in
5240  * equivalent windows due to sort instability. (See General Rule 4 of
5241  * <window clause> in SQL2008 - SQL2016.)
5242  *
5243  * Additionally, if the entire list of clauses of one window is a prefix
5244  * of another, put first the window with stronger sorting requirements.
5245  * This way we will first sort for stronger window, and won't have to sort
5246  * again for the weaker one.
5247  */
5248  qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
5249 
5250  /* build ordered list of the original WindowClause nodes */
5251  for (int i = 0; i < nActive; i++)
5252  result = lappend(result, actives[i].wc);
5253 
5254  pfree(actives);
5255 
5256  return result;
5257 }
5258 
5259 /*
5260  * common_prefix_cmp
5261  * QSort comparison function for WindowClauseSortData
5262  *
5263  * Sort the windows by the required sorting clauses. First, compare the sort
5264  * clauses themselves. Second, if one window's clauses are a prefix of another
5265  * one's clauses, put the window with more sort clauses first.
5266  */
5267 static int
5268 common_prefix_cmp(const void *a, const void *b)
5269 {
5270  const WindowClauseSortData *wcsa = a;
5271  const WindowClauseSortData *wcsb = b;
5272  ListCell *item_a;
5273  ListCell *item_b;
5274 
5275  forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
5276  {
5279 
5280  if (sca->tleSortGroupRef > scb->tleSortGroupRef)
5281  return -1;
5282  else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
5283  return 1;
5284  else if (sca->sortop > scb->sortop)
5285  return -1;
5286  else if (sca->sortop < scb->sortop)
5287  return 1;
5288  else if (sca->nulls_first && !scb->nulls_first)
5289  return -1;
5290  else if (!sca->nulls_first && scb->nulls_first)
5291  return 1;
5292  /* no need to compare eqop, since it is fully determined by sortop */
5293  }
5294 
5295  if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
5296  return -1;
5297  else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
5298  return 1;
5299 
5300  return 0;
5301 }
5302 
5303 /*
5304  * make_window_input_target
5305  * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5306  *
5307  * When the query has window functions, this function computes the desired
5308  * target to be computed by the node just below the first WindowAgg.
5309  * This tlist must contain all values needed to evaluate the window functions,
5310  * compute the final target list, and perform any required final sort step.
5311  * If multiple WindowAggs are needed, each intermediate one adds its window
5312  * function results onto this base tlist; only the topmost WindowAgg computes
5313  * the actual desired target list.
5314  *
5315  * This function is much like make_group_input_target, though not quite enough
5316  * like it to share code. As in that function, we flatten most expressions
5317  * into their component variables. But we do not want to flatten window
5318  * PARTITION BY/ORDER BY clauses, since that might result in multiple
5319  * evaluations of them, which would be bad (possibly even resulting in
5320  * inconsistent answers, if they contain volatile functions).
5321  * Also, we must not flatten GROUP BY clauses that were left unflattened by
5322  * make_group_input_target, because we may no longer have access to the
5323  * individual Vars in them.
5324  *
5325  * Another key difference from make_group_input_target is that we don't
5326  * flatten Aggref expressions, since those are to be computed below the
5327  * window functions and just referenced like Vars above that.
5328  *
5329  * 'final_target' is the query's final target list (in PathTarget form)
5330  * 'activeWindows' is the list of active windows previously identified by
5331  * select_active_windows.
5332  *
5333  * The result is the PathTarget to be computed by the plan node immediately
5334  * below the first WindowAgg node.
5335  */
5336 static PathTarget *
5338  PathTarget *final_target,
5339  List *activeWindows)
5340 {
5341  Query *parse = root->parse;
5342  PathTarget *input_target;
5343  Bitmapset *sgrefs;
5344  List *flattenable_cols;
5345  List *flattenable_vars;
5346  int i;
5347  ListCell *lc;
5348 
5349  Assert(parse->hasWindowFuncs);
5350 
5351  /*
5352  * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5353  * into a bitmapset for convenient reference below.
5354  */
5355  sgrefs = NULL;
5356  foreach(lc, activeWindows)
5357  {
5359  ListCell *lc2;
5360 
5361  foreach(lc2, wc->partitionClause)
5362  {
5364 
5365  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5366  }
5367  foreach(lc2, wc->orderClause)
5368  {
5370 
5371  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5372  }
5373  }
5374 
5375  /* Add in sortgroupref numbers of GROUP BY clauses, too */
5376  foreach(lc, parse->groupClause)
5377  {
5379 
5380  sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5381  }
5382 
5383  /*
5384  * Construct a target containing all the non-flattenable targetlist items,
5385  * and save aside the others for a moment.
5386  */
5387  input_target = create_empty_pathtarget();
5388  flattenable_cols = NIL;
5389 
5390  i = 0;
5391  foreach(lc, final_target->exprs)
5392  {
5393  Expr *expr = (Expr *) lfirst(lc);
5394  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5395 
5396  /*
5397  * Don't want to deconstruct window clauses or GROUP BY items. (Note
5398  * that such items can't contain window functions, so it's okay to
5399  * compute them below the WindowAgg nodes.)
5400  */
5401  if (sgref != 0 && bms_is_member(sgref, sgrefs))
5402  {
5403  /*
5404  * Don't want to deconstruct this value, so add it to the input
5405  * target as-is.
5406  */
5407  add_column_to_pathtarget(input_target, expr, sgref);
5408  }
5409  else
5410  {
5411  /*
5412  * Column is to be flattened, so just remember the expression for
5413  * later call to pull_var_clause.
5414  */
5415  flattenable_cols = lappend(flattenable_cols, expr);
5416  }
5417 
5418  i++;
5419  }
5420 
5421  /*
5422  * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
5423  * add them to the input target if not already present. (Some might be
5424  * there already because they're used directly as window/group clauses.)
5425  *
5426  * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
5427  * Aggrefs are placed in the Agg node's tlist and not left to be computed
5428  * at higher levels. On the other hand, we should recurse into
5429  * WindowFuncs to make sure their input expressions are available.
5430  */
5431  flattenable_vars = pull_var_clause((Node *) flattenable_cols,
5435  add_new_columns_to_pathtarget(input_target, flattenable_vars);
5436 
5437  /* clean up cruft */
5438  list_free(flattenable_vars);
5439  list_free(flattenable_cols);
5440 
5441  /* XXX this causes some redundant cost calculation ... */
5442  return set_pathtarget_cost_width(root, input_target);
5443 }
5444 
5445 /*
5446  * make_pathkeys_for_window
5447  * Create a pathkeys list describing the required input ordering
5448  * for the given WindowClause.
5449  *
5450  * The required ordering is first the PARTITION keys, then the ORDER keys.
5451  * In the future we might try to implement windowing using hashing, in which
5452  * case the ordering could be relaxed, but for now we always sort.
5453  */
5454 static List *
5456  List *tlist)
5457 {
5458  List *window_pathkeys;
5459  List *window_sortclauses;
5460 
5461  /* Throw error if can't sort */
5463  ereport(ERROR,
5464  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5465  errmsg("could not implement window PARTITION BY"),
5466  errdetail("Window partitioning columns must be of sortable datatypes.")));
5468  ereport(ERROR,
5469  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5470  errmsg("could not implement window ORDER BY"),
5471  errdetail("Window ordering columns must be of sortable datatypes.")));
5472 
5473  /* Okay, make the combined pathkeys */
5474  window_sortclauses = list_concat_copy(wc->partitionClause, wc->orderClause);
5475  window_pathkeys = make_pathkeys_for_sortclauses(root,
5476  window_sortclauses,
5477  tlist);
5478  list_free(window_sortclauses);
5479  return window_pathkeys;
5480 }
5481 
5482 /*
5483  * make_sort_input_target
5484  * Generate appropriate PathTarget for initial input to Sort step.
5485  *
5486  * If the query has ORDER BY, this function chooses the target to be computed
5487  * by the node just below the Sort (and DISTINCT, if any, since Unique can't
5488  * project) steps. This might or might not be identical to the query's final
5489  * output target.
5490  *
5491  * The main argument for keeping the sort-input tlist the same as the final
5492  * is that we avoid a separate projection node (which will be needed if
5493  * they're different, because Sort can't project). However, there are also
5494  * advantages to postponing tlist evaluation till after the Sort: it ensures
5495  * a consistent order of evaluation for any volatile functions in the tlist,
5496  * and if there's also a LIMIT, we can stop the query without ever computing
5497  * tlist functions for later rows, which is beneficial for both volatile and
5498  * expensive functions.
5499  *
5500  * Our current policy is to postpone volatile expressions till after the sort
5501  * unconditionally (assuming that that's possible, ie they are in plain tlist
5502  * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
5503  * postpone set-returning expressions, because running them beforehand would
5504  * bloat the sort dataset, and because it might cause unexpected output order
5505  * if the sort isn't stable. However there's a constraint on that: all SRFs
5506  * in the tlist should be evaluated at the same plan step, so that they can
5507  * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
5508  * mustn't postpone any SRFs. (Note that in principle that policy should
5509  * probably get applied to the group/window input targetlists too, but we
5510  * have not done that historically.) Lastly, expensive expressions are
5511  * postponed if there is a LIMIT, or if root->tuple_fraction shows that
5512  * partial evaluation of the query is possible (if neither is true, we expect
5513  * to have to evaluate the expressions for every row anyway), or if there are
5514  * any volatile or set-returning expressions (since once we've put in a
5515  * projection at all, it won't cost any more to postpone more stuff).
5516  *
5517  * Another issue that could potentially be considered here is that
5518  * evaluating tlist expressions could result in data that's either wider
5519  * or narrower than the input Vars, thus changing the volume of data that
5520  * has to go through the Sort. However, we usually have only a very bad
5521  * idea of the output width of any expression more complex than a Var,
5522  * so for now it seems too risky to try to optimize on that basis.
5523  *
5524  * Note that if we do produce a modified sort-input target, and then the
5525  * query ends up not using an explicit Sort, no particular harm is done:
5526  * we'll initially use the modified target for the preceding path nodes,
5527  * but then change them to the final target with apply_projection_to_path.
5528  * Moreover, in such a case the guarantees about evaluation order of
5529  * volatile functions still hold, since the rows are sorted already.
5530  *
5531  * This function has some things in common with make_group_input_target and
5532  * make_window_input_target, though the detailed rules for what to do are
5533  * different. We never flatten/postpone any grouping or ordering columns;
5534  * those are needed before the sort. If we do flatten a particular
5535  * expression, we leave Aggref and WindowFunc nodes alone, since those were
5536  * computed earlier.
5537  *
5538  * 'final_target' is the query's final target list (in PathTarget form)
5539  * 'have_postponed_srfs' is an output argument, see below
5540  *
5541  * The result is the PathTarget to be computed by the plan node immediately
5542  * below the Sort step (and the Distinct step, if any). This will be
5543  * exactly final_target if we decide a projection step wouldn't be helpful.
5544  *
5545  * In addition, *have_postponed_srfs is set to true if we choose to postpone
5546  * any set-returning functions to after the Sort.
5547  */
5548 static PathTarget *
5550  PathTarget *final_target,
5551  bool *have_postponed_srfs)
5552 {
5553  Query *parse = root->parse;
5554  PathTarget *input_target;
5555  int ncols;
5556  bool *col_is_srf;
5557  bool *postpone_col;
5558  bool have_srf;
5559  bool have_volatile;
5560  bool have_expensive;
5561  bool have_srf_sortcols;
5562  bool postpone_srfs;
5563  List *postponable_cols;
5564  List *postponable_vars;
5565  int i;
5566  ListCell *lc;
5567 
5568  /* Shouldn't get here unless query has ORDER BY */
5569  Assert(parse->sortClause);
5570 
5571  *have_postponed_srfs = false; /* default result */
5572 
5573  /* Inspect tlist and collect per-column information */
5574  ncols = list_length(final_target->exprs);
5575  col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
5576  postpone_col = (bool *) palloc0(ncols * sizeof(bool));
5577  have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
5578 
5579  i = 0;
5580  foreach(lc, final_target->exprs)
5581  {
5582  Expr *expr = (Expr *) lfirst(lc);
5583 
5584  /*
5585  * If the column has a sortgroupref, assume it has to be evaluated
5586  * before sorting. Generally such columns would be ORDER BY, GROUP
5587  * BY, etc targets. One exception is columns that were removed from
5588  * GROUP BY by remove_useless_groupby_columns() ... but those would
5589  * only be Vars anyway. There don't seem to be any cases where it
5590  * would be worth the trouble to double-check.
5591  */
5592  if (get_pathtarget_sortgroupref(final_target, i) == 0)
5593  {
5594  /*
5595  * Check for SRF or volatile functions. Check the SRF case first
5596  * because we must know whether we have any postponed SRFs.
5597  */
5598  if (parse->hasTargetSRFs &&
5599  expression_returns_set((Node *) expr))
5600  {
5601  /* We'll decide below whether these are postponable */
5602  col_is_srf[i] = true;
5603  have_srf = true;
5604  }
5605  else if (contain_volatile_functions((Node *) expr))
5606  {
5607  /* Unconditionally postpone */
5608  postpone_col[i] = true;
5609  have_volatile = true;
5610  }
5611  else
5612  {
5613  /*
5614  * Else check the cost. XXX it's annoying to have to do this
5615  * when set_pathtarget_cost_width() just did it. Refactor to
5616  * allow sharing the work?
5617  */
5618  QualCost cost;
5619 
5620  cost_qual_eval_node(&cost, (Node *) expr, root);
5621 
5622  /*
5623  * We arbitrarily define "expensive" as "more than 10X
5624  * cpu_operator_cost". Note this will take in any PL function
5625  * with default cost.
5626  */
5627  if (cost.per_tuple > 10 * cpu_operator_cost)
5628  {
5629  postpone_col[i] = true;
5630  have_expensive = true;
5631  }
5632  }
5633  }
5634  else
5635  {
5636  /* For sortgroupref cols, just check if any contain SRFs */
5637  if (!have_srf_sortcols &&
5638  parse->hasTargetSRFs &&
5639  expression_returns_set((Node *) expr))
5640  have_srf_sortcols = true;
5641  }
5642 
5643  i++;
5644  }
5645 
5646  /*
5647  * We can postpone SRFs if we have some but none are in sortgroupref cols.
5648  */
5649  postpone_srfs = (have_srf && !have_srf_sortcols);
5650 
5651  /*
5652  * If we don't need a post-sort projection, just return final_target.
5653  */
5654  if (!(postpone_srfs || have_volatile ||
5655  (have_expensive &&
5656  (parse->limitCount || root->tuple_fraction > 0))))
5657  return final_target;
5658 
5659  /*
5660  * Report whether the post-sort projection will contain set-returning
5661  * functions. This is important because it affects whether the Sort can
5662  * rely on the query's LIMIT (if any) to bound the number of rows it needs
5663  * to return.
5664  */
5665  *have_postponed_srfs = postpone_srfs;
5666 
5667  /*
5668  * Construct the sort-input target, taking all non-postponable columns and
5669  * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
5670  * the postponable ones.
5671  */
5672  input_target = create_empty_pathtarget();
5673  postponable_cols = NIL;
5674 
5675  i = 0;
5676  foreach(lc, final_target->exprs)
5677  {
5678  Expr *expr = (Expr *) lfirst(lc);
5679 
5680  if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
5681  postponable_cols = lappend(postponable_cols, expr);
5682  else
5683  add_column_to_pathtarget(input_target, expr,
5684  get_pathtarget_sortgroupref(final_target, i));
5685 
5686  i++;
5687  }
5688 
5689  /*
5690  * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
5691  * postponable columns, and add them to the sort-input target if not
5692  * already present. (Some might be there already.) We mustn't
5693  * deconstruct Aggrefs or WindowFuncs here, since the projection node
5694  * would be unable to recompute them.
5695  */
5696  postponable_vars = pull_var_clause((Node *) postponable_cols,
5700  add_new_columns_to_pathtarget(input_target, postponable_vars);
5701 
5702  /* clean up cruft */
5703  list_free(postponable_vars);
5704  list_free(postponable_cols);
5705 
5706  /* XXX this represents even more redundant cost calculation ... */
5707  return set_pathtarget_cost_width(root, input_target);
5708 }
5709 
5710 /*
5711  * get_cheapest_fractional_path
5712  * Find the cheapest path for retrieving a specified fraction of all
5713  * the tuples expected to be returned by the given relation.
5714  *
5715  * We interpret tuple_fraction the same way as grouping_planner.
5716  *
5717  * We assume set_cheapest() has been run on the given rel.
5718  */
5719 Path *
5720 get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
5721 {
5722  Path *best_path = rel->cheapest_total_path;
5723  ListCell *l;
5724 
5725  /* If all tuples will be retrieved, just return the cheapest-total path */
5726  if (tuple_fraction <= 0.0)
5727  return best_path;
5728 
5729  /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
5730  if (tuple_fraction >= 1.0 && best_path->rows > 0)
5731  tuple_fraction /= best_path->rows;
5732 
5733  foreach(l, rel->pathlist)
5734  {
5735  Path *path = (Path *) lfirst(l);
5736 
5737  if (path == rel->cheapest_total_path ||
5738  compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
5739  continue;
5740 
5741  best_path = path;
5742  }
5743 
5744  return best_path;
5745 }
5746 
5747 /*
5748  * adjust_paths_for_srfs
5749  * Fix up the Paths of the given upperrel to handle tSRFs properly.
5750  *
5751  * The executor can only handle set-returning functions that appear at the
5752  * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
5753  * that are not at top level, we need to split up the evaluation into multiple
5754  * plan levels in which each level satisfies this constraint. This function
5755  * modifies each Path of an upperrel that (might) compute any SRFs in its
5756  * output tlist to insert appropriate projection steps.
5757  *
5758  * The given targets and targets_contain_srfs lists are from
5759  * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
5760  * target in targets.
5761  */
5762 static void
5764  List *targets, List *targets_contain_srfs)
5765 {
5766  ListCell *lc;
5767 
5768  Assert(list_length(targets) == list_length(targets_contain_srfs));
5769  Assert(!linitial_int(targets_contain_srfs));
5770 
5771  /* If no SRFs appear at this plan level, nothing to do */
5772  if (list_length(targets) == 1)
5773  return;
5774 
5775  /*
5776  * Stack SRF-evaluation nodes atop each path for the rel.
5777  *
5778  * In principle we should re-run set_cheapest() here to identify the
5779  * cheapest path, but it seems unlikely that adding the same tlist eval
5780  * costs to all the paths would change that, so we don't bother. Instead,
5781  * just assume that the cheapest-startup and cheapest-total paths remain
5782  * so. (There should be no parameterized paths anymore, so we needn't
5783  * worry about updating cheapest_parameterized_paths.)
5784  */
5785  foreach(lc, rel->pathlist)
5786  {
5787  Path *subpath = (Path *) lfirst(lc);
5788  Path *newpath = subpath;
5789  ListCell *lc1,
5790  *lc2;
5791 
5792  Assert(subpath->param_info == NULL);
5793  forboth(lc1, targets, lc2, targets_contain_srfs)
5794  {
5795  PathTarget *thistarget = lfirst_node(PathTarget, lc1);
5796  bool contains_srfs = (bool) lfirst_int(lc2);
5797 
5798  /* If this level doesn't contain SRFs, do regular projection */
5799  if (contains_srfs)
5800  newpath = (Path *) create_set_projection_path(root,
5801  rel,
5802  newpath,
5803  thistarget);
5804&#