PostgreSQL Source Code  git master
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/genam.h"
22 #include "access/htup_details.h"
23 #include "access/parallel.h"
24 #include "access/sysattr.h"
25 #include "access/table.h"
26 #include "access/xact.h"
27 #include "catalog/pg_constraint.h"
28 #include "catalog/pg_inherits.h"
29 #include "catalog/pg_proc.h"
30 #include "catalog/pg_type.h"
31 #include "executor/executor.h"
32 #include "executor/nodeAgg.h"
33 #include "foreign/fdwapi.h"
34 #include "jit/jit.h"
35 #include "lib/bipartite_match.h"
36 #include "lib/knapsack.h"
37 #include "miscadmin.h"
38 #include "nodes/makefuncs.h"
39 #include "nodes/nodeFuncs.h"
40 #ifdef OPTIMIZER_DEBUG
41 #include "nodes/print.h"
42 #endif
43 #include "optimizer/appendinfo.h"
44 #include "optimizer/clauses.h"
45 #include "optimizer/cost.h"
46 #include "optimizer/inherit.h"
47 #include "optimizer/optimizer.h"
48 #include "optimizer/paramassign.h"
49 #include "optimizer/pathnode.h"
50 #include "optimizer/paths.h"
51 #include "optimizer/plancat.h"
52 #include "optimizer/planmain.h"
53 #include "optimizer/planner.h"
54 #include "optimizer/prep.h"
55 #include "optimizer/subselect.h"
56 #include "optimizer/tlist.h"
57 #include "parser/analyze.h"
58 #include "parser/parse_agg.h"
59 #include "parser/parsetree.h"
60 #include "partitioning/partdesc.h"
61 #include "rewrite/rewriteManip.h"
62 #include "storage/dsm_impl.h"
63 #include "utils/lsyscache.h"
64 #include "utils/rel.h"
65 #include "utils/selfuncs.h"
66 #include "utils/syscache.h"
67 
68 /* GUC parameters */
72 
73 /* Hook for plugins to get control in planner() */
75 
76 /* Hook for plugins to get control when grouping_planner() plans upper rels */
78 
79 
80 /* Expression kind codes for preprocess_expression */
81 #define EXPRKIND_QUAL 0
82 #define EXPRKIND_TARGET 1
83 #define EXPRKIND_RTFUNC 2
84 #define EXPRKIND_RTFUNC_LATERAL 3
85 #define EXPRKIND_VALUES 4
86 #define EXPRKIND_VALUES_LATERAL 5
87 #define EXPRKIND_LIMIT 6
88 #define EXPRKIND_APPINFO 7
89 #define EXPRKIND_PHV 8
90 #define EXPRKIND_TABLESAMPLE 9
91 #define EXPRKIND_ARBITER_ELEM 10
92 #define EXPRKIND_TABLEFUNC 11
93 #define EXPRKIND_TABLEFUNC_LATERAL 12
94 
95 /* Passthrough data for standard_qp_callback */
96 typedef struct
97 {
98  List *activeWindows; /* active windows, if any */
99  List *groupClause; /* overrides parse->groupClause */
101 
102 /*
103  * Data specific to grouping sets
104  */
105 
106 typedef struct
107 {
117 
118 /*
119  * Temporary structure for use during WindowClause reordering in order to be
120  * able to sort WindowClauses on partitioning/ordering prefix.
121  */
122 typedef struct
123 {
125  List *uniqueOrder; /* A List of unique ordering/partitioning
126  * clauses per Window */
128 
129 /* Local functions */
130 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
131 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
132 static void grouping_planner(PlannerInfo *root, double tuple_fraction);
134 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
135  int *tleref_to_colnum_map);
136 static void preprocess_rowmarks(PlannerInfo *root);
137 static double preprocess_limit(PlannerInfo *root,
138  double tuple_fraction,
139  int64 *offset_est, int64 *count_est);
141 static List *preprocess_groupclause(PlannerInfo *root, List *force);
142 static List *extract_rollup_sets(List *groupingSets);
143 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
144 static void standard_qp_callback(PlannerInfo *root, void *extra);
145 static double get_number_of_groups(PlannerInfo *root,
146  double path_rows,
147  grouping_sets_data *gd,
148  List *target_list);
150  RelOptInfo *input_rel,
151  PathTarget *target,
152  bool target_parallel_safe,
153  grouping_sets_data *gd);
154 static bool is_degenerate_grouping(PlannerInfo *root);
156  RelOptInfo *input_rel,
157  RelOptInfo *grouped_rel);
158 static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
159  PathTarget *target, bool target_parallel_safe,
160  Node *havingQual);
162  RelOptInfo *input_rel,
163  RelOptInfo *grouped_rel,
164  const AggClauseCosts *agg_costs,
165  grouping_sets_data *gd,
166  GroupPathExtraData *extra,
167  RelOptInfo **partially_grouped_rel_p);
168 static void consider_groupingsets_paths(PlannerInfo *root,
169  RelOptInfo *grouped_rel,
170  Path *path,
171  bool is_sorted,
172  bool can_hash,
173  grouping_sets_data *gd,
174  const AggClauseCosts *agg_costs,
175  double dNumGroups);
177  RelOptInfo *input_rel,
178  PathTarget *input_target,
179  PathTarget *output_target,
180  bool output_target_parallel_safe,
181  WindowFuncLists *wflists,
182  List *activeWindows);
183 static void create_one_window_path(PlannerInfo *root,
184  RelOptInfo *window_rel,
185  Path *path,
186  PathTarget *input_target,
187  PathTarget *output_target,
188  WindowFuncLists *wflists,
189  List *activeWindows);
191  RelOptInfo *input_rel);
193  RelOptInfo *input_rel,
194  PathTarget *target,
195  bool target_parallel_safe,
196  double limit_tuples);
198  PathTarget *final_target);
200  PathTarget *grouping_target,
201  Node *havingQual);
202 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
203 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
205  PathTarget *final_target,
206  List *activeWindows);
208  List *tlist);
210  PathTarget *final_target,
211  bool *have_postponed_srfs);
212 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
213  List *targets, List *targets_contain_srfs);
214 static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
215  RelOptInfo *grouped_rel,
216  RelOptInfo *partially_grouped_rel,
217  const AggClauseCosts *agg_costs,
218  grouping_sets_data *gd,
219  double dNumGroups,
220  GroupPathExtraData *extra);
222  RelOptInfo *grouped_rel,
223  RelOptInfo *input_rel,
224  grouping_sets_data *gd,
225  GroupPathExtraData *extra,
226  bool force_rel_creation);
227 static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
228 static bool can_partial_agg(PlannerInfo *root);
230  RelOptInfo *rel,
231  List *scanjoin_targets,
232  List *scanjoin_targets_contain_srfs,
233  bool scanjoin_target_parallel_safe,
234  bool tlist_same_exprs);
236  RelOptInfo *input_rel,
237  RelOptInfo *grouped_rel,
238  RelOptInfo *partially_grouped_rel,
239  const AggClauseCosts *agg_costs,
240  grouping_sets_data *gd,
242  GroupPathExtraData *extra);
243 static bool group_by_has_partkey(RelOptInfo *input_rel,
244  List *targetList,
245  List *groupClause);
246 static int common_prefix_cmp(const void *a, const void *b);
247 
248 
249 /*****************************************************************************
250  *
251  * Query optimizer entry point
252  *
253  * To support loadable plugins that monitor or modify planner behavior,
254  * we provide a hook variable that lets a plugin get control before and
255  * after the standard planning process. The plugin would normally call
256  * standard_planner().
257  *
258  * Note to plugin authors: standard_planner() scribbles on its Query input,
259  * so you'd better copy that data structure if you want to plan more than once.
260  *
261  *****************************************************************************/
262 PlannedStmt *
263 planner(Query *parse, const char *query_string, int cursorOptions,
264  ParamListInfo boundParams)
265 {
266  PlannedStmt *result;
267 
268  if (planner_hook)
269  result = (*planner_hook) (parse, query_string, cursorOptions, boundParams);
270  else
271  result = standard_planner(parse, query_string, cursorOptions, boundParams);
272  return result;
273 }
274 
275 PlannedStmt *
276 standard_planner(Query *parse, const char *query_string, int cursorOptions,
277  ParamListInfo boundParams)
278 {
279  PlannedStmt *result;
280  PlannerGlobal *glob;
281  double tuple_fraction;
282  PlannerInfo *root;
283  RelOptInfo *final_rel;
284  Path *best_path;
285  Plan *top_plan;
286  ListCell *lp,
287  *lr;
288 
289  /*
290  * Set up global state for this planner invocation. This data is needed
291  * across all levels of sub-Query that might exist in the given command,
292  * so we keep it in a separate struct that's linked to by each per-Query
293  * PlannerInfo.
294  */
295  glob = makeNode(PlannerGlobal);
296 
297  glob->boundParams = boundParams;
298  glob->subplans = NIL;
299  glob->subroots = NIL;
300  glob->rewindPlanIDs = NULL;
301  glob->finalrtable = NIL;
302  glob->finalrowmarks = NIL;
303  glob->resultRelations = NIL;
304  glob->appendRelations = NIL;
305  glob->relationOids = NIL;
306  glob->invalItems = NIL;
307  glob->paramExecTypes = NIL;
308  glob->lastPHId = 0;
309  glob->lastRowMarkId = 0;
310  glob->lastPlanNodeId = 0;
311  glob->transientPlan = false;
312  glob->dependsOnRole = false;
313 
314  /*
315  * Assess whether it's feasible to use parallel mode for this query. We
316  * can't do this in a standalone backend, or if the command will try to
317  * modify any data, or if this is a cursor operation, or if GUCs are set
318  * to values that don't permit parallelism, or if parallel-unsafe
319  * functions are present in the query tree.
320  *
321  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
322  * MATERIALIZED VIEW to use parallel plans, but as of now, only the leader
323  * backend writes into a completely new table. In the future, we can
324  * extend it to allow workers to write into the table. However, to allow
325  * parallel updates and deletes, we have to solve other problems,
326  * especially around combo CIDs.)
327  *
328  * For now, we don't try to use parallel mode if we're running inside a
329  * parallel worker. We might eventually be able to relax this
330  * restriction, but for now it seems best not to have parallel workers
331  * trying to create their own parallel workers.
332  */
333  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
335  parse->commandType == CMD_SELECT &&
336  !parse->hasModifyingCTE &&
338  !IsParallelWorker())
339  {
340  /* all the cheap tests pass, so scan the query tree */
341  glob->maxParallelHazard = max_parallel_hazard(parse);
342  glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
343  }
344  else
345  {
346  /* skip the query tree scan, just assume it's unsafe */
347  glob->maxParallelHazard = PROPARALLEL_UNSAFE;
348  glob->parallelModeOK = false;
349  }
350 
351  /*
352  * glob->parallelModeNeeded is normally set to false here and changed to
353  * true during plan creation if a Gather or Gather Merge plan is actually
354  * created (cf. create_gather_plan, create_gather_merge_plan).
355  *
356  * However, if force_parallel_mode = on or force_parallel_mode = regress,
357  * then we impose parallel mode whenever it's safe to do so, even if the
358  * final plan doesn't use parallelism. It's not safe to do so if the
359  * query contains anything parallel-unsafe; parallelModeOK will be false
360  * in that case. Note that parallelModeOK can't change after this point.
361  * Otherwise, everything in the query is either parallel-safe or
362  * parallel-restricted, and in either case it should be OK to impose
363  * parallel-mode restrictions. If that ends up breaking something, then
364  * either some function the user included in the query is incorrectly
365  * labeled as parallel-safe or parallel-restricted when in reality it's
366  * parallel-unsafe, or else the query planner itself has a bug.
367  */
368  glob->parallelModeNeeded = glob->parallelModeOK &&
370 
371  /* Determine what fraction of the plan is likely to be scanned */
372  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
373  {
374  /*
375  * We have no real idea how many tuples the user will ultimately FETCH
376  * from a cursor, but it is often the case that he doesn't want 'em
377  * all, or would prefer a fast-start plan anyway so that he can
378  * process some of the tuples sooner. Use a GUC parameter to decide
379  * what fraction to optimize for.
380  */
381  tuple_fraction = cursor_tuple_fraction;
382 
383  /*
384  * We document cursor_tuple_fraction as simply being a fraction, which
385  * means the edge cases 0 and 1 have to be treated specially here. We
386  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
387  */
388  if (tuple_fraction >= 1.0)
389  tuple_fraction = 0.0;
390  else if (tuple_fraction <= 0.0)
391  tuple_fraction = 1e-10;
392  }
393  else
394  {
395  /* Default assumption is we need all the tuples */
396  tuple_fraction = 0.0;
397  }
398 
399  /* primary planning entry point (may recurse for subqueries) */
400  root = subquery_planner(glob, parse, NULL,
401  false, tuple_fraction);
402 
403  /* Select best Path and turn it into a Plan */
404  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
405  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
406 
407  top_plan = create_plan(root, best_path);
408 
409  /*
410  * If creating a plan for a scrollable cursor, make sure it can run
411  * backwards on demand. Add a Material node at the top at need.
412  */
413  if (cursorOptions & CURSOR_OPT_SCROLL)
414  {
415  if (!ExecSupportsBackwardScan(top_plan))
416  top_plan = materialize_finished_plan(top_plan);
417  }
418 
419  /*
420  * Optionally add a Gather node for testing purposes, provided this is
421  * actually a safe thing to do.
422  */
424  {
425  Gather *gather = makeNode(Gather);
426 
427  /*
428  * If there are any initPlans attached to the formerly-top plan node,
429  * move them up to the Gather node; same as we do for Material node in
430  * materialize_finished_plan.
431  */
432  gather->plan.initPlan = top_plan->initPlan;
433  top_plan->initPlan = NIL;
434 
435  gather->plan.targetlist = top_plan->targetlist;
436  gather->plan.qual = NIL;
437  gather->plan.lefttree = top_plan;
438  gather->plan.righttree = NULL;
439  gather->num_workers = 1;
440  gather->single_copy = true;
442 
443  /*
444  * Since this Gather has no parallel-aware descendants to signal to,
445  * we don't need a rescan Param.
446  */
447  gather->rescan_param = -1;
448 
449  /*
450  * Ideally we'd use cost_gather here, but setting up dummy path data
451  * to satisfy it doesn't seem much cleaner than knowing what it does.
452  */
453  gather->plan.startup_cost = top_plan->startup_cost +
455  gather->plan.total_cost = top_plan->total_cost +
457  gather->plan.plan_rows = top_plan->plan_rows;
458  gather->plan.plan_width = top_plan->plan_width;
459  gather->plan.parallel_aware = false;
460  gather->plan.parallel_safe = false;
461 
462  /* use parallel mode for parallel plans. */
463  root->glob->parallelModeNeeded = true;
464 
465  top_plan = &gather->plan;
466  }
467 
468  /*
469  * If any Params were generated, run through the plan tree and compute
470  * each plan node's extParam/allParam sets. Ideally we'd merge this into
471  * set_plan_references' tree traversal, but for now it has to be separate
472  * because we need to visit subplans before not after main plan.
473  */
474  if (glob->paramExecTypes != NIL)
475  {
476  Assert(list_length(glob->subplans) == list_length(glob->subroots));
477  forboth(lp, glob->subplans, lr, glob->subroots)
478  {
479  Plan *subplan = (Plan *) lfirst(lp);
480  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
481 
482  SS_finalize_plan(subroot, subplan);
483  }
484  SS_finalize_plan(root, top_plan);
485  }
486 
487  /* final cleanup of the plan */
488  Assert(glob->finalrtable == NIL);
489  Assert(glob->finalrowmarks == NIL);
490  Assert(glob->resultRelations == NIL);
491  Assert(glob->appendRelations == NIL);
492  top_plan = set_plan_references(root, top_plan);
493  /* ... and the subplans (both regular subplans and initplans) */
494  Assert(list_length(glob->subplans) == list_length(glob->subroots));
495  forboth(lp, glob->subplans, lr, glob->subroots)
496  {
497  Plan *subplan = (Plan *) lfirst(lp);
498  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
499 
500  lfirst(lp) = set_plan_references(subroot, subplan);
501  }
502 
503  /* build the PlannedStmt result */
504  result = makeNode(PlannedStmt);
505 
506  result->commandType = parse->commandType;
507  result->queryId = parse->queryId;
508  result->hasReturning = (parse->returningList != NIL);
509  result->hasModifyingCTE = parse->hasModifyingCTE;
510  result->canSetTag = parse->canSetTag;
511  result->transientPlan = glob->transientPlan;
512  result->dependsOnRole = glob->dependsOnRole;
513  result->parallelModeNeeded = glob->parallelModeNeeded;
514  result->planTree = top_plan;
515  result->rtable = glob->finalrtable;
516  result->resultRelations = glob->resultRelations;
517  result->appendRelations = glob->appendRelations;
518  result->subplans = glob->subplans;
519  result->rewindPlanIDs = glob->rewindPlanIDs;
520  result->rowMarks = glob->finalrowmarks;
521  result->relationOids = glob->relationOids;
522  result->invalItems = glob->invalItems;
523  result->paramExecTypes = glob->paramExecTypes;
524  /* utilityStmt should be null, but we might as well copy it */
525  result->utilityStmt = parse->utilityStmt;
526  result->stmt_location = parse->stmt_location;
527  result->stmt_len = parse->stmt_len;
528 
529  result->jitFlags = PGJIT_NONE;
530  if (jit_enabled && jit_above_cost >= 0 &&
531  top_plan->total_cost > jit_above_cost)
532  {
533  result->jitFlags |= PGJIT_PERFORM;
534 
535  /*
536  * Decide how much effort should be put into generating better code.
537  */
538  if (jit_optimize_above_cost >= 0 &&
540  result->jitFlags |= PGJIT_OPT3;
541  if (jit_inline_above_cost >= 0 &&
542  top_plan->total_cost > jit_inline_above_cost)
543  result->jitFlags |= PGJIT_INLINE;
544 
545  /*
546  * Decide which operations should be JITed.
547  */
548  if (jit_expressions)
549  result->jitFlags |= PGJIT_EXPR;
551  result->jitFlags |= PGJIT_DEFORM;
552  }
553 
554  if (glob->partition_directory != NULL)
556 
557  return result;
558 }
559 
560 
561 /*--------------------
562  * subquery_planner
563  * Invokes the planner on a subquery. We recurse to here for each
564  * sub-SELECT found in the query tree.
565  *
566  * glob is the global state for the current planner run.
567  * parse is the querytree produced by the parser & rewriter.
568  * parent_root is the immediate parent Query's info (NULL at the top level).
569  * hasRecursion is true if this is a recursive WITH query.
570  * tuple_fraction is the fraction of tuples we expect will be retrieved.
571  * tuple_fraction is interpreted as explained for grouping_planner, below.
572  *
573  * Basically, this routine does the stuff that should only be done once
574  * per Query object. It then calls grouping_planner. At one time,
575  * grouping_planner could be invoked recursively on the same Query object;
576  * that's not currently true, but we keep the separation between the two
577  * routines anyway, in case we need it again someday.
578  *
579  * subquery_planner will be called recursively to handle sub-Query nodes
580  * found within the query's expressions and rangetable.
581  *
582  * Returns the PlannerInfo struct ("root") that contains all data generated
583  * while planning the subquery. In particular, the Path(s) attached to
584  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
585  * cheapest way(s) to implement the query. The top level will select the
586  * best Path and pass it through createplan.c to produce a finished Plan.
587  *--------------------
588  */
589 PlannerInfo *
591  PlannerInfo *parent_root,
592  bool hasRecursion, double tuple_fraction)
593 {
594  PlannerInfo *root;
595  List *newWithCheckOptions;
596  List *newHaving;
597  bool hasOuterJoins;
598  bool hasResultRTEs;
599  RelOptInfo *final_rel;
600  ListCell *l;
601 
602  /* Create a PlannerInfo data structure for this subquery */
603  root = makeNode(PlannerInfo);
604  root->parse = parse;
605  root->glob = glob;
606  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
607  root->parent_root = parent_root;
608  root->plan_params = NIL;
609  root->outer_params = NULL;
611  root->init_plans = NIL;
612  root->cte_plan_ids = NIL;
613  root->multiexpr_params = NIL;
614  root->eq_classes = NIL;
615  root->ec_merging_done = false;
616  root->all_result_relids =
617  parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL;
618  root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */
619  root->append_rel_list = NIL;
620  root->row_identity_vars = NIL;
621  root->rowMarks = NIL;
622  memset(root->upper_rels, 0, sizeof(root->upper_rels));
623  memset(root->upper_targets, 0, sizeof(root->upper_targets));
624  root->processed_tlist = NIL;
625  root->update_colnos = NIL;
626  root->grouping_map = NULL;
627  root->minmax_aggs = NIL;
628  root->qual_security_level = 0;
629  root->hasPseudoConstantQuals = false;
630  root->hasAlternativeSubPlans = false;
631  root->hasRecursion = hasRecursion;
632  if (hasRecursion)
634  else
635  root->wt_param_id = -1;
636  root->non_recursive_path = NULL;
637  root->partColsUpdated = false;
638 
639  /*
640  * If there is a WITH list, process each WITH query and either convert it
641  * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
642  */
643  if (parse->cteList)
644  SS_process_ctes(root);
645 
646  /*
647  * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
648  * that we don't need so many special cases to deal with that situation.
649  */
650  replace_empty_jointree(parse);
651 
652  /*
653  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
654  * to transform them into joins. Note that this step does not descend
655  * into subqueries; if we pull up any subqueries below, their SubLinks are
656  * processed just before pulling them up.
657  */
658  if (parse->hasSubLinks)
659  pull_up_sublinks(root);
660 
661  /*
662  * Scan the rangetable for function RTEs, do const-simplification on them,
663  * and then inline them if possible (producing subqueries that might get
664  * pulled up next). Recursion issues here are handled in the same way as
665  * for SubLinks.
666  */
668 
669  /*
670  * Check to see if any subqueries in the jointree can be merged into this
671  * query.
672  */
673  pull_up_subqueries(root);
674 
675  /*
676  * If this is a simple UNION ALL query, flatten it into an appendrel. We
677  * do this now because it requires applying pull_up_subqueries to the leaf
678  * queries of the UNION ALL, which weren't touched above because they
679  * weren't referenced by the jointree (they will be after we do this).
680  */
681  if (parse->setOperations)
683 
684  /*
685  * Survey the rangetable to see what kinds of entries are present. We can
686  * skip some later processing if relevant SQL features are not used; for
687  * example if there are no JOIN RTEs we can avoid the expense of doing
688  * flatten_join_alias_vars(). This must be done after we have finished
689  * adding rangetable entries, of course. (Note: actually, processing of
690  * inherited or partitioned rels can cause RTEs for their child tables to
691  * get added later; but those must all be RTE_RELATION entries, so they
692  * don't invalidate the conclusions drawn here.)
693  */
694  root->hasJoinRTEs = false;
695  root->hasLateralRTEs = false;
696  hasOuterJoins = false;
697  hasResultRTEs = false;
698  foreach(l, parse->rtable)
699  {
701 
702  switch (rte->rtekind)
703  {
704  case RTE_RELATION:
705  if (rte->inh)
706  {
707  /*
708  * Check to see if the relation actually has any children;
709  * if not, clear the inh flag so we can treat it as a
710  * plain base relation.
711  *
712  * Note: this could give a false-positive result, if the
713  * rel once had children but no longer does. We used to
714  * be able to clear rte->inh later on when we discovered
715  * that, but no more; we have to handle such cases as
716  * full-fledged inheritance.
717  */
718  rte->inh = has_subclass(rte->relid);
719  }
720  break;
721  case RTE_JOIN:
722  root->hasJoinRTEs = true;
723  if (IS_OUTER_JOIN(rte->jointype))
724  hasOuterJoins = true;
725  break;
726  case RTE_RESULT:
727  hasResultRTEs = true;
728  break;
729  default:
730  /* No work here for other RTE types */
731  break;
732  }
733 
734  if (rte->lateral)
735  root->hasLateralRTEs = true;
736 
737  /*
738  * We can also determine the maximum security level required for any
739  * securityQuals now. Addition of inheritance-child RTEs won't affect
740  * this, because child tables don't have their own securityQuals; see
741  * expand_single_inheritance_child().
742  */
743  if (rte->securityQuals)
745  list_length(rte->securityQuals));
746  }
747 
748  /*
749  * If we have now verified that the query target relation is
750  * non-inheriting, mark it as a leaf target.
751  */
752  if (parse->resultRelation)
753  {
754  RangeTblEntry *rte = rt_fetch(parse->resultRelation, parse->rtable);
755 
756  if (!rte->inh)
757  root->leaf_result_relids =
759  }
760 
761  /*
762  * Preprocess RowMark information. We need to do this after subquery
763  * pullup, so that all base relations are present.
764  */
765  preprocess_rowmarks(root);
766 
767  /*
768  * Set hasHavingQual to remember if HAVING clause is present. Needed
769  * because preprocess_expression will reduce a constant-true condition to
770  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
771  */
772  root->hasHavingQual = (parse->havingQual != NULL);
773 
774  /*
775  * Do expression preprocessing on targetlist and quals, as well as other
776  * random expressions in the querytree. Note that we do not need to
777  * handle sort/group expressions explicitly, because they are actually
778  * part of the targetlist.
779  */
780  parse->targetList = (List *)
781  preprocess_expression(root, (Node *) parse->targetList,
783 
784  /* Constant-folding might have removed all set-returning functions */
785  if (parse->hasTargetSRFs)
787 
788  newWithCheckOptions = NIL;
789  foreach(l, parse->withCheckOptions)
790  {
792 
793  wco->qual = preprocess_expression(root, wco->qual,
794  EXPRKIND_QUAL);
795  if (wco->qual != NULL)
796  newWithCheckOptions = lappend(newWithCheckOptions, wco);
797  }
798  parse->withCheckOptions = newWithCheckOptions;
799 
800  parse->returningList = (List *)
801  preprocess_expression(root, (Node *) parse->returningList,
803 
804  preprocess_qual_conditions(root, (Node *) parse->jointree);
805 
806  parse->havingQual = preprocess_expression(root, parse->havingQual,
807  EXPRKIND_QUAL);
808 
809  foreach(l, parse->windowClause)
810  {
812 
813  /* partitionClause/orderClause are sort/group expressions */
816  wc->endOffset = preprocess_expression(root, wc->endOffset,
818  }
819 
820  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
822  parse->limitCount = preprocess_expression(root, parse->limitCount,
824 
825  if (parse->onConflict)
826  {
827  parse->onConflict->arbiterElems = (List *)
829  (Node *) parse->onConflict->arbiterElems,
831  parse->onConflict->arbiterWhere =
833  parse->onConflict->arbiterWhere,
834  EXPRKIND_QUAL);
835  parse->onConflict->onConflictSet = (List *)
837  (Node *) parse->onConflict->onConflictSet,
839  parse->onConflict->onConflictWhere =
841  parse->onConflict->onConflictWhere,
842  EXPRKIND_QUAL);
843  /* exclRelTlist contains only Vars, so no preprocessing needed */
844  }
845 
846  root->append_rel_list = (List *)
849 
850  /* Also need to preprocess expressions within RTEs */
851  foreach(l, parse->rtable)
852  {
854  int kind;
855  ListCell *lcsq;
856 
857  if (rte->rtekind == RTE_RELATION)
858  {
859  if (rte->tablesample)
860  rte->tablesample = (TableSampleClause *)
862  (Node *) rte->tablesample,
864  }
865  else if (rte->rtekind == RTE_SUBQUERY)
866  {
867  /*
868  * We don't want to do all preprocessing yet on the subquery's
869  * expressions, since that will happen when we plan it. But if it
870  * contains any join aliases of our level, those have to get
871  * expanded now, because planning of the subquery won't do it.
872  * That's only possible if the subquery is LATERAL.
873  */
874  if (rte->lateral && root->hasJoinRTEs)
875  rte->subquery = (Query *)
877  (Node *) rte->subquery);
878  }
879  else if (rte->rtekind == RTE_FUNCTION)
880  {
881  /* Preprocess the function expression(s) fully */
883  rte->functions = (List *)
884  preprocess_expression(root, (Node *) rte->functions, kind);
885  }
886  else if (rte->rtekind == RTE_TABLEFUNC)
887  {
888  /* Preprocess the function expression(s) fully */
890  rte->tablefunc = (TableFunc *)
891  preprocess_expression(root, (Node *) rte->tablefunc, kind);
892  }
893  else if (rte->rtekind == RTE_VALUES)
894  {
895  /* Preprocess the values lists fully */
897  rte->values_lists = (List *)
898  preprocess_expression(root, (Node *) rte->values_lists, kind);
899  }
900 
901  /*
902  * Process each element of the securityQuals list as if it were a
903  * separate qual expression (as indeed it is). We need to do it this
904  * way to get proper canonicalization of AND/OR structure. Note that
905  * this converts each element into an implicit-AND sublist.
906  */
907  foreach(lcsq, rte->securityQuals)
908  {
909  lfirst(lcsq) = preprocess_expression(root,
910  (Node *) lfirst(lcsq),
911  EXPRKIND_QUAL);
912  }
913  }
914 
915  /*
916  * Now that we are done preprocessing expressions, and in particular done
917  * flattening join alias variables, get rid of the joinaliasvars lists.
918  * They no longer match what expressions in the rest of the tree look
919  * like, because we have not preprocessed expressions in those lists (and
920  * do not want to; for example, expanding a SubLink there would result in
921  * a useless unreferenced subplan). Leaving them in place simply creates
922  * a hazard for later scans of the tree. We could try to prevent that by
923  * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
924  * but that doesn't sound very reliable.
925  */
926  if (root->hasJoinRTEs)
927  {
928  foreach(l, parse->rtable)
929  {
931 
932  rte->joinaliasvars = NIL;
933  }
934  }
935 
936  /*
937  * In some cases we may want to transfer a HAVING clause into WHERE. We
938  * cannot do so if the HAVING clause contains aggregates (obviously) or
939  * volatile functions (since a HAVING clause is supposed to be executed
940  * only once per group). We also can't do this if there are any nonempty
941  * grouping sets; moving such a clause into WHERE would potentially change
942  * the results, if any referenced column isn't present in all the grouping
943  * sets. (If there are only empty grouping sets, then the HAVING clause
944  * must be degenerate as discussed below.)
945  *
946  * Also, it may be that the clause is so expensive to execute that we're
947  * better off doing it only once per group, despite the loss of
948  * selectivity. This is hard to estimate short of doing the entire
949  * planning process twice, so we use a heuristic: clauses containing
950  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
951  * clause into WHERE, in hopes of eliminating tuples before aggregation
952  * instead of after.
953  *
954  * If the query has explicit grouping then we can simply move such a
955  * clause into WHERE; any group that fails the clause will not be in the
956  * output because none of its tuples will reach the grouping or
957  * aggregation stage. Otherwise we must have a degenerate (variable-free)
958  * HAVING clause, which we put in WHERE so that query_planner() can use it
959  * in a gating Result node, but also keep in HAVING to ensure that we
960  * don't emit a bogus aggregated row. (This could be done better, but it
961  * seems not worth optimizing.)
962  *
963  * Note that both havingQual and parse->jointree->quals are in
964  * implicitly-ANDed-list form at this point, even though they are declared
965  * as Node *.
966  */
967  newHaving = NIL;
968  foreach(l, (List *) parse->havingQual)
969  {
970  Node *havingclause = (Node *) lfirst(l);
971 
972  if ((parse->groupClause && parse->groupingSets) ||
973  contain_agg_clause(havingclause) ||
974  contain_volatile_functions(havingclause) ||
975  contain_subplans(havingclause))
976  {
977  /* keep it in HAVING */
978  newHaving = lappend(newHaving, havingclause);
979  }
980  else if (parse->groupClause && !parse->groupingSets)
981  {
982  /* move it to WHERE */
983  parse->jointree->quals = (Node *)
984  lappend((List *) parse->jointree->quals, havingclause);
985  }
986  else
987  {
988  /* put a copy in WHERE, keep it in HAVING */
989  parse->jointree->quals = (Node *)
990  lappend((List *) parse->jointree->quals,
991  copyObject(havingclause));
992  newHaving = lappend(newHaving, havingclause);
993  }
994  }
995  parse->havingQual = (Node *) newHaving;
996 
997  /* Remove any redundant GROUP BY columns */
999 
1000  /*
1001  * If we have any outer joins, try to reduce them to plain inner joins.
1002  * This step is most easily done after we've done expression
1003  * preprocessing.
1004  */
1005  if (hasOuterJoins)
1006  reduce_outer_joins(root);
1007 
1008  /*
1009  * If we have any RTE_RESULT relations, see if they can be deleted from
1010  * the jointree. This step is most effectively done after we've done
1011  * expression preprocessing and outer join reduction.
1012  */
1013  if (hasResultRTEs)
1015 
1016  /*
1017  * Do the main planning.
1018  */
1019  grouping_planner(root, tuple_fraction);
1020 
1021  /*
1022  * Capture the set of outer-level param IDs we have access to, for use in
1023  * extParam/allParam calculations later.
1024  */
1026 
1027  /*
1028  * If any initPlans were created in this query level, adjust the surviving
1029  * Paths' costs and parallel-safety flags to account for them. The
1030  * initPlans won't actually get attached to the plan tree till
1031  * create_plan() runs, but we must include their effects now.
1032  */
1033  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1034  SS_charge_for_initplans(root, final_rel);
1035 
1036  /*
1037  * Make sure we've identified the cheapest Path for the final rel. (By
1038  * doing this here not in grouping_planner, we include initPlan costs in
1039  * the decision, though it's unlikely that will change anything.)
1040  */
1041  set_cheapest(final_rel);
1042 
1043  return root;
1044 }
1045 
1046 /*
1047  * preprocess_expression
1048  * Do subquery_planner's preprocessing work for an expression,
1049  * which can be a targetlist, a WHERE clause (including JOIN/ON
1050  * conditions), a HAVING clause, or a few other things.
1051  */
1052 static Node *
1053 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1054 {
1055  /*
1056  * Fall out quickly if expression is empty. This occurs often enough to
1057  * be worth checking. Note that null->null is the correct conversion for
1058  * implicit-AND result format, too.
1059  */
1060  if (expr == NULL)
1061  return NULL;
1062 
1063  /*
1064  * If the query has any join RTEs, replace join alias variables with
1065  * base-relation variables. We must do this first, since any expressions
1066  * we may extract from the joinaliasvars lists have not been preprocessed.
1067  * For example, if we did this after sublink processing, sublinks expanded
1068  * out from join aliases would not get processed. But we can skip this in
1069  * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1070  * they can't contain any Vars of the current query level.
1071  */
1072  if (root->hasJoinRTEs &&
1073  !(kind == EXPRKIND_RTFUNC ||
1074  kind == EXPRKIND_VALUES ||
1075  kind == EXPRKIND_TABLESAMPLE ||
1076  kind == EXPRKIND_TABLEFUNC))
1077  expr = flatten_join_alias_vars(root->parse, expr);
1078 
1079  /*
1080  * Simplify constant expressions. For function RTEs, this was already
1081  * done by preprocess_function_rtes ... but we have to do it again if the
1082  * RTE is LATERAL and might have contained join alias variables.
1083  *
1084  * Note: an essential effect of this is to convert named-argument function
1085  * calls to positional notation and insert the current actual values of
1086  * any default arguments for functions. To ensure that happens, we *must*
1087  * process all expressions here. Previous PG versions sometimes skipped
1088  * const-simplification if it didn't seem worth the trouble, but we can't
1089  * do that anymore.
1090  *
1091  * Note: this also flattens nested AND and OR expressions into N-argument
1092  * form. All processing of a qual expression after this point must be
1093  * careful to maintain AND/OR flatness --- that is, do not generate a tree
1094  * with AND directly under AND, nor OR directly under OR.
1095  */
1096  if (!(kind == EXPRKIND_RTFUNC ||
1097  (kind == EXPRKIND_RTFUNC_LATERAL && !root->hasJoinRTEs)))
1098  expr = eval_const_expressions(root, expr);
1099 
1100  /*
1101  * If it's a qual or havingQual, canonicalize it.
1102  */
1103  if (kind == EXPRKIND_QUAL)
1104  {
1105  expr = (Node *) canonicalize_qual((Expr *) expr, false);
1106 
1107 #ifdef OPTIMIZER_DEBUG
1108  printf("After canonicalize_qual()\n");
1109  pprint(expr);
1110 #endif
1111  }
1112 
1113  /*
1114  * Check for ANY ScalarArrayOpExpr with Const arrays and set the
1115  * hashfuncid of any that might execute more quickly by using hash lookups
1116  * instead of a linear search.
1117  */
1118  if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET)
1119  {
1121  }
1122 
1123  /* Expand SubLinks to SubPlans */
1124  if (root->parse->hasSubLinks)
1125  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1126 
1127  /*
1128  * XXX do not insert anything here unless you have grokked the comments in
1129  * SS_replace_correlation_vars ...
1130  */
1131 
1132  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1133  if (root->query_level > 1)
1134  expr = SS_replace_correlation_vars(root, expr);
1135 
1136  /*
1137  * If it's a qual or havingQual, convert it to implicit-AND format. (We
1138  * don't want to do this before eval_const_expressions, since the latter
1139  * would be unable to simplify a top-level AND correctly. Also,
1140  * SS_process_sublinks expects explicit-AND format.)
1141  */
1142  if (kind == EXPRKIND_QUAL)
1143  expr = (Node *) make_ands_implicit((Expr *) expr);
1144 
1145  return expr;
1146 }
1147 
1148 /*
1149  * preprocess_qual_conditions
1150  * Recursively scan the query's jointree and do subquery_planner's
1151  * preprocessing work on each qual condition found therein.
1152  */
1153 static void
1155 {
1156  if (jtnode == NULL)
1157  return;
1158  if (IsA(jtnode, RangeTblRef))
1159  {
1160  /* nothing to do here */
1161  }
1162  else if (IsA(jtnode, FromExpr))
1163  {
1164  FromExpr *f = (FromExpr *) jtnode;
1165  ListCell *l;
1166 
1167  foreach(l, f->fromlist)
1169 
1171  }
1172  else if (IsA(jtnode, JoinExpr))
1173  {
1174  JoinExpr *j = (JoinExpr *) jtnode;
1175 
1176  preprocess_qual_conditions(root, j->larg);
1177  preprocess_qual_conditions(root, j->rarg);
1178 
1180  }
1181  else
1182  elog(ERROR, "unrecognized node type: %d",
1183  (int) nodeTag(jtnode));
1184 }
1185 
1186 /*
1187  * preprocess_phv_expression
1188  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1189  *
1190  * If a LATERAL subquery references an output of another subquery, and that
1191  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1192  * join, then we'll push the PlaceHolderVar expression down into the subquery
1193  * and later pull it back up during find_lateral_references, which runs after
1194  * subquery_planner has preprocessed all the expressions that were in the
1195  * current query level to start with. So we need to preprocess it then.
1196  */
1197 Expr *
1199 {
1200  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1201 }
1202 
1203 /*--------------------
1204  * grouping_planner
1205  * Perform planning steps related to grouping, aggregation, etc.
1206  *
1207  * This function adds all required top-level processing to the scan/join
1208  * Path(s) produced by query_planner.
1209  *
1210  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1211  * tuple_fraction is interpreted as follows:
1212  * 0: expect all tuples to be retrieved (normal case)
1213  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1214  * from the plan to be retrieved
1215  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1216  * expected to be retrieved (ie, a LIMIT specification)
1217  *
1218  * Returns nothing; the useful output is in the Paths we attach to the
1219  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1220  * root->processed_tlist contains the final processed targetlist.
1221  *
1222  * Note that we have not done set_cheapest() on the final rel; it's convenient
1223  * to leave this to the caller.
1224  *--------------------
1225  */
1226 static void
1227 grouping_planner(PlannerInfo *root, double tuple_fraction)
1228 {
1229  Query *parse = root->parse;
1230  int64 offset_est = 0;
1231  int64 count_est = 0;
1232  double limit_tuples = -1.0;
1233  bool have_postponed_srfs = false;
1234  PathTarget *final_target;
1235  List *final_targets;
1236  List *final_targets_contain_srfs;
1237  bool final_target_parallel_safe;
1238  RelOptInfo *current_rel;
1239  RelOptInfo *final_rel;
1240  FinalPathExtraData extra;
1241  ListCell *lc;
1242 
1243  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1244  if (parse->limitCount || parse->limitOffset)
1245  {
1246  tuple_fraction = preprocess_limit(root, tuple_fraction,
1247  &offset_est, &count_est);
1248 
1249  /*
1250  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1251  * estimate the effects of using a bounded sort.
1252  */
1253  if (count_est > 0 && offset_est >= 0)
1254  limit_tuples = (double) count_est + (double) offset_est;
1255  }
1256 
1257  /* Make tuple_fraction accessible to lower-level routines */
1258  root->tuple_fraction = tuple_fraction;
1259 
1260  if (parse->setOperations)
1261  {
1262  /*
1263  * If there's a top-level ORDER BY, assume we have to fetch all the
1264  * tuples. This might be too simplistic given all the hackery below
1265  * to possibly avoid the sort; but the odds of accurate estimates here
1266  * are pretty low anyway. XXX try to get rid of this in favor of
1267  * letting plan_set_operations generate both fast-start and
1268  * cheapest-total paths.
1269  */
1270  if (parse->sortClause)
1271  root->tuple_fraction = 0.0;
1272 
1273  /*
1274  * Construct Paths for set operations. The results will not need any
1275  * work except perhaps a top-level sort and/or LIMIT. Note that any
1276  * special work for recursive unions is the responsibility of
1277  * plan_set_operations.
1278  */
1279  current_rel = plan_set_operations(root);
1280 
1281  /*
1282  * We should not need to call preprocess_targetlist, since we must be
1283  * in a SELECT query node. Instead, use the processed_tlist returned
1284  * by plan_set_operations (since this tells whether it returned any
1285  * resjunk columns!), and transfer any sort key information from the
1286  * original tlist.
1287  */
1288  Assert(parse->commandType == CMD_SELECT);
1289 
1290  /* for safety, copy processed_tlist instead of modifying in-place */
1291  root->processed_tlist =
1293  parse->targetList);
1294 
1295  /* Also extract the PathTarget form of the setop result tlist */
1296  final_target = current_rel->cheapest_total_path->pathtarget;
1297 
1298  /* And check whether it's parallel safe */
1299  final_target_parallel_safe =
1300  is_parallel_safe(root, (Node *) final_target->exprs);
1301 
1302  /* The setop result tlist couldn't contain any SRFs */
1303  Assert(!parse->hasTargetSRFs);
1304  final_targets = final_targets_contain_srfs = NIL;
1305 
1306  /*
1307  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1308  * checked already, but let's make sure).
1309  */
1310  if (parse->rowMarks)
1311  ereport(ERROR,
1312  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1313  /*------
1314  translator: %s is a SQL row locking clause such as FOR UPDATE */
1315  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1317  parse->rowMarks)->strength))));
1318 
1319  /*
1320  * Calculate pathkeys that represent result ordering requirements
1321  */
1322  Assert(parse->distinctClause == NIL);
1324  parse->sortClause,
1325  root->processed_tlist);
1326  }
1327  else
1328  {
1329  /* No set operations, do regular planning */
1330  PathTarget *sort_input_target;
1331  List *sort_input_targets;
1332  List *sort_input_targets_contain_srfs;
1333  bool sort_input_target_parallel_safe;
1334  PathTarget *grouping_target;
1335  List *grouping_targets;
1336  List *grouping_targets_contain_srfs;
1337  bool grouping_target_parallel_safe;
1338  PathTarget *scanjoin_target;
1339  List *scanjoin_targets;
1340  List *scanjoin_targets_contain_srfs;
1341  bool scanjoin_target_parallel_safe;
1342  bool scanjoin_target_same_exprs;
1343  bool have_grouping;
1344  WindowFuncLists *wflists = NULL;
1345  List *activeWindows = NIL;
1346  grouping_sets_data *gset_data = NULL;
1347  standard_qp_extra qp_extra;
1348 
1349  /* A recursive query should always have setOperations */
1350  Assert(!root->hasRecursion);
1351 
1352  /* Preprocess grouping sets and GROUP BY clause, if any */
1353  if (parse->groupingSets)
1354  {
1355  gset_data = preprocess_grouping_sets(root);
1356  }
1357  else
1358  {
1359  /* Preprocess regular GROUP BY clause, if any */
1360  if (parse->groupClause)
1361  parse->groupClause = preprocess_groupclause(root, NIL);
1362  }
1363 
1364  /*
1365  * Preprocess targetlist. Note that much of the remaining planning
1366  * work will be done with the PathTarget representation of tlists, but
1367  * we must also maintain the full representation of the final tlist so
1368  * that we can transfer its decoration (resnames etc) to the topmost
1369  * tlist of the finished Plan. This is kept in processed_tlist.
1370  */
1371  preprocess_targetlist(root);
1372 
1373  /*
1374  * Mark all the aggregates with resolved aggtranstypes, and detect
1375  * aggregates that are duplicates or can share transition state. We
1376  * must do this before slicing and dicing the tlist into various
1377  * pathtargets, else some copies of the Aggref nodes might escape
1378  * being marked.
1379  */
1380  if (parse->hasAggs)
1381  {
1382  preprocess_aggrefs(root, (Node *) root->processed_tlist);
1383  preprocess_aggrefs(root, (Node *) parse->havingQual);
1384  }
1385 
1386  /*
1387  * Locate any window functions in the tlist. (We don't need to look
1388  * anywhere else, since expressions used in ORDER BY will be in there
1389  * too.) Note that they could all have been eliminated by constant
1390  * folding, in which case we don't need to do any more work.
1391  */
1392  if (parse->hasWindowFuncs)
1393  {
1394  wflists = find_window_functions((Node *) root->processed_tlist,
1395  list_length(parse->windowClause));
1396  if (wflists->numWindowFuncs > 0)
1397  activeWindows = select_active_windows(root, wflists);
1398  else
1399  parse->hasWindowFuncs = false;
1400  }
1401 
1402  /*
1403  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1404  * adding logic between here and the query_planner() call. Anything
1405  * that is needed in MIN/MAX-optimizable cases will have to be
1406  * duplicated in planagg.c.
1407  */
1408  if (parse->hasAggs)
1410 
1411  /*
1412  * Figure out whether there's a hard limit on the number of rows that
1413  * query_planner's result subplan needs to return. Even if we know a
1414  * hard limit overall, it doesn't apply if the query has any
1415  * grouping/aggregation operations, or SRFs in the tlist.
1416  */
1417  if (parse->groupClause ||
1418  parse->groupingSets ||
1419  parse->distinctClause ||
1420  parse->hasAggs ||
1421  parse->hasWindowFuncs ||
1422  parse->hasTargetSRFs ||
1423  root->hasHavingQual)
1424  root->limit_tuples = -1.0;
1425  else
1426  root->limit_tuples = limit_tuples;
1427 
1428  /* Set up data needed by standard_qp_callback */
1429  qp_extra.activeWindows = activeWindows;
1430  qp_extra.groupClause = (gset_data
1431  ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
1432  : parse->groupClause);
1433 
1434  /*
1435  * Generate the best unsorted and presorted paths for the scan/join
1436  * portion of this Query, ie the processing represented by the
1437  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1438  * We also generate (in standard_qp_callback) pathkey representations
1439  * of the query's sort clause, distinct clause, etc.
1440  */
1441  current_rel = query_planner(root, standard_qp_callback, &qp_extra);
1442 
1443  /*
1444  * Convert the query's result tlist into PathTarget format.
1445  *
1446  * Note: this cannot be done before query_planner() has performed
1447  * appendrel expansion, because that might add resjunk entries to
1448  * root->processed_tlist. Waiting till afterwards is also helpful
1449  * because the target width estimates can use per-Var width numbers
1450  * that were obtained within query_planner().
1451  */
1452  final_target = create_pathtarget(root, root->processed_tlist);
1453  final_target_parallel_safe =
1454  is_parallel_safe(root, (Node *) final_target->exprs);
1455 
1456  /*
1457  * If ORDER BY was given, consider whether we should use a post-sort
1458  * projection, and compute the adjusted target for preceding steps if
1459  * so.
1460  */
1461  if (parse->sortClause)
1462  {
1463  sort_input_target = make_sort_input_target(root,
1464  final_target,
1465  &have_postponed_srfs);
1466  sort_input_target_parallel_safe =
1467  is_parallel_safe(root, (Node *) sort_input_target->exprs);
1468  }
1469  else
1470  {
1471  sort_input_target = final_target;
1472  sort_input_target_parallel_safe = final_target_parallel_safe;
1473  }
1474 
1475  /*
1476  * If we have window functions to deal with, the output from any
1477  * grouping step needs to be what the window functions want;
1478  * otherwise, it should be sort_input_target.
1479  */
1480  if (activeWindows)
1481  {
1482  grouping_target = make_window_input_target(root,
1483  final_target,
1484  activeWindows);
1485  grouping_target_parallel_safe =
1486  is_parallel_safe(root, (Node *) grouping_target->exprs);
1487  }
1488  else
1489  {
1490  grouping_target = sort_input_target;
1491  grouping_target_parallel_safe = sort_input_target_parallel_safe;
1492  }
1493 
1494  /*
1495  * If we have grouping or aggregation to do, the topmost scan/join
1496  * plan node must emit what the grouping step wants; otherwise, it
1497  * should emit grouping_target.
1498  */
1499  have_grouping = (parse->groupClause || parse->groupingSets ||
1500  parse->hasAggs || root->hasHavingQual);
1501  if (have_grouping)
1502  {
1503  scanjoin_target = make_group_input_target(root, final_target);
1504  scanjoin_target_parallel_safe =
1505  is_parallel_safe(root, (Node *) scanjoin_target->exprs);
1506  }
1507  else
1508  {
1509  scanjoin_target = grouping_target;
1510  scanjoin_target_parallel_safe = grouping_target_parallel_safe;
1511  }
1512 
1513  /*
1514  * If there are any SRFs in the targetlist, we must separate each of
1515  * these PathTargets into SRF-computing and SRF-free targets. Replace
1516  * each of the named targets with a SRF-free version, and remember the
1517  * list of additional projection steps we need to add afterwards.
1518  */
1519  if (parse->hasTargetSRFs)
1520  {
1521  /* final_target doesn't recompute any SRFs in sort_input_target */
1522  split_pathtarget_at_srfs(root, final_target, sort_input_target,
1523  &final_targets,
1524  &final_targets_contain_srfs);
1525  final_target = linitial_node(PathTarget, final_targets);
1526  Assert(!linitial_int(final_targets_contain_srfs));
1527  /* likewise for sort_input_target vs. grouping_target */
1528  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1529  &sort_input_targets,
1530  &sort_input_targets_contain_srfs);
1531  sort_input_target = linitial_node(PathTarget, sort_input_targets);
1532  Assert(!linitial_int(sort_input_targets_contain_srfs));
1533  /* likewise for grouping_target vs. scanjoin_target */
1534  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1535  &grouping_targets,
1536  &grouping_targets_contain_srfs);
1537  grouping_target = linitial_node(PathTarget, grouping_targets);
1538  Assert(!linitial_int(grouping_targets_contain_srfs));
1539  /* scanjoin_target will not have any SRFs precomputed for it */
1540  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1541  &scanjoin_targets,
1542  &scanjoin_targets_contain_srfs);
1543  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
1544  Assert(!linitial_int(scanjoin_targets_contain_srfs));
1545  }
1546  else
1547  {
1548  /* initialize lists; for most of these, dummy values are OK */
1549  final_targets = final_targets_contain_srfs = NIL;
1550  sort_input_targets = sort_input_targets_contain_srfs = NIL;
1551  grouping_targets = grouping_targets_contain_srfs = NIL;
1552  scanjoin_targets = list_make1(scanjoin_target);
1553  scanjoin_targets_contain_srfs = NIL;
1554  }
1555 
1556  /* Apply scan/join target. */
1557  scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
1558  && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
1559  apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
1560  scanjoin_targets_contain_srfs,
1561  scanjoin_target_parallel_safe,
1562  scanjoin_target_same_exprs);
1563 
1564  /*
1565  * Save the various upper-rel PathTargets we just computed into
1566  * root->upper_targets[]. The core code doesn't use this, but it
1567  * provides a convenient place for extensions to get at the info. For
1568  * consistency, we save all the intermediate targets, even though some
1569  * of the corresponding upperrels might not be needed for this query.
1570  */
1571  root->upper_targets[UPPERREL_FINAL] = final_target;
1572  root->upper_targets[UPPERREL_ORDERED] = final_target;
1573  root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
1574  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1575  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1576 
1577  /*
1578  * If we have grouping and/or aggregation, consider ways to implement
1579  * that. We build a new upperrel representing the output of this
1580  * phase.
1581  */
1582  if (have_grouping)
1583  {
1584  current_rel = create_grouping_paths(root,
1585  current_rel,
1586  grouping_target,
1587  grouping_target_parallel_safe,
1588  gset_data);
1589  /* Fix things up if grouping_target contains SRFs */
1590  if (parse->hasTargetSRFs)
1591  adjust_paths_for_srfs(root, current_rel,
1592  grouping_targets,
1593  grouping_targets_contain_srfs);
1594  }
1595 
1596  /*
1597  * If we have window functions, consider ways to implement those. We
1598  * build a new upperrel representing the output of this phase.
1599  */
1600  if (activeWindows)
1601  {
1602  current_rel = create_window_paths(root,
1603  current_rel,
1604  grouping_target,
1605  sort_input_target,
1606  sort_input_target_parallel_safe,
1607  wflists,
1608  activeWindows);
1609  /* Fix things up if sort_input_target contains SRFs */
1610  if (parse->hasTargetSRFs)
1611  adjust_paths_for_srfs(root, current_rel,
1612  sort_input_targets,
1613  sort_input_targets_contain_srfs);
1614  }
1615 
1616  /*
1617  * If there is a DISTINCT clause, consider ways to implement that. We
1618  * build a new upperrel representing the output of this phase.
1619  */
1620  if (parse->distinctClause)
1621  {
1622  current_rel = create_distinct_paths(root,
1623  current_rel);
1624  }
1625  } /* end of if (setOperations) */
1626 
1627  /*
1628  * If ORDER BY was given, consider ways to implement that, and generate a
1629  * new upperrel containing only paths that emit the correct ordering and
1630  * project the correct final_target. We can apply the original
1631  * limit_tuples limit in sort costing here, but only if there are no
1632  * postponed SRFs.
1633  */
1634  if (parse->sortClause)
1635  {
1636  current_rel = create_ordered_paths(root,
1637  current_rel,
1638  final_target,
1639  final_target_parallel_safe,
1640  have_postponed_srfs ? -1.0 :
1641  limit_tuples);
1642  /* Fix things up if final_target contains SRFs */
1643  if (parse->hasTargetSRFs)
1644  adjust_paths_for_srfs(root, current_rel,
1645  final_targets,
1646  final_targets_contain_srfs);
1647  }
1648 
1649  /*
1650  * Now we are prepared to build the final-output upperrel.
1651  */
1652  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1653 
1654  /*
1655  * If the input rel is marked consider_parallel and there's nothing that's
1656  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1657  * consider_parallel as well. Note that if the query has rowMarks or is
1658  * not a SELECT, consider_parallel will be false for every relation in the
1659  * query.
1660  */
1661  if (current_rel->consider_parallel &&
1662  is_parallel_safe(root, parse->limitOffset) &&
1663  is_parallel_safe(root, parse->limitCount))
1664  final_rel->consider_parallel = true;
1665 
1666  /*
1667  * If the current_rel belongs to a single FDW, so does the final_rel.
1668  */
1669  final_rel->serverid = current_rel->serverid;
1670  final_rel->userid = current_rel->userid;
1671  final_rel->useridiscurrent = current_rel->useridiscurrent;
1672  final_rel->fdwroutine = current_rel->fdwroutine;
1673 
1674  /*
1675  * Generate paths for the final_rel. Insert all surviving paths, with
1676  * LockRows, Limit, and/or ModifyTable steps added if needed.
1677  */
1678  foreach(lc, current_rel->pathlist)
1679  {
1680  Path *path = (Path *) lfirst(lc);
1681 
1682  /*
1683  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1684  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1685  * here. If there are only non-locking rowmarks, they should be
1686  * handled by the ModifyTable node instead. However, root->rowMarks
1687  * is what goes into the LockRows node.)
1688  */
1689  if (parse->rowMarks)
1690  {
1691  path = (Path *) create_lockrows_path(root, final_rel, path,
1692  root->rowMarks,
1694  }
1695 
1696  /*
1697  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
1698  */
1699  if (limit_needed(parse))
1700  {
1701  path = (Path *) create_limit_path(root, final_rel, path,
1702  parse->limitOffset,
1703  parse->limitCount,
1704  parse->limitOption,
1705  offset_est, count_est);
1706  }
1707 
1708  /*
1709  * If this is an INSERT/UPDATE/DELETE, add the ModifyTable node.
1710  */
1711  if (parse->commandType != CMD_SELECT)
1712  {
1713  Index rootRelation;
1714  List *resultRelations = NIL;
1715  List *updateColnosLists = NIL;
1716  List *withCheckOptionLists = NIL;
1717  List *returningLists = NIL;
1718  List *rowMarks;
1719 
1721  {
1722  /* Inherited UPDATE/DELETE */
1723  RelOptInfo *top_result_rel = find_base_rel(root,
1724  parse->resultRelation);
1725  int resultRelation = -1;
1726 
1727  /* Add only leaf children to ModifyTable. */
1728  while ((resultRelation = bms_next_member(root->leaf_result_relids,
1729  resultRelation)) >= 0)
1730  {
1731  RelOptInfo *this_result_rel = find_base_rel(root,
1732  resultRelation);
1733 
1734  /*
1735  * Also exclude any leaf rels that have turned dummy since
1736  * being added to the list, for example, by being excluded
1737  * by constraint exclusion.
1738  */
1739  if (IS_DUMMY_REL(this_result_rel))
1740  continue;
1741 
1742  /* Build per-target-rel lists needed by ModifyTable */
1743  resultRelations = lappend_int(resultRelations,
1744  resultRelation);
1745  if (parse->commandType == CMD_UPDATE)
1746  {
1747  List *update_colnos = root->update_colnos;
1748 
1749  if (this_result_rel != top_result_rel)
1750  update_colnos =
1752  update_colnos,
1753  this_result_rel->relid,
1754  top_result_rel->relid);
1755  updateColnosLists = lappend(updateColnosLists,
1756  update_colnos);
1757  }
1758  if (parse->withCheckOptions)
1759  {
1760  List *withCheckOptions = parse->withCheckOptions;
1761 
1762  if (this_result_rel != top_result_rel)
1763  withCheckOptions = (List *)
1765  (Node *) withCheckOptions,
1766  this_result_rel->relids,
1767  top_result_rel->relids);
1768  withCheckOptionLists = lappend(withCheckOptionLists,
1769  withCheckOptions);
1770  }
1771  if (parse->returningList)
1772  {
1773  List *returningList = parse->returningList;
1774 
1775  if (this_result_rel != top_result_rel)
1776  returningList = (List *)
1778  (Node *) returningList,
1779  this_result_rel->relids,
1780  top_result_rel->relids);
1781  returningLists = lappend(returningLists,
1782  returningList);
1783  }
1784  }
1785 
1786  if (resultRelations == NIL)
1787  {
1788  /*
1789  * We managed to exclude every child rel, so generate a
1790  * dummy one-relation plan using info for the top target
1791  * rel (even though that may not be a leaf target).
1792  * Although it's clear that no data will be updated or
1793  * deleted, we still need to have a ModifyTable node so
1794  * that any statement triggers will be executed. (This
1795  * could be cleaner if we fixed nodeModifyTable.c to allow
1796  * zero target relations, but that probably wouldn't be a
1797  * net win.)
1798  */
1799  resultRelations = list_make1_int(parse->resultRelation);
1800  if (parse->commandType == CMD_UPDATE)
1801  updateColnosLists = list_make1(root->update_colnos);
1802  if (parse->withCheckOptions)
1803  withCheckOptionLists = list_make1(parse->withCheckOptions);
1804  if (parse->returningList)
1805  returningLists = list_make1(parse->returningList);
1806  }
1807  }
1808  else
1809  {
1810  /* Single-relation INSERT/UPDATE/DELETE. */
1811  resultRelations = list_make1_int(parse->resultRelation);
1812  if (parse->commandType == CMD_UPDATE)
1813  updateColnosLists = list_make1(root->update_colnos);
1814  if (parse->withCheckOptions)
1815  withCheckOptionLists = list_make1(parse->withCheckOptions);
1816  if (parse->returningList)
1817  returningLists = list_make1(parse->returningList);
1818  }
1819 
1820  /*
1821  * If target is a partition root table, we need to mark the
1822  * ModifyTable node appropriately for that.
1823  */
1824  if (rt_fetch(parse->resultRelation, parse->rtable)->relkind ==
1825  RELKIND_PARTITIONED_TABLE)
1826  rootRelation = parse->resultRelation;
1827  else
1828  rootRelation = 0;
1829 
1830  /*
1831  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
1832  * will have dealt with fetching non-locked marked rows, else we
1833  * need to have ModifyTable do that.
1834  */
1835  if (parse->rowMarks)
1836  rowMarks = NIL;
1837  else
1838  rowMarks = root->rowMarks;
1839 
1840  path = (Path *)
1841  create_modifytable_path(root, final_rel,
1842  path,
1843  parse->commandType,
1844  parse->canSetTag,
1845  parse->resultRelation,
1846  rootRelation,
1847  root->partColsUpdated,
1848  resultRelations,
1849  updateColnosLists,
1850  withCheckOptionLists,
1851  returningLists,
1852  rowMarks,
1853  parse->onConflict,
1855  }
1856 
1857  /* And shove it into final_rel */
1858  add_path(final_rel, path);
1859  }
1860 
1861  /*
1862  * Generate partial paths for final_rel, too, if outer query levels might
1863  * be able to make use of them.
1864  */
1865  if (final_rel->consider_parallel && root->query_level > 1 &&
1866  !limit_needed(parse))
1867  {
1868  Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
1869  foreach(lc, current_rel->partial_pathlist)
1870  {
1871  Path *partial_path = (Path *) lfirst(lc);
1872 
1873  add_partial_path(final_rel, partial_path);
1874  }
1875  }
1876 
1877  extra.limit_needed = limit_needed(parse);
1878  extra.limit_tuples = limit_tuples;
1879  extra.count_est = count_est;
1880  extra.offset_est = offset_est;
1881 
1882  /*
1883  * If there is an FDW that's responsible for all baserels of the query,
1884  * let it consider adding ForeignPaths.
1885  */
1886  if (final_rel->fdwroutine &&
1887  final_rel->fdwroutine->GetForeignUpperPaths)
1889  current_rel, final_rel,
1890  &extra);
1891 
1892  /* Let extensions possibly add some more paths */
1894  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
1895  current_rel, final_rel, &extra);
1896 
1897  /* Note: currently, we leave it to callers to do set_cheapest() */
1898 }
1899 
1900 /*
1901  * Do preprocessing for groupingSets clause and related data. This handles the
1902  * preliminary steps of expanding the grouping sets, organizing them into lists
1903  * of rollups, and preparing annotations which will later be filled in with
1904  * size estimates.
1905  */
1906 static grouping_sets_data *
1908 {
1909  Query *parse = root->parse;
1910  List *sets;
1911  int maxref = 0;
1912  ListCell *lc;
1913  ListCell *lc_set;
1915 
1916  parse->groupingSets = expand_grouping_sets(parse->groupingSets, parse->groupDistinct, -1);
1917 
1918  gd->any_hashable = false;
1919  gd->unhashable_refs = NULL;
1920  gd->unsortable_refs = NULL;
1921  gd->unsortable_sets = NIL;
1922 
1923  if (parse->groupClause)
1924  {
1925  ListCell *lc;
1926 
1927  foreach(lc, parse->groupClause)
1928  {
1930  Index ref = gc->tleSortGroupRef;
1931 
1932  if (ref > maxref)
1933  maxref = ref;
1934 
1935  if (!gc->hashable)
1937 
1938  if (!OidIsValid(gc->sortop))
1940  }
1941  }
1942 
1943  /* Allocate workspace array for remapping */
1944  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
1945 
1946  /*
1947  * If we have any unsortable sets, we must extract them before trying to
1948  * prepare rollups. Unsortable sets don't go through
1949  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
1950  * here.
1951  */
1952  if (!bms_is_empty(gd->unsortable_refs))
1953  {
1954  List *sortable_sets = NIL;
1955 
1956  foreach(lc, parse->groupingSets)
1957  {
1958  List *gset = (List *) lfirst(lc);
1959 
1960  if (bms_overlap_list(gd->unsortable_refs, gset))
1961  {
1963 
1964  gs->set = gset;
1965  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
1966 
1967  /*
1968  * We must enforce here that an unsortable set is hashable;
1969  * later code assumes this. Parse analysis only checks that
1970  * every individual column is either hashable or sortable.
1971  *
1972  * Note that passing this test doesn't guarantee we can
1973  * generate a plan; there might be other showstoppers.
1974  */
1975  if (bms_overlap_list(gd->unhashable_refs, gset))
1976  ereport(ERROR,
1977  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1978  errmsg("could not implement GROUP BY"),
1979  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1980  }
1981  else
1982  sortable_sets = lappend(sortable_sets, gset);
1983  }
1984 
1985  if (sortable_sets)
1986  sets = extract_rollup_sets(sortable_sets);
1987  else
1988  sets = NIL;
1989  }
1990  else
1991  sets = extract_rollup_sets(parse->groupingSets);
1992 
1993  foreach(lc_set, sets)
1994  {
1995  List *current_sets = (List *) lfirst(lc_set);
1996  RollupData *rollup = makeNode(RollupData);
1997  GroupingSetData *gs;
1998 
1999  /*
2000  * Reorder the current list of grouping sets into correct prefix
2001  * order. If only one aggregation pass is needed, try to make the
2002  * list match the ORDER BY clause; if more than one pass is needed, we
2003  * don't bother with that.
2004  *
2005  * Note that this reorders the sets from smallest-member-first to
2006  * largest-member-first, and applies the GroupingSetData annotations,
2007  * though the data will be filled in later.
2008  */
2009  current_sets = reorder_grouping_sets(current_sets,
2010  (list_length(sets) == 1
2011  ? parse->sortClause
2012  : NIL));
2013 
2014  /*
2015  * Get the initial (and therefore largest) grouping set.
2016  */
2017  gs = linitial_node(GroupingSetData, current_sets);
2018 
2019  /*
2020  * Order the groupClause appropriately. If the first grouping set is
2021  * empty, then the groupClause must also be empty; otherwise we have
2022  * to force the groupClause to match that grouping set's order.
2023  *
2024  * (The first grouping set can be empty even though parse->groupClause
2025  * is not empty only if all non-empty grouping sets are unsortable.
2026  * The groupClauses for hashed grouping sets are built later on.)
2027  */
2028  if (gs->set)
2029  rollup->groupClause = preprocess_groupclause(root, gs->set);
2030  else
2031  rollup->groupClause = NIL;
2032 
2033  /*
2034  * Is it hashable? We pretend empty sets are hashable even though we
2035  * actually force them not to be hashed later. But don't bother if
2036  * there's nothing but empty sets (since in that case we can't hash
2037  * anything).
2038  */
2039  if (gs->set &&
2041  {
2042  rollup->hashable = true;
2043  gd->any_hashable = true;
2044  }
2045 
2046  /*
2047  * Now that we've pinned down an order for the groupClause for this
2048  * list of grouping sets, we need to remap the entries in the grouping
2049  * sets from sortgrouprefs to plain indices (0-based) into the
2050  * groupClause for this collection of grouping sets. We keep the
2051  * original form for later use, though.
2052  */
2053  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2054  current_sets,
2055  gd->tleref_to_colnum_map);
2056  rollup->gsets_data = current_sets;
2057 
2058  gd->rollups = lappend(gd->rollups, rollup);
2059  }
2060 
2061  if (gd->unsortable_sets)
2062  {
2063  /*
2064  * We have not yet pinned down a groupclause for this, but we will
2065  * need index-based lists for estimation purposes. Construct
2066  * hash_sets_idx based on the entire original groupclause for now.
2067  */
2069  gd->unsortable_sets,
2070  gd->tleref_to_colnum_map);
2071  gd->any_hashable = true;
2072  }
2073 
2074  return gd;
2075 }
2076 
2077 /*
2078  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2079  * (without annotation) mapped to indexes into the given groupclause.
2080  */
2081 static List *
2083  List *gsets,
2084  int *tleref_to_colnum_map)
2085 {
2086  int ref = 0;
2087  List *result = NIL;
2088  ListCell *lc;
2089 
2090  foreach(lc, groupClause)
2091  {
2093 
2094  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2095  }
2096 
2097  foreach(lc, gsets)
2098  {
2099  List *set = NIL;
2100  ListCell *lc2;
2102 
2103  foreach(lc2, gs->set)
2104  {
2105  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2106  }
2107 
2108  result = lappend(result, set);
2109  }
2110 
2111  return result;
2112 }
2113 
2114 
2115 /*
2116  * preprocess_rowmarks - set up PlanRowMarks if needed
2117  */
2118 static void
2120 {
2121  Query *parse = root->parse;
2122  Bitmapset *rels;
2123  List *prowmarks;
2124  ListCell *l;
2125  int i;
2126 
2127  if (parse->rowMarks)
2128  {
2129  /*
2130  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2131  * grouping, since grouping renders a reference to individual tuple
2132  * CTIDs invalid. This is also checked at parse time, but that's
2133  * insufficient because of rule substitution, query pullup, etc.
2134  */
2136  parse->rowMarks)->strength);
2137  }
2138  else
2139  {
2140  /*
2141  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2142  * UPDATE/SHARE.
2143  */
2144  if (parse->commandType != CMD_UPDATE &&
2145  parse->commandType != CMD_DELETE)
2146  return;
2147  }
2148 
2149  /*
2150  * We need to have rowmarks for all base relations except the target. We
2151  * make a bitmapset of all base rels and then remove the items we don't
2152  * need or have FOR [KEY] UPDATE/SHARE marks for.
2153  */
2154  rels = get_relids_in_jointree((Node *) parse->jointree, false);
2155  if (parse->resultRelation)
2156  rels = bms_del_member(rels, parse->resultRelation);
2157 
2158  /*
2159  * Convert RowMarkClauses to PlanRowMark representation.
2160  */
2161  prowmarks = NIL;
2162  foreach(l, parse->rowMarks)
2163  {
2165  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2166  PlanRowMark *newrc;
2167 
2168  /*
2169  * Currently, it is syntactically impossible to have FOR UPDATE et al
2170  * applied to an update/delete target rel. If that ever becomes
2171  * possible, we should drop the target from the PlanRowMark list.
2172  */
2173  Assert(rc->rti != parse->resultRelation);
2174 
2175  /*
2176  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2177  * can't support true locking. Subqueries that got flattened into the
2178  * main query should be ignored completely. Any that didn't will get
2179  * ROW_MARK_COPY items in the next loop.
2180  */
2181  if (rte->rtekind != RTE_RELATION)
2182  continue;
2183 
2184  rels = bms_del_member(rels, rc->rti);
2185 
2186  newrc = makeNode(PlanRowMark);
2187  newrc->rti = newrc->prti = rc->rti;
2188  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2189  newrc->markType = select_rowmark_type(rte, rc->strength);
2190  newrc->allMarkTypes = (1 << newrc->markType);
2191  newrc->strength = rc->strength;
2192  newrc->waitPolicy = rc->waitPolicy;
2193  newrc->isParent = false;
2194 
2195  prowmarks = lappend(prowmarks, newrc);
2196  }
2197 
2198  /*
2199  * Now, add rowmarks for any non-target, non-locked base relations.
2200  */
2201  i = 0;
2202  foreach(l, parse->rtable)
2203  {
2205  PlanRowMark *newrc;
2206 
2207  i++;
2208  if (!bms_is_member(i, rels))
2209  continue;
2210 
2211  newrc = makeNode(PlanRowMark);
2212  newrc->rti = newrc->prti = i;
2213  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2214  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2215  newrc->allMarkTypes = (1 << newrc->markType);
2216  newrc->strength = LCS_NONE;
2217  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2218  newrc->isParent = false;
2219 
2220  prowmarks = lappend(prowmarks, newrc);
2221  }
2222 
2223  root->rowMarks = prowmarks;
2224 }
2225 
2226 /*
2227  * Select RowMarkType to use for a given table
2228  */
2231 {
2232  if (rte->rtekind != RTE_RELATION)
2233  {
2234  /* If it's not a table at all, use ROW_MARK_COPY */
2235  return ROW_MARK_COPY;
2236  }
2237  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2238  {
2239  /* Let the FDW select the rowmark type, if it wants to */
2240  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2241 
2242  if (fdwroutine->GetForeignRowMarkType != NULL)
2243  return fdwroutine->GetForeignRowMarkType(rte, strength);
2244  /* Otherwise, use ROW_MARK_COPY by default */
2245  return ROW_MARK_COPY;
2246  }
2247  else
2248  {
2249  /* Regular table, apply the appropriate lock type */
2250  switch (strength)
2251  {
2252  case LCS_NONE:
2253 
2254  /*
2255  * We don't need a tuple lock, only the ability to re-fetch
2256  * the row.
2257  */
2258  return ROW_MARK_REFERENCE;
2259  break;
2260  case LCS_FORKEYSHARE:
2261  return ROW_MARK_KEYSHARE;
2262  break;
2263  case LCS_FORSHARE:
2264  return ROW_MARK_SHARE;
2265  break;
2266  case LCS_FORNOKEYUPDATE:
2267  return ROW_MARK_NOKEYEXCLUSIVE;
2268  break;
2269  case LCS_FORUPDATE:
2270  return ROW_MARK_EXCLUSIVE;
2271  break;
2272  }
2273  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2274  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2275  }
2276 }
2277 
2278 /*
2279  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2280  *
2281  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2282  * results back in *count_est and *offset_est. These variables are set to
2283  * 0 if the corresponding clause is not present, and -1 if it's present
2284  * but we couldn't estimate the value for it. (The "0" convention is OK
2285  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2286  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2287  * usual practice of never estimating less than one row.) These values will
2288  * be passed to create_limit_path, which see if you change this code.
2289  *
2290  * The return value is the suitably adjusted tuple_fraction to use for
2291  * planning the query. This adjustment is not overridable, since it reflects
2292  * plan actions that grouping_planner() will certainly take, not assumptions
2293  * about context.
2294  */
2295 static double
2296 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2297  int64 *offset_est, int64 *count_est)
2298 {
2299  Query *parse = root->parse;
2300  Node *est;
2301  double limit_fraction;
2302 
2303  /* Should not be called unless LIMIT or OFFSET */
2304  Assert(parse->limitCount || parse->limitOffset);
2305 
2306  /*
2307  * Try to obtain the clause values. We use estimate_expression_value
2308  * primarily because it can sometimes do something useful with Params.
2309  */
2310  if (parse->limitCount)
2311  {
2312  est = estimate_expression_value(root, parse->limitCount);
2313  if (est && IsA(est, Const))
2314  {
2315  if (((Const *) est)->constisnull)
2316  {
2317  /* NULL indicates LIMIT ALL, ie, no limit */
2318  *count_est = 0; /* treat as not present */
2319  }
2320  else
2321  {
2322  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2323  if (*count_est <= 0)
2324  *count_est = 1; /* force to at least 1 */
2325  }
2326  }
2327  else
2328  *count_est = -1; /* can't estimate */
2329  }
2330  else
2331  *count_est = 0; /* not present */
2332 
2333  if (parse->limitOffset)
2334  {
2335  est = estimate_expression_value(root, parse->limitOffset);
2336  if (est && IsA(est, Const))
2337  {
2338  if (((Const *) est)->constisnull)
2339  {
2340  /* Treat NULL as no offset; the executor will too */
2341  *offset_est = 0; /* treat as not present */
2342  }
2343  else
2344  {
2345  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2346  if (*offset_est < 0)
2347  *offset_est = 0; /* treat as not present */
2348  }
2349  }
2350  else
2351  *offset_est = -1; /* can't estimate */
2352  }
2353  else
2354  *offset_est = 0; /* not present */
2355 
2356  if (*count_est != 0)
2357  {
2358  /*
2359  * A LIMIT clause limits the absolute number of tuples returned.
2360  * However, if it's not a constant LIMIT then we have to guess; for
2361  * lack of a better idea, assume 10% of the plan's result is wanted.
2362  */
2363  if (*count_est < 0 || *offset_est < 0)
2364  {
2365  /* LIMIT or OFFSET is an expression ... punt ... */
2366  limit_fraction = 0.10;
2367  }
2368  else
2369  {
2370  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2371  limit_fraction = (double) *count_est + (double) *offset_est;
2372  }
2373 
2374  /*
2375  * If we have absolute limits from both caller and LIMIT, use the
2376  * smaller value; likewise if they are both fractional. If one is
2377  * fractional and the other absolute, we can't easily determine which
2378  * is smaller, but we use the heuristic that the absolute will usually
2379  * be smaller.
2380  */
2381  if (tuple_fraction >= 1.0)
2382  {
2383  if (limit_fraction >= 1.0)
2384  {
2385  /* both absolute */
2386  tuple_fraction = Min(tuple_fraction, limit_fraction);
2387  }
2388  else
2389  {
2390  /* caller absolute, limit fractional; use caller's value */
2391  }
2392  }
2393  else if (tuple_fraction > 0.0)
2394  {
2395  if (limit_fraction >= 1.0)
2396  {
2397  /* caller fractional, limit absolute; use limit */
2398  tuple_fraction = limit_fraction;
2399  }
2400  else
2401  {
2402  /* both fractional */
2403  tuple_fraction = Min(tuple_fraction, limit_fraction);
2404  }
2405  }
2406  else
2407  {
2408  /* no info from caller, just use limit */
2409  tuple_fraction = limit_fraction;
2410  }
2411  }
2412  else if (*offset_est != 0 && tuple_fraction > 0.0)
2413  {
2414  /*
2415  * We have an OFFSET but no LIMIT. This acts entirely differently
2416  * from the LIMIT case: here, we need to increase rather than decrease
2417  * the caller's tuple_fraction, because the OFFSET acts to cause more
2418  * tuples to be fetched instead of fewer. This only matters if we got
2419  * a tuple_fraction > 0, however.
2420  *
2421  * As above, use 10% if OFFSET is present but unestimatable.
2422  */
2423  if (*offset_est < 0)
2424  limit_fraction = 0.10;
2425  else
2426  limit_fraction = (double) *offset_est;
2427 
2428  /*
2429  * If we have absolute counts from both caller and OFFSET, add them
2430  * together; likewise if they are both fractional. If one is
2431  * fractional and the other absolute, we want to take the larger, and
2432  * we heuristically assume that's the fractional one.
2433  */
2434  if (tuple_fraction >= 1.0)
2435  {
2436  if (limit_fraction >= 1.0)
2437  {
2438  /* both absolute, so add them together */
2439  tuple_fraction += limit_fraction;
2440  }
2441  else
2442  {
2443  /* caller absolute, limit fractional; use limit */
2444  tuple_fraction = limit_fraction;
2445  }
2446  }
2447  else
2448  {
2449  if (limit_fraction >= 1.0)
2450  {
2451  /* caller fractional, limit absolute; use caller's value */
2452  }
2453  else
2454  {
2455  /* both fractional, so add them together */
2456  tuple_fraction += limit_fraction;
2457  if (tuple_fraction >= 1.0)
2458  tuple_fraction = 0.0; /* assume fetch all */
2459  }
2460  }
2461  }
2462 
2463  return tuple_fraction;
2464 }
2465 
2466 /*
2467  * limit_needed - do we actually need a Limit plan node?
2468  *
2469  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2470  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2471  * locution for an optimization fence. (Because other places in the planner
2472  * merely check whether parse->limitOffset isn't NULL, it will still work as
2473  * an optimization fence --- we're just suppressing unnecessary run-time
2474  * overhead.)
2475  *
2476  * This might look like it could be merged into preprocess_limit, but there's
2477  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2478  * in preprocess_limit it's good enough to consider estimated values.
2479  */
2480 bool
2482 {
2483  Node *node;
2484 
2485  node = parse->limitCount;
2486  if (node)
2487  {
2488  if (IsA(node, Const))
2489  {
2490  /* NULL indicates LIMIT ALL, ie, no limit */
2491  if (!((Const *) node)->constisnull)
2492  return true; /* LIMIT with a constant value */
2493  }
2494  else
2495  return true; /* non-constant LIMIT */
2496  }
2497 
2498  node = parse->limitOffset;
2499  if (node)
2500  {
2501  if (IsA(node, Const))
2502  {
2503  /* Treat NULL as no offset; the executor would too */
2504  if (!((Const *) node)->constisnull)
2505  {
2506  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2507 
2508  if (offset != 0)
2509  return true; /* OFFSET with a nonzero value */
2510  }
2511  }
2512  else
2513  return true; /* non-constant OFFSET */
2514  }
2515 
2516  return false; /* don't need a Limit plan node */
2517 }
2518 
2519 
2520 /*
2521  * remove_useless_groupby_columns
2522  * Remove any columns in the GROUP BY clause that are redundant due to
2523  * being functionally dependent on other GROUP BY columns.
2524  *
2525  * Since some other DBMSes do not allow references to ungrouped columns, it's
2526  * not unusual to find all columns listed in GROUP BY even though listing the
2527  * primary-key columns would be sufficient. Deleting such excess columns
2528  * avoids redundant sorting work, so it's worth doing.
2529  *
2530  * Relcache invalidations will ensure that cached plans become invalidated
2531  * when the underlying index of the pkey constraint is dropped.
2532  *
2533  * Currently, we only make use of pkey constraints for this, however, we may
2534  * wish to take this further in the future and also use unique constraints
2535  * which have NOT NULL columns. In that case, plan invalidation will still
2536  * work since relations will receive a relcache invalidation when a NOT NULL
2537  * constraint is dropped.
2538  */
2539 static void
2541 {
2542  Query *parse = root->parse;
2543  Bitmapset **groupbyattnos;
2544  Bitmapset **surplusvars;
2545  ListCell *lc;
2546  int relid;
2547 
2548  /* No chance to do anything if there are less than two GROUP BY items */
2549  if (list_length(parse->groupClause) < 2)
2550  return;
2551 
2552  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2553  if (parse->groupingSets)
2554  return;
2555 
2556  /*
2557  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2558  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2559  * that are GROUP BY items.
2560  */
2561  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2562  (list_length(parse->rtable) + 1));
2563  foreach(lc, parse->groupClause)
2564  {
2566  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2567  Var *var = (Var *) tle->expr;
2568 
2569  /*
2570  * Ignore non-Vars and Vars from other query levels.
2571  *
2572  * XXX in principle, stable expressions containing Vars could also be
2573  * removed, if all the Vars are functionally dependent on other GROUP
2574  * BY items. But it's not clear that such cases occur often enough to
2575  * be worth troubling over.
2576  */
2577  if (!IsA(var, Var) ||
2578  var->varlevelsup > 0)
2579  continue;
2580 
2581  /* OK, remember we have this Var */
2582  relid = var->varno;
2583  Assert(relid <= list_length(parse->rtable));
2584  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2586  }
2587 
2588  /*
2589  * Consider each relation and see if it is possible to remove some of its
2590  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2591  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2592  * of the column attnos of RTE k that are removable GROUP BY items.
2593  */
2594  surplusvars = NULL; /* don't allocate array unless required */
2595  relid = 0;
2596  foreach(lc, parse->rtable)
2597  {
2599  Bitmapset *relattnos;
2600  Bitmapset *pkattnos;
2601  Oid constraintOid;
2602 
2603  relid++;
2604 
2605  /* Only plain relations could have primary-key constraints */
2606  if (rte->rtekind != RTE_RELATION)
2607  continue;
2608 
2609  /*
2610  * We must skip inheritance parent tables as some of the child rels
2611  * may cause duplicate rows. This cannot happen with partitioned
2612  * tables, however.
2613  */
2614  if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
2615  continue;
2616 
2617  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2618  relattnos = groupbyattnos[relid];
2619  if (bms_membership(relattnos) != BMS_MULTIPLE)
2620  continue;
2621 
2622  /*
2623  * Can't remove any columns for this rel if there is no suitable
2624  * (i.e., nondeferrable) primary key constraint.
2625  */
2626  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2627  if (pkattnos == NULL)
2628  continue;
2629 
2630  /*
2631  * If the primary key is a proper subset of relattnos then we have
2632  * some items in the GROUP BY that can be removed.
2633  */
2634  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2635  {
2636  /*
2637  * To easily remember whether we've found anything to do, we don't
2638  * allocate the surplusvars[] array until we find something.
2639  */
2640  if (surplusvars == NULL)
2641  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2642  (list_length(parse->rtable) + 1));
2643 
2644  /* Remember the attnos of the removable columns */
2645  surplusvars[relid] = bms_difference(relattnos, pkattnos);
2646  }
2647  }
2648 
2649  /*
2650  * If we found any surplus Vars, build a new GROUP BY clause without them.
2651  * (Note: this may leave some TLEs with unreferenced ressortgroupref
2652  * markings, but that's harmless.)
2653  */
2654  if (surplusvars != NULL)
2655  {
2656  List *new_groupby = NIL;
2657 
2658  foreach(lc, parse->groupClause)
2659  {
2661  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2662  Var *var = (Var *) tle->expr;
2663 
2664  /*
2665  * New list must include non-Vars, outer Vars, and anything not
2666  * marked as surplus.
2667  */
2668  if (!IsA(var, Var) ||
2669  var->varlevelsup > 0 ||
2671  surplusvars[var->varno]))
2672  new_groupby = lappend(new_groupby, sgc);
2673  }
2674 
2675  parse->groupClause = new_groupby;
2676  }
2677 }
2678 
2679 /*
2680  * preprocess_groupclause - do preparatory work on GROUP BY clause
2681  *
2682  * The idea here is to adjust the ordering of the GROUP BY elements
2683  * (which in itself is semantically insignificant) to match ORDER BY,
2684  * thereby allowing a single sort operation to both implement the ORDER BY
2685  * requirement and set up for a Unique step that implements GROUP BY.
2686  *
2687  * In principle it might be interesting to consider other orderings of the
2688  * GROUP BY elements, which could match the sort ordering of other
2689  * possible plans (eg an indexscan) and thereby reduce cost. We don't
2690  * bother with that, though. Hashed grouping will frequently win anyway.
2691  *
2692  * Note: we need no comparable processing of the distinctClause because
2693  * the parser already enforced that that matches ORDER BY.
2694  *
2695  * For grouping sets, the order of items is instead forced to agree with that
2696  * of the grouping set (and items not in the grouping set are skipped). The
2697  * work of sorting the order of grouping set elements to match the ORDER BY if
2698  * possible is done elsewhere.
2699  */
2700 static List *
2702 {
2703  Query *parse = root->parse;
2704  List *new_groupclause = NIL;
2705  bool partial_match;
2706  ListCell *sl;
2707  ListCell *gl;
2708 
2709  /* For grouping sets, we need to force the ordering */
2710  if (force)
2711  {
2712  foreach(sl, force)
2713  {
2714  Index ref = lfirst_int(sl);
2716 
2717  new_groupclause = lappend(new_groupclause, cl);
2718  }
2719 
2720  return new_groupclause;
2721  }
2722 
2723  /* If no ORDER BY, nothing useful to do here */
2724  if (parse->sortClause == NIL)
2725  return parse->groupClause;
2726 
2727  /*
2728  * Scan the ORDER BY clause and construct a list of matching GROUP BY
2729  * items, but only as far as we can make a matching prefix.
2730  *
2731  * This code assumes that the sortClause contains no duplicate items.
2732  */
2733  foreach(sl, parse->sortClause)
2734  {
2736 
2737  foreach(gl, parse->groupClause)
2738  {
2740 
2741  if (equal(gc, sc))
2742  {
2743  new_groupclause = lappend(new_groupclause, gc);
2744  break;
2745  }
2746  }
2747  if (gl == NULL)
2748  break; /* no match, so stop scanning */
2749  }
2750 
2751  /* Did we match all of the ORDER BY list, or just some of it? */
2752  partial_match = (sl != NULL);
2753 
2754  /* If no match at all, no point in reordering GROUP BY */
2755  if (new_groupclause == NIL)
2756  return parse->groupClause;
2757 
2758  /*
2759  * Add any remaining GROUP BY items to the new list, but only if we were
2760  * able to make a complete match. In other words, we only rearrange the
2761  * GROUP BY list if the result is that one list is a prefix of the other
2762  * --- otherwise there's no possibility of a common sort. Also, give up
2763  * if there are any non-sortable GROUP BY items, since then there's no
2764  * hope anyway.
2765  */
2766  foreach(gl, parse->groupClause)
2767  {
2769 
2770  if (list_member_ptr(new_groupclause, gc))
2771  continue; /* it matched an ORDER BY item */
2772  if (partial_match)
2773  return parse->groupClause; /* give up, no common sort possible */
2774  if (!OidIsValid(gc->sortop))
2775  return parse->groupClause; /* give up, GROUP BY can't be sorted */
2776  new_groupclause = lappend(new_groupclause, gc);
2777  }
2778 
2779  /* Success --- install the rearranged GROUP BY list */
2780  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2781  return new_groupclause;
2782 }
2783 
2784 /*
2785  * Extract lists of grouping sets that can be implemented using a single
2786  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2787  *
2788  * Input must be sorted with smallest sets first. Result has each sublist
2789  * sorted with smallest sets first.
2790  *
2791  * We want to produce the absolute minimum possible number of lists here to
2792  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2793  * of finding the minimal partition of a partially-ordered set into chains
2794  * (which is what we need, taking the list of grouping sets as a poset ordered
2795  * by set inclusion) can be mapped to the problem of finding the maximum
2796  * cardinality matching on a bipartite graph, which is solvable in polynomial
2797  * time with a worst case of no worse than O(n^2.5) and usually much
2798  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2799  * heuristic or approximate methods. (Planning time for a 12-d cube is under
2800  * half a second on my modest system even with optimization off and assertions
2801  * on.)
2802  */
2803 static List *
2805 {
2806  int num_sets_raw = list_length(groupingSets);
2807  int num_empty = 0;
2808  int num_sets = 0; /* distinct sets */
2809  int num_chains = 0;
2810  List *result = NIL;
2811  List **results;
2812  List **orig_sets;
2813  Bitmapset **set_masks;
2814  int *chains;
2815  short **adjacency;
2816  short *adjacency_buf;
2818  int i;
2819  int j;
2820  int j_size;
2821  ListCell *lc1 = list_head(groupingSets);
2822  ListCell *lc;
2823 
2824  /*
2825  * Start by stripping out empty sets. The algorithm doesn't require this,
2826  * but the planner currently needs all empty sets to be returned in the
2827  * first list, so we strip them here and add them back after.
2828  */
2829  while (lc1 && lfirst(lc1) == NIL)
2830  {
2831  ++num_empty;
2832  lc1 = lnext(groupingSets, lc1);
2833  }
2834 
2835  /* bail out now if it turns out that all we had were empty sets. */
2836  if (!lc1)
2837  return list_make1(groupingSets);
2838 
2839  /*----------
2840  * We don't strictly need to remove duplicate sets here, but if we don't,
2841  * they tend to become scattered through the result, which is a bit
2842  * confusing (and irritating if we ever decide to optimize them out).
2843  * So we remove them here and add them back after.
2844  *
2845  * For each non-duplicate set, we fill in the following:
2846  *
2847  * orig_sets[i] = list of the original set lists
2848  * set_masks[i] = bitmapset for testing inclusion
2849  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2850  *
2851  * chains[i] will be the result group this set is assigned to.
2852  *
2853  * We index all of these from 1 rather than 0 because it is convenient
2854  * to leave 0 free for the NIL node in the graph algorithm.
2855  *----------
2856  */
2857  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2858  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2859  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2860  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2861 
2862  j_size = 0;
2863  j = 0;
2864  i = 1;
2865 
2866  for_each_cell(lc, groupingSets, lc1)
2867  {
2868  List *candidate = (List *) lfirst(lc);
2869  Bitmapset *candidate_set = NULL;
2870  ListCell *lc2;
2871  int dup_of = 0;
2872 
2873  foreach(lc2, candidate)
2874  {
2875  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
2876  }
2877 
2878  /* we can only be a dup if we're the same length as a previous set */
2879  if (j_size == list_length(candidate))
2880  {
2881  int k;
2882 
2883  for (k = j; k < i; ++k)
2884  {
2885  if (bms_equal(set_masks[k], candidate_set))
2886  {
2887  dup_of = k;
2888  break;
2889  }
2890  }
2891  }
2892  else if (j_size < list_length(candidate))
2893  {
2894  j_size = list_length(candidate);
2895  j = i;
2896  }
2897 
2898  if (dup_of > 0)
2899  {
2900  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
2901  bms_free(candidate_set);
2902  }
2903  else
2904  {
2905  int k;
2906  int n_adj = 0;
2907 
2908  orig_sets[i] = list_make1(candidate);
2909  set_masks[i] = candidate_set;
2910 
2911  /* fill in adjacency list; no need to compare equal-size sets */
2912 
2913  for (k = j - 1; k > 0; --k)
2914  {
2915  if (bms_is_subset(set_masks[k], candidate_set))
2916  adjacency_buf[++n_adj] = k;
2917  }
2918 
2919  if (n_adj > 0)
2920  {
2921  adjacency_buf[0] = n_adj;
2922  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
2923  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
2924  }
2925  else
2926  adjacency[i] = NULL;
2927 
2928  ++i;
2929  }
2930  }
2931 
2932  num_sets = i - 1;
2933 
2934  /*
2935  * Apply the graph matching algorithm to do the work.
2936  */
2937  state = BipartiteMatch(num_sets, num_sets, adjacency);
2938 
2939  /*
2940  * Now, the state->pair* fields have the info we need to assign sets to
2941  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
2942  * pair_vu[v] = u (both will be true, but we check both so that we can do
2943  * it in one pass)
2944  */
2945  chains = palloc0((num_sets + 1) * sizeof(int));
2946 
2947  for (i = 1; i <= num_sets; ++i)
2948  {
2949  int u = state->pair_vu[i];
2950  int v = state->pair_uv[i];
2951 
2952  if (u > 0 && u < i)
2953  chains[i] = chains[u];
2954  else if (v > 0 && v < i)
2955  chains[i] = chains[v];
2956  else
2957  chains[i] = ++num_chains;
2958  }
2959 
2960  /* build result lists. */
2961  results = palloc0((num_chains + 1) * sizeof(List *));
2962 
2963  for (i = 1; i <= num_sets; ++i)
2964  {
2965  int c = chains[i];
2966 
2967  Assert(c > 0);
2968 
2969  results[c] = list_concat(results[c], orig_sets[i]);
2970  }
2971 
2972  /* push any empty sets back on the first list. */
2973  while (num_empty-- > 0)
2974  results[1] = lcons(NIL, results[1]);
2975 
2976  /* make result list */
2977  for (i = 1; i <= num_chains; ++i)
2978  result = lappend(result, results[i]);
2979 
2980  /*
2981  * Free all the things.
2982  *
2983  * (This is over-fussy for small sets but for large sets we could have
2984  * tied up a nontrivial amount of memory.)
2985  */
2986  BipartiteMatchFree(state);
2987  pfree(results);
2988  pfree(chains);
2989  for (i = 1; i <= num_sets; ++i)
2990  if (adjacency[i])
2991  pfree(adjacency[i]);
2992  pfree(adjacency);
2993  pfree(adjacency_buf);
2994  pfree(orig_sets);
2995  for (i = 1; i <= num_sets; ++i)
2996  bms_free(set_masks[i]);
2997  pfree(set_masks);
2998 
2999  return result;
3000 }
3001 
3002 /*
3003  * Reorder the elements of a list of grouping sets such that they have correct
3004  * prefix relationships. Also inserts the GroupingSetData annotations.
3005  *
3006  * The input must be ordered with smallest sets first; the result is returned
3007  * with largest sets first. Note that the result shares no list substructure
3008  * with the input, so it's safe for the caller to modify it later.
3009  *
3010  * If we're passed in a sortclause, we follow its order of columns to the
3011  * extent possible, to minimize the chance that we add unnecessary sorts.
3012  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3013  * gets implemented in one pass.)
3014  */
3015 static List *
3016 reorder_grouping_sets(List *groupingsets, List *sortclause)
3017 {
3018  ListCell *lc;
3019  List *previous = NIL;
3020  List *result = NIL;
3021 
3022  foreach(lc, groupingsets)
3023  {
3024  List *candidate = (List *) lfirst(lc);
3025  List *new_elems = list_difference_int(candidate, previous);
3027 
3028  while (list_length(sortclause) > list_length(previous) &&
3029  list_length(new_elems) > 0)
3030  {
3031  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3032  int ref = sc->tleSortGroupRef;
3033 
3034  if (list_member_int(new_elems, ref))
3035  {
3036  previous = lappend_int(previous, ref);
3037  new_elems = list_delete_int(new_elems, ref);
3038  }
3039  else
3040  {
3041  /* diverged from the sortclause; give up on it */
3042  sortclause = NIL;
3043  break;
3044  }
3045  }
3046 
3047  previous = list_concat(previous, new_elems);
3048 
3049  gs->set = list_copy(previous);
3050  result = lcons(gs, result);
3051  }
3052 
3053  list_free(previous);
3054 
3055  return result;
3056 }
3057 
3058 /*
3059  * Compute query_pathkeys and other pathkeys during plan generation
3060  */
3061 static void
3063 {
3064  Query *parse = root->parse;
3065  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3066  List *tlist = root->processed_tlist;
3067  List *activeWindows = qp_extra->activeWindows;
3068 
3069  /*
3070  * Calculate pathkeys that represent grouping/ordering requirements. The
3071  * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
3072  * be, in which case we just leave their pathkeys empty.
3073  */
3074  if (qp_extra->groupClause &&
3075  grouping_is_sortable(qp_extra->groupClause))
3076  root->group_pathkeys =
3078  qp_extra->groupClause,
3079  tlist);
3080  else
3081  root->group_pathkeys = NIL;
3082 
3083  /* We consider only the first (bottom) window in pathkeys logic */
3084  if (activeWindows != NIL)
3085  {
3086  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3087 
3089  wc,
3090  tlist);
3091  }
3092  else
3093  root->window_pathkeys = NIL;
3094 
3095  if (parse->distinctClause &&
3097  root->distinct_pathkeys =
3099  parse->distinctClause,
3100  tlist);
3101  else
3102  root->distinct_pathkeys = NIL;
3103 
3104  root->sort_pathkeys =
3106  parse->sortClause,
3107  tlist);
3108 
3109  /*
3110  * Figure out whether we want a sorted result from query_planner.
3111  *
3112  * If we have a sortable GROUP BY clause, then we want a result sorted
3113  * properly for grouping. Otherwise, if we have window functions to
3114  * evaluate, we try to sort for the first window. Otherwise, if there's a
3115  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3116  * we try to produce output that's sufficiently well sorted for the
3117  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3118  * by the ORDER BY clause.
3119  *
3120  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3121  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3122  * that might just leave us failing to exploit an available sort order at
3123  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3124  * much easier, since we know that the parser ensured that one is a
3125  * superset of the other.
3126  */
3127  if (root->group_pathkeys)
3128  root->query_pathkeys = root->group_pathkeys;
3129  else if (root->window_pathkeys)
3130  root->query_pathkeys = root->window_pathkeys;
3131  else if (list_length(root->distinct_pathkeys) >
3132  list_length(root->sort_pathkeys))
3133  root->query_pathkeys = root->distinct_pathkeys;
3134  else if (root->sort_pathkeys)
3135  root->query_pathkeys = root->sort_pathkeys;
3136  else
3137  root->query_pathkeys = NIL;
3138 }
3139 
3140 /*
3141  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3142  *
3143  * path_rows: number of output rows from scan/join step
3144  * gd: grouping sets data including list of grouping sets and their clauses
3145  * target_list: target list containing group clause references
3146  *
3147  * If doing grouping sets, we also annotate the gsets data with the estimates
3148  * for each set and each individual rollup list, with a view to later
3149  * determining whether some combination of them could be hashed instead.
3150  */
3151 static double
3153  double path_rows,
3154  grouping_sets_data *gd,
3155  List *target_list)
3156 {
3157  Query *parse = root->parse;
3158  double dNumGroups;
3159 
3160  if (parse->groupClause)
3161  {
3162  List *groupExprs;
3163 
3164  if (parse->groupingSets)
3165  {
3166  /* Add up the estimates for each grouping set */
3167  ListCell *lc;
3168  ListCell *lc2;
3169 
3170  Assert(gd); /* keep Coverity happy */
3171 
3172  dNumGroups = 0;
3173 
3174  foreach(lc, gd->rollups)
3175  {
3176  RollupData *rollup = lfirst_node(RollupData, lc);
3177  ListCell *lc;
3178 
3179  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3180  target_list);
3181 
3182  rollup->numGroups = 0.0;
3183 
3184  forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
3185  {
3186  List *gset = (List *) lfirst(lc);
3188  double numGroups = estimate_num_groups(root,
3189  groupExprs,
3190  path_rows,
3191  &gset,
3192  NULL);
3193 
3194  gs->numGroups = numGroups;
3195  rollup->numGroups += numGroups;
3196  }
3197 
3198  dNumGroups += rollup->numGroups;
3199  }
3200 
3201  if (gd->hash_sets_idx)
3202  {
3203  ListCell *lc;
3204 
3205  gd->dNumHashGroups = 0;
3206 
3207  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3208  target_list);
3209 
3210  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3211  {
3212  List *gset = (List *) lfirst(lc);
3214  double numGroups = estimate_num_groups(root,
3215  groupExprs,
3216  path_rows,
3217  &gset,
3218  NULL);
3219 
3220  gs->numGroups = numGroups;
3221  gd->dNumHashGroups += numGroups;
3222  }
3223 
3224  dNumGroups += gd->dNumHashGroups;
3225  }
3226  }
3227  else
3228  {
3229  /* Plain GROUP BY */
3230  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3231  target_list);
3232 
3233  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3234  NULL, NULL);
3235  }
3236  }
3237  else if (parse->groupingSets)
3238  {
3239  /* Empty grouping sets ... one result row for each one */
3240  dNumGroups = list_length(parse->groupingSets);
3241  }
3242  else if (parse->hasAggs || root->hasHavingQual)
3243  {
3244  /* Plain aggregation, one result row */
3245  dNumGroups = 1;
3246  }
3247  else
3248  {
3249  /* Not grouping */
3250  dNumGroups = 1;
3251  }
3252 
3253  return dNumGroups;
3254 }
3255 
3256 /*
3257  * create_grouping_paths
3258  *
3259  * Build a new upperrel containing Paths for grouping and/or aggregation.
3260  * Along the way, we also build an upperrel for Paths which are partially
3261  * grouped and/or aggregated. A partially grouped and/or aggregated path
3262  * needs a FinalizeAggregate node to complete the aggregation. Currently,
3263  * the only partially grouped paths we build are also partial paths; that
3264  * is, they need a Gather and then a FinalizeAggregate.
3265  *
3266  * input_rel: contains the source-data Paths
3267  * target: the pathtarget for the result Paths to compute
3268  * gd: grouping sets data including list of grouping sets and their clauses
3269  *
3270  * Note: all Paths in input_rel are expected to return the target computed
3271  * by make_group_input_target.
3272  */
3273 static RelOptInfo *
3275  RelOptInfo *input_rel,
3276  PathTarget *target,
3277  bool target_parallel_safe,
3278  grouping_sets_data *gd)
3279 {
3280  Query *parse = root->parse;
3281  RelOptInfo *grouped_rel;
3282  RelOptInfo *partially_grouped_rel;
3283  AggClauseCosts agg_costs;
3284 
3285  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
3286  get_agg_clause_costs(root, AGGSPLIT_SIMPLE, &agg_costs);
3287 
3288  /*
3289  * Create grouping relation to hold fully aggregated grouping and/or
3290  * aggregation paths.
3291  */
3292  grouped_rel = make_grouping_rel(root, input_rel, target,
3293  target_parallel_safe, parse->havingQual);
3294 
3295  /*
3296  * Create either paths for a degenerate grouping or paths for ordinary
3297  * grouping, as appropriate.
3298  */
3299  if (is_degenerate_grouping(root))
3300  create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3301  else
3302  {
3303  int flags = 0;
3304  GroupPathExtraData extra;
3305 
3306  /*
3307  * Determine whether it's possible to perform sort-based
3308  * implementations of grouping. (Note that if groupClause is empty,
3309  * grouping_is_sortable() is trivially true, and all the
3310  * pathkeys_contained_in() tests will succeed too, so that we'll
3311  * consider every surviving input path.)
3312  *
3313  * If we have grouping sets, we might be able to sort some but not all
3314  * of them; in this case, we need can_sort to be true as long as we
3315  * must consider any sorted-input plan.
3316  */
3317  if ((gd && gd->rollups != NIL)
3318  || grouping_is_sortable(parse->groupClause))
3319  flags |= GROUPING_CAN_USE_SORT;
3320 
3321  /*
3322  * Determine whether we should consider hash-based implementations of
3323  * grouping.
3324  *
3325  * Hashed aggregation only applies if we're grouping. If we have
3326  * grouping sets, some groups might be hashable but others not; in
3327  * this case we set can_hash true as long as there is nothing globally
3328  * preventing us from hashing (and we should therefore consider plans
3329  * with hashes).
3330  *
3331  * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3332  * BY aggregates. (Doing so would imply storing *all* the input
3333  * values in the hash table, and/or running many sorts in parallel,
3334  * either of which seems like a certain loser.) We similarly don't
3335  * support ordered-set aggregates in hashed aggregation, but that case
3336  * is also included in the numOrderedAggs count.
3337  *
3338  * Note: grouping_is_hashable() is much more expensive to check than
3339  * the other gating conditions, so we want to do it last.
3340  */
3341  if ((parse->groupClause != NIL &&
3342  root->numOrderedAggs == 0 &&
3343  (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))))
3344  flags |= GROUPING_CAN_USE_HASH;
3345 
3346  /*
3347  * Determine whether partial aggregation is possible.
3348  */
3349  if (can_partial_agg(root))
3350  flags |= GROUPING_CAN_PARTIAL_AGG;
3351 
3352  extra.flags = flags;
3353  extra.target_parallel_safe = target_parallel_safe;
3354  extra.havingQual = parse->havingQual;
3355  extra.targetList = parse->targetList;
3356  extra.partial_costs_set = false;
3357 
3358  /*
3359  * Determine whether partitionwise aggregation is in theory possible.
3360  * It can be disabled by the user, and for now, we don't try to
3361  * support grouping sets. create_ordinary_grouping_paths() will check
3362  * additional conditions, such as whether input_rel is partitioned.
3363  */
3366  else
3368 
3369  create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3370  &agg_costs, gd, &extra,
3371  &partially_grouped_rel);
3372  }
3373 
3374  set_cheapest(grouped_rel);
3375  return grouped_rel;
3376 }
3377 
3378 /*
3379  * make_grouping_rel
3380  *
3381  * Create a new grouping rel and set basic properties.
3382  *
3383  * input_rel represents the underlying scan/join relation.
3384  * target is the output expected from the grouping relation.
3385  */
3386 static RelOptInfo *
3388  PathTarget *target, bool target_parallel_safe,
3389  Node *havingQual)
3390 {
3391  RelOptInfo *grouped_rel;
3392 
3393  if (IS_OTHER_REL(input_rel))
3394  {
3395  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3396  input_rel->relids);
3397  grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3398  }
3399  else
3400  {
3401  /*
3402  * By tradition, the relids set for the main grouping relation is
3403  * NULL. (This could be changed, but might require adjustments
3404  * elsewhere.)
3405  */
3406  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3407  }
3408 
3409  /* Set target. */
3410  grouped_rel->reltarget = target;
3411 
3412  /*
3413  * If the input relation is not parallel-safe, then the grouped relation
3414  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3415  * target list and HAVING quals are parallel-safe.
3416  */
3417  if (input_rel->consider_parallel && target_parallel_safe &&
3418  is_parallel_safe(root, (Node *) havingQual))
3419  grouped_rel->consider_parallel = true;
3420 
3421  /*
3422  * If the input rel belongs to a single FDW, so does the grouped rel.
3423  */
3424  grouped_rel->serverid = input_rel->serverid;
3425  grouped_rel->userid = input_rel->userid;
3426  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3427  grouped_rel->fdwroutine = input_rel->fdwroutine;
3428 
3429  return grouped_rel;
3430 }
3431 
3432 /*
3433  * is_degenerate_grouping
3434  *
3435  * A degenerate grouping is one in which the query has a HAVING qual and/or
3436  * grouping sets, but no aggregates and no GROUP BY (which implies that the
3437  * grouping sets are all empty).
3438  */
3439 static bool
3441 {
3442  Query *parse = root->parse;
3443 
3444  return (root->hasHavingQual || parse->groupingSets) &&
3445  !parse->hasAggs && parse->groupClause == NIL;
3446 }
3447 
3448 /*
3449  * create_degenerate_grouping_paths
3450  *
3451  * When the grouping is degenerate (see is_degenerate_grouping), we are
3452  * supposed to emit either zero or one row for each grouping set depending on
3453  * whether HAVING succeeds. Furthermore, there cannot be any variables in
3454  * either HAVING or the targetlist, so we actually do not need the FROM table
3455  * at all! We can just throw away the plan-so-far and generate a Result node.
3456  * This is a sufficiently unusual corner case that it's not worth contorting
3457  * the structure of this module to avoid having to generate the earlier paths
3458  * in the first place.
3459  */
3460 static void
3462  RelOptInfo *grouped_rel)
3463 {
3464  Query *parse = root->parse;
3465  int nrows;
3466  Path *path;
3467 
3468  nrows = list_length(parse->groupingSets);
3469  if (nrows > 1)
3470  {
3471  /*
3472  * Doesn't seem worthwhile writing code to cons up a generate_series
3473  * or a values scan to emit multiple rows. Instead just make N clones
3474  * and append them. (With a volatile HAVING clause, this means you
3475  * might get between 0 and N output rows. Offhand I think that's
3476  * desired.)
3477  */
3478  List *paths = NIL;
3479 
3480  while (--nrows >= 0)
3481  {
3482  path = (Path *)
3483  create_group_result_path(root, grouped_rel,
3484  grouped_rel->reltarget,
3485  (List *) parse->havingQual);
3486  paths = lappend(paths, path);
3487  }
3488  path = (Path *)
3489  create_append_path(root,
3490  grouped_rel,
3491  paths,
3492  NIL,
3493  NIL,
3494  NULL,
3495  0,
3496  false,
3497  -1);
3498  }
3499  else
3500  {
3501  /* No grouping sets, or just one, so one output row */
3502  path = (Path *)
3503  create_group_result_path(root, grouped_rel,
3504  grouped_rel->reltarget,
3505  (List *) parse->havingQual);
3506  }
3507 
3508  add_path(grouped_rel, path);
3509 }
3510 
3511 /*
3512  * create_ordinary_grouping_paths
3513  *
3514  * Create grouping paths for the ordinary (that is, non-degenerate) case.
3515  *
3516  * We need to consider sorted and hashed aggregation in the same function,
3517  * because otherwise (1) it would be harder to throw an appropriate error
3518  * message if neither way works, and (2) we should not allow hashtable size
3519  * considerations to dissuade us from using hashing if sorting is not possible.
3520  *
3521  * *partially_grouped_rel_p will be set to the partially grouped rel which this
3522  * function creates, or to NULL if it doesn't create one.
3523  */
3524 static void
3526  RelOptInfo *grouped_rel,
3527  const AggClauseCosts *agg_costs,
3528  grouping_sets_data *gd,
3529  GroupPathExtraData *extra,
3530  RelOptInfo **partially_grouped_rel_p)
3531 {
3532  Path *cheapest_path = input_rel->cheapest_total_path;
3533  RelOptInfo *partially_grouped_rel = NULL;
3534  double dNumGroups;
3536 
3537  /*
3538  * If this is the topmost grouping relation or if the parent relation is
3539  * doing some form of partitionwise aggregation, then we may be able to do
3540  * it at this level also. However, if the input relation is not
3541  * partitioned, partitionwise aggregate is impossible.
3542  */
3543  if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
3544  IS_PARTITIONED_REL(input_rel))
3545  {
3546  /*
3547  * If this is the topmost relation or if the parent relation is doing
3548  * full partitionwise aggregation, then we can do full partitionwise
3549  * aggregation provided that the GROUP BY clause contains all of the
3550  * partitioning columns at this level. Otherwise, we can do at most
3551  * partial partitionwise aggregation. But if partial aggregation is
3552  * not supported in general then we can't use it for partitionwise
3553  * aggregation either.
3554  */
3555  if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
3556  group_by_has_partkey(input_rel, extra->targetList,
3557  root->parse->groupClause))
3559  else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3561  else
3563  }
3564 
3565  /*
3566  * Before generating paths for grouped_rel, we first generate any possible
3567  * partially grouped paths; that way, later code can easily consider both
3568  * parallel and non-parallel approaches to grouping.
3569  */
3570  if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
3571  {
3572  bool force_rel_creation;
3573 
3574  /*
3575  * If we're doing partitionwise aggregation at this level, force
3576  * creation of a partially_grouped_rel so we can add partitionwise
3577  * paths to it.
3578  */
3579  force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
3580 
3581  partially_grouped_rel =
3583  grouped_rel,
3584  input_rel,
3585  gd,
3586  extra,
3587  force_rel_creation);
3588  }
3589 
3590  /* Set out parameter. */
3591  *partially_grouped_rel_p = partially_grouped_rel;
3592 
3593  /* Apply partitionwise aggregation technique, if possible. */
3594  if (patype != PARTITIONWISE_AGGREGATE_NONE)
3595  create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
3596  partially_grouped_rel, agg_costs,
3597  gd, patype, extra);
3598 
3599  /* If we are doing partial aggregation only, return. */
3601  {
3602  Assert(partially_grouped_rel);
3603 
3604  if (partially_grouped_rel->pathlist)
3605  set_cheapest(partially_grouped_rel);
3606 
3607  return;
3608  }
3609 
3610  /* Gather any partially grouped partial paths. */
3611  if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
3612  {
3613  gather_grouping_paths(root, partially_grouped_rel);
3614  set_cheapest(partially_grouped_rel);
3615  }
3616 
3617  /*
3618  * Estimate number of groups.
3619  */
3620  dNumGroups = get_number_of_groups(root,
3621  cheapest_path->rows,
3622  gd,
3623  extra->targetList);
3624 
3625  /* Build final grouping paths */
3626  add_paths_to_grouping_rel(root, input_rel, grouped_rel,
3627  partially_grouped_rel, agg_costs, gd,
3628  dNumGroups, extra);
3629 
3630  /* Give a helpful error if we failed to find any implementation */
3631  if (grouped_rel->pathlist == NIL)
3632  ereport(ERROR,
3633  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3634  errmsg("could not implement GROUP BY"),
3635  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
3636 
3637  /*
3638  * If there is an FDW that's responsible for all baserels of the query,
3639  * let it consider adding ForeignPaths.
3640  */
3641  if (grouped_rel->fdwroutine &&
3642  grouped_rel->fdwroutine->GetForeignUpperPaths)
3644  input_rel, grouped_rel,
3645  extra);
3646 
3647  /* Let extensions possibly add some more paths */
3649  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
3650  input_rel, grouped_rel,
3651  extra);
3652 }
3653 
3654 /*
3655  * For a given input path, consider the possible ways of doing grouping sets on
3656  * it, by combinations of hashing and sorting. This can be called multiple
3657  * times, so it's important that it not scribble on input. No result is
3658  * returned, but any generated paths are added to grouped_rel.
3659  */
3660 static void
3662  RelOptInfo *grouped_rel,
3663  Path *path,
3664  bool is_sorted,
3665  bool can_hash,
3666  grouping_sets_data *gd,
3667  const AggClauseCosts *agg_costs,
3668  double dNumGroups)
3669 {
3670  Query *parse = root->parse;
3671  int hash_mem = get_hash_mem();
3672 
3673  /*
3674  * If we're not being offered sorted input, then only consider plans that
3675  * can be done entirely by hashing.
3676  *
3677  * We can hash everything if it looks like it'll fit in hash_mem. But if
3678  * the input is actually sorted despite not being advertised as such, we
3679  * prefer to make use of that in order to use less memory.
3680  *
3681  * If none of the grouping sets are sortable, then ignore the hash_mem
3682  * limit and generate a path anyway, since otherwise we'll just fail.
3683  */
3684  if (!is_sorted)
3685  {
3686  List *new_rollups = NIL;
3687  RollupData *unhashed_rollup = NULL;
3688  List *sets_data;
3689  List *empty_sets_data = NIL;
3690  List *empty_sets = NIL;
3691  ListCell *lc;
3692  ListCell *l_start = list_head(gd->rollups);
3693  AggStrategy strat = AGG_HASHED;
3694  double hashsize;
3695  double exclude_groups = 0.0;
3696 
3697  Assert(can_hash);
3698 
3699  /*
3700  * If the input is coincidentally sorted usefully (which can happen
3701  * even if is_sorted is false, since that only means that our caller
3702  * has set up the sorting for us), then save some hashtable space by
3703  * making use of that. But we need to watch out for degenerate cases:
3704  *
3705  * 1) If there are any empty grouping sets, then group_pathkeys might
3706  * be NIL if all non-empty grouping sets are unsortable. In this case,
3707  * there will be a rollup containing only empty groups, and the
3708  * pathkeys_contained_in test is vacuously true; this is ok.
3709  *
3710  * XXX: the above relies on the fact that group_pathkeys is generated
3711  * from the first rollup. If we add the ability to consider multiple
3712  * sort orders for grouping input, this assumption might fail.
3713  *
3714  * 2) If there are no empty sets and only unsortable sets, then the
3715  * rollups list will be empty (and thus l_start == NULL), and
3716  * group_pathkeys will be NIL; we must ensure that the vacuously-true
3717  * pathkeys_contained_in test doesn't cause us to crash.
3718  */
3719  if (l_start != NULL &&
3721  {
3722  unhashed_rollup = lfirst_node(RollupData, l_start);
3723  exclude_groups = unhashed_rollup->numGroups;
3724  l_start = lnext(gd->rollups, l_start);
3725  }
3726 
3727  hashsize = estimate_hashagg_tablesize(root,
3728  path,
3729  agg_costs,
3730  dNumGroups - exclude_groups);
3731 
3732  /*
3733  * gd->rollups is empty if we have only unsortable columns to work
3734  * with. Override hash_mem in that case; otherwise, we'll rely on the
3735  * sorted-input case to generate usable mixed paths.
3736  */
3737  if (hashsize > hash_mem * 1024L && gd->rollups)
3738  return; /* nope, won't fit */
3739 
3740  /*
3741  * We need to burst the existing rollups list into individual grouping
3742  * sets and recompute a groupClause for each set.
3743  */
3744  sets_data = list_copy(gd->unsortable_sets);
3745 
3746  for_each_cell(lc, gd->rollups, l_start)
3747  {
3748  RollupData *rollup = lfirst_node(RollupData, lc);
3749 
3750  /*
3751  * If we find an unhashable rollup that's not been skipped by the
3752  * "actually sorted" check above, we can't cope; we'd need sorted
3753  * input (with a different sort order) but we can't get that here.
3754  * So bail out; we'll get a valid path from the is_sorted case
3755  * instead.
3756  *
3757  * The mere presence of empty grouping sets doesn't make a rollup
3758  * unhashable (see preprocess_grouping_sets), we handle those
3759  * specially below.
3760  */
3761  if (!rollup->hashable)
3762  return;
3763 
3764  sets_data = list_concat(sets_data, rollup->gsets_data);
3765  }
3766  foreach(lc, sets_data)
3767  {
3769  List *gset = gs->set;
3770  RollupData *rollup;
3771 
3772  if (gset == NIL)
3773  {
3774  /* Empty grouping sets can't be hashed. */
3775  empty_sets_data = lappend(empty_sets_data, gs);
3776  empty_sets = lappend(empty_sets, NIL);
3777  }
3778  else
3779  {
3780  rollup = makeNode(RollupData);
3781 
3782  rollup->groupClause = preprocess_groupclause(root, gset);
3783  rollup->gsets_data = list_make1(gs);
3784  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
3785  rollup->gsets_data,
3786  gd->tleref_to_colnum_map);
3787  rollup->numGroups = gs->numGroups;
3788  rollup->hashable = true;
3789  rollup->is_hashed = true;
3790  new_rollups = lappend(new_rollups, rollup);
3791  }
3792  }
3793 
3794  /*
3795  * If we didn't find anything nonempty to hash, then bail. We'll
3796  * generate a path from the is_sorted case.
3797  */
3798  if (new_rollups == NIL)
3799  return;
3800 
3801  /*
3802  * If there were empty grouping sets they should have been in the
3803  * first rollup.
3804  */
3805  Assert(!unhashed_rollup || !empty_sets);
3806 
3807  if (unhashed_rollup)
3808  {
3809  new_rollups = lappend(new_rollups, unhashed_rollup);
3810  strat = AGG_MIXED;
3811  }
3812  else if (empty_sets)
3813  {
3814  RollupData *rollup = makeNode(RollupData);
3815 
3816  rollup->groupClause = NIL;
3817  rollup->gsets_data = empty_sets_data;
3818  rollup->gsets = empty_sets;
3819  rollup->numGroups = list_length(empty_sets);
3820  rollup->hashable = false;
3821  rollup->is_hashed = false;
3822  new_rollups = lappend(new_rollups, rollup);
3823  strat = AGG_MIXED;
3824  }
3825 
3826  add_path(grouped_rel, (Path *)
3828  grouped_rel,
3829  path,
3830  (List *) parse->havingQual,
3831  strat,
3832  new_rollups,
3833  agg_costs,
3834  dNumGroups));
3835  return;
3836  }
3837 
3838  /*
3839  * If we have sorted input but nothing we can do with it, bail.
3840  */
3841  if (list_length(gd->rollups) == 0)
3842  return;
3843 
3844  /*
3845  * Given sorted input, we try and make two paths: one sorted and one mixed
3846  * sort/hash. (We need to try both because hashagg might be disabled, or
3847  * some columns might not be sortable.)
3848  *
3849  * can_hash is passed in as false if some obstacle elsewhere (such as
3850  * ordered aggs) means that we shouldn't consider hashing at all.
3851  */
3852  if (can_hash && gd->any_hashable)
3853  {
3854  List *rollups = NIL;
3855  List *hash_sets = list_copy(gd->unsortable_sets);
3856  double availspace = (hash_mem * 1024.0);
3857  ListCell *lc;
3858 
3859  /*
3860  * Account first for space needed for groups we can't sort at all.
3861  */
3862  availspace -= estimate_hashagg_tablesize(root,
3863  path,
3864  agg_costs,
3865  gd->dNumHashGroups);
3866 
3867  if (availspace > 0 && list_length(gd->rollups) > 1)
3868  {
3869  double scale;
3870  int num_rollups = list_length(gd->rollups);
3871  int k_capacity;
3872  int *k_weights = palloc(num_rollups * sizeof(int));
3873  Bitmapset *hash_items = NULL;
3874  int i;
3875 
3876  /*
3877  * We treat this as a knapsack problem: the knapsack capacity
3878  * represents hash_mem, the item weights are the estimated memory
3879  * usage of the hashtables needed to implement a single rollup,
3880  * and we really ought to use the cost saving as the item value;
3881  * however, currently the costs assigned to sort nodes don't
3882  * reflect the comparison costs well, and so we treat all items as
3883  * of equal value (each rollup we hash instead saves us one sort).
3884  *
3885  * To use the discrete knapsack, we need to scale the values to a
3886  * reasonably small bounded range. We choose to allow a 5% error
3887  * margin; we have no more than 4096 rollups in the worst possible
3888  * case, which with a 5% error margin will require a bit over 42MB
3889  * of workspace. (Anyone wanting to plan queries that complex had
3890  * better have the memory for it. In more reasonable cases, with
3891  * no more than a couple of dozen rollups, the memory usage will
3892  * be negligible.)
3893  *
3894  * k_capacity is naturally bounded, but we clamp the values for
3895  * scale and weight (below) to avoid overflows or underflows (or
3896  * uselessly trying to use a scale factor less than 1 byte).
3897  */
3898  scale = Max(availspace / (20.0 * num_rollups), 1.0);
3899  k_capacity = (int) floor(availspace / scale);
3900 
3901  /*
3902  * We leave the first rollup out of consideration since it's the
3903  * one that matches the input sort order. We assign indexes "i"
3904  * to only those entries considered for hashing; the second loop,
3905  * below, must use the same condition.
3906  */
3907  i = 0;
3908  for_each_from(lc, gd->rollups, 1)
3909  {
3910  RollupData *rollup = lfirst_node(RollupData, lc);
3911 
3912  if (rollup->hashable)
3913  {
3914  double sz = estimate_hashagg_tablesize(root,
3915  path,
3916  agg_costs,
3917  rollup->numGroups);
3918 
3919  /*
3920  * If sz is enormous, but hash_mem (and hence scale) is
3921  * small, avoid integer overflow here.
3922  */
3923  k_weights[i] = (int) Min(floor(sz / scale),
3924  k_capacity + 1.0);
3925  ++i;
3926  }
3927  }
3928 
3929  /*
3930  * Apply knapsack algorithm; compute the set of items which
3931  * maximizes the value stored (in this case the number of sorts
3932  * saved) while keeping the total size (approximately) within
3933  * capacity.
3934  */
3935  if (i > 0)
3936  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
3937 
3938  if (!bms_is_empty(hash_items))
3939  {
3940  rollups = list_make1(linitial(gd->rollups));
3941 
3942  i = 0;
3943  for_each_from(lc, gd->rollups, 1)
3944  {
3945  RollupData *rollup = lfirst_node(RollupData, lc);
3946 
3947  if (rollup->hashable)
3948  {
3949  if (bms_is_member(i, hash_items))
3950  hash_sets = list_concat(hash_sets,
3951  rollup->gsets_data);
3952  else
3953  rollups = lappend(rollups, rollup);
3954  ++i;
3955  }
3956  else
3957  rollups = lappend(rollups, rollup);
3958  }
3959  }
3960  }
3961 
3962  if (!rollups && hash_sets)
3963  rollups = list_copy(gd->rollups);
3964 
3965  foreach(lc, hash_sets)
3966  {
3968  RollupData *rollup = makeNode(RollupData);
3969 
3970  Assert(gs->set != NIL);
3971 
3972  rollup->groupClause = preprocess_groupclause(root, gs->set);
3973  rollup->gsets_data = list_make1(gs);
3974  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
3975  rollup->gsets_data,
3976  gd->tleref_to_colnum_map);
3977  rollup->numGroups = gs->numGroups;
3978  rollup->hashable = true;
3979  rollup->is_hashed = true;
3980  rollups = lcons(rollup, rollups);
3981  }
3982 
3983  if (rollups)
3984  {
3985  add_path(grouped_rel, (Path *)
3987  grouped_rel,
3988  path,
3989  (List *) parse->havingQual,
3990  AGG_MIXED,
3991  rollups,
3992  agg_costs,
3993  dNumGroups));
3994  }
3995  }
3996 
3997  /*
3998  * Now try the simple sorted case.
3999  */
4000  if (!gd->unsortable_sets)
4001  add_path(grouped_rel, (Path *)
4003  grouped_rel,
4004  path,
4005  (List *) parse->havingQual,
4006  AGG_SORTED,
4007  gd->rollups,
4008  agg_costs,
4009  dNumGroups));
4010 }
4011 
4012 /*
4013  * create_window_paths
4014  *
4015  * Build a new upperrel containing Paths for window-function evaluation.
4016  *
4017  * input_rel: contains the source-data Paths
4018  * input_target: result of make_window_input_target
4019  * output_target: what the topmost WindowAggPath should return
4020  * wflists: result of find_window_functions
4021  * activeWindows: result of select_active_windows
4022  *
4023  * Note: all Paths in input_rel are expected to return input_target.
4024  */
4025 static RelOptInfo *
4027  RelOptInfo *input_rel,
4028  PathTarget *input_target,
4029  PathTarget *output_target,
4030  bool output_target_parallel_safe,
4031  WindowFuncLists *wflists,
4032  List *activeWindows)
4033 {
4034  RelOptInfo *window_rel;
4035  ListCell *lc;
4036 
4037  /* For now, do all work in the (WINDOW, NULL) upperrel */
4038  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4039 
4040  /*
4041  * If the input relation is not parallel-safe, then the window relation
4042  * can't be parallel-safe, either. Otherwise, we need to examine the
4043  * target list and active windows for non-parallel-safe constructs.
4044  */
4045  if (input_rel->consider_parallel && output_target_parallel_safe &&
4046  is_parallel_safe(root, (Node *) activeWindows))
4047  window_rel->consider_parallel = true;
4048 
4049  /*
4050  * If the input rel belongs to a single FDW, so does the window rel.
4051  */
4052  window_rel->serverid = input_rel->serverid;
4053  window_rel->userid = input_rel->userid;
4054  window_rel->useridiscurrent = input_rel->useridiscurrent;
4055  window_rel->fdwroutine = input_rel->fdwroutine;
4056 
4057  /*
4058  * Consider computing window functions starting from the existing
4059  * cheapest-total path (which will likely require a sort) as well as any
4060  * existing paths that satisfy or partially satisfy root->window_pathkeys.
4061  */
4062  foreach(lc, input_rel->pathlist)
4063  {
4064  Path *path = (Path *) lfirst(lc);
4065  int presorted_keys;
4066 
4067  if (path == input_rel->cheapest_total_path ||
4069  &presorted_keys) ||
4070  presorted_keys > 0)
4072  window_rel,
4073  path,
4074  input_target,
4075  output_target,
4076  wflists,
4077  activeWindows);
4078  }
4079 
4080  /*
4081  * If there is an FDW that's responsible for all baserels of the query,
4082  * let it consider adding ForeignPaths.
4083  */
4084  if (window_rel->fdwroutine &&
4085  window_rel->fdwroutine->GetForeignUpperPaths)
4086  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4087  input_rel, window_rel,
4088  NULL);
4089 
4090  /* Let extensions possibly add some more paths */
4092  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4093  input_rel, window_rel, NULL);
4094 
4095  /* Now choose the best path(s) */
4096  set_cheapest(window_rel);
4097 
4098  return window_rel;
4099 }
4100 
4101 /*
4102  * Stack window-function implementation steps atop the given Path, and
4103  * add the result to window_rel.
4104  *
4105  * window_rel: upperrel to contain result
4106  * path: input Path to use (must return input_target)
4107  * input_target: result of make_window_input_target
4108  * output_target: what the topmost WindowAggPath should return
4109  * wflists: result of find_window_functions
4110  * activeWindows: result of select_active_windows
4111  */
4112 static void
4114  RelOptInfo *window_rel,
4115  Path *path,
4116  PathTarget *input_target,
4117  PathTarget *output_target,
4118  WindowFuncLists *wflists,
4119  List *activeWindows)
4120 {
4121  PathTarget *window_target;
4122  ListCell *l;
4123 
4124  /*
4125  * Since each window clause could require a different sort order, we stack
4126  * up a WindowAgg node for each clause, with sort steps between them as
4127  * needed. (We assume that select_active_windows chose a good order for
4128  * executing the clauses in.)
4129  *
4130  * input_target should contain all Vars and Aggs needed for the result.
4131  * (In some cases we wouldn't need to propagate all of these all the way
4132  * to the top, since they might only be needed as inputs to WindowFuncs.
4133  * It's probably not worth trying to optimize that though.) It must also
4134  * contain all window partitioning and sorting expressions, to ensure
4135  * they're computed only once at the bottom of the stack (that's critical
4136  * for volatile functions). As we climb up the stack, we'll add outputs
4137  * for the WindowFuncs computed at each level.
4138  */
4139  window_target = input_target;
4140 
4141  foreach(l, activeWindows)
4142  {
4144  List *window_pathkeys;
4145  int presorted_keys;
4146  bool is_sorted;
4147 
4148  window_pathkeys = make_pathkeys_for_window(root,
4149  wc,
4150  root->processed_tlist);
4151 
4152  is_sorted = pathkeys_count_contained_in(window_pathkeys,
4153  path->pathkeys,
4154  &presorted_keys);
4155 
4156  /* Sort if necessary */
4157  if (!is_sorted)
4158  {
4159  /*
4160  * No presorted keys or incremental sort disabled, just perform a
4161  * complete sort.
4162  */
4163  if (presorted_keys == 0 || !enable_incremental_sort)
4164  path = (Path *) create_sort_path(root, window_rel,
4165  path,
4166  window_pathkeys,
4167  -1.0);
4168  else
4169  {
4170  /*
4171  * Since we have presorted keys and incremental sort is
4172  * enabled, just use incremental sort.
4173  */
4174  path = (Path *) create_incremental_sort_path(root,
4175  window_rel,
4176  path,
4177  window_pathkeys,
4178  presorted_keys,
4179  -1.0);
4180  }
4181  }
4182 
4183  if (lnext(activeWindows, l))
4184  {
4185  /*
4186  * Add the current WindowFuncs to the output target for this
4187  * intermediate WindowAggPath. We must copy window_target to
4188  * avoid changing the previous path's target.
4189  *
4190  * Note: a WindowFunc adds nothing to the target's eval costs; but
4191  * we do need to account for the increase in tlist width.
4192  */
4193  ListCell *lc2;
4194 
4195  window_target = copy_pathtarget(window_target);
4196  foreach(lc2, wflists->windowFuncs[wc->winref])
4197  {
4198  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4199 
4200  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4201  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4202  }
4203  }
4204  else
4205  {
4206  /* Install the goal target in the topmost WindowAgg */
4207  window_target = output_target;
4208  }
4209 
4210  path = (Path *)
4211  create_windowagg_path(root, window_rel, path, window_target,
4212  wflists->windowFuncs[wc->winref],
4213  wc);
4214  }
4215 
4216  add_path(window_rel, path);
4217 }
4218 
4219 /*
4220  * create_distinct_paths
4221  *
4222  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4223  *
4224  * input_rel: contains the source-data Paths
4225  *
4226  * Note: input paths should already compute the desired pathtarget, since
4227  * Sort/Unique won't project anything.
4228  */
4229 static RelOptInfo *
4231  RelOptInfo *input_rel)
4232 {
4233  Query *parse = root->parse;
4234  Path *cheapest_input_path = input_rel->cheapest_total_path;
4235  RelOptInfo *distinct_rel;
4236  double numDistinctRows;
4237  bool allow_hash;
4238  Path *path;
4239  ListCell *lc;
4240 
4241  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4242  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4243 
4244  /*
4245  * We don't compute anything at this level, so distinct_rel will be
4246  * parallel-safe if the input rel is parallel-safe. In particular, if
4247  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4248  * output those expressions, and will not be parallel-safe unless those
4249  * expressions are parallel-safe.
4250  */
4251  distinct_rel->consider_parallel = input_rel->consider_parallel;
4252 
4253  /*
4254  * If the input rel belongs to a single FDW, so does the distinct_rel.
4255  */
4256  distinct_rel->serverid = input_rel->serverid;
4257  distinct_rel->userid = input_rel->userid;
4258  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4259  distinct_rel->fdwroutine = input_rel->fdwroutine;
4260 
4261  /* Estimate number of distinct rows there will be */
4262  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4263  root->hasHavingQual)
4264  {
4265  /*
4266  * If there was grouping or aggregation, use the number of input rows
4267  * as the estimated number of DISTINCT rows (ie, assume the input is
4268  * already mostly unique).
4269  */
4270  numDistinctRows = cheapest_input_path->rows;
4271  }
4272  else
4273  {
4274  /*
4275  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4276  */
4277  List *distinctExprs;
4278 
4279  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4280  parse->targetList);
4281  numDistinctRows = estimate_num_groups(root, distinctExprs,
4282  cheapest_input_path->rows,
4283  NULL, NULL);
4284  }
4285 
4286  /*
4287  * Consider sort-based implementations of DISTINCT, if possible.
4288  */
4290  {
4291  /*
4292  * First, if we have any adequately-presorted paths, just stick a
4293  * Unique node on those. Then consider doing an explicit sort of the
4294  * cheapest input path and Unique'ing that.
4295  *
4296  * When we have DISTINCT ON, we must sort by the more rigorous of
4297  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4298  * Also, if we do have to do an explicit sort, we might as well use
4299  * the more rigorous ordering to avoid a second sort later. (Note
4300  * that the parser will have ensured that one clause is a prefix of
4301  * the other.)
4302  */
4303  List *needed_pathkeys;
4304 
4305  if (parse->hasDistinctOn &&
4307  list_length(root->sort_pathkeys))
4308  needed_pathkeys = root->sort_pathkeys;
4309  else
4310  needed_pathkeys = root->distinct_pathkeys;
4311 
4312  foreach(lc, input_rel->pathlist)
4313  {
4314  Path *path = (Path *) lfirst(lc);
4315 
4316  if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4317  {
4318  add_path(distinct_rel, (Path *)
4319  create_upper_unique_path(root, distinct_rel,
4320  path,
4322  numDistinctRows));
4323  }
4324  }
4325 
4326  /* For explicit-sort case, always use the more rigorous clause */
4327  if (list_length(root->distinct_pathkeys) <
4328  list_length(root->sort_pathkeys))
4329  {
4330  needed_pathkeys = root->sort_pathkeys;
4331  /* Assert checks that parser didn't mess up... */
4333  needed_pathkeys));
4334  }
4335  else
4336  needed_pathkeys = root->distinct_pathkeys;
4337 
4338  path = cheapest_input_path;
4339  if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4340  path = (Path *) create_sort_path(root, distinct_rel,
4341  path,
4342  needed_pathkeys,
4343  -1.0);
4344 
4345  add_path(distinct_rel, (Path *)
4346  create_upper_unique_path(root, distinct_rel,
4347  path,
4349  numDistinctRows));
4350  }
4351 
4352  /*
4353  * Consider hash-based implementations of DISTINCT, if possible.
4354  *
4355  * If we were not able to make any other types of path, we *must* hash or
4356  * die trying. If we do have other choices, there are two things that
4357  * should prevent selection of hashing: if the query uses DISTINCT ON
4358  * (because it won't really have the expected behavior if we hash), or if
4359  * enable_hashagg is off.
4360  *
4361  * Note: grouping_is_hashable() is much more expensive to check than the
4362  * other gating conditions, so we want to do it last.
4363  */
4364  if (distinct_rel->pathlist == NIL)
4365  allow_hash = true; /* we have no alternatives */
4366  else if (parse->hasDistinctOn || !enable_hashagg)
4367  allow_hash = false; /* policy-based decision not to hash */
4368  else
4369  allow_hash = true; /* default */
4370 
4371  if (allow_hash && grouping_is_hashable(parse->distinctClause))
4372  {
4373  /* Generate hashed aggregate path --- no sort needed */
4374  add_path(distinct_rel, (Path *)
4375  create_agg_path(root,
4376  distinct_rel,
4377  cheapest_input_path,
4378  cheapest_input_path->pathtarget,
4379  AGG_HASHED,
4381  parse->distinctClause,
4382  NIL,
4383  NULL,
4384  numDistinctRows));
4385  }
4386 
4387  /* Give a helpful error if we failed to find any implementation */
4388  if (distinct_rel->pathlist == NIL)
4389  ereport(ERROR,
4390  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4391  errmsg("could not implement DISTINCT"),
4392  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4393 
4394  /*
4395  * If there is an FDW that's responsible for all baserels of the query,
4396  * let it consider adding ForeignPaths.
4397  */
4398  if (distinct_rel->fdwroutine &&
4399  distinct_rel->fdwroutine->GetForeignUpperPaths)
4400  distinct_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_DISTINCT,
4401  input_rel, distinct_rel,
4402  NULL);
4403 
4404  /* Let extensions possibly add some more paths */
4406  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT,
4407  input_rel, distinct_rel, NULL);
4408 
4409  /* Now choose the best path(s) */
4410  set_cheapest(distinct_rel);
4411 
4412  return distinct_rel;
4413 }
4414 
4415 /*
4416  * create_ordered_paths
4417  *
4418  * Build a new upperrel containing Paths for ORDER BY evaluation.
4419  *
4420  * All paths in the result must satisfy the ORDER BY ordering.
4421  * The only new paths we need consider are an explicit full sort
4422  * and incremental sort on the cheapest-total existing path.
4423  *
4424  * input_rel: contains the source-data Paths
4425  * target: the output tlist the result Paths must emit
4426  * limit_tuples: estimated bound on the number of output tuples,
4427  * or -1 if no LIMIT or couldn't estimate
4428  *
4429  * XXX This only looks at sort_pathkeys. I wonder if it needs to look at the
4430  * other pathkeys (grouping, ...) like generate_useful_gather_paths.
4431  */
4432 static RelOptInfo *
4434  RelOptInfo *input_rel,
4435  PathTarget *target,
4436  bool target_parallel_safe,
4437  double limit_tuples)
4438 {
4439  Path *cheapest_input_path = input_rel->cheapest_total_path;
4440  RelOptInfo *ordered_rel;
4441  ListCell *lc;
4442 
4443  /* For now, do all work in the (ORDERED, NULL) upperrel */
4444  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4445 
4446  /*
4447  * If the input relation is not parallel-safe, then the ordered relation
4448  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
4449  * target list is parallel-safe.
4450  */
4451  if (input_rel->consider_parallel && target_parallel_safe)
4452  ordered_rel->consider_parallel = true;
4453 
4454  /*
4455  * If the input rel belongs to a single FDW, so does the ordered_rel.
4456  */
4457  ordered_rel->serverid = input_rel->serverid;
4458  ordered_rel->userid = input_rel->userid;
4459  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
4460  ordered_rel->fdwroutine = input_rel->fdwroutine;
4461 
4462  foreach(lc, input_rel->pathlist)
4463  {
4464  Path *input_path = (Path *) lfirst(lc);
4465  Path *sorted_path = input_path;
4466  bool is_sorted;
4467  int presorted_keys;
4468 
4469  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
4470  input_path->pathkeys, &presorted_keys);
4471 
4472  if (is_sorted)
4473  {
4474  /* Use the input path as is, but add a projection step if needed */
4475  if (sorted_path->pathtarget != target)
4476  sorted_path = apply_projection_to_path(root, ordered_rel,
4477  sorted_path, target);
4478 
4479  add_path(ordered_rel, sorted_path);
4480  }
4481  else
4482  {
4483  /*
4484  * Try adding an explicit sort, but only to the cheapest total
4485  * path since a full sort should generally add the same cost to
4486  * all paths.
4487  */
4488  if (input_path == cheapest_input_path)
4489  {
4490  /*
4491  * Sort the cheapest input path. An explicit sort here can
4492  * take advantage of LIMIT.
4493  */
4494  sorted_path = (Path *) create_sort_path(root,
4495  ordered_rel,
4496  input_path,
4497  root->sort_pathkeys,
4498  limit_tuples);
4499  /* Add projection step if needed */
4500  if (sorted_path->pathtarget != target)
4501  sorted_path = apply_projection_to_path(root, ordered_rel,
4502  sorted_path, target);
4503 
4504  add_path(ordered_rel, sorted_path);
4505  }
4506 
4507  /*
4508  * If incremental sort is enabled, then try it as well. Unlike
4509  * with regular sorts, we can't just look at the cheapest path,
4510  * because the cost of incremental sort depends on how well
4511  * presorted the path is. Additionally incremental sort may enable
4512  * a cheaper startup path to win out despite higher total cost.
4513  */
4515  continue;
4516 
4517  /* Likewise, if the path can't be used for incremental sort. */
4518  if (!presorted_keys)
4519  continue;
4520 
4521  /* Also consider incremental sort. */
4522  sorted_path = (Path *) create_incremental_sort_path(root,
4523  ordered_rel,
4524  input_path,
4525  root->sort_pathkeys,
4526  presorted_keys,
4527  limit_tuples);
4528 
4529  /* Add projection step if needed */
4530  if (sorted_path->pathtarget != target)
4531  sorted_path = apply_projection_to_path(root, ordered_rel,
4532  sorted_path, target);
4533 
4534  add_path(ordered_rel, sorted_path);
4535  }
4536  }
4537 
4538  /*
4539  * generate_gather_paths() will have already generated a simple Gather
4540  * path for the best parallel path, if any, and the loop above will have
4541  * considered sorting it. Similarly, generate_gather_paths() will also
4542  * have generated order-preserving Gather Merge plans which can be used
4543  * without sorting if they happen to match the sort_pathkeys, and the loop
4544  * above will have handled those as well. However, there's one more
4545  * possibility: it may make sense to sort the cheapest partial path
4546  * according to the required output order and then use Gather Merge.
4547  */
4548  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
4549  input_rel->partial_pathlist != NIL)
4550  {
4551  Path *cheapest_partial_path;
4552 
4553  cheapest_partial_path = linitial(input_rel->partial_pathlist);
4554 
4555  /*
4556  * If cheapest partial path doesn't need a sort, this is redundant
4557  * with what's already been tried.
4558  */
4560  cheapest_partial_path->pathkeys))
4561  {
4562  Path *path;
4563  double total_groups;
4564 
4565  path = (Path *) create_sort_path(root,
4566  ordered_rel,
4567  cheapest_partial_path,
4568  root->sort_pathkeys,
4569  limit_tuples);
4570 
4571  total_groups = cheapest_partial_path->rows *
4572  cheapest_partial_path->parallel_workers;
4573  path = (Path *)
4574  create_gather_merge_path(root, ordered_rel,
4575  path,
4576  path->pathtarget,
4577  root->sort_pathkeys, NULL,
4578  &total_groups);
4579 
4580  /* Add projection step if needed */
4581  if (path->pathtarget != target)
4582  path = apply_projection_to_path(root, ordered_rel,
4583  path, target);
4584 
4585  add_path(ordered_rel, path);
4586  }
4587 
4588  /*
4589  * Consider incremental sort with a gather merge on partial paths.
4590  *
4591  * We can also skip the entire loop when we only have a single-item
4592  * sort_pathkeys because then we can't possibly have a presorted
4593  * prefix of the list without having the list be fully sorted.
4594  */
4596  {
4597  ListCell *lc;
4598 
4599  foreach(lc, input_rel->partial_pathlist)
4600  {
4601  Path *input_path = (Path *) lfirst(lc);
4602  Path *sorted_path;
4603  bool is_sorted;
4604  int presorted_keys;
4605  double total_groups;
4606 
4607  /*
4608  * We don't care if this is the cheapest partial path - we
4609  * can't simply skip it, because it may be partially sorted in
4610  * which case we want to consider adding incremental sort
4611  * (instead of full sort, which is what happens above).
4612  */
4613 
4614  is_sorted = pathkeys_count_contained_in(root->sort_pathkeys,
4615  input_path->pathkeys,
4616  &presorted_keys);
4617 
4618  /* No point in adding incremental sort on fully sorted paths. */
4619  if (is_sorted)
4620  continue;
4621 
4622  if (presorted_keys == 0)
4623  continue;
4624 
4625  /* Since we have presorted keys, consider incremental sort. */
4626  sorted_path = (Path *) create_incremental_sort_path(root,
4627  ordered_rel,
4628  input_path,
4629  root->sort_pathkeys,
4630  presorted_keys,
4631  limit_tuples);
4632  total_groups = input_path->rows *
4633  input_path->parallel_workers;
4634  sorted_path = (Path *)
4635  create_gather_merge_path(root, ordered_rel,
4636  sorted_path,
4637  sorted_path->pathtarget,
4638  root->sort_pathkeys, NULL,
4639  &total_groups);
4640 
4641  /* Add projection step if needed */
4642  if (sorted_path->pathtarget != target)
4643  sorted_path = apply_projection_to_path(root, ordered_rel,
4644  sorted_path, target);
4645 
4646  add_path(ordered_rel, sorted_path);
4647  }
4648  }
4649  }
4650 
4651  /*
4652  * If there is an FDW that's responsible for all baserels of the query,
4653  * let it consider adding ForeignPaths.
4654  */
4655  if (ordered_rel->fdwroutine &&
4656  ordered_rel->fdwroutine->GetForeignUpperPaths)
4657  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
4658  input_rel, ordered_rel,
4659  NULL);
4660 
4661  /* Let extensions possibly add some more paths */
4663  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
4664  input_rel, ordered_rel, NULL);
4665 
4666  /*
4667  * No need to bother with set_cheapest here; grouping_planner does not
4668  * need us to do it.
4669  */
4670  Assert(ordered_rel->pathlist != NIL);
4671 
4672  return ordered_rel;
4673 }
4674 
4675 
4676 /*
4677  * make_group_input_target
4678  * Generate appropriate PathTarget for initial input to grouping nodes.
4679  *
4680  * If there is grouping or aggregation, the scan/join subplan cannot emit
4681  * the query's final targetlist; for example, it certainly can't emit any
4682  * aggregate function calls. This routine generates the correct target
4683  * for the scan/join subplan.
4684  *
4685  * The query target list passed from the parser already contains entries
4686  * for all ORDER BY and GROUP BY expressions, but it will not have entries
4687  * for variables used only in HAVING clauses; so we need to add those
4688  * variables to the subplan target list. Also, we flatten all expressions
4689  * except GROUP BY items into their component variables; other expressions
4690  * will be computed by the upper plan nodes rather than by the subplan.
4691  * For example, given a query like
4692  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
4693  * we want to pass this targetlist to the subplan:
4694  * a+b,c,d
4695  * where the a+b target will be used by the Sort/Group steps, and the
4696  * other targets will be used for computing the final results.
4697  *
4698  * 'final_target' is the query's final target list (in PathTarget form)
4699  *
4700  * The result is the PathTarget to be computed by the Paths returned from
4701  * query_planner().
4702  */
4703 static PathTarget *
4705 {
4706  Query *parse = root->parse;
4707  PathTarget *input_target;
4708  List *non_group_cols;
4709  List *non_group_vars;
4710  int i;
4711  ListCell *lc;
4712 
4713  /*
4714  * We must build a target containing all grouping columns, plus any other
4715  * Vars mentioned in the query's targetlist and HAVING qual.
4716  */
4717  input_target = create_empty_pathtarget();
4718  non_group_cols = NIL;
4719 
4720  i = 0;
4721  foreach(lc, final_target->exprs)
4722  {
4723  Expr *expr = (Expr *) lfirst(lc);
4724  Index sgref = get_pathtarget_sortgroupref(final_target, i);
4725 
4726  if (sgref && parse->groupClause &&
4727  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
4728  {
4729  /*
4730  * It's a grouping column, so add it to the input target as-is.
4731  */
4732  add_column_to_pathtarget(input_target, expr, sgref);
4733  }
4734  else
4735  {
4736  /*
4737  * Non-grouping column, so just remember the expression for later
4738  * call to pull_var_clause.
4739  */
4740  non_group_cols = lappend(non_group_cols, expr);
4741  }
4742 
4743  i++;
4744  }
4745 
4746  /*
4747  * If there's a HAVING clause, we'll need the Vars it uses, too.
4748  */
4749  if (parse->havingQual)
4750  non_group_cols = lappend(non_group_cols, parse->havingQual);
4751 
4752  /*
4753  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
4754  * add them to the input target if not already present. (A Var used
4755  * directly as a GROUP BY item will be present already.) Note this
4756  * includes Vars used in resjunk items, so we are covering the needs of
4757  * ORDER BY and window specifications. Vars used within Aggrefs and
4758  * WindowFuncs will be pulled out here, too.
4759  */
4760  non_group_vars = pull_var_clause((Node *) non_group_cols,
4764  add_new_columns_to_pathtarget(input_target, non_group_vars);
4765 
4766  /* clean up cruft */
4767  list_free(non_group_vars);
4768  list_free(non_group_cols);
4769 
4770  /* XXX this causes some redundant cost calculation ... */
4771  return set_pathtarget_cost_width(root, input_target);
4772 }
4773 
4774 /*
4775  * make_partial_grouping_target
4776  * Generate appropriate PathTarget for output of partial aggregate
4777  * (or partial grouping, if there are no aggregates) nodes.
4778  *
4779  * A partial aggregation node needs to emit all the same aggregates that
4780  * a regular aggregation node would, plus any aggregates used in HAVING;
4781  * except that the Aggref nodes should be marked as partial aggregates.
4782  *
4783  * In addition, we'd better emit any Vars and PlaceHolderVars that are
4784  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
4785  * these would be Vars that are grouped by or used in grouping expressions.)
4786  *
4787  * grouping_target is the tlist to be emitted by the topmost aggregation step.
4788  * havingQual represents the HAVING clause.
4789  */
4790 static PathTarget *
4792  PathTarget *grouping_target,
4793  Node *havingQual)
4794 {
4795  Query *parse = root->parse;
4796  PathTarget *partial_target;
4797  List *non_group_cols;
4798  List *non_group_exprs;
4799  int i;
4800  ListCell *lc;
4801 
4802  partial_target = create_empty_pathtarget();
4803  non_group_cols = NIL;
4804 
4805  i = 0;
4806  foreach(lc, grouping_target->exprs)
4807  {
4808  Expr *expr = (Expr *) lfirst(lc);
4809  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
4810 
4811  if (sgref && parse->groupClause &&
4812  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
4813  {
4814  /*
4815  * It's a grouping column, so add it to the partial_target as-is.
4816  * (This allows the upper agg step to repeat the grouping calcs.)
4817  */
4818  add_column_to_pathtarget(partial_target, expr, sgref);
4819  }
4820  else
4821  {
4822  /*
4823  * Non-grouping column, so just remember the expression for later
4824  * call to pull_var_clause.
4825  */
4826  non_group_cols = lappend(non_group_cols, expr);
4827  }
4828 
4829  i++;
4830  }
4831 
4832  /*
4833  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
4834  */
4835  if (havingQual)
4836  non_group_cols = lappend(non_group_cols, havingQual);
4837 
4838  /*
4839  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
4840  * non-group cols (plus HAVING), and add them to the partial_target if not
4841  * already present. (An expression used directly as a GROUP BY item will
4842  * be present already.) Note this includes Vars used in resjunk items, so
4843  * we are covering the needs of ORDER BY and window specifications.
4844  */
4845  non_group_exprs = pull_var_clause((Node *) non_group_cols,
4849 
4850  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
4851 
4852  /*
4853  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
4854  * are at the top level of the target list, so we can just scan the list
4855  * rather than recursing through the expression trees.
4856  */
4857  foreach(lc, partial_target->exprs)
4858  {
4859  Aggref *aggref = (Aggref *) lfirst(lc);
4860 
4861  if (IsA(aggref, Aggref))
4862  {
4863  Aggref *newaggref;
4864 
4865  /*
4866  * We shouldn't need to copy the substructure of the Aggref node,
4867  * but flat-copy the node itself to avoid damaging other trees.
4868  */
4869  newaggref = makeNode(Aggref);
4870  memcpy(newaggref, aggref, sizeof(Aggref));
4871 
4872  /* For now, assume serialization is required */
4874 
4875  lfirst(lc) = newaggref;
4876  }
4877  }
4878 
4879  /* clean up cruft */
4880  list_free(non_group_exprs);
4881  list_free(non_group_cols);
4882 
4883  /* XXX this causes some redundant cost calculation ... */
4884  return set_pathtarget_cost_width(root, partial_target);
4885 }
4886 
4887 /*
4888  * mark_partial_aggref
4889  * Adjust an Aggref to make it represent a partial-aggregation step.
4890  *
4891  * The Aggref node is modified in-place; caller must do any copying required.
4892  */
4893 void
4895 {
4896  /* aggtranstype should be computed by this point */
4898  /* ... but aggsplit should still be as the parser left it */
4899  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
4900 
4901  /* Mark the Aggref with the intended partial-aggregation mode */
4902  agg->aggsplit = aggsplit;
4903 
4904  /*
4905  * Adjust result type if needed. Normally, a partial aggregate returns
4906  * the aggregate's transition type; but if that's INTERNAL and we're
4907  * serializing, it returns BYTEA instead.
4908  */
4909  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
4910  {
4911  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
4912  agg->aggtype = BYTEAOID;
4913  else
4914  agg->aggtype = agg->aggtranstype;
4915  }
4916 }
4917 
4918 /*
4919  * postprocess_setop_tlist
4920  * Fix up targetlist returned by plan_set_operations().
4921  *
4922  * We need to transpose sort key info from the orig_tlist into new_tlist.
4923  * NOTE: this would not be good enough if we supported resjunk sort keys
4924  * for results of set operations --- then, we'd need to project a whole
4925  * new tlist to evaluate the resjunk columns. For now, just ereport if we
4926  * find any resjunk columns in orig_tlist.
4927  */
4928 static List *
4929 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
4930 {
4931  ListCell *l;
4932  ListCell *orig_tlist_item = list_head(orig_tlist);
4933 
4934  foreach(l, new_tlist)
4935  {
4936  TargetEntry *new_tle = lfirst_node(TargetEntry, l);
4937  TargetEntry *orig_tle;
4938 
4939  /* ignore resjunk columns in setop result */
4940  if (new_tle->resjunk)
4941  continue;
4942 
4943  Assert(orig_tlist_item != NULL);
4944  orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
4945  orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
4946  if (orig_tle->resjunk) /* should not happen */
4947  elog(ERROR, "resjunk output columns are not implemented");
4948  Assert(new_tle->resno == orig_tle->resno);
4949  new_tle->ressortgroupref = orig_tle->ressortgroupref;
4950  }
4951  if (orig_tlist_item != NULL)
4952  elog(ERROR, "resjunk output columns are not implemented");
4953  return new_tlist;
4954 }
4955 
4956 /*
4957  * select_active_windows
4958  * Create a list of the "active" window clauses (ie, those referenced
4959  * by non-deleted WindowFuncs) in the order they are to be executed.
4960  */
4961 static List *
4963 {
4964  List *windowClause = root->parse->windowClause;
4965  List *result = NIL;
4966  ListCell *lc;
4967  int nActive = 0;
4969  * list_length(windowClause));
4970 
4971  /* First, construct an array of the active windows */
4972  foreach(lc, windowClause)
4973  {
4975 
4976  /* It's only active if wflists shows some related WindowFuncs */
4977  Assert(wc->winref <= wflists->maxWinRef);
4978  if (wflists->windowFuncs[wc->winref] == NIL)
4979  continue;
4980 
4981  actives[nActive].wc = wc; /* original clause */
4982 
4983  /*
4984  * For sorting, we want the list of partition keys followed by the
4985  * list of sort keys. But pathkeys construction will remove duplicates
4986  * between the two, so we can as well (even though we can't detect all
4987  * of the duplicates, since some may come from ECs - that might mean
4988  * we miss optimization chances here). We must, however, ensure that
4989  * the order of entries is preserved with respect to the ones we do
4990  * keep.
4991  *
4992  * partitionClause and orderClause had their own duplicates removed in
4993  * parse analysis, so we're only concerned here with removing
4994  * orderClause entries that also appear in partitionClause.
4995  */
4996  actives[nActive].uniqueOrder =
4998  wc->orderClause);
4999  nActive++;
5000  }
5001 
5002  /*
5003  * Sort active windows by their partitioning/ordering clauses, ignoring
5004  * any framing clauses, so that the windows that need the same sorting are
5005  * adjacent in the list. When we come to generate paths, this will avoid
5006  * inserting additional Sort nodes.
5007  *
5008  * This is how we implement a specific requirement from the SQL standard,
5009  * which says that when two or more windows are order-equivalent (i.e.
5010  * have matching partition and order clauses, even if their names or
5011  * framing clauses differ), then all peer rows must be presented in the
5012  * same order in all of them. If we allowed multiple sort nodes for such
5013  * cases, we'd risk having the peer rows end up in different orders in
5014  * equivalent windows due to sort instability. (See General Rule 4 of
5015  * <window clause> in SQL2008 - SQL2016.)
5016  *
5017  * Additionally, if the entire list of clauses of one window is a prefix
5018  * of another, put first the window with stronger sorting requirements.
5019  * This way we will first sort for stronger window, and won't have to sort
5020  * again for the weaker one.
5021  */
5022  qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
5023 
5024  /* build ordered list of the original WindowClause nodes */
5025  for (int i = 0; i < nActive; i++)
5026  result = lappend(result, actives[i].wc);
5027 
5028  pfree(actives);
5029 
5030  return result;
5031 }
5032 
5033 /*
5034  * common_prefix_cmp
5035  * QSort comparison function for WindowClauseSortData
5036  *
5037  * Sort the windows by the required sorting clauses. First, compare the sort
5038  * clauses themselves. Second, if one window's clauses are a prefix of another
5039  * one's clauses, put the window with more sort clauses first.
5040  */
5041 static int
5042 common_prefix_cmp(const void *a, const void *b)
5043 {
5044  const WindowClauseSortData *wcsa = a;
5045  const WindowClauseSortData *wcsb = b;
5046  ListCell *item_a;
5047  ListCell *item_b;
5048 
5049  forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
5050  {
5053 
5054  if (sca->tleSortGroupRef > scb->tleSortGroupRef)
5055  return -1;
5056  else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
5057  return 1;
5058  else if (sca->sortop > scb->sortop)
5059  return -1;
5060  else if (sca->sortop < scb->sortop)
5061  return 1;
5062  else if (sca->nulls_first && !scb->nulls_first)
5063  return -1;
5064  else if (!sca->nulls_first && scb->nulls_first)
5065  return 1;
5066  /* no need to compare eqop, since it is fully determined by sortop */
5067  }
5068 
5069  if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
5070  return -1;
5071  else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
5072  return 1;
5073 
5074  return 0;
5075 }
5076 
5077 /*
5078  * make_window_input_target
5079  * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5080  *
5081  * When the query has window functions, this function computes the desired
5082  * target to be computed by the node just below the first WindowAgg.
5083  * This tlist must contain all values needed to evaluate the window functions,
5084  * compute the final target list, and perform any required final sort step.
5085  * If multiple WindowAggs are needed, each intermediate one adds its window
5086  * function results onto this base tlist; only the topmost WindowAgg computes
5087  * the actual desired target list.
5088  *
5089  * This function is much like make_group_input_target, though not quite enough
5090  * like it to share code. As in that function, we flatten most expressions
5091  * into their component variables. But we do not want to flatten window
5092  * PARTITION BY/ORDER BY clauses, since that might result in multiple
5093  * evaluations of them, which would be bad (possibly even resulting in
5094  * inconsistent answers, if they contain volatile functions).
5095  * Also, we must not flatten GROUP BY clauses that were left unflattened by
5096  * make_group_input_target, because we may no longer have access to the
5097  * individual Vars in them.
5098  *
5099  * Another key difference from make_group_input_target is that we don't
5100  * flatten Aggref expressions, since those are to be computed below the
5101  * window functions and just referenced like Vars above that.
5102  *
5103  * 'final_target' is the query's final target list (in PathTarget form)
5104  * 'activeWindows' is the list of active windows previously identified by
5105  * select_active_windows.
5106  *
5107  * The result is the PathTarget to be computed by the plan node immediately
5108  * below the first WindowAgg node.
5109  */
5110 static PathTarget *
5112  PathTarget *final_target,
5113  List *activeWindows)
5114 {
5115  Query *parse = root->parse;
5116  PathTarget *input_target;
5117  Bitmapset *sgrefs;
5118  List *flattenable_cols;
5119  List *flattenable_vars;
5120  int i;
5121  ListCell *lc;
5122 
5123  Assert(parse->hasWindowFuncs);
5124 
5125  /*
5126  * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5127  * into a bitmapset for convenient reference below.
5128  */
5129  sgrefs = NULL;
5130  foreach(lc, activeWindows)
5131  {
5133  ListCell *lc2;
5134 
5135  foreach(lc2, wc->partitionClause)
5136  {
5138 
5139  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5140  }
5141  foreach(lc2, wc->orderClause)
5142  {
5144 
5145  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5146  }
5147  }
5148 
5149  /* Add in sortgroupref numbers of GROUP BY clauses, too */
5150  foreach(lc, parse->groupClause)
5151  {
5153 
5154  sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5155  }
5156 
5157  /*
5158  * Construct a target containing all the non-flattenable targetlist items,
5159  * and save aside the others for a moment.
5160  */
5161  input_target = create_empty_pathtarget();
5162  flattenable_cols = NIL;
5163 
5164  i = 0;
5165  foreach(lc, final_target->exprs)
5166  {
5167  Expr *expr = (Expr *) lfirst(lc);
5168  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5169 
5170  /*
5171  * Don't want to deconstruct window clauses or GROUP BY items. (Note
5172  * that such items can't contain window functions, so it's okay to
5173  * compute them below the WindowAgg nodes.)
5174  */
5175  if (sgref != 0 && bms_is_member(sgref, sgrefs))
5176  {
5177  /*
5178  * Don't want to deconstruct this value, so add it to the input
5179  * target as-is.
5180  */
5181  add_column_to_pathtarget(input_target, expr, sgref);
5182  }
5183  else
5184  {
5185  /*
5186  * Column is to be flattened, so just remember the expression for
5187  * later call to pull_var_clause.
5188  */
5189  flattenable_cols = lappend(flattenable_cols, expr);
5190  }
5191 
5192  i++;
5193  }
5194 
5195  /*
5196  * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
5197  * add them to the input target if not already present. (Some might be
5198  * there already because they're used directly as window/group clauses.)
5199  *
5200  * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
5201  * Aggrefs are placed in the Agg node's tlist and not left to be computed
5202  * at higher levels. On the other hand, we should recurse into
5203  * WindowFuncs to make sure their input expressions are available.
5204  */
5205  flattenable_vars = pull_var_clause((Node *) flattenable_cols,
5209  add_new_columns_to_pathtarget(input_target, flattenable_vars);
5210 
5211  /* clean up cruft */
5212  list_free(flattenable_vars);
5213  list_free(flattenable_cols);
5214 
5215  /* XXX this causes some redundant cost calculation ... */
5216  return set_pathtarget_cost_width(root, input_target);
5217 }
5218 
5219 /*
5220  * make_pathkeys_for_window
5221  * Create a pathkeys list describing the required input ordering
5222  * for the given WindowClause.
5223  *
5224  * The required ordering is first the PARTITION keys, then the ORDER keys.
5225  * In the future we might try to implement windowing using hashing, in which
5226  * case the ordering could be relaxed, but for now we always sort.
5227  */
5228 static List *
5230  List *tlist)
5231 {
5232  List *window_pathkeys;
5233  List *window_sortclauses;
5234 
5235  /* Throw error if can't sort */
5237  ereport(ERROR,
5238  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5239  errmsg("could not implement window PARTITION BY"),
5240  errdetail("Window partitioning columns must be of sortable datatypes.")));
5242  ereport(ERROR,
5243  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5244  errmsg("could not implement window ORDER BY"),
5245  errdetail("Window ordering columns must be of sortable datatypes.")));
5246 
5247  /* Okay, make the combined pathkeys */
5248  window_sortclauses = list_concat_copy(wc->partitionClause, wc->orderClause);
5249  window_pathkeys = make_pathkeys_for_sortclauses(root,
5250  window_sortclauses,
5251  tlist);
5252  list_free(window_sortclauses);
5253  return window_pathkeys;
5254 }
5255 
5256 /*
5257  * make_sort_input_target
5258  * Generate appropriate PathTarget for initial input to Sort step.
5259  *
5260  * If the query has ORDER BY, this function chooses the target to be computed
5261  * by the node just below the Sort (and DISTINCT, if any, since Unique can't
5262  * project) steps. This might or might not be identical to the query's final
5263  * output target.
5264  *
5265  * The main argument for keeping the sort-input tlist the same as the final
5266  * is that we avoid a separate projection node (which will be needed if
5267  * they're different, because Sort can't project). However, there are also
5268  * advantages to postponing tlist evaluation till after the Sort: it ensures
5269  * a consistent order of evaluation for any volatile functions in the tlist,
5270  * and if there's also a LIMIT, we can stop the query without ever computing
5271  * tlist functions for later rows, which is beneficial for both volatile and
5272  * expensive functions.
5273  *
5274  * Our current policy is to postpone volatile expressions till after the sort
5275  * unconditionally (assuming that that's possible, ie they are in plain tlist
5276  * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
5277  * postpone set-returning expressions, because running them beforehand would
5278  * bloat the sort dataset, and because it might cause unexpected output order
5279  * if the sort isn't stable. However there's a constraint on that: all SRFs
5280  * in the tlist should be evaluated at the same plan step, so that they can
5281  * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
5282  * mustn't postpone any SRFs. (Note that in principle that policy should
5283  * probably get applied to the group/window input targetlists too, but we
5284  * have not done that historically.) Lastly, expensive expressions are
5285  * postponed if there is a LIMIT, or if root->tuple_fraction shows that
5286  * partial evaluation of the query is possible (if neither is true, we expect
5287  * to have to evaluate the expressions for every row anyway), or if there are
5288  * any volatile or set-returning expressions (since once we've put in a
5289  * projection at all, it won't cost any more to postpone more stuff).
5290  *
5291  * Another issue that could potentially be considered here is that
5292  * evaluating tlist expressions could result in data that's either wider
5293  * or narrower than the input Vars, thus changing the volume of data that
5294  * has to go through the Sort. However, we usually have only a very bad
5295  * idea of the output width of any expression more complex than a Var,
5296  * so for now it seems too risky to try to optimize on that basis.
5297  *
5298  * Note that if we do produce a modified sort-input target, and then the
5299  * query ends up not using an explicit Sort, no particular harm is done:
5300  * we'll initially use the modified target for the preceding path nodes,
5301  * but then change them to the final target with apply_projection_to_path.
5302  * Moreover, in such a case the guarantees about evaluation order of
5303  * volatile functions still hold, since the rows are sorted already.
5304  *
5305  * This function has some things in common with make_group_input_target and
5306  * make_window_input_target, though the detailed rules for what to do are
5307  * different. We never flatten/postpone any grouping or ordering columns;
5308  * those are needed before the sort. If we do flatten a particular
5309  * expression, we leave Aggref and WindowFunc nodes alone, since those were
5310  * computed earlier.
5311  *
5312  * 'final_target' is the query's final target list (in PathTarget form)
5313  * 'have_postponed_srfs' is an output argument, see below
5314  *
5315  * The result is the PathTarget to be computed by the plan node immediately
5316  * below the Sort step (and the Distinct step, if any). This will be
5317  * exactly final_target if we decide a projection step wouldn't be helpful.
5318  *
5319  * In addition, *have_postponed_srfs is set to true if we choose to postpone
5320  * any set-returning functions to after the Sort.
5321  */
5322 static PathTarget *
5324  PathTarget *final_target,
5325  bool *have_postponed_srfs)
5326 {
5327  Query *parse = root->parse;
5328  PathTarget *input_target;
5329  int ncols;
5330  bool *col_is_srf;
5331  bool *postpone_col;
5332  bool have_srf;
5333  bool have_volatile;
5334  bool have_expensive;
5335  bool have_srf_sortcols;
5336  bool postpone_srfs;
5337  List *postponable_cols;
5338  List *postponable_vars;
5339  int i;
5340  ListCell *lc;
5341 
5342  /* Shouldn't get here unless query has ORDER BY */
5343  Assert(parse->sortClause);
5344 
5345  *have_postponed_srfs = false; /* default result */
5346 
5347  /* Inspect tlist and collect per-column information */
5348  ncols = list_length(final_target->exprs);
5349  col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
5350  postpone_col = (bool *) palloc0(ncols * sizeof(bool));
5351  have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
5352 
5353  i = 0;
5354  foreach(lc, final_target->exprs)
5355  {
5356  Expr *expr = (Expr *) lfirst(lc);
5357 
5358  /*
5359  * If the column has a sortgroupref, assume it has to be evaluated
5360  * before sorting. Generally such columns would be ORDER BY, GROUP
5361  * BY, etc targets. One exception is columns that were removed from
5362  * GROUP BY by remove_useless_groupby_columns() ... but those would
5363  * only be Vars anyway. There don't seem to be any cases where it
5364  * would be worth the trouble to double-check.
5365  */
5366  if (get_pathtarget_sortgroupref(final_target, i) == 0)
5367  {
5368  /*
5369  * Check for SRF or volatile functions. Check the SRF case first
5370  * because we must know whether we have any postponed SRFs.
5371  */
5372  if (parse->hasTargetSRFs &&
5373  expression_returns_set((Node *) expr))
5374  {
5375  /* We'll decide below whether these are postponable */
5376  col_is_srf[i] = true;
5377  have_srf = true;
5378  }
5379  else if (contain_volatile_functions((Node *) expr))
5380  {
5381  /* Unconditionally postpone */
5382  postpone_col[i] = true;
5383  have_volatile = true;
5384  }
5385  else
5386  {
5387  /*
5388  * Else check the cost. XXX it's annoying to have to do this
5389  * when set_pathtarget_cost_width() just did it. Refactor to
5390  * allow sharing the work?
5391  */
5392  QualCost cost;
5393 
5394  cost_qual_eval_node(&cost, (Node *) expr, root);
5395 
5396  /*
5397  * We arbitrarily define "expensive" as "more than 10X
5398  * cpu_operator_cost". Note this will take in any PL function
5399  * with default cost.
5400  */
5401  if (cost.per_tuple > 10 * cpu_operator_cost)
5402  {
5403  postpone_col[i] = true;
5404  have_expensive = true;
5405  }
5406  }
5407  }
5408  else
5409  {
5410  /* For sortgroupref cols, just check if any contain SRFs */
5411  if (!have_srf_sortcols &&
5412  parse->hasTargetSRFs &&
5413  expression_returns_set((Node *) expr))
5414  have_srf_sortcols = true;
5415  }
5416 
5417  i++;
5418  }
5419 
5420  /*
5421  * We can postpone SRFs if we have some but none are in sortgroupref cols.
5422  */
5423  postpone_srfs = (have_srf && !have_srf_sortcols);
5424 
5425  /*
5426  * If we don't need a post-sort projection, just return final_target.
5427  */
5428  if (!(postpone_srfs || have_volatile ||
5429  (have_expensive &&
5430  (parse->limitCount || root->tuple_fraction > 0))))
5431  return final_target;
5432 
5433  /*
5434  * Report whether the post-sort projection will contain set-returning
5435  * functions. This is important because it affects whether the Sort can
5436  * rely on the query's LIMIT (if any) to bound the number of rows it needs
5437  * to return.
5438  */
5439  *have_postponed_srfs = postpone_srfs;
5440 
5441  /*
5442  * Construct the sort-input target, taking all non-postponable columns and
5443  * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
5444  * the postponable ones.
5445  */
5446  input_target = create_empty_pathtarget();
5447  postponable_cols = NIL;
5448 
5449  i = 0;
5450  foreach(lc, final_target->exprs)
5451  {
5452  Expr *expr = (Expr *) lfirst(lc);
5453 
5454  if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
5455  postponable_cols = lappend(postponable_cols, expr);
5456  else
5457  add_column_to_pathtarget(input_target, expr,
5458  get_pathtarget_sortgroupref(final_target, i));
5459 
5460  i++;
5461  }
5462 
5463  /*
5464  * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
5465  * postponable columns, and add them to the sort-input target if not
5466  * already present. (Some might be there already.) We mustn't
5467  * deconstruct Aggrefs or WindowFuncs here, since the projection node
5468  * would be unable to recompute them.
5469  */
5470  postponable_vars = pull_var_clause((Node *) postponable_cols,
5474  add_new_columns_to_pathtarget(input_target, postponable_vars);
5475 
5476  /* clean up cruft */
5477  list_free(postponable_vars);
5478  list_free(postponable_cols);
5479 
5480  /* XXX this represents even more redundant cost calculation ... */
5481  return set_pathtarget_cost_width(root, input_target);
5482 }
5483 
5484 /*
5485  * get_cheapest_fractional_path
5486  * Find the cheapest path for retrieving a specified fraction of all
5487  * the tuples expected to be returned by the given relation.
5488  *
5489  * We interpret tuple_fraction the same way as grouping_planner.
5490  *
5491  * We assume set_cheapest() has been run on the given rel.
5492  */
5493 Path *
5494 get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
5495 {
5496  Path *best_path = rel->cheapest_total_path;
5497  ListCell *l;
5498 
5499  /* If all tuples will be retrieved, just return the cheapest-total path */
5500  if (tuple_fraction <= 0.0)
5501  return best_path;
5502 
5503  /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
5504  if (tuple_fraction >= 1.0 && best_path->rows > 0)
5505  tuple_fraction /= best_path->rows;
5506 
5507  foreach(l, rel->pathlist)
5508  {
5509  Path *path = (Path *) lfirst(l);
5510 
5511  if (path == rel->cheapest_total_path ||
5512  compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
5513  continue;
5514 
5515  best_path = path;
5516  }
5517 
5518  return best_path;
5519 }
5520 
5521 /*
5522  * adjust_paths_for_srfs
5523  * Fix up the Paths of the given upperrel to handle tSRFs properly.
5524  *
5525  * The executor can only handle set-returning functions that appear at the
5526  * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
5527  * that are not at top level, we need to split up the evaluation into multiple
5528  * plan levels in which each level satisfies this constraint. This function
5529  * modifies each Path of an upperrel that (might) compute any SRFs in its
5530  * output tlist to insert appropriate projection steps.
5531  *
5532  * The given targets and targets_contain_srfs lists are from
5533  * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
5534  * target in targets.
5535  */
5536 static void
5538  List *targets, List *targets_contain_srfs)
5539 {
5540  ListCell *lc;
5541 
5542  Assert(list_length(targets) == list_length(targets_contain_srfs));
5543  Assert(!linitial_int(targets_contain_srfs));
5544 
5545  /* If no SRFs appear at this plan level, nothing to do */
5546  if (list_length(targets) == 1)
5547  return;
5548 
5549  /*
5550  * Stack SRF-evaluation nodes atop each path for the rel.
5551  *
5552  * In principle we should re-run set_cheapest() here to identify the
5553  * cheapest path, but it seems unlikely that adding the same tlist eval
5554  * costs to all the paths would change that, so we don't bother. Instead,
5555  * just assume that the cheapest-startup and cheapest-total paths remain
5556  * so. (There should be no parameterized paths anymore, so we needn't
5557  * worry about updating cheapest_parameterized_paths.)
5558  */
5559  foreach(lc, rel->pathlist)
5560  {
5561  Path *subpath = (Path *) lfirst(lc);
5562  Path *newpath = subpath;
5563  ListCell *lc1,
5564  *lc2;
5565 
5566  Assert(subpath->param_info == NULL);
5567  forboth(lc1, targets, lc2, targets_contain_srfs)
5568  {
5569  PathTarget *thistarget = lfirst_node(PathTarget, lc1);
5570  bool contains_srfs = (bool) lfirst_int(lc2);
5571 
5572  /* If this level doesn't contain SRFs, do regular projection */
5573  if (contains_srfs)
5574  newpath = (Path *) create_set_projection_path(root,
5575  rel,
5576  newpath,
5577  thistarget);
5578  else
5579  newpath = (Path *) apply_projection_to_path(root,
5580  rel,
5581  newpath,
5582  thistarget);
5583  }
5584  lfirst(lc) = newpath;
5585  if (subpath == rel->cheapest_startup_path)
5586  rel->cheapest_startup_path = newpath;
5587  if (subpath == rel->cheapest_total_path)
5588  rel->cheapest_total_path = newpath;
5589  }
5590 
5591  /* Likewise for partial paths, if any */
5592  foreach(lc, rel->partial_pathlist)
5593  {
5594  Path *subpath = (Path *) lfirst(lc);
5595  Path *newpath = subpath;
5596  ListCell *lc1,
5597  *lc2;
5598 
5599  Assert(subpath->param_info == NULL);
5600  forboth(lc1, targets, lc2, targets_contain_srfs)
5601  {
5602  PathTarget *thistarget = lfirst_node(PathTarget, lc1);
5603  bool contains_srfs = (bool) lfirst_int(lc2);
5604 
5605  /* If this level doesn't contain SRFs, do regular projection */
5606  if (contains_srfs)
5607  newpath = (Path *) create_set_projection_path(root,
5608  rel,
5609  newpath,
5610  thistarget);
5611  else
5612  {
5613  /* avoid apply_projection_to_path, in case of multiple refs */
5614  newpath = (Path *) create_projection_path(root,
5615  rel,
5616  newpath,
5617  thistarget);
5618  }
5619  }
5620  lfirst(lc) = newpath;
5621  }
5622 }
5623 
5624 /*
5625  * expression_planner
5626  * Perform planner's transformations on a standalone expression.
5627  *
5628  * Various utility commands need to evaluate expressions that are not part
5629  * of a plannable query. They can do so using the executor's regular
5630  * expression-execution machinery, but first the expression has to be fed
5631  * through here to transform it from parser output to something executable.
5632  *
5633  * Currently, we disallow sublinks in standalone expressions, so there's no
5634  * real "planning" involved here. (That might not always be true though.)
5635  * What we must do is run eval_const_expressions to ensure that any function
5636  * calls are converted to positional notation and function default arguments
5637  * get inserted. The fact that constant subexpressions get simplified is a
5638  * side-effect that is useful when the expression will get evaluated more than
5639  * once. Also, we must fix operator function IDs.
5640  *
5641  * This does not return any information about dependencies of the expression.
5642  * Hence callers should use the results only for the duration of the current
5643  * query. Callers that would like to cache the results for longer should use
5644  * expression_planner_with_deps, probably via the plancache.
5645  *
5646  * Note: this must not make any damaging changes to the passed-in expression
5647  * tree. (It would actually be okay to apply fix_opfuncids to it, but since
5648  * we first do an expression_tree_mutator-based walk, what is returned will
5649  * be a new node tree.) The result is constructed in the current memory
5650  * context; beware that this can leak a lot of additional stuff there, too.
5651  */
5652 Expr *
5654 {
5655  Node *result;
5656 
5657  /*
5658  * Convert named-argument function calls, insert default arguments and
5659  * simplify constant subexprs
5660  */
5661  result = eval_const_expressions(NULL, (Node *) expr);
5662 
5663  /* Fill in opfuncid values if missing */
5664  fix_opfuncids(result);
5665 
5666  return (Expr *) result;
5667 }
5668 
5669 /*
5670  * expression_planner_with_deps
5671  * Perform planner's transformations on a standalone expression,
5672  * returning expression dependency information along with the result.
5673  *
5674  * This is identical to expression_planner() except that it also returns
5675  * information about possible dependencies of the expression, ie identities of
5676  * objects whose definitions affect the result. As in a PlannedStmt, these
5677  * are expressed as a list of relation Oids and a list of PlanInvalItems.
5678  */
5679 Expr *
5681  List **relationOids,
5682  List **invalItems)
5683 {
5684  Node *result;
5685  PlannerGlobal glob;
5686  PlannerInfo root;
5687 
5688  /* Make up dummy planner state so we can use setrefs machinery */
5689  MemSet(&glob, 0, sizeof(glob));
5690  glob.type = T_PlannerGlobal;
5691  glob.relationOids = NIL;
5692  glob.invalItems = NIL;
5693 
5694  MemSet(&root, 0, sizeof(root));
5695  root.type = T_PlannerInfo;
5696  root.glob = &glob;
5697 
5698  /*
5699  * Convert named-argument function calls, insert default arguments and
5700  * simplify constant subexprs. Collect identities of inlined functions
5701  * and elided domains, too.
5702  */
5703  result = eval_const_expressions(&root, (Node *) expr);
5704 
5705  /* Fill in opfuncid values if missing */
5706  fix_opfuncids(result);
5707 
5708  /*
5709  * Now walk the finished expression to find anything else we ought to
5710  * record as an expression dependency.
5711  */
5712  (void) extract_query_dependencies_walker(result, &root);
5713 
5714  *relationOids = glob.relationOids;
5715  *invalItems = glob.invalItems;
5716 
5717  return (Expr *) result;
5718 }
5719 
5720 
5721 /*
5722  * plan_cluster_use_sort
5723  * Use the planner to decide how CLUSTER should implement sorting
5724  *
5725  * tableOid is the OID of a table to be clustered on its index indexOid
5726  * (which is already known to be a btree index). Decide whether it's
5727  * cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
5728  * Return true to use sorting, false to use an indexscan.
5729  *
5730  * Note: caller had better already hold some type of lock on the table.
5731  */
5732 bool
5733 plan_cluster_use_sort(Oid tableOid, Oid indexOid)
5734 {
5735  PlannerInfo *root;
5736  Query *query;
5737  PlannerGlobal *glob;
5738  RangeTblEntry *rte;
5739  RelOptInfo *rel;
5740  IndexOptInfo *indexInfo;
5741  QualCost indexExprCost;
5742  Cost comparisonCost;
5743  Path *seqScanPath;
5744  Path seqScanAndSortPath;
5745  IndexPath *indexScanPath;
5746  ListCell *lc;
5747 
5748  /* We can short-circuit the cost comparison if indexscans are disabled */
5749  if (!enable_indexscan)
5750  return true; /* use sort */
5751 
5752  /* Set up mostly-dummy planner state */
5753  query = makeNode(Query);
5754  query->commandType = CMD_SELECT;
5755 
5756  glob = makeNode(PlannerGlobal);
5757 
5758  root = makeNode(PlannerInfo);
5759  root->parse = query;
5760  root->glob = glob;
5761  root->query_level = 1;
5763  root->wt_param_id = -1;
5764 
5765  /* Build a minimal RTE for the rel */
5766  rte = makeNode(RangeTblEntry);
5767  rte->rtekind = RTE_RELATION;
5768  rte->relid = tableOid;
5769  rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
5771  rte->lateral = false;
5772  rte->inh = false;
5773  rte->inFromCl = true;
5774  query->rtable = list_make1(rte);
5775 
5776  /* Set up RTE/RelOptInfo arrays */
5778 
5779  /* Build RelOptInfo */
5780  rel = build_simple_rel(root, 1, NULL);
5781 
5782  /* Locate IndexOptInfo for the target index */
5783  indexInfo = NULL;
5784  foreach(lc, rel->indexlist)
5785  {
5786  indexInfo = lfirst_node(IndexOptInfo, lc);
5787  if (indexInfo->indexoid == indexOid)
5788  break;
5789  }
5790 
5791  /*
5792  * It's possible that get_relation_info did not generate an IndexOptInfo
5793  * for the desired index; this could happen if it's not yet reached its
5794  * indcheckxmin usability horizon, or if it's a system index and we're
5795  * ignoring system indexes. In such cases we should tell CLUSTER to not
5796  * trust the index contents but use seqscan-and-sort.
5797  */
5798  if (lc == NULL) /* not in the list? */
5799  return true; /* use sort */
5800 
5801  /*
5802  * Rather than doing all the pushups that would be needed to use
5803  * set_baserel_size_estimates, just do a quick hack for rows and width.
5804  */
5805  rel->rows = rel->tuples;
5806  rel->reltarget->width = get_relation_data_width(tableOid, NULL);
5807 
5808  root->total_table_pages = rel->pages;
5809 
5810  /*
5811  * Determine eval cost of the index expressions, if any. We need to
5812  * charge twice that amount for each tuple comparison that happens during
5813  * the sort, since tuplesort.c will have to re-evaluate the index
5814  * expressions each time. (XXX that's pretty inefficient...)
5815  */
5816  cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
5817  comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
5818 
5819  /* Estimate the cost of seq scan + sort */
5820  seqScanPath = create_seqscan_path(root, rel, NULL, 0);
5821  cost_sort(&seqScanAndSortPath, root, NIL,
5822  seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
5823  comparisonCost, maintenance_work_mem, -1.0);
5824 
5825  /* Estimate the cost of index scan */
5826  indexScanPath = create_index_path(root, indexInfo,
5827  NIL, NIL, NIL, NIL,
5828  ForwardScanDirection, false,
5829  NULL, 1.0, false);
5830 
5831  return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
5832 }
5833 
5834 /*
5835  * plan_create_index_workers
5836  * Use the planner to decide how many parallel worker processes
5837  * CREATE INDEX should request for use
5838  *
5839  * tableOid is the table on which the index is to be built. indexOid is the
5840  * OID of an index to be created or reindexed (which must be a btree index).
5841  *
5842  * Return value is the number of parallel worker processes to request. It
5843  * may be unsafe to proceed if this is 0. Note that this does not include the
5844  * leader participating as a worker (value is always a number of parallel
5845  * worker processes).
5846  *
5847  * Note: caller had better already hold some type of lock on the table and
5848  * index.
5849  */
5850 int
5851 plan_create_index_workers(Oid tableOid, Oid indexOid)
5852 {
5853  PlannerInfo *root;
5854  Query *query;
5855  PlannerGlobal *glob;
5856  RangeTblEntry *rte;
5857  Relation heap;
5858  Relation index;
5859  RelOptInfo *rel;
5860  int parallel_workers;
5861  BlockNumber heap_blocks;
5862  double reltuples;
5863  double allvisfrac;
5864 
5865  /*
5866  * We don't allow performing parallel operation in standalone backend or
5867  * when parallelism is disabled.
5868  */
5870  return 0;
5871 
5872  /* Set up largely-dummy planner state */
5873  query = makeNode(Query);
5874  query->commandType = CMD_SELECT;
5875 
5876  glob = makeNode(PlannerGlobal);
5877 
5878  root = makeNode(PlannerInfo);
5879  root->parse = query;
5880  root->glob = glob;
5881  root->query_level = 1;
5883  root->wt_param_id = -1;
5884 
5885  /*
5886  * Build a minimal RTE.
5887  *
5888  * Mark the RTE with inh = true. This is a kludge to prevent
5889  * get_relation_info() from fetching index info, which is necessary
5890  * because it does not expect that any IndexOptInfo