PostgreSQL Source Code  git master
planner.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * planner.c
4  * The query optimizer external interface.
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/plan/planner.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include <limits.h>
19 #include <math.h>
20 
21 #include "access/genam.h"
22 #include "access/htup_details.h"
23 #include "access/parallel.h"
24 #include "access/sysattr.h"
25 #include "access/table.h"
26 #include "access/xact.h"
27 #include "catalog/pg_constraint.h"
28 #include "catalog/pg_inherits.h"
29 #include "catalog/pg_proc.h"
30 #include "catalog/pg_type.h"
31 #include "executor/executor.h"
32 #include "executor/nodeAgg.h"
33 #include "foreign/fdwapi.h"
34 #include "miscadmin.h"
35 #include "jit/jit.h"
36 #include "lib/bipartite_match.h"
37 #include "lib/knapsack.h"
38 #include "nodes/makefuncs.h"
39 #include "nodes/nodeFuncs.h"
40 #ifdef OPTIMIZER_DEBUG
41 #include "nodes/print.h"
42 #endif
43 #include "optimizer/appendinfo.h"
44 #include "optimizer/clauses.h"
45 #include "optimizer/cost.h"
46 #include "optimizer/inherit.h"
47 #include "optimizer/optimizer.h"
48 #include "optimizer/paramassign.h"
49 #include "optimizer/pathnode.h"
50 #include "optimizer/paths.h"
51 #include "optimizer/plancat.h"
52 #include "optimizer/planmain.h"
53 #include "optimizer/planner.h"
54 #include "optimizer/prep.h"
55 #include "optimizer/subselect.h"
56 #include "optimizer/tlist.h"
57 #include "parser/analyze.h"
58 #include "parser/parsetree.h"
59 #include "parser/parse_agg.h"
60 #include "partitioning/partdesc.h"
61 #include "rewrite/rewriteManip.h"
62 #include "storage/dsm_impl.h"
63 #include "utils/rel.h"
64 #include "utils/selfuncs.h"
65 #include "utils/lsyscache.h"
66 #include "utils/syscache.h"
67 
68 
69 /* GUC parameters */
73 
74 /* Hook for plugins to get control in planner() */
76 
77 /* Hook for plugins to get control when grouping_planner() plans upper rels */
79 
80 
81 /* Expression kind codes for preprocess_expression */
82 #define EXPRKIND_QUAL 0
83 #define EXPRKIND_TARGET 1
84 #define EXPRKIND_RTFUNC 2
85 #define EXPRKIND_RTFUNC_LATERAL 3
86 #define EXPRKIND_VALUES 4
87 #define EXPRKIND_VALUES_LATERAL 5
88 #define EXPRKIND_LIMIT 6
89 #define EXPRKIND_APPINFO 7
90 #define EXPRKIND_PHV 8
91 #define EXPRKIND_TABLESAMPLE 9
92 #define EXPRKIND_ARBITER_ELEM 10
93 #define EXPRKIND_TABLEFUNC 11
94 #define EXPRKIND_TABLEFUNC_LATERAL 12
95 
96 /* Passthrough data for standard_qp_callback */
97 typedef struct
98 {
99  List *activeWindows; /* active windows, if any */
100  List *groupClause; /* overrides parse->groupClause */
102 
103 /*
104  * Data specific to grouping sets
105  */
106 
107 typedef struct
108 {
118 
119 /*
120  * Temporary structure for use during WindowClause reordering in order to be
121  * able to sort WindowClauses on partitioning/ordering prefix.
122  */
123 typedef struct
124 {
126  List *uniqueOrder; /* A List of unique ordering/partitioning
127  * clauses per Window */
129 
130 /* Local functions */
131 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
132 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
133 static void inheritance_planner(PlannerInfo *root);
134 static void grouping_planner(PlannerInfo *root, bool inheritance_update,
135  double tuple_fraction);
137 static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
138  int *tleref_to_colnum_map);
139 static void preprocess_rowmarks(PlannerInfo *root);
140 static double preprocess_limit(PlannerInfo *root,
141  double tuple_fraction,
142  int64 *offset_est, int64 *count_est);
144 static List *preprocess_groupclause(PlannerInfo *root, List *force);
145 static List *extract_rollup_sets(List *groupingSets);
146 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
147 static void standard_qp_callback(PlannerInfo *root, void *extra);
148 static double get_number_of_groups(PlannerInfo *root,
149  double path_rows,
150  grouping_sets_data *gd,
151  List *target_list);
153  RelOptInfo *input_rel,
154  PathTarget *target,
155  bool target_parallel_safe,
156  const AggClauseCosts *agg_costs,
157  grouping_sets_data *gd);
158 static bool is_degenerate_grouping(PlannerInfo *root);
160  RelOptInfo *input_rel,
161  RelOptInfo *grouped_rel);
162 static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
163  PathTarget *target, bool target_parallel_safe,
164  Node *havingQual);
166  RelOptInfo *input_rel,
167  RelOptInfo *grouped_rel,
168  const AggClauseCosts *agg_costs,
169  grouping_sets_data *gd,
170  GroupPathExtraData *extra,
171  RelOptInfo **partially_grouped_rel_p);
172 static void consider_groupingsets_paths(PlannerInfo *root,
173  RelOptInfo *grouped_rel,
174  Path *path,
175  bool is_sorted,
176  bool can_hash,
177  grouping_sets_data *gd,
178  const AggClauseCosts *agg_costs,
179  double dNumGroups);
181  RelOptInfo *input_rel,
182  PathTarget *input_target,
183  PathTarget *output_target,
184  bool output_target_parallel_safe,
185  WindowFuncLists *wflists,
186  List *activeWindows);
187 static void create_one_window_path(PlannerInfo *root,
188  RelOptInfo *window_rel,
189  Path *path,
190  PathTarget *input_target,
191  PathTarget *output_target,
192  WindowFuncLists *wflists,
193  List *activeWindows);
195  RelOptInfo *input_rel);
197  RelOptInfo *input_rel,
198  PathTarget *target,
199  bool target_parallel_safe,
200  double limit_tuples);
202  PathTarget *final_target);
204  PathTarget *grouping_target,
205  Node *havingQual);
206 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
207 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
209  PathTarget *final_target,
210  List *activeWindows);
212  List *tlist);
214  PathTarget *final_target,
215  bool *have_postponed_srfs);
216 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
217  List *targets, List *targets_contain_srfs);
218 static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
219  RelOptInfo *grouped_rel,
220  RelOptInfo *partially_grouped_rel,
221  const AggClauseCosts *agg_costs,
222  grouping_sets_data *gd,
223  double dNumGroups,
224  GroupPathExtraData *extra);
226  RelOptInfo *grouped_rel,
227  RelOptInfo *input_rel,
228  grouping_sets_data *gd,
229  GroupPathExtraData *extra,
230  bool force_rel_creation);
231 static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
232 static bool can_partial_agg(PlannerInfo *root,
233  const AggClauseCosts *agg_costs);
235  RelOptInfo *rel,
236  List *scanjoin_targets,
237  List *scanjoin_targets_contain_srfs,
238  bool scanjoin_target_parallel_safe,
239  bool tlist_same_exprs);
241  RelOptInfo *input_rel,
242  RelOptInfo *grouped_rel,
243  RelOptInfo *partially_grouped_rel,
244  const AggClauseCosts *agg_costs,
245  grouping_sets_data *gd,
247  GroupPathExtraData *extra);
248 static bool group_by_has_partkey(RelOptInfo *input_rel,
249  List *targetList,
250  List *groupClause);
251 static int common_prefix_cmp(const void *a, const void *b);
252 
253 
254 /*****************************************************************************
255  *
256  * Query optimizer entry point
257  *
258  * To support loadable plugins that monitor or modify planner behavior,
259  * we provide a hook variable that lets a plugin get control before and
260  * after the standard planning process. The plugin would normally call
261  * standard_planner().
262  *
263  * Note to plugin authors: standard_planner() scribbles on its Query input,
264  * so you'd better copy that data structure if you want to plan more than once.
265  *
266  *****************************************************************************/
267 PlannedStmt *
268 planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
269 {
270  PlannedStmt *result;
271 
272  if (planner_hook)
273  result = (*planner_hook) (parse, cursorOptions, boundParams);
274  else
275  result = standard_planner(parse, cursorOptions, boundParams);
276  return result;
277 }
278 
279 PlannedStmt *
280 standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
281 {
282  PlannedStmt *result;
283  PlannerGlobal *glob;
284  double tuple_fraction;
285  PlannerInfo *root;
286  RelOptInfo *final_rel;
287  Path *best_path;
288  Plan *top_plan;
289  ListCell *lp,
290  *lr;
291 
292  /*
293  * Set up global state for this planner invocation. This data is needed
294  * across all levels of sub-Query that might exist in the given command,
295  * so we keep it in a separate struct that's linked to by each per-Query
296  * PlannerInfo.
297  */
298  glob = makeNode(PlannerGlobal);
299 
300  glob->boundParams = boundParams;
301  glob->subplans = NIL;
302  glob->subroots = NIL;
303  glob->rewindPlanIDs = NULL;
304  glob->finalrtable = NIL;
305  glob->finalrowmarks = NIL;
306  glob->resultRelations = NIL;
307  glob->rootResultRelations = NIL;
308  glob->relationOids = NIL;
309  glob->invalItems = NIL;
310  glob->paramExecTypes = NIL;
311  glob->lastPHId = 0;
312  glob->lastRowMarkId = 0;
313  glob->lastPlanNodeId = 0;
314  glob->transientPlan = false;
315  glob->dependsOnRole = false;
316 
317  /*
318  * Assess whether it's feasible to use parallel mode for this query. We
319  * can't do this in a standalone backend, or if the command will try to
320  * modify any data, or if this is a cursor operation, or if GUCs are set
321  * to values that don't permit parallelism, or if parallel-unsafe
322  * functions are present in the query tree.
323  *
324  * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE
325  * MATERIALIZED VIEW to use parallel plans, but this is safe only because
326  * the command is writing into a completely new table which workers won't
327  * be able to see. If the workers could see the table, the fact that
328  * group locking would cause them to ignore the leader's heavyweight
329  * relation extension lock and GIN page locks would make this unsafe.
330  * We'll have to fix that somehow if we want to allow parallel inserts in
331  * general; updates and deletes have additional problems especially around
332  * combo CIDs.)
333  *
334  * For now, we don't try to use parallel mode if we're running inside a
335  * parallel worker. We might eventually be able to relax this
336  * restriction, but for now it seems best not to have parallel workers
337  * trying to create their own parallel workers.
338  */
339  if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
341  parse->commandType == CMD_SELECT &&
342  !parse->hasModifyingCTE &&
344  !IsParallelWorker())
345  {
346  /* all the cheap tests pass, so scan the query tree */
347  glob->maxParallelHazard = max_parallel_hazard(parse);
348  glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
349  }
350  else
351  {
352  /* skip the query tree scan, just assume it's unsafe */
353  glob->maxParallelHazard = PROPARALLEL_UNSAFE;
354  glob->parallelModeOK = false;
355  }
356 
357  /*
358  * glob->parallelModeNeeded is normally set to false here and changed to
359  * true during plan creation if a Gather or Gather Merge plan is actually
360  * created (cf. create_gather_plan, create_gather_merge_plan).
361  *
362  * However, if force_parallel_mode = on or force_parallel_mode = regress,
363  * then we impose parallel mode whenever it's safe to do so, even if the
364  * final plan doesn't use parallelism. It's not safe to do so if the
365  * query contains anything parallel-unsafe; parallelModeOK will be false
366  * in that case. Note that parallelModeOK can't change after this point.
367  * Otherwise, everything in the query is either parallel-safe or
368  * parallel-restricted, and in either case it should be OK to impose
369  * parallel-mode restrictions. If that ends up breaking something, then
370  * either some function the user included in the query is incorrectly
371  * labelled as parallel-safe or parallel-restricted when in reality it's
372  * parallel-unsafe, or else the query planner itself has a bug.
373  */
374  glob->parallelModeNeeded = glob->parallelModeOK &&
376 
377  /* Determine what fraction of the plan is likely to be scanned */
378  if (cursorOptions & CURSOR_OPT_FAST_PLAN)
379  {
380  /*
381  * We have no real idea how many tuples the user will ultimately FETCH
382  * from a cursor, but it is often the case that he doesn't want 'em
383  * all, or would prefer a fast-start plan anyway so that he can
384  * process some of the tuples sooner. Use a GUC parameter to decide
385  * what fraction to optimize for.
386  */
387  tuple_fraction = cursor_tuple_fraction;
388 
389  /*
390  * We document cursor_tuple_fraction as simply being a fraction, which
391  * means the edge cases 0 and 1 have to be treated specially here. We
392  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
393  */
394  if (tuple_fraction >= 1.0)
395  tuple_fraction = 0.0;
396  else if (tuple_fraction <= 0.0)
397  tuple_fraction = 1e-10;
398  }
399  else
400  {
401  /* Default assumption is we need all the tuples */
402  tuple_fraction = 0.0;
403  }
404 
405  /* primary planning entry point (may recurse for subqueries) */
406  root = subquery_planner(glob, parse, NULL,
407  false, tuple_fraction);
408 
409  /* Select best Path and turn it into a Plan */
410  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
411  best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
412 
413  top_plan = create_plan(root, best_path);
414 
415  /*
416  * If creating a plan for a scrollable cursor, make sure it can run
417  * backwards on demand. Add a Material node at the top at need.
418  */
419  if (cursorOptions & CURSOR_OPT_SCROLL)
420  {
421  if (!ExecSupportsBackwardScan(top_plan))
422  top_plan = materialize_finished_plan(top_plan);
423  }
424 
425  /*
426  * Optionally add a Gather node for testing purposes, provided this is
427  * actually a safe thing to do.
428  */
430  {
431  Gather *gather = makeNode(Gather);
432 
433  /*
434  * If there are any initPlans attached to the formerly-top plan node,
435  * move them up to the Gather node; same as we do for Material node in
436  * materialize_finished_plan.
437  */
438  gather->plan.initPlan = top_plan->initPlan;
439  top_plan->initPlan = NIL;
440 
441  gather->plan.targetlist = top_plan->targetlist;
442  gather->plan.qual = NIL;
443  gather->plan.lefttree = top_plan;
444  gather->plan.righttree = NULL;
445  gather->num_workers = 1;
446  gather->single_copy = true;
448 
449  /*
450  * Since this Gather has no parallel-aware descendants to signal to,
451  * we don't need a rescan Param.
452  */
453  gather->rescan_param = -1;
454 
455  /*
456  * Ideally we'd use cost_gather here, but setting up dummy path data
457  * to satisfy it doesn't seem much cleaner than knowing what it does.
458  */
459  gather->plan.startup_cost = top_plan->startup_cost +
461  gather->plan.total_cost = top_plan->total_cost +
463  gather->plan.plan_rows = top_plan->plan_rows;
464  gather->plan.plan_width = top_plan->plan_width;
465  gather->plan.parallel_aware = false;
466  gather->plan.parallel_safe = false;
467 
468  /* use parallel mode for parallel plans. */
469  root->glob->parallelModeNeeded = true;
470 
471  top_plan = &gather->plan;
472  }
473 
474  /*
475  * If any Params were generated, run through the plan tree and compute
476  * each plan node's extParam/allParam sets. Ideally we'd merge this into
477  * set_plan_references' tree traversal, but for now it has to be separate
478  * because we need to visit subplans before not after main plan.
479  */
480  if (glob->paramExecTypes != NIL)
481  {
482  Assert(list_length(glob->subplans) == list_length(glob->subroots));
483  forboth(lp, glob->subplans, lr, glob->subroots)
484  {
485  Plan *subplan = (Plan *) lfirst(lp);
486  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
487 
488  SS_finalize_plan(subroot, subplan);
489  }
490  SS_finalize_plan(root, top_plan);
491  }
492 
493  /* final cleanup of the plan */
494  Assert(glob->finalrtable == NIL);
495  Assert(glob->finalrowmarks == NIL);
496  Assert(glob->resultRelations == NIL);
497  Assert(glob->rootResultRelations == NIL);
498  top_plan = set_plan_references(root, top_plan);
499  /* ... and the subplans (both regular subplans and initplans) */
500  Assert(list_length(glob->subplans) == list_length(glob->subroots));
501  forboth(lp, glob->subplans, lr, glob->subroots)
502  {
503  Plan *subplan = (Plan *) lfirst(lp);
504  PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
505 
506  lfirst(lp) = set_plan_references(subroot, subplan);
507  }
508 
509  /* build the PlannedStmt result */
510  result = makeNode(PlannedStmt);
511 
512  result->commandType = parse->commandType;
513  result->queryId = parse->queryId;
514  result->hasReturning = (parse->returningList != NIL);
515  result->hasModifyingCTE = parse->hasModifyingCTE;
516  result->canSetTag = parse->canSetTag;
517  result->transientPlan = glob->transientPlan;
518  result->dependsOnRole = glob->dependsOnRole;
519  result->parallelModeNeeded = glob->parallelModeNeeded;
520  result->planTree = top_plan;
521  result->rtable = glob->finalrtable;
522  result->resultRelations = glob->resultRelations;
524  result->subplans = glob->subplans;
525  result->rewindPlanIDs = glob->rewindPlanIDs;
526  result->rowMarks = glob->finalrowmarks;
527  result->relationOids = glob->relationOids;
528  result->invalItems = glob->invalItems;
529  result->paramExecTypes = glob->paramExecTypes;
530  /* utilityStmt should be null, but we might as well copy it */
531  result->utilityStmt = parse->utilityStmt;
532  result->stmt_location = parse->stmt_location;
533  result->stmt_len = parse->stmt_len;
534 
535  result->jitFlags = PGJIT_NONE;
536  if (jit_enabled && jit_above_cost >= 0 &&
537  top_plan->total_cost > jit_above_cost)
538  {
539  result->jitFlags |= PGJIT_PERFORM;
540 
541  /*
542  * Decide how much effort should be put into generating better code.
543  */
544  if (jit_optimize_above_cost >= 0 &&
546  result->jitFlags |= PGJIT_OPT3;
547  if (jit_inline_above_cost >= 0 &&
548  top_plan->total_cost > jit_inline_above_cost)
549  result->jitFlags |= PGJIT_INLINE;
550 
551  /*
552  * Decide which operations should be JITed.
553  */
554  if (jit_expressions)
555  result->jitFlags |= PGJIT_EXPR;
557  result->jitFlags |= PGJIT_DEFORM;
558  }
559 
560  if (glob->partition_directory != NULL)
562 
563  return result;
564 }
565 
566 
567 /*--------------------
568  * subquery_planner
569  * Invokes the planner on a subquery. We recurse to here for each
570  * sub-SELECT found in the query tree.
571  *
572  * glob is the global state for the current planner run.
573  * parse is the querytree produced by the parser & rewriter.
574  * parent_root is the immediate parent Query's info (NULL at the top level).
575  * hasRecursion is true if this is a recursive WITH query.
576  * tuple_fraction is the fraction of tuples we expect will be retrieved.
577  * tuple_fraction is interpreted as explained for grouping_planner, below.
578  *
579  * Basically, this routine does the stuff that should only be done once
580  * per Query object. It then calls grouping_planner. At one time,
581  * grouping_planner could be invoked recursively on the same Query object;
582  * that's not currently true, but we keep the separation between the two
583  * routines anyway, in case we need it again someday.
584  *
585  * subquery_planner will be called recursively to handle sub-Query nodes
586  * found within the query's expressions and rangetable.
587  *
588  * Returns the PlannerInfo struct ("root") that contains all data generated
589  * while planning the subquery. In particular, the Path(s) attached to
590  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
591  * cheapest way(s) to implement the query. The top level will select the
592  * best Path and pass it through createplan.c to produce a finished Plan.
593  *--------------------
594  */
595 PlannerInfo *
597  PlannerInfo *parent_root,
598  bool hasRecursion, double tuple_fraction)
599 {
600  PlannerInfo *root;
601  List *newWithCheckOptions;
602  List *newHaving;
603  bool hasOuterJoins;
604  bool hasResultRTEs;
605  RelOptInfo *final_rel;
606  ListCell *l;
607 
608  /* Create a PlannerInfo data structure for this subquery */
609  root = makeNode(PlannerInfo);
610  root->parse = parse;
611  root->glob = glob;
612  root->query_level = parent_root ? parent_root->query_level + 1 : 1;
613  root->parent_root = parent_root;
614  root->plan_params = NIL;
615  root->outer_params = NULL;
617  root->init_plans = NIL;
618  root->cte_plan_ids = NIL;
619  root->multiexpr_params = NIL;
620  root->eq_classes = NIL;
621  root->ec_merging_done = false;
622  root->append_rel_list = NIL;
623  root->rowMarks = NIL;
624  memset(root->upper_rels, 0, sizeof(root->upper_rels));
625  memset(root->upper_targets, 0, sizeof(root->upper_targets));
626  root->processed_tlist = NIL;
627  root->grouping_map = NULL;
628  root->minmax_aggs = NIL;
629  root->qual_security_level = 0;
630  root->inhTargetKind = INHKIND_NONE;
631  root->hasRecursion = hasRecursion;
632  if (hasRecursion)
634  else
635  root->wt_param_id = -1;
636  root->non_recursive_path = NULL;
637  root->partColsUpdated = false;
638 
639  /*
640  * If there is a WITH list, process each WITH query and either convert it
641  * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
642  */
643  if (parse->cteList)
644  SS_process_ctes(root);
645 
646  /*
647  * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
648  * that we don't need so many special cases to deal with that situation.
649  */
650  replace_empty_jointree(parse);
651 
652  /*
653  * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
654  * to transform them into joins. Note that this step does not descend
655  * into subqueries; if we pull up any subqueries below, their SubLinks are
656  * processed just before pulling them up.
657  */
658  if (parse->hasSubLinks)
659  pull_up_sublinks(root);
660 
661  /*
662  * Scan the rangetable for function RTEs, do const-simplification on them,
663  * and then inline them if possible (producing subqueries that might get
664  * pulled up next). Recursion issues here are handled in the same way as
665  * for SubLinks.
666  */
668 
669  /*
670  * Check to see if any subqueries in the jointree can be merged into this
671  * query.
672  */
673  pull_up_subqueries(root);
674 
675  /*
676  * If this is a simple UNION ALL query, flatten it into an appendrel. We
677  * do this now because it requires applying pull_up_subqueries to the leaf
678  * queries of the UNION ALL, which weren't touched above because they
679  * weren't referenced by the jointree (they will be after we do this).
680  */
681  if (parse->setOperations)
683 
684  /*
685  * Survey the rangetable to see what kinds of entries are present. We can
686  * skip some later processing if relevant SQL features are not used; for
687  * example if there are no JOIN RTEs we can avoid the expense of doing
688  * flatten_join_alias_vars(). This must be done after we have finished
689  * adding rangetable entries, of course. (Note: actually, processing of
690  * inherited or partitioned rels can cause RTEs for their child tables to
691  * get added later; but those must all be RTE_RELATION entries, so they
692  * don't invalidate the conclusions drawn here.)
693  */
694  root->hasJoinRTEs = false;
695  root->hasLateralRTEs = false;
696  hasOuterJoins = false;
697  hasResultRTEs = false;
698  foreach(l, parse->rtable)
699  {
701 
702  switch (rte->rtekind)
703  {
704  case RTE_RELATION:
705  if (rte->inh)
706  {
707  /*
708  * Check to see if the relation actually has any children;
709  * if not, clear the inh flag so we can treat it as a
710  * plain base relation.
711  *
712  * Note: this could give a false-positive result, if the
713  * rel once had children but no longer does. We used to
714  * be able to clear rte->inh later on when we discovered
715  * that, but no more; we have to handle such cases as
716  * full-fledged inheritance.
717  */
718  rte->inh = has_subclass(rte->relid);
719  }
720  break;
721  case RTE_JOIN:
722  root->hasJoinRTEs = true;
723  if (IS_OUTER_JOIN(rte->jointype))
724  hasOuterJoins = true;
725  break;
726  case RTE_RESULT:
727  hasResultRTEs = true;
728  break;
729  default:
730  /* No work here for other RTE types */
731  break;
732  }
733 
734  if (rte->lateral)
735  root->hasLateralRTEs = true;
736 
737  /*
738  * We can also determine the maximum security level required for any
739  * securityQuals now. Addition of inheritance-child RTEs won't affect
740  * this, because child tables don't have their own securityQuals; see
741  * expand_single_inheritance_child().
742  */
743  if (rte->securityQuals)
745  list_length(rte->securityQuals));
746  }
747 
748  /*
749  * Preprocess RowMark information. We need to do this after subquery
750  * pullup, so that all base relations are present.
751  */
752  preprocess_rowmarks(root);
753 
754  /*
755  * Set hasHavingQual to remember if HAVING clause is present. Needed
756  * because preprocess_expression will reduce a constant-true condition to
757  * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
758  */
759  root->hasHavingQual = (parse->havingQual != NULL);
760 
761  /* Clear this flag; might get set in distribute_qual_to_rels */
762  root->hasPseudoConstantQuals = false;
763 
764  /*
765  * Do expression preprocessing on targetlist and quals, as well as other
766  * random expressions in the querytree. Note that we do not need to
767  * handle sort/group expressions explicitly, because they are actually
768  * part of the targetlist.
769  */
770  parse->targetList = (List *)
771  preprocess_expression(root, (Node *) parse->targetList,
773 
774  /* Constant-folding might have removed all set-returning functions */
775  if (parse->hasTargetSRFs)
777 
778  newWithCheckOptions = NIL;
779  foreach(l, parse->withCheckOptions)
780  {
782 
783  wco->qual = preprocess_expression(root, wco->qual,
784  EXPRKIND_QUAL);
785  if (wco->qual != NULL)
786  newWithCheckOptions = lappend(newWithCheckOptions, wco);
787  }
788  parse->withCheckOptions = newWithCheckOptions;
789 
790  parse->returningList = (List *)
791  preprocess_expression(root, (Node *) parse->returningList,
793 
794  preprocess_qual_conditions(root, (Node *) parse->jointree);
795 
796  parse->havingQual = preprocess_expression(root, parse->havingQual,
797  EXPRKIND_QUAL);
798 
799  foreach(l, parse->windowClause)
800  {
802 
803  /* partitionClause/orderClause are sort/group expressions */
806  wc->endOffset = preprocess_expression(root, wc->endOffset,
808  }
809 
810  parse->limitOffset = preprocess_expression(root, parse->limitOffset,
812  parse->limitCount = preprocess_expression(root, parse->limitCount,
814 
815  if (parse->onConflict)
816  {
817  parse->onConflict->arbiterElems = (List *)
819  (Node *) parse->onConflict->arbiterElems,
821  parse->onConflict->arbiterWhere =
823  parse->onConflict->arbiterWhere,
824  EXPRKIND_QUAL);
825  parse->onConflict->onConflictSet = (List *)
827  (Node *) parse->onConflict->onConflictSet,
829  parse->onConflict->onConflictWhere =
831  parse->onConflict->onConflictWhere,
832  EXPRKIND_QUAL);
833  /* exclRelTlist contains only Vars, so no preprocessing needed */
834  }
835 
836  root->append_rel_list = (List *)
839 
840  /* Also need to preprocess expressions within RTEs */
841  foreach(l, parse->rtable)
842  {
844  int kind;
845  ListCell *lcsq;
846 
847  if (rte->rtekind == RTE_RELATION)
848  {
849  if (rte->tablesample)
850  rte->tablesample = (TableSampleClause *)
852  (Node *) rte->tablesample,
854  }
855  else if (rte->rtekind == RTE_SUBQUERY)
856  {
857  /*
858  * We don't want to do all preprocessing yet on the subquery's
859  * expressions, since that will happen when we plan it. But if it
860  * contains any join aliases of our level, those have to get
861  * expanded now, because planning of the subquery won't do it.
862  * That's only possible if the subquery is LATERAL.
863  */
864  if (rte->lateral && root->hasJoinRTEs)
865  rte->subquery = (Query *)
867  (Node *) rte->subquery);
868  }
869  else if (rte->rtekind == RTE_FUNCTION)
870  {
871  /* Preprocess the function expression(s) fully */
873  rte->functions = (List *)
874  preprocess_expression(root, (Node *) rte->functions, kind);
875  }
876  else if (rte->rtekind == RTE_TABLEFUNC)
877  {
878  /* Preprocess the function expression(s) fully */
880  rte->tablefunc = (TableFunc *)
881  preprocess_expression(root, (Node *) rte->tablefunc, kind);
882  }
883  else if (rte->rtekind == RTE_VALUES)
884  {
885  /* Preprocess the values lists fully */
887  rte->values_lists = (List *)
888  preprocess_expression(root, (Node *) rte->values_lists, kind);
889  }
890 
891  /*
892  * Process each element of the securityQuals list as if it were a
893  * separate qual expression (as indeed it is). We need to do it this
894  * way to get proper canonicalization of AND/OR structure. Note that
895  * this converts each element into an implicit-AND sublist.
896  */
897  foreach(lcsq, rte->securityQuals)
898  {
899  lfirst(lcsq) = preprocess_expression(root,
900  (Node *) lfirst(lcsq),
901  EXPRKIND_QUAL);
902  }
903  }
904 
905  /*
906  * Now that we are done preprocessing expressions, and in particular done
907  * flattening join alias variables, get rid of the joinaliasvars lists.
908  * They no longer match what expressions in the rest of the tree look
909  * like, because we have not preprocessed expressions in those lists (and
910  * do not want to; for example, expanding a SubLink there would result in
911  * a useless unreferenced subplan). Leaving them in place simply creates
912  * a hazard for later scans of the tree. We could try to prevent that by
913  * using QTW_IGNORE_JOINALIASES in every tree scan done after this point,
914  * but that doesn't sound very reliable.
915  */
916  if (root->hasJoinRTEs)
917  {
918  foreach(l, parse->rtable)
919  {
921 
922  rte->joinaliasvars = NIL;
923  }
924  }
925 
926  /*
927  * In some cases we may want to transfer a HAVING clause into WHERE. We
928  * cannot do so if the HAVING clause contains aggregates (obviously) or
929  * volatile functions (since a HAVING clause is supposed to be executed
930  * only once per group). We also can't do this if there are any nonempty
931  * grouping sets; moving such a clause into WHERE would potentially change
932  * the results, if any referenced column isn't present in all the grouping
933  * sets. (If there are only empty grouping sets, then the HAVING clause
934  * must be degenerate as discussed below.)
935  *
936  * Also, it may be that the clause is so expensive to execute that we're
937  * better off doing it only once per group, despite the loss of
938  * selectivity. This is hard to estimate short of doing the entire
939  * planning process twice, so we use a heuristic: clauses containing
940  * subplans are left in HAVING. Otherwise, we move or copy the HAVING
941  * clause into WHERE, in hopes of eliminating tuples before aggregation
942  * instead of after.
943  *
944  * If the query has explicit grouping then we can simply move such a
945  * clause into WHERE; any group that fails the clause will not be in the
946  * output because none of its tuples will reach the grouping or
947  * aggregation stage. Otherwise we must have a degenerate (variable-free)
948  * HAVING clause, which we put in WHERE so that query_planner() can use it
949  * in a gating Result node, but also keep in HAVING to ensure that we
950  * don't emit a bogus aggregated row. (This could be done better, but it
951  * seems not worth optimizing.)
952  *
953  * Note that both havingQual and parse->jointree->quals are in
954  * implicitly-ANDed-list form at this point, even though they are declared
955  * as Node *.
956  */
957  newHaving = NIL;
958  foreach(l, (List *) parse->havingQual)
959  {
960  Node *havingclause = (Node *) lfirst(l);
961 
962  if ((parse->groupClause && parse->groupingSets) ||
963  contain_agg_clause(havingclause) ||
964  contain_volatile_functions(havingclause) ||
965  contain_subplans(havingclause))
966  {
967  /* keep it in HAVING */
968  newHaving = lappend(newHaving, havingclause);
969  }
970  else if (parse->groupClause && !parse->groupingSets)
971  {
972  /* move it to WHERE */
973  parse->jointree->quals = (Node *)
974  lappend((List *) parse->jointree->quals, havingclause);
975  }
976  else
977  {
978  /* put a copy in WHERE, keep it in HAVING */
979  parse->jointree->quals = (Node *)
980  lappend((List *) parse->jointree->quals,
981  copyObject(havingclause));
982  newHaving = lappend(newHaving, havingclause);
983  }
984  }
985  parse->havingQual = (Node *) newHaving;
986 
987  /* Remove any redundant GROUP BY columns */
989 
990  /*
991  * If we have any outer joins, try to reduce them to plain inner joins.
992  * This step is most easily done after we've done expression
993  * preprocessing.
994  */
995  if (hasOuterJoins)
996  reduce_outer_joins(root);
997 
998  /*
999  * If we have any RTE_RESULT relations, see if they can be deleted from
1000  * the jointree. This step is most effectively done after we've done
1001  * expression preprocessing and outer join reduction.
1002  */
1003  if (hasResultRTEs)
1005 
1006  /*
1007  * Do the main planning. If we have an inherited target relation, that
1008  * needs special processing, else go straight to grouping_planner.
1009  */
1010  if (parse->resultRelation &&
1011  rt_fetch(parse->resultRelation, parse->rtable)->inh)
1012  inheritance_planner(root);
1013  else
1014  grouping_planner(root, false, tuple_fraction);
1015 
1016  /*
1017  * Capture the set of outer-level param IDs we have access to, for use in
1018  * extParam/allParam calculations later.
1019  */
1021 
1022  /*
1023  * If any initPlans were created in this query level, adjust the surviving
1024  * Paths' costs and parallel-safety flags to account for them. The
1025  * initPlans won't actually get attached to the plan tree till
1026  * create_plan() runs, but we must include their effects now.
1027  */
1028  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1029  SS_charge_for_initplans(root, final_rel);
1030 
1031  /*
1032  * Make sure we've identified the cheapest Path for the final rel. (By
1033  * doing this here not in grouping_planner, we include initPlan costs in
1034  * the decision, though it's unlikely that will change anything.)
1035  */
1036  set_cheapest(final_rel);
1037 
1038  return root;
1039 }
1040 
1041 /*
1042  * preprocess_expression
1043  * Do subquery_planner's preprocessing work for an expression,
1044  * which can be a targetlist, a WHERE clause (including JOIN/ON
1045  * conditions), a HAVING clause, or a few other things.
1046  */
1047 static Node *
1048 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
1049 {
1050  /*
1051  * Fall out quickly if expression is empty. This occurs often enough to
1052  * be worth checking. Note that null->null is the correct conversion for
1053  * implicit-AND result format, too.
1054  */
1055  if (expr == NULL)
1056  return NULL;
1057 
1058  /*
1059  * If the query has any join RTEs, replace join alias variables with
1060  * base-relation variables. We must do this first, since any expressions
1061  * we may extract from the joinaliasvars lists have not been preprocessed.
1062  * For example, if we did this after sublink processing, sublinks expanded
1063  * out from join aliases would not get processed. But we can skip this in
1064  * non-lateral RTE functions, VALUES lists, and TABLESAMPLE clauses, since
1065  * they can't contain any Vars of the current query level.
1066  */
1067  if (root->hasJoinRTEs &&
1068  !(kind == EXPRKIND_RTFUNC ||
1069  kind == EXPRKIND_VALUES ||
1070  kind == EXPRKIND_TABLESAMPLE ||
1071  kind == EXPRKIND_TABLEFUNC))
1072  expr = flatten_join_alias_vars(root->parse, expr);
1073 
1074  /*
1075  * Simplify constant expressions. For function RTEs, this was already
1076  * done by preprocess_function_rtes ... but we have to do it again if the
1077  * RTE is LATERAL and might have contained join alias variables.
1078  *
1079  * Note: an essential effect of this is to convert named-argument function
1080  * calls to positional notation and insert the current actual values of
1081  * any default arguments for functions. To ensure that happens, we *must*
1082  * process all expressions here. Previous PG versions sometimes skipped
1083  * const-simplification if it didn't seem worth the trouble, but we can't
1084  * do that anymore.
1085  *
1086  * Note: this also flattens nested AND and OR expressions into N-argument
1087  * form. All processing of a qual expression after this point must be
1088  * careful to maintain AND/OR flatness --- that is, do not generate a tree
1089  * with AND directly under AND, nor OR directly under OR.
1090  */
1091  if (!(kind == EXPRKIND_RTFUNC ||
1092  (kind == EXPRKIND_RTFUNC_LATERAL && !root->hasJoinRTEs)))
1093  expr = eval_const_expressions(root, expr);
1094 
1095  /*
1096  * If it's a qual or havingQual, canonicalize it.
1097  */
1098  if (kind == EXPRKIND_QUAL)
1099  {
1100  expr = (Node *) canonicalize_qual((Expr *) expr, false);
1101 
1102 #ifdef OPTIMIZER_DEBUG
1103  printf("After canonicalize_qual()\n");
1104  pprint(expr);
1105 #endif
1106  }
1107 
1108  /* Expand SubLinks to SubPlans */
1109  if (root->parse->hasSubLinks)
1110  expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
1111 
1112  /*
1113  * XXX do not insert anything here unless you have grokked the comments in
1114  * SS_replace_correlation_vars ...
1115  */
1116 
1117  /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
1118  if (root->query_level > 1)
1119  expr = SS_replace_correlation_vars(root, expr);
1120 
1121  /*
1122  * If it's a qual or havingQual, convert it to implicit-AND format. (We
1123  * don't want to do this before eval_const_expressions, since the latter
1124  * would be unable to simplify a top-level AND correctly. Also,
1125  * SS_process_sublinks expects explicit-AND format.)
1126  */
1127  if (kind == EXPRKIND_QUAL)
1128  expr = (Node *) make_ands_implicit((Expr *) expr);
1129 
1130  return expr;
1131 }
1132 
1133 /*
1134  * preprocess_qual_conditions
1135  * Recursively scan the query's jointree and do subquery_planner's
1136  * preprocessing work on each qual condition found therein.
1137  */
1138 static void
1140 {
1141  if (jtnode == NULL)
1142  return;
1143  if (IsA(jtnode, RangeTblRef))
1144  {
1145  /* nothing to do here */
1146  }
1147  else if (IsA(jtnode, FromExpr))
1148  {
1149  FromExpr *f = (FromExpr *) jtnode;
1150  ListCell *l;
1151 
1152  foreach(l, f->fromlist)
1154 
1156  }
1157  else if (IsA(jtnode, JoinExpr))
1158  {
1159  JoinExpr *j = (JoinExpr *) jtnode;
1160 
1161  preprocess_qual_conditions(root, j->larg);
1162  preprocess_qual_conditions(root, j->rarg);
1163 
1165  }
1166  else
1167  elog(ERROR, "unrecognized node type: %d",
1168  (int) nodeTag(jtnode));
1169 }
1170 
1171 /*
1172  * preprocess_phv_expression
1173  * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1174  *
1175  * If a LATERAL subquery references an output of another subquery, and that
1176  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1177  * join, then we'll push the PlaceHolderVar expression down into the subquery
1178  * and later pull it back up during find_lateral_references, which runs after
1179  * subquery_planner has preprocessed all the expressions that were in the
1180  * current query level to start with. So we need to preprocess it then.
1181  */
1182 Expr *
1184 {
1185  return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1186 }
1187 
1188 /*
1189  * inheritance_planner
1190  * Generate Paths in the case where the result relation is an
1191  * inheritance set.
1192  *
1193  * We have to handle this case differently from cases where a source relation
1194  * is an inheritance set. Source inheritance is expanded at the bottom of the
1195  * plan tree (see allpaths.c), but target inheritance has to be expanded at
1196  * the top. The reason is that for UPDATE, each target relation needs a
1197  * different targetlist matching its own column set. Fortunately,
1198  * the UPDATE/DELETE target can never be the nullable side of an outer join,
1199  * so it's OK to generate the plan this way.
1200  *
1201  * Returns nothing; the useful output is in the Paths we attach to
1202  * the (UPPERREL_FINAL, NULL) upperrel stored in *root.
1203  *
1204  * Note that we have not done set_cheapest() on the final rel; it's convenient
1205  * to leave this to the caller.
1206  */
1207 static void
1209 {
1210  Query *parse = root->parse;
1211  int top_parentRTindex = parse->resultRelation;
1212  List *select_rtable;
1213  List *select_appinfos;
1214  List *child_appinfos;
1215  List *old_child_rtis;
1216  List *new_child_rtis;
1217  Bitmapset *subqueryRTindexes;
1218  Index next_subquery_rti;
1219  int nominalRelation = -1;
1220  Index rootRelation = 0;
1221  List *final_rtable = NIL;
1222  List *final_rowmarks = NIL;
1223  int save_rel_array_size = 0;
1224  RelOptInfo **save_rel_array = NULL;
1225  AppendRelInfo **save_append_rel_array = NULL;
1226  List *subpaths = NIL;
1227  List *subroots = NIL;
1228  List *resultRelations = NIL;
1229  List *withCheckOptionLists = NIL;
1230  List *returningLists = NIL;
1231  List *rowMarks;
1232  RelOptInfo *final_rel;
1233  ListCell *lc;
1234  ListCell *lc2;
1235  Index rti;
1236  RangeTblEntry *parent_rte;
1237  Bitmapset *parent_relids;
1238  Query **parent_parses;
1239 
1240  /* Should only get here for UPDATE or DELETE */
1241  Assert(parse->commandType == CMD_UPDATE ||
1242  parse->commandType == CMD_DELETE);
1243 
1244  /*
1245  * We generate a modified instance of the original Query for each target
1246  * relation, plan that, and put all the plans into a list that will be
1247  * controlled by a single ModifyTable node. All the instances share the
1248  * same rangetable, but each instance must have its own set of subquery
1249  * RTEs within the finished rangetable because (1) they are likely to get
1250  * scribbled on during planning, and (2) it's not inconceivable that
1251  * subqueries could get planned differently in different cases. We need
1252  * not create duplicate copies of other RTE kinds, in particular not the
1253  * target relations, because they don't have either of those issues. Not
1254  * having to duplicate the target relations is important because doing so
1255  * (1) would result in a rangetable of length O(N^2) for N targets, with
1256  * at least O(N^3) work expended here; and (2) would greatly complicate
1257  * management of the rowMarks list.
1258  *
1259  * To begin with, generate a bitmapset of the relids of the subquery RTEs.
1260  */
1261  subqueryRTindexes = NULL;
1262  rti = 1;
1263  foreach(lc, parse->rtable)
1264  {
1266 
1267  if (rte->rtekind == RTE_SUBQUERY)
1268  subqueryRTindexes = bms_add_member(subqueryRTindexes, rti);
1269  rti++;
1270  }
1271 
1272  /*
1273  * If the parent RTE is a partitioned table, we should use that as the
1274  * nominal target relation, because the RTEs added for partitioned tables
1275  * (including the root parent) as child members of the inheritance set do
1276  * not appear anywhere else in the plan, so the confusion explained below
1277  * for non-partitioning inheritance cases is not possible.
1278  */
1279  parent_rte = rt_fetch(top_parentRTindex, parse->rtable);
1280  Assert(parent_rte->inh);
1281  if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1282  {
1283  nominalRelation = top_parentRTindex;
1284  rootRelation = top_parentRTindex;
1285  }
1286 
1287  /*
1288  * Before generating the real per-child-relation plans, do a cycle of
1289  * planning as though the query were a SELECT. The objective here is to
1290  * find out which child relations need to be processed, using the same
1291  * expansion and pruning logic as for a SELECT. We'll then pull out the
1292  * RangeTblEntry-s generated for the child rels, and make use of the
1293  * AppendRelInfo entries for them to guide the real planning. (This is
1294  * rather inefficient; we could perhaps stop short of making a full Path
1295  * tree. But this whole function is inefficient and slated for
1296  * destruction, so let's not contort query_planner for that.)
1297  */
1298  {
1299  PlannerInfo *subroot;
1300 
1301  /*
1302  * Flat-copy the PlannerInfo to prevent modification of the original.
1303  */
1304  subroot = makeNode(PlannerInfo);
1305  memcpy(subroot, root, sizeof(PlannerInfo));
1306 
1307  /*
1308  * Make a deep copy of the parsetree for this planning cycle to mess
1309  * around with, and change it to look like a SELECT. (Hack alert: the
1310  * target RTE still has updatedCols set if this is an UPDATE, so that
1311  * expand_partitioned_rtentry will correctly update
1312  * subroot->partColsUpdated.)
1313  */
1314  subroot->parse = copyObject(root->parse);
1315 
1316  subroot->parse->commandType = CMD_SELECT;
1317  subroot->parse->resultRelation = 0;
1318 
1319  /*
1320  * Ensure the subroot has its own copy of the original
1321  * append_rel_list, since it'll be scribbled on. (Note that at this
1322  * point, the list only contains AppendRelInfos for flattened UNION
1323  * ALL subqueries.)
1324  */
1325  subroot->append_rel_list = copyObject(root->append_rel_list);
1326 
1327  /*
1328  * Better make a private copy of the rowMarks, too.
1329  */
1330  subroot->rowMarks = copyObject(root->rowMarks);
1331 
1332  /* There shouldn't be any OJ info to translate, as yet */
1333  Assert(subroot->join_info_list == NIL);
1334  /* and we haven't created PlaceHolderInfos, either */
1335  Assert(subroot->placeholder_list == NIL);
1336 
1337  /* Generate Path(s) for accessing this result relation */
1338  grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
1339 
1340  /* Extract the info we need. */
1341  select_rtable = subroot->parse->rtable;
1342  select_appinfos = subroot->append_rel_list;
1343 
1344  /*
1345  * We need to propagate partColsUpdated back, too. (The later
1346  * planning cycles will not set this because they won't run
1347  * expand_partitioned_rtentry for the UPDATE target.)
1348  */
1349  root->partColsUpdated = subroot->partColsUpdated;
1350  }
1351 
1352  /*----------
1353  * Since only one rangetable can exist in the final plan, we need to make
1354  * sure that it contains all the RTEs needed for any child plan. This is
1355  * complicated by the need to use separate subquery RTEs for each child.
1356  * We arrange the final rtable as follows:
1357  * 1. All original rtable entries (with their original RT indexes).
1358  * 2. All the relation RTEs generated for children of the target table.
1359  * 3. Subquery RTEs for children after the first. We need N * (K - 1)
1360  * RT slots for this, if there are N subqueries and K child tables.
1361  * 4. Additional RTEs generated during the child planning runs, such as
1362  * children of inheritable RTEs other than the target table.
1363  * We assume that each child planning run will create an identical set
1364  * of type-4 RTEs.
1365  *
1366  * So the next thing to do is append the type-2 RTEs (the target table's
1367  * children) to the original rtable. We look through select_appinfos
1368  * to find them.
1369  *
1370  * To identify which AppendRelInfos are relevant as we thumb through
1371  * select_appinfos, we need to look for both direct and indirect children
1372  * of top_parentRTindex, so we use a bitmap of known parent relids.
1373  * expand_inherited_rtentry() always processes a parent before any of that
1374  * parent's children, so we should see an intermediate parent before its
1375  * children.
1376  *----------
1377  */
1378  child_appinfos = NIL;
1379  old_child_rtis = NIL;
1380  new_child_rtis = NIL;
1381  parent_relids = bms_make_singleton(top_parentRTindex);
1382  foreach(lc, select_appinfos)
1383  {
1384  AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc);
1385  RangeTblEntry *child_rte;
1386 
1387  /* append_rel_list contains all append rels; ignore others */
1388  if (!bms_is_member(appinfo->parent_relid, parent_relids))
1389  continue;
1390 
1391  /* remember relevant AppendRelInfos for use below */
1392  child_appinfos = lappend(child_appinfos, appinfo);
1393 
1394  /* extract RTE for this child rel */
1395  child_rte = rt_fetch(appinfo->child_relid, select_rtable);
1396 
1397  /* and append it to the original rtable */
1398  parse->rtable = lappend(parse->rtable, child_rte);
1399 
1400  /* remember child's index in the SELECT rtable */
1401  old_child_rtis = lappend_int(old_child_rtis, appinfo->child_relid);
1402 
1403  /* and its new index in the final rtable */
1404  new_child_rtis = lappend_int(new_child_rtis, list_length(parse->rtable));
1405 
1406  /* if child is itself partitioned, update parent_relids */
1407  if (child_rte->inh)
1408  {
1409  Assert(child_rte->relkind == RELKIND_PARTITIONED_TABLE);
1410  parent_relids = bms_add_member(parent_relids, appinfo->child_relid);
1411  }
1412  }
1413 
1414  /*
1415  * It's possible that the RTIs we just assigned for the child rels in the
1416  * final rtable are different from what they were in the SELECT query.
1417  * Adjust the AppendRelInfos so that they will correctly map RT indexes to
1418  * the final indexes. We can do this left-to-right since no child rel's
1419  * final RT index could be greater than what it had in the SELECT query.
1420  */
1421  forboth(lc, old_child_rtis, lc2, new_child_rtis)
1422  {
1423  int old_child_rti = lfirst_int(lc);
1424  int new_child_rti = lfirst_int(lc2);
1425 
1426  if (old_child_rti == new_child_rti)
1427  continue; /* nothing to do */
1428 
1429  Assert(old_child_rti > new_child_rti);
1430 
1431  ChangeVarNodes((Node *) child_appinfos,
1432  old_child_rti, new_child_rti, 0);
1433  }
1434 
1435  /*
1436  * Now set up rangetable entries for subqueries for additional children
1437  * (the first child will just use the original ones). These all have to
1438  * look more or less real, or EXPLAIN will get unhappy; so we just make
1439  * them all clones of the original subqueries.
1440  */
1441  next_subquery_rti = list_length(parse->rtable) + 1;
1442  if (subqueryRTindexes != NULL)
1443  {
1444  int n_children = list_length(child_appinfos);
1445 
1446  while (n_children-- > 1)
1447  {
1448  int oldrti = -1;
1449 
1450  while ((oldrti = bms_next_member(subqueryRTindexes, oldrti)) >= 0)
1451  {
1452  RangeTblEntry *subqrte;
1453 
1454  subqrte = rt_fetch(oldrti, parse->rtable);
1455  parse->rtable = lappend(parse->rtable, copyObject(subqrte));
1456  }
1457  }
1458  }
1459 
1460  /*
1461  * The query for each child is obtained by translating the query for its
1462  * immediate parent, since the AppendRelInfo data we have shows deltas
1463  * between parents and children. We use the parent_parses array to
1464  * remember the appropriate query trees. This is indexed by parent relid.
1465  * Since the maximum number of parents is limited by the number of RTEs in
1466  * the SELECT query, we use that number to allocate the array. An extra
1467  * entry is needed since relids start from 1.
1468  */
1469  parent_parses = (Query **) palloc0((list_length(select_rtable) + 1) *
1470  sizeof(Query *));
1471  parent_parses[top_parentRTindex] = parse;
1472 
1473  /*
1474  * And now we can get on with generating a plan for each child table.
1475  */
1476  foreach(lc, child_appinfos)
1477  {
1478  AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc);
1479  Index this_subquery_rti = next_subquery_rti;
1480  Query *parent_parse;
1481  PlannerInfo *subroot;
1482  RangeTblEntry *child_rte;
1483  RelOptInfo *sub_final_rel;
1484  Path *subpath;
1485 
1486  /*
1487  * expand_inherited_rtentry() always processes a parent before any of
1488  * that parent's children, so the parent query for this relation
1489  * should already be available.
1490  */
1491  parent_parse = parent_parses[appinfo->parent_relid];
1492  Assert(parent_parse != NULL);
1493 
1494  /*
1495  * We need a working copy of the PlannerInfo so that we can control
1496  * propagation of information back to the main copy.
1497  */
1498  subroot = makeNode(PlannerInfo);
1499  memcpy(subroot, root, sizeof(PlannerInfo));
1500 
1501  /*
1502  * Generate modified query with this rel as target. We first apply
1503  * adjust_appendrel_attrs, which copies the Query and changes
1504  * references to the parent RTE to refer to the current child RTE,
1505  * then fool around with subquery RTEs.
1506  */
1507  subroot->parse = (Query *)
1508  adjust_appendrel_attrs(subroot,
1509  (Node *) parent_parse,
1510  1, &appinfo);
1511 
1512  /*
1513  * If there are securityQuals attached to the parent, move them to the
1514  * child rel (they've already been transformed properly for that).
1515  */
1516  parent_rte = rt_fetch(appinfo->parent_relid, subroot->parse->rtable);
1517  child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable);
1518  child_rte->securityQuals = parent_rte->securityQuals;
1519  parent_rte->securityQuals = NIL;
1520 
1521  /*
1522  * HACK: setting this to a value other than INHKIND_NONE signals to
1523  * relation_excluded_by_constraints() to treat the result relation as
1524  * being an appendrel member.
1525  */
1526  subroot->inhTargetKind =
1527  (rootRelation != 0) ? INHKIND_PARTITIONED : INHKIND_INHERITED;
1528 
1529  /*
1530  * If this child is further partitioned, remember it as a parent.
1531  * Since a partitioned table does not have any data, we don't need to
1532  * create a plan for it, and we can stop processing it here. We do,
1533  * however, need to remember its modified PlannerInfo for use when
1534  * processing its children, since we'll update their varnos based on
1535  * the delta from immediate parent to child, not from top to child.
1536  *
1537  * Note: a very non-obvious point is that we have not yet added
1538  * duplicate subquery RTEs to the subroot's rtable. We mustn't,
1539  * because then its children would have two sets of duplicates,
1540  * confusing matters.
1541  */
1542  if (child_rte->inh)
1543  {
1544  Assert(child_rte->relkind == RELKIND_PARTITIONED_TABLE);
1545  parent_parses[appinfo->child_relid] = subroot->parse;
1546  continue;
1547  }
1548 
1549  /*
1550  * Set the nominal target relation of the ModifyTable node if not
1551  * already done. If the target is a partitioned table, we already set
1552  * nominalRelation to refer to the partition root, above. For
1553  * non-partitioned inheritance cases, we'll use the first child
1554  * relation (even if it's excluded) as the nominal target relation.
1555  * Because of the way expand_inherited_rtentry works, that should be
1556  * the RTE representing the parent table in its role as a simple
1557  * member of the inheritance set.
1558  *
1559  * It would be logically cleaner to *always* use the inheritance
1560  * parent RTE as the nominal relation; but that RTE is not otherwise
1561  * referenced in the plan in the non-partitioned inheritance case.
1562  * Instead the duplicate child RTE created by expand_inherited_rtentry
1563  * is used elsewhere in the plan, so using the original parent RTE
1564  * would give rise to confusing use of multiple aliases in EXPLAIN
1565  * output for what the user will think is the "same" table. OTOH,
1566  * it's not a problem in the partitioned inheritance case, because
1567  * there is no duplicate RTE for the parent.
1568  */
1569  if (nominalRelation < 0)
1570  nominalRelation = appinfo->child_relid;
1571 
1572  /*
1573  * As above, each child plan run needs its own append_rel_list and
1574  * rowmarks, which should start out as pristine copies of the
1575  * originals. There can't be any references to UPDATE/DELETE target
1576  * rels in them; but there could be subquery references, which we'll
1577  * fix up in a moment.
1578  */
1579  subroot->append_rel_list = copyObject(root->append_rel_list);
1580  subroot->rowMarks = copyObject(root->rowMarks);
1581 
1582  /*
1583  * If this isn't the first child Query, adjust Vars and jointree
1584  * entries to reference the appropriate set of subquery RTEs.
1585  */
1586  if (final_rtable != NIL && subqueryRTindexes != NULL)
1587  {
1588  int oldrti = -1;
1589 
1590  while ((oldrti = bms_next_member(subqueryRTindexes, oldrti)) >= 0)
1591  {
1592  Index newrti = next_subquery_rti++;
1593 
1594  ChangeVarNodes((Node *) subroot->parse, oldrti, newrti, 0);
1595  ChangeVarNodes((Node *) subroot->append_rel_list,
1596  oldrti, newrti, 0);
1597  ChangeVarNodes((Node *) subroot->rowMarks, oldrti, newrti, 0);
1598  }
1599  }
1600 
1601  /* There shouldn't be any OJ info to translate, as yet */
1602  Assert(subroot->join_info_list == NIL);
1603  /* and we haven't created PlaceHolderInfos, either */
1604  Assert(subroot->placeholder_list == NIL);
1605 
1606  /* Generate Path(s) for accessing this result relation */
1607  grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
1608 
1609  /*
1610  * Select cheapest path in case there's more than one. We always run
1611  * modification queries to conclusion, so we care only for the
1612  * cheapest-total path.
1613  */
1614  sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
1615  set_cheapest(sub_final_rel);
1616  subpath = sub_final_rel->cheapest_total_path;
1617 
1618  /*
1619  * If this child rel was excluded by constraint exclusion, exclude it
1620  * from the result plan.
1621  */
1622  if (IS_DUMMY_REL(sub_final_rel))
1623  continue;
1624 
1625  /*
1626  * If this is the first non-excluded child, its post-planning rtable
1627  * becomes the initial contents of final_rtable; otherwise, copy its
1628  * modified subquery RTEs into final_rtable, to ensure we have sane
1629  * copies of those. Also save the first non-excluded child's version
1630  * of the rowmarks list; we assume all children will end up with
1631  * equivalent versions of that.
1632  */
1633  if (final_rtable == NIL)
1634  {
1635  final_rtable = subroot->parse->rtable;
1636  final_rowmarks = subroot->rowMarks;
1637  }
1638  else
1639  {
1640  Assert(list_length(final_rtable) ==
1641  list_length(subroot->parse->rtable));
1642  if (subqueryRTindexes != NULL)
1643  {
1644  int oldrti = -1;
1645 
1646  while ((oldrti = bms_next_member(subqueryRTindexes, oldrti)) >= 0)
1647  {
1648  Index newrti = this_subquery_rti++;
1649  RangeTblEntry *subqrte;
1650  ListCell *newrticell;
1651 
1652  subqrte = rt_fetch(newrti, subroot->parse->rtable);
1653  newrticell = list_nth_cell(final_rtable, newrti - 1);
1654  lfirst(newrticell) = subqrte;
1655  }
1656  }
1657  }
1658 
1659  /*
1660  * We need to collect all the RelOptInfos from all child plans into
1661  * the main PlannerInfo, since setrefs.c will need them. We use the
1662  * last child's simple_rel_array, so we have to propagate forward the
1663  * RelOptInfos that were already built in previous children.
1664  */
1665  Assert(subroot->simple_rel_array_size >= save_rel_array_size);
1666  for (rti = 1; rti < save_rel_array_size; rti++)
1667  {
1668  RelOptInfo *brel = save_rel_array[rti];
1669 
1670  if (brel)
1671  subroot->simple_rel_array[rti] = brel;
1672  }
1673  save_rel_array_size = subroot->simple_rel_array_size;
1674  save_rel_array = subroot->simple_rel_array;
1675  save_append_rel_array = subroot->append_rel_array;
1676 
1677  /*
1678  * Make sure any initplans from this rel get into the outer list. Note
1679  * we're effectively assuming all children generate the same
1680  * init_plans.
1681  */
1682  root->init_plans = subroot->init_plans;
1683 
1684  /* Build list of sub-paths */
1685  subpaths = lappend(subpaths, subpath);
1686 
1687  /* Build list of modified subroots, too */
1688  subroots = lappend(subroots, subroot);
1689 
1690  /* Build list of target-relation RT indexes */
1691  resultRelations = lappend_int(resultRelations, appinfo->child_relid);
1692 
1693  /* Build lists of per-relation WCO and RETURNING targetlists */
1694  if (parse->withCheckOptions)
1695  withCheckOptionLists = lappend(withCheckOptionLists,
1696  subroot->parse->withCheckOptions);
1697  if (parse->returningList)
1698  returningLists = lappend(returningLists,
1699  subroot->parse->returningList);
1700 
1701  Assert(!parse->onConflict);
1702  }
1703 
1704  /* Result path must go into outer query's FINAL upperrel */
1705  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1706 
1707  /*
1708  * We don't currently worry about setting final_rel's consider_parallel
1709  * flag in this case, nor about allowing FDWs or create_upper_paths_hook
1710  * to get control here.
1711  */
1712 
1713  if (subpaths == NIL)
1714  {
1715  /*
1716  * We managed to exclude every child rel, so generate a dummy path
1717  * representing the empty set. Although it's clear that no data will
1718  * be updated or deleted, we will still need to have a ModifyTable
1719  * node so that any statement triggers are executed. (This could be
1720  * cleaner if we fixed nodeModifyTable.c to support zero child nodes,
1721  * but that probably wouldn't be a net win.)
1722  */
1723  Path *dummy_path;
1724 
1725  /* tlist processing never got done, either */
1726  root->processed_tlist = preprocess_targetlist(root);
1727  final_rel->reltarget = create_pathtarget(root, root->processed_tlist);
1728 
1729  /* Make a dummy path, cf set_dummy_rel_pathlist() */
1730  dummy_path = (Path *) create_append_path(NULL, final_rel, NIL, NIL,
1731  NIL, NULL, 0, false,
1732  NIL, -1);
1733 
1734  /* These lists must be nonempty to make a valid ModifyTable node */
1735  subpaths = list_make1(dummy_path);
1736  subroots = list_make1(root);
1737  resultRelations = list_make1_int(parse->resultRelation);
1738  if (parse->withCheckOptions)
1739  withCheckOptionLists = list_make1(parse->withCheckOptions);
1740  if (parse->returningList)
1741  returningLists = list_make1(parse->returningList);
1742  /* Disable tuple routing, too, just to be safe */
1743  root->partColsUpdated = false;
1744  }
1745  else
1746  {
1747  /*
1748  * Put back the final adjusted rtable into the master copy of the
1749  * Query. (We mustn't do this if we found no non-excluded children,
1750  * since we never saved an adjusted rtable at all.)
1751  */
1752  parse->rtable = final_rtable;
1753  root->simple_rel_array_size = save_rel_array_size;
1754  root->simple_rel_array = save_rel_array;
1755  root->append_rel_array = save_append_rel_array;
1756 
1757  /* Must reconstruct master's simple_rte_array, too */
1758  root->simple_rte_array = (RangeTblEntry **)
1759  palloc0((list_length(final_rtable) + 1) * sizeof(RangeTblEntry *));
1760  rti = 1;
1761  foreach(lc, final_rtable)
1762  {
1764 
1765  root->simple_rte_array[rti++] = rte;
1766  }
1767 
1768  /* Put back adjusted rowmarks, too */
1769  root->rowMarks = final_rowmarks;
1770  }
1771 
1772  /*
1773  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will
1774  * have dealt with fetching non-locked marked rows, else we need to have
1775  * ModifyTable do that.
1776  */
1777  if (parse->rowMarks)
1778  rowMarks = NIL;
1779  else
1780  rowMarks = root->rowMarks;
1781 
1782  /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
1783  add_path(final_rel, (Path *)
1784  create_modifytable_path(root, final_rel,
1785  parse->commandType,
1786  parse->canSetTag,
1787  nominalRelation,
1788  rootRelation,
1789  root->partColsUpdated,
1790  resultRelations,
1791  subpaths,
1792  subroots,
1793  withCheckOptionLists,
1794  returningLists,
1795  rowMarks,
1796  NULL,
1797  assign_special_exec_param(root)));
1798 }
1799 
1800 /*--------------------
1801  * grouping_planner
1802  * Perform planning steps related to grouping, aggregation, etc.
1803  *
1804  * This function adds all required top-level processing to the scan/join
1805  * Path(s) produced by query_planner.
1806  *
1807  * If inheritance_update is true, we're being called from inheritance_planner
1808  * and should not include a ModifyTable step in the resulting Path(s).
1809  * (inheritance_planner will create a single ModifyTable node covering all the
1810  * target tables.)
1811  *
1812  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1813  * tuple_fraction is interpreted as follows:
1814  * 0: expect all tuples to be retrieved (normal case)
1815  * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1816  * from the plan to be retrieved
1817  * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1818  * expected to be retrieved (ie, a LIMIT specification)
1819  *
1820  * Returns nothing; the useful output is in the Paths we attach to the
1821  * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1822  * root->processed_tlist contains the final processed targetlist.
1823  *
1824  * Note that we have not done set_cheapest() on the final rel; it's convenient
1825  * to leave this to the caller.
1826  *--------------------
1827  */
1828 static void
1829 grouping_planner(PlannerInfo *root, bool inheritance_update,
1830  double tuple_fraction)
1831 {
1832  Query *parse = root->parse;
1833  int64 offset_est = 0;
1834  int64 count_est = 0;
1835  double limit_tuples = -1.0;
1836  bool have_postponed_srfs = false;
1837  PathTarget *final_target;
1838  List *final_targets;
1839  List *final_targets_contain_srfs;
1840  bool final_target_parallel_safe;
1841  RelOptInfo *current_rel;
1842  RelOptInfo *final_rel;
1843  FinalPathExtraData extra;
1844  ListCell *lc;
1845 
1846  /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1847  if (parse->limitCount || parse->limitOffset)
1848  {
1849  tuple_fraction = preprocess_limit(root, tuple_fraction,
1850  &offset_est, &count_est);
1851 
1852  /*
1853  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1854  * estimate the effects of using a bounded sort.
1855  */
1856  if (count_est > 0 && offset_est >= 0)
1857  limit_tuples = (double) count_est + (double) offset_est;
1858  }
1859 
1860  /* Make tuple_fraction accessible to lower-level routines */
1861  root->tuple_fraction = tuple_fraction;
1862 
1863  if (parse->setOperations)
1864  {
1865  /*
1866  * If there's a top-level ORDER BY, assume we have to fetch all the
1867  * tuples. This might be too simplistic given all the hackery below
1868  * to possibly avoid the sort; but the odds of accurate estimates here
1869  * are pretty low anyway. XXX try to get rid of this in favor of
1870  * letting plan_set_operations generate both fast-start and
1871  * cheapest-total paths.
1872  */
1873  if (parse->sortClause)
1874  root->tuple_fraction = 0.0;
1875 
1876  /*
1877  * Construct Paths for set operations. The results will not need any
1878  * work except perhaps a top-level sort and/or LIMIT. Note that any
1879  * special work for recursive unions is the responsibility of
1880  * plan_set_operations.
1881  */
1882  current_rel = plan_set_operations(root);
1883 
1884  /*
1885  * We should not need to call preprocess_targetlist, since we must be
1886  * in a SELECT query node. Instead, use the processed_tlist returned
1887  * by plan_set_operations (since this tells whether it returned any
1888  * resjunk columns!), and transfer any sort key information from the
1889  * original tlist.
1890  */
1891  Assert(parse->commandType == CMD_SELECT);
1892 
1893  /* for safety, copy processed_tlist instead of modifying in-place */
1894  root->processed_tlist =
1896  parse->targetList);
1897 
1898  /* Also extract the PathTarget form of the setop result tlist */
1899  final_target = current_rel->cheapest_total_path->pathtarget;
1900 
1901  /* And check whether it's parallel safe */
1902  final_target_parallel_safe =
1903  is_parallel_safe(root, (Node *) final_target->exprs);
1904 
1905  /* The setop result tlist couldn't contain any SRFs */
1906  Assert(!parse->hasTargetSRFs);
1907  final_targets = final_targets_contain_srfs = NIL;
1908 
1909  /*
1910  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1911  * checked already, but let's make sure).
1912  */
1913  if (parse->rowMarks)
1914  ereport(ERROR,
1915  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1916  /*------
1917  translator: %s is a SQL row locking clause such as FOR UPDATE */
1918  errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1920  parse->rowMarks)->strength))));
1921 
1922  /*
1923  * Calculate pathkeys that represent result ordering requirements
1924  */
1925  Assert(parse->distinctClause == NIL);
1927  parse->sortClause,
1928  root->processed_tlist);
1929  }
1930  else
1931  {
1932  /* No set operations, do regular planning */
1933  PathTarget *sort_input_target;
1934  List *sort_input_targets;
1935  List *sort_input_targets_contain_srfs;
1936  bool sort_input_target_parallel_safe;
1937  PathTarget *grouping_target;
1938  List *grouping_targets;
1939  List *grouping_targets_contain_srfs;
1940  bool grouping_target_parallel_safe;
1941  PathTarget *scanjoin_target;
1942  List *scanjoin_targets;
1943  List *scanjoin_targets_contain_srfs;
1944  bool scanjoin_target_parallel_safe;
1945  bool scanjoin_target_same_exprs;
1946  bool have_grouping;
1947  AggClauseCosts agg_costs;
1948  WindowFuncLists *wflists = NULL;
1949  List *activeWindows = NIL;
1950  grouping_sets_data *gset_data = NULL;
1951  standard_qp_extra qp_extra;
1952 
1953  /* A recursive query should always have setOperations */
1954  Assert(!root->hasRecursion);
1955 
1956  /* Preprocess grouping sets and GROUP BY clause, if any */
1957  if (parse->groupingSets)
1958  {
1959  gset_data = preprocess_grouping_sets(root);
1960  }
1961  else
1962  {
1963  /* Preprocess regular GROUP BY clause, if any */
1964  if (parse->groupClause)
1965  parse->groupClause = preprocess_groupclause(root, NIL);
1966  }
1967 
1968  /*
1969  * Preprocess targetlist. Note that much of the remaining planning
1970  * work will be done with the PathTarget representation of tlists, but
1971  * we must also maintain the full representation of the final tlist so
1972  * that we can transfer its decoration (resnames etc) to the topmost
1973  * tlist of the finished Plan. This is kept in processed_tlist.
1974  */
1975  root->processed_tlist = preprocess_targetlist(root);
1976 
1977  /*
1978  * Collect statistics about aggregates for estimating costs, and mark
1979  * all the aggregates with resolved aggtranstypes. We must do this
1980  * before slicing and dicing the tlist into various pathtargets, else
1981  * some copies of the Aggref nodes might escape being marked with the
1982  * correct transtypes.
1983  *
1984  * Note: currently, we do not detect duplicate aggregates here. This
1985  * may result in somewhat-overestimated cost, which is fine for our
1986  * purposes since all Paths will get charged the same. But at some
1987  * point we might wish to do that detection in the planner, rather
1988  * than during executor startup.
1989  */
1990  MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
1991  if (parse->hasAggs)
1992  {
1993  get_agg_clause_costs(root, (Node *) root->processed_tlist,
1994  AGGSPLIT_SIMPLE, &agg_costs);
1996  &agg_costs);
1997  }
1998 
1999  /*
2000  * Locate any window functions in the tlist. (We don't need to look
2001  * anywhere else, since expressions used in ORDER BY will be in there
2002  * too.) Note that they could all have been eliminated by constant
2003  * folding, in which case we don't need to do any more work.
2004  */
2005  if (parse->hasWindowFuncs)
2006  {
2007  wflists = find_window_functions((Node *) root->processed_tlist,
2008  list_length(parse->windowClause));
2009  if (wflists->numWindowFuncs > 0)
2010  activeWindows = select_active_windows(root, wflists);
2011  else
2012  parse->hasWindowFuncs = false;
2013  }
2014 
2015  /*
2016  * Preprocess MIN/MAX aggregates, if any. Note: be careful about
2017  * adding logic between here and the query_planner() call. Anything
2018  * that is needed in MIN/MAX-optimizable cases will have to be
2019  * duplicated in planagg.c.
2020  */
2021  if (parse->hasAggs)
2023 
2024  /*
2025  * Figure out whether there's a hard limit on the number of rows that
2026  * query_planner's result subplan needs to return. Even if we know a
2027  * hard limit overall, it doesn't apply if the query has any
2028  * grouping/aggregation operations, or SRFs in the tlist.
2029  */
2030  if (parse->groupClause ||
2031  parse->groupingSets ||
2032  parse->distinctClause ||
2033  parse->hasAggs ||
2034  parse->hasWindowFuncs ||
2035  parse->hasTargetSRFs ||
2036  root->hasHavingQual)
2037  root->limit_tuples = -1.0;
2038  else
2039  root->limit_tuples = limit_tuples;
2040 
2041  /* Set up data needed by standard_qp_callback */
2042  qp_extra.activeWindows = activeWindows;
2043  qp_extra.groupClause = (gset_data
2044  ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
2045  : parse->groupClause);
2046 
2047  /*
2048  * Generate the best unsorted and presorted paths for the scan/join
2049  * portion of this Query, ie the processing represented by the
2050  * FROM/WHERE clauses. (Note there may not be any presorted paths.)
2051  * We also generate (in standard_qp_callback) pathkey representations
2052  * of the query's sort clause, distinct clause, etc.
2053  */
2054  current_rel = query_planner(root, standard_qp_callback, &qp_extra);
2055 
2056  /*
2057  * Convert the query's result tlist into PathTarget format.
2058  *
2059  * Note: this cannot be done before query_planner() has performed
2060  * appendrel expansion, because that might add resjunk entries to
2061  * root->processed_tlist. Waiting till afterwards is also helpful
2062  * because the target width estimates can use per-Var width numbers
2063  * that were obtained within query_planner().
2064  */
2065  final_target = create_pathtarget(root, root->processed_tlist);
2066  final_target_parallel_safe =
2067  is_parallel_safe(root, (Node *) final_target->exprs);
2068 
2069  /*
2070  * If ORDER BY was given, consider whether we should use a post-sort
2071  * projection, and compute the adjusted target for preceding steps if
2072  * so.
2073  */
2074  if (parse->sortClause)
2075  {
2076  sort_input_target = make_sort_input_target(root,
2077  final_target,
2078  &have_postponed_srfs);
2079  sort_input_target_parallel_safe =
2080  is_parallel_safe(root, (Node *) sort_input_target->exprs);
2081  }
2082  else
2083  {
2084  sort_input_target = final_target;
2085  sort_input_target_parallel_safe = final_target_parallel_safe;
2086  }
2087 
2088  /*
2089  * If we have window functions to deal with, the output from any
2090  * grouping step needs to be what the window functions want;
2091  * otherwise, it should be sort_input_target.
2092  */
2093  if (activeWindows)
2094  {
2095  grouping_target = make_window_input_target(root,
2096  final_target,
2097  activeWindows);
2098  grouping_target_parallel_safe =
2099  is_parallel_safe(root, (Node *) grouping_target->exprs);
2100  }
2101  else
2102  {
2103  grouping_target = sort_input_target;
2104  grouping_target_parallel_safe = sort_input_target_parallel_safe;
2105  }
2106 
2107  /*
2108  * If we have grouping or aggregation to do, the topmost scan/join
2109  * plan node must emit what the grouping step wants; otherwise, it
2110  * should emit grouping_target.
2111  */
2112  have_grouping = (parse->groupClause || parse->groupingSets ||
2113  parse->hasAggs || root->hasHavingQual);
2114  if (have_grouping)
2115  {
2116  scanjoin_target = make_group_input_target(root, final_target);
2117  scanjoin_target_parallel_safe =
2118  is_parallel_safe(root, (Node *) scanjoin_target->exprs);
2119  }
2120  else
2121  {
2122  scanjoin_target = grouping_target;
2123  scanjoin_target_parallel_safe = grouping_target_parallel_safe;
2124  }
2125 
2126  /*
2127  * If there are any SRFs in the targetlist, we must separate each of
2128  * these PathTargets into SRF-computing and SRF-free targets. Replace
2129  * each of the named targets with a SRF-free version, and remember the
2130  * list of additional projection steps we need to add afterwards.
2131  */
2132  if (parse->hasTargetSRFs)
2133  {
2134  /* final_target doesn't recompute any SRFs in sort_input_target */
2135  split_pathtarget_at_srfs(root, final_target, sort_input_target,
2136  &final_targets,
2137  &final_targets_contain_srfs);
2138  final_target = linitial_node(PathTarget, final_targets);
2139  Assert(!linitial_int(final_targets_contain_srfs));
2140  /* likewise for sort_input_target vs. grouping_target */
2141  split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
2142  &sort_input_targets,
2143  &sort_input_targets_contain_srfs);
2144  sort_input_target = linitial_node(PathTarget, sort_input_targets);
2145  Assert(!linitial_int(sort_input_targets_contain_srfs));
2146  /* likewise for grouping_target vs. scanjoin_target */
2147  split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
2148  &grouping_targets,
2149  &grouping_targets_contain_srfs);
2150  grouping_target = linitial_node(PathTarget, grouping_targets);
2151  Assert(!linitial_int(grouping_targets_contain_srfs));
2152  /* scanjoin_target will not have any SRFs precomputed for it */
2153  split_pathtarget_at_srfs(root, scanjoin_target, NULL,
2154  &scanjoin_targets,
2155  &scanjoin_targets_contain_srfs);
2156  scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
2157  Assert(!linitial_int(scanjoin_targets_contain_srfs));
2158  }
2159  else
2160  {
2161  /* initialize lists; for most of these, dummy values are OK */
2162  final_targets = final_targets_contain_srfs = NIL;
2163  sort_input_targets = sort_input_targets_contain_srfs = NIL;
2164  grouping_targets = grouping_targets_contain_srfs = NIL;
2165  scanjoin_targets = list_make1(scanjoin_target);
2166  scanjoin_targets_contain_srfs = NIL;
2167  }
2168 
2169  /* Apply scan/join target. */
2170  scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
2171  && equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
2172  apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
2173  scanjoin_targets_contain_srfs,
2174  scanjoin_target_parallel_safe,
2175  scanjoin_target_same_exprs);
2176 
2177  /*
2178  * Save the various upper-rel PathTargets we just computed into
2179  * root->upper_targets[]. The core code doesn't use this, but it
2180  * provides a convenient place for extensions to get at the info. For
2181  * consistency, we save all the intermediate targets, even though some
2182  * of the corresponding upperrels might not be needed for this query.
2183  */
2184  root->upper_targets[UPPERREL_FINAL] = final_target;
2185  root->upper_targets[UPPERREL_ORDERED] = final_target;
2186  root->upper_targets[UPPERREL_DISTINCT] = sort_input_target;
2187  root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
2188  root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
2189 
2190  /*
2191  * If we have grouping and/or aggregation, consider ways to implement
2192  * that. We build a new upperrel representing the output of this
2193  * phase.
2194  */
2195  if (have_grouping)
2196  {
2197  current_rel = create_grouping_paths(root,
2198  current_rel,
2199  grouping_target,
2200  grouping_target_parallel_safe,
2201  &agg_costs,
2202  gset_data);
2203  /* Fix things up if grouping_target contains SRFs */
2204  if (parse->hasTargetSRFs)
2205  adjust_paths_for_srfs(root, current_rel,
2206  grouping_targets,
2207  grouping_targets_contain_srfs);
2208  }
2209 
2210  /*
2211  * If we have window functions, consider ways to implement those. We
2212  * build a new upperrel representing the output of this phase.
2213  */
2214  if (activeWindows)
2215  {
2216  current_rel = create_window_paths(root,
2217  current_rel,
2218  grouping_target,
2219  sort_input_target,
2220  sort_input_target_parallel_safe,
2221  wflists,
2222  activeWindows);
2223  /* Fix things up if sort_input_target contains SRFs */
2224  if (parse->hasTargetSRFs)
2225  adjust_paths_for_srfs(root, current_rel,
2226  sort_input_targets,
2227  sort_input_targets_contain_srfs);
2228  }
2229 
2230  /*
2231  * If there is a DISTINCT clause, consider ways to implement that. We
2232  * build a new upperrel representing the output of this phase.
2233  */
2234  if (parse->distinctClause)
2235  {
2236  current_rel = create_distinct_paths(root,
2237  current_rel);
2238  }
2239  } /* end of if (setOperations) */
2240 
2241  /*
2242  * If ORDER BY was given, consider ways to implement that, and generate a
2243  * new upperrel containing only paths that emit the correct ordering and
2244  * project the correct final_target. We can apply the original
2245  * limit_tuples limit in sort costing here, but only if there are no
2246  * postponed SRFs.
2247  */
2248  if (parse->sortClause)
2249  {
2250  current_rel = create_ordered_paths(root,
2251  current_rel,
2252  final_target,
2253  final_target_parallel_safe,
2254  have_postponed_srfs ? -1.0 :
2255  limit_tuples);
2256  /* Fix things up if final_target contains SRFs */
2257  if (parse->hasTargetSRFs)
2258  adjust_paths_for_srfs(root, current_rel,
2259  final_targets,
2260  final_targets_contain_srfs);
2261  }
2262 
2263  /*
2264  * Now we are prepared to build the final-output upperrel.
2265  */
2266  final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
2267 
2268  /*
2269  * If the input rel is marked consider_parallel and there's nothing that's
2270  * not parallel-safe in the LIMIT clause, then the final_rel can be marked
2271  * consider_parallel as well. Note that if the query has rowMarks or is
2272  * not a SELECT, consider_parallel will be false for every relation in the
2273  * query.
2274  */
2275  if (current_rel->consider_parallel &&
2276  is_parallel_safe(root, parse->limitOffset) &&
2277  is_parallel_safe(root, parse->limitCount))
2278  final_rel->consider_parallel = true;
2279 
2280  /*
2281  * If the current_rel belongs to a single FDW, so does the final_rel.
2282  */
2283  final_rel->serverid = current_rel->serverid;
2284  final_rel->userid = current_rel->userid;
2285  final_rel->useridiscurrent = current_rel->useridiscurrent;
2286  final_rel->fdwroutine = current_rel->fdwroutine;
2287 
2288  /*
2289  * Generate paths for the final_rel. Insert all surviving paths, with
2290  * LockRows, Limit, and/or ModifyTable steps added if needed.
2291  */
2292  foreach(lc, current_rel->pathlist)
2293  {
2294  Path *path = (Path *) lfirst(lc);
2295 
2296  /*
2297  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
2298  * (Note: we intentionally test parse->rowMarks not root->rowMarks
2299  * here. If there are only non-locking rowmarks, they should be
2300  * handled by the ModifyTable node instead. However, root->rowMarks
2301  * is what goes into the LockRows node.)
2302  */
2303  if (parse->rowMarks)
2304  {
2305  path = (Path *) create_lockrows_path(root, final_rel, path,
2306  root->rowMarks,
2308  }
2309 
2310  /*
2311  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
2312  */
2313  if (limit_needed(parse))
2314  {
2315  path = (Path *) create_limit_path(root, final_rel, path,
2316  parse->limitOffset,
2317  parse->limitCount,
2318  offset_est, count_est);
2319  }
2320 
2321  /*
2322  * If this is an INSERT/UPDATE/DELETE, and we're not being called from
2323  * inheritance_planner, add the ModifyTable node.
2324  */
2325  if (parse->commandType != CMD_SELECT && !inheritance_update)
2326  {
2327  Index rootRelation;
2328  List *withCheckOptionLists;
2329  List *returningLists;
2330  List *rowMarks;
2331 
2332  /*
2333  * If target is a partition root table, we need to mark the
2334  * ModifyTable node appropriately for that.
2335  */
2336  if (rt_fetch(parse->resultRelation, parse->rtable)->relkind ==
2337  RELKIND_PARTITIONED_TABLE)
2338  rootRelation = parse->resultRelation;
2339  else
2340  rootRelation = 0;
2341 
2342  /*
2343  * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if
2344  * needed.
2345  */
2346  if (parse->withCheckOptions)
2347  withCheckOptionLists = list_make1(parse->withCheckOptions);
2348  else
2349  withCheckOptionLists = NIL;
2350 
2351  if (parse->returningList)
2352  returningLists = list_make1(parse->returningList);
2353  else
2354  returningLists = NIL;
2355 
2356  /*
2357  * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2358  * will have dealt with fetching non-locked marked rows, else we
2359  * need to have ModifyTable do that.
2360  */
2361  if (parse->rowMarks)
2362  rowMarks = NIL;
2363  else
2364  rowMarks = root->rowMarks;
2365 
2366  path = (Path *)
2367  create_modifytable_path(root, final_rel,
2368  parse->commandType,
2369  parse->canSetTag,
2370  parse->resultRelation,
2371  rootRelation,
2372  false,
2374  list_make1(path),
2375  list_make1(root),
2376  withCheckOptionLists,
2377  returningLists,
2378  rowMarks,
2379  parse->onConflict,
2381  }
2382 
2383  /* And shove it into final_rel */
2384  add_path(final_rel, path);
2385  }
2386 
2387  /*
2388  * Generate partial paths for final_rel, too, if outer query levels might
2389  * be able to make use of them.
2390  */
2391  if (final_rel->consider_parallel && root->query_level > 1 &&
2392  !limit_needed(parse))
2393  {
2394  Assert(!parse->rowMarks && parse->commandType == CMD_SELECT);
2395  foreach(lc, current_rel->partial_pathlist)
2396  {
2397  Path *partial_path = (Path *) lfirst(lc);
2398 
2399  add_partial_path(final_rel, partial_path);
2400  }
2401  }
2402 
2403  extra.limit_needed = limit_needed(parse);
2404  extra.limit_tuples = limit_tuples;
2405  extra.count_est = count_est;
2406  extra.offset_est = offset_est;
2407 
2408  /*
2409  * If there is an FDW that's responsible for all baserels of the query,
2410  * let it consider adding ForeignPaths.
2411  */
2412  if (final_rel->fdwroutine &&
2413  final_rel->fdwroutine->GetForeignUpperPaths)
2415  current_rel, final_rel,
2416  &extra);
2417 
2418  /* Let extensions possibly add some more paths */
2420  (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2421  current_rel, final_rel, &extra);
2422 
2423  /* Note: currently, we leave it to callers to do set_cheapest() */
2424 }
2425 
2426 /*
2427  * Do preprocessing for groupingSets clause and related data. This handles the
2428  * preliminary steps of expanding the grouping sets, organizing them into lists
2429  * of rollups, and preparing annotations which will later be filled in with
2430  * size estimates.
2431  */
2432 static grouping_sets_data *
2434 {
2435  Query *parse = root->parse;
2436  List *sets;
2437  int maxref = 0;
2438  ListCell *lc;
2439  ListCell *lc_set;
2441 
2442  parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
2443 
2444  gd->any_hashable = false;
2445  gd->unhashable_refs = NULL;
2446  gd->unsortable_refs = NULL;
2447  gd->unsortable_sets = NIL;
2448 
2449  if (parse->groupClause)
2450  {
2451  ListCell *lc;
2452 
2453  foreach(lc, parse->groupClause)
2454  {
2456  Index ref = gc->tleSortGroupRef;
2457 
2458  if (ref > maxref)
2459  maxref = ref;
2460 
2461  if (!gc->hashable)
2463 
2464  if (!OidIsValid(gc->sortop))
2466  }
2467  }
2468 
2469  /* Allocate workspace array for remapping */
2470  gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2471 
2472  /*
2473  * If we have any unsortable sets, we must extract them before trying to
2474  * prepare rollups. Unsortable sets don't go through
2475  * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2476  * here.
2477  */
2478  if (!bms_is_empty(gd->unsortable_refs))
2479  {
2480  List *sortable_sets = NIL;
2481 
2482  foreach(lc, parse->groupingSets)
2483  {
2484  List *gset = (List *) lfirst(lc);
2485 
2486  if (bms_overlap_list(gd->unsortable_refs, gset))
2487  {
2489 
2490  gs->set = gset;
2491  gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2492 
2493  /*
2494  * We must enforce here that an unsortable set is hashable;
2495  * later code assumes this. Parse analysis only checks that
2496  * every individual column is either hashable or sortable.
2497  *
2498  * Note that passing this test doesn't guarantee we can
2499  * generate a plan; there might be other showstoppers.
2500  */
2501  if (bms_overlap_list(gd->unhashable_refs, gset))
2502  ereport(ERROR,
2503  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2504  errmsg("could not implement GROUP BY"),
2505  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2506  }
2507  else
2508  sortable_sets = lappend(sortable_sets, gset);
2509  }
2510 
2511  if (sortable_sets)
2512  sets = extract_rollup_sets(sortable_sets);
2513  else
2514  sets = NIL;
2515  }
2516  else
2517  sets = extract_rollup_sets(parse->groupingSets);
2518 
2519  foreach(lc_set, sets)
2520  {
2521  List *current_sets = (List *) lfirst(lc_set);
2522  RollupData *rollup = makeNode(RollupData);
2523  GroupingSetData *gs;
2524 
2525  /*
2526  * Reorder the current list of grouping sets into correct prefix
2527  * order. If only one aggregation pass is needed, try to make the
2528  * list match the ORDER BY clause; if more than one pass is needed, we
2529  * don't bother with that.
2530  *
2531  * Note that this reorders the sets from smallest-member-first to
2532  * largest-member-first, and applies the GroupingSetData annotations,
2533  * though the data will be filled in later.
2534  */
2535  current_sets = reorder_grouping_sets(current_sets,
2536  (list_length(sets) == 1
2537  ? parse->sortClause
2538  : NIL));
2539 
2540  /*
2541  * Get the initial (and therefore largest) grouping set.
2542  */
2543  gs = linitial_node(GroupingSetData, current_sets);
2544 
2545  /*
2546  * Order the groupClause appropriately. If the first grouping set is
2547  * empty, then the groupClause must also be empty; otherwise we have
2548  * to force the groupClause to match that grouping set's order.
2549  *
2550  * (The first grouping set can be empty even though parse->groupClause
2551  * is not empty only if all non-empty grouping sets are unsortable.
2552  * The groupClauses for hashed grouping sets are built later on.)
2553  */
2554  if (gs->set)
2555  rollup->groupClause = preprocess_groupclause(root, gs->set);
2556  else
2557  rollup->groupClause = NIL;
2558 
2559  /*
2560  * Is it hashable? We pretend empty sets are hashable even though we
2561  * actually force them not to be hashed later. But don't bother if
2562  * there's nothing but empty sets (since in that case we can't hash
2563  * anything).
2564  */
2565  if (gs->set &&
2567  {
2568  rollup->hashable = true;
2569  gd->any_hashable = true;
2570  }
2571 
2572  /*
2573  * Now that we've pinned down an order for the groupClause for this
2574  * list of grouping sets, we need to remap the entries in the grouping
2575  * sets from sortgrouprefs to plain indices (0-based) into the
2576  * groupClause for this collection of grouping sets. We keep the
2577  * original form for later use, though.
2578  */
2579  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2580  current_sets,
2581  gd->tleref_to_colnum_map);
2582  rollup->gsets_data = current_sets;
2583 
2584  gd->rollups = lappend(gd->rollups, rollup);
2585  }
2586 
2587  if (gd->unsortable_sets)
2588  {
2589  /*
2590  * We have not yet pinned down a groupclause for this, but we will
2591  * need index-based lists for estimation purposes. Construct
2592  * hash_sets_idx based on the entire original groupclause for now.
2593  */
2595  gd->unsortable_sets,
2596  gd->tleref_to_colnum_map);
2597  gd->any_hashable = true;
2598  }
2599 
2600  return gd;
2601 }
2602 
2603 /*
2604  * Given a groupclause and a list of GroupingSetData, return equivalent sets
2605  * (without annotation) mapped to indexes into the given groupclause.
2606  */
2607 static List *
2609  List *gsets,
2610  int *tleref_to_colnum_map)
2611 {
2612  int ref = 0;
2613  List *result = NIL;
2614  ListCell *lc;
2615 
2616  foreach(lc, groupClause)
2617  {
2619 
2620  tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2621  }
2622 
2623  foreach(lc, gsets)
2624  {
2625  List *set = NIL;
2626  ListCell *lc2;
2628 
2629  foreach(lc2, gs->set)
2630  {
2631  set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2632  }
2633 
2634  result = lappend(result, set);
2635  }
2636 
2637  return result;
2638 }
2639 
2640 
2641 /*
2642  * preprocess_rowmarks - set up PlanRowMarks if needed
2643  */
2644 static void
2646 {
2647  Query *parse = root->parse;
2648  Bitmapset *rels;
2649  List *prowmarks;
2650  ListCell *l;
2651  int i;
2652 
2653  if (parse->rowMarks)
2654  {
2655  /*
2656  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2657  * grouping, since grouping renders a reference to individual tuple
2658  * CTIDs invalid. This is also checked at parse time, but that's
2659  * insufficient because of rule substitution, query pullup, etc.
2660  */
2662  parse->rowMarks)->strength);
2663  }
2664  else
2665  {
2666  /*
2667  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2668  * UPDATE/SHARE.
2669  */
2670  if (parse->commandType != CMD_UPDATE &&
2671  parse->commandType != CMD_DELETE)
2672  return;
2673  }
2674 
2675  /*
2676  * We need to have rowmarks for all base relations except the target. We
2677  * make a bitmapset of all base rels and then remove the items we don't
2678  * need or have FOR [KEY] UPDATE/SHARE marks for.
2679  */
2680  rels = get_relids_in_jointree((Node *) parse->jointree, false);
2681  if (parse->resultRelation)
2682  rels = bms_del_member(rels, parse->resultRelation);
2683 
2684  /*
2685  * Convert RowMarkClauses to PlanRowMark representation.
2686  */
2687  prowmarks = NIL;
2688  foreach(l, parse->rowMarks)
2689  {
2691  RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2692  PlanRowMark *newrc;
2693 
2694  /*
2695  * Currently, it is syntactically impossible to have FOR UPDATE et al
2696  * applied to an update/delete target rel. If that ever becomes
2697  * possible, we should drop the target from the PlanRowMark list.
2698  */
2699  Assert(rc->rti != parse->resultRelation);
2700 
2701  /*
2702  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2703  * can't support true locking. Subqueries that got flattened into the
2704  * main query should be ignored completely. Any that didn't will get
2705  * ROW_MARK_COPY items in the next loop.
2706  */
2707  if (rte->rtekind != RTE_RELATION)
2708  continue;
2709 
2710  rels = bms_del_member(rels, rc->rti);
2711 
2712  newrc = makeNode(PlanRowMark);
2713  newrc->rti = newrc->prti = rc->rti;
2714  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2715  newrc->markType = select_rowmark_type(rte, rc->strength);
2716  newrc->allMarkTypes = (1 << newrc->markType);
2717  newrc->strength = rc->strength;
2718  newrc->waitPolicy = rc->waitPolicy;
2719  newrc->isParent = false;
2720 
2721  prowmarks = lappend(prowmarks, newrc);
2722  }
2723 
2724  /*
2725  * Now, add rowmarks for any non-target, non-locked base relations.
2726  */
2727  i = 0;
2728  foreach(l, parse->rtable)
2729  {
2731  PlanRowMark *newrc;
2732 
2733  i++;
2734  if (!bms_is_member(i, rels))
2735  continue;
2736 
2737  newrc = makeNode(PlanRowMark);
2738  newrc->rti = newrc->prti = i;
2739  newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2740  newrc->markType = select_rowmark_type(rte, LCS_NONE);
2741  newrc->allMarkTypes = (1 << newrc->markType);
2742  newrc->strength = LCS_NONE;
2743  newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2744  newrc->isParent = false;
2745 
2746  prowmarks = lappend(prowmarks, newrc);
2747  }
2748 
2749  root->rowMarks = prowmarks;
2750 }
2751 
2752 /*
2753  * Select RowMarkType to use for a given table
2754  */
2757 {
2758  if (rte->rtekind != RTE_RELATION)
2759  {
2760  /* If it's not a table at all, use ROW_MARK_COPY */
2761  return ROW_MARK_COPY;
2762  }
2763  else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2764  {
2765  /* Let the FDW select the rowmark type, if it wants to */
2766  FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2767 
2768  if (fdwroutine->GetForeignRowMarkType != NULL)
2769  return fdwroutine->GetForeignRowMarkType(rte, strength);
2770  /* Otherwise, use ROW_MARK_COPY by default */
2771  return ROW_MARK_COPY;
2772  }
2773  else
2774  {
2775  /* Regular table, apply the appropriate lock type */
2776  switch (strength)
2777  {
2778  case LCS_NONE:
2779 
2780  /*
2781  * We don't need a tuple lock, only the ability to re-fetch
2782  * the row.
2783  */
2784  return ROW_MARK_REFERENCE;
2785  break;
2786  case LCS_FORKEYSHARE:
2787  return ROW_MARK_KEYSHARE;
2788  break;
2789  case LCS_FORSHARE:
2790  return ROW_MARK_SHARE;
2791  break;
2792  case LCS_FORNOKEYUPDATE:
2793  return ROW_MARK_NOKEYEXCLUSIVE;
2794  break;
2795  case LCS_FORUPDATE:
2796  return ROW_MARK_EXCLUSIVE;
2797  break;
2798  }
2799  elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2800  return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2801  }
2802 }
2803 
2804 /*
2805  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2806  *
2807  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2808  * results back in *count_est and *offset_est. These variables are set to
2809  * 0 if the corresponding clause is not present, and -1 if it's present
2810  * but we couldn't estimate the value for it. (The "0" convention is OK
2811  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2812  * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2813  * usual practice of never estimating less than one row.) These values will
2814  * be passed to create_limit_path, which see if you change this code.
2815  *
2816  * The return value is the suitably adjusted tuple_fraction to use for
2817  * planning the query. This adjustment is not overridable, since it reflects
2818  * plan actions that grouping_planner() will certainly take, not assumptions
2819  * about context.
2820  */
2821 static double
2822 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2823  int64 *offset_est, int64 *count_est)
2824 {
2825  Query *parse = root->parse;
2826  Node *est;
2827  double limit_fraction;
2828 
2829  /* Should not be called unless LIMIT or OFFSET */
2830  Assert(parse->limitCount || parse->limitOffset);
2831 
2832  /*
2833  * Try to obtain the clause values. We use estimate_expression_value
2834  * primarily because it can sometimes do something useful with Params.
2835  */
2836  if (parse->limitCount)
2837  {
2838  est = estimate_expression_value(root, parse->limitCount);
2839  if (est && IsA(est, Const))
2840  {
2841  if (((Const *) est)->constisnull)
2842  {
2843  /* NULL indicates LIMIT ALL, ie, no limit */
2844  *count_est = 0; /* treat as not present */
2845  }
2846  else
2847  {
2848  *count_est = DatumGetInt64(((Const *) est)->constvalue);
2849  if (*count_est <= 0)
2850  *count_est = 1; /* force to at least 1 */
2851  }
2852  }
2853  else
2854  *count_est = -1; /* can't estimate */
2855  }
2856  else
2857  *count_est = 0; /* not present */
2858 
2859  if (parse->limitOffset)
2860  {
2861  est = estimate_expression_value(root, parse->limitOffset);
2862  if (est && IsA(est, Const))
2863  {
2864  if (((Const *) est)->constisnull)
2865  {
2866  /* Treat NULL as no offset; the executor will too */
2867  *offset_est = 0; /* treat as not present */
2868  }
2869  else
2870  {
2871  *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2872  if (*offset_est < 0)
2873  *offset_est = 0; /* treat as not present */
2874  }
2875  }
2876  else
2877  *offset_est = -1; /* can't estimate */
2878  }
2879  else
2880  *offset_est = 0; /* not present */
2881 
2882  if (*count_est != 0)
2883  {
2884  /*
2885  * A LIMIT clause limits the absolute number of tuples returned.
2886  * However, if it's not a constant LIMIT then we have to guess; for
2887  * lack of a better idea, assume 10% of the plan's result is wanted.
2888  */
2889  if (*count_est < 0 || *offset_est < 0)
2890  {
2891  /* LIMIT or OFFSET is an expression ... punt ... */
2892  limit_fraction = 0.10;
2893  }
2894  else
2895  {
2896  /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2897  limit_fraction = (double) *count_est + (double) *offset_est;
2898  }
2899 
2900  /*
2901  * If we have absolute limits from both caller and LIMIT, use the
2902  * smaller value; likewise if they are both fractional. If one is
2903  * fractional and the other absolute, we can't easily determine which
2904  * is smaller, but we use the heuristic that the absolute will usually
2905  * be smaller.
2906  */
2907  if (tuple_fraction >= 1.0)
2908  {
2909  if (limit_fraction >= 1.0)
2910  {
2911  /* both absolute */
2912  tuple_fraction = Min(tuple_fraction, limit_fraction);
2913  }
2914  else
2915  {
2916  /* caller absolute, limit fractional; use caller's value */
2917  }
2918  }
2919  else if (tuple_fraction > 0.0)
2920  {
2921  if (limit_fraction >= 1.0)
2922  {
2923  /* caller fractional, limit absolute; use limit */
2924  tuple_fraction = limit_fraction;
2925  }
2926  else
2927  {
2928  /* both fractional */
2929  tuple_fraction = Min(tuple_fraction, limit_fraction);
2930  }
2931  }
2932  else
2933  {
2934  /* no info from caller, just use limit */
2935  tuple_fraction = limit_fraction;
2936  }
2937  }
2938  else if (*offset_est != 0 && tuple_fraction > 0.0)
2939  {
2940  /*
2941  * We have an OFFSET but no LIMIT. This acts entirely differently
2942  * from the LIMIT case: here, we need to increase rather than decrease
2943  * the caller's tuple_fraction, because the OFFSET acts to cause more
2944  * tuples to be fetched instead of fewer. This only matters if we got
2945  * a tuple_fraction > 0, however.
2946  *
2947  * As above, use 10% if OFFSET is present but unestimatable.
2948  */
2949  if (*offset_est < 0)
2950  limit_fraction = 0.10;
2951  else
2952  limit_fraction = (double) *offset_est;
2953 
2954  /*
2955  * If we have absolute counts from both caller and OFFSET, add them
2956  * together; likewise if they are both fractional. If one is
2957  * fractional and the other absolute, we want to take the larger, and
2958  * we heuristically assume that's the fractional one.
2959  */
2960  if (tuple_fraction >= 1.0)
2961  {
2962  if (limit_fraction >= 1.0)
2963  {
2964  /* both absolute, so add them together */
2965  tuple_fraction += limit_fraction;
2966  }
2967  else
2968  {
2969  /* caller absolute, limit fractional; use limit */
2970  tuple_fraction = limit_fraction;
2971  }
2972  }
2973  else
2974  {
2975  if (limit_fraction >= 1.0)
2976  {
2977  /* caller fractional, limit absolute; use caller's value */
2978  }
2979  else
2980  {
2981  /* both fractional, so add them together */
2982  tuple_fraction += limit_fraction;
2983  if (tuple_fraction >= 1.0)
2984  tuple_fraction = 0.0; /* assume fetch all */
2985  }
2986  }
2987  }
2988 
2989  return tuple_fraction;
2990 }
2991 
2992 /*
2993  * limit_needed - do we actually need a Limit plan node?
2994  *
2995  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2996  * a Limit node. This is worth checking for because "OFFSET 0" is a common
2997  * locution for an optimization fence. (Because other places in the planner
2998  * merely check whether parse->limitOffset isn't NULL, it will still work as
2999  * an optimization fence --- we're just suppressing unnecessary run-time
3000  * overhead.)
3001  *
3002  * This might look like it could be merged into preprocess_limit, but there's
3003  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
3004  * in preprocess_limit it's good enough to consider estimated values.
3005  */
3006 bool
3008 {
3009  Node *node;
3010 
3011  node = parse->limitCount;
3012  if (node)
3013  {
3014  if (IsA(node, Const))
3015  {
3016  /* NULL indicates LIMIT ALL, ie, no limit */
3017  if (!((Const *) node)->constisnull)
3018  return true; /* LIMIT with a constant value */
3019  }
3020  else
3021  return true; /* non-constant LIMIT */
3022  }
3023 
3024  node = parse->limitOffset;
3025  if (node)
3026  {
3027  if (IsA(node, Const))
3028  {
3029  /* Treat NULL as no offset; the executor would too */
3030  if (!((Const *) node)->constisnull)
3031  {
3032  int64 offset = DatumGetInt64(((Const *) node)->constvalue);
3033 
3034  if (offset != 0)
3035  return true; /* OFFSET with a nonzero value */
3036  }
3037  }
3038  else
3039  return true; /* non-constant OFFSET */
3040  }
3041 
3042  return false; /* don't need a Limit plan node */
3043 }
3044 
3045 
3046 /*
3047  * remove_useless_groupby_columns
3048  * Remove any columns in the GROUP BY clause that are redundant due to
3049  * being functionally dependent on other GROUP BY columns.
3050  *
3051  * Since some other DBMSes do not allow references to ungrouped columns, it's
3052  * not unusual to find all columns listed in GROUP BY even though listing the
3053  * primary-key columns would be sufficient. Deleting such excess columns
3054  * avoids redundant sorting work, so it's worth doing. When we do this, we
3055  * must mark the plan as dependent on the pkey constraint (compare the
3056  * parser's check_ungrouped_columns() and check_functional_grouping()).
3057  *
3058  * In principle, we could treat any NOT-NULL columns appearing in a UNIQUE
3059  * index as the determining columns. But as with check_functional_grouping(),
3060  * there's currently no way to represent dependency on a NOT NULL constraint,
3061  * so we consider only the pkey for now.
3062  */
3063 static void
3065 {
3066  Query *parse = root->parse;
3067  Bitmapset **groupbyattnos;
3068  Bitmapset **surplusvars;
3069  ListCell *lc;
3070  int relid;
3071 
3072  /* No chance to do anything if there are less than two GROUP BY items */
3073  if (list_length(parse->groupClause) < 2)
3074  return;
3075 
3076  /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
3077  if (parse->groupingSets)
3078  return;
3079 
3080  /*
3081  * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
3082  * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
3083  * that are GROUP BY items.
3084  */
3085  groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
3086  (list_length(parse->rtable) + 1));
3087  foreach(lc, parse->groupClause)
3088  {
3090  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
3091  Var *var = (Var *) tle->expr;
3092 
3093  /*
3094  * Ignore non-Vars and Vars from other query levels.
3095  *
3096  * XXX in principle, stable expressions containing Vars could also be
3097  * removed, if all the Vars are functionally dependent on other GROUP
3098  * BY items. But it's not clear that such cases occur often enough to
3099  * be worth troubling over.
3100  */
3101  if (!IsA(var, Var) ||
3102  var->varlevelsup > 0)
3103  continue;
3104 
3105  /* OK, remember we have this Var */
3106  relid = var->varno;
3107  Assert(relid <= list_length(parse->rtable));
3108  groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
3110  }
3111 
3112  /*
3113  * Consider each relation and see if it is possible to remove some of its
3114  * Vars from GROUP BY. For simplicity and speed, we do the actual removal
3115  * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
3116  * of the column attnos of RTE k that are removable GROUP BY items.
3117  */
3118  surplusvars = NULL; /* don't allocate array unless required */
3119  relid = 0;
3120  foreach(lc, parse->rtable)
3121  {
3123  Bitmapset *relattnos;
3124  Bitmapset *pkattnos;
3125  Oid constraintOid;
3126 
3127  relid++;
3128 
3129  /* Only plain relations could have primary-key constraints */
3130  if (rte->rtekind != RTE_RELATION)
3131  continue;
3132 
3133  /*
3134  * We must skip inheritance parent tables as some of the child rels
3135  * may cause duplicate rows. This cannot happen with partitioned
3136  * tables, however.
3137  */
3138  if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
3139  continue;
3140 
3141  /* Nothing to do unless this rel has multiple Vars in GROUP BY */
3142  relattnos = groupbyattnos[relid];
3143  if (bms_membership(relattnos) != BMS_MULTIPLE)
3144  continue;
3145 
3146  /*
3147  * Can't remove any columns for this rel if there is no suitable
3148  * (i.e., nondeferrable) primary key constraint.
3149  */
3150  pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
3151  if (pkattnos == NULL)
3152  continue;
3153 
3154  /*
3155  * If the primary key is a proper subset of relattnos then we have
3156  * some items in the GROUP BY that can be removed.
3157  */
3158  if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
3159  {
3160  /*
3161  * To easily remember whether we've found anything to do, we don't
3162  * allocate the surplusvars[] array until we find something.
3163  */
3164  if (surplusvars == NULL)
3165  surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
3166  (list_length(parse->rtable) + 1));
3167 
3168  /* Remember the attnos of the removable columns */
3169  surplusvars[relid] = bms_difference(relattnos, pkattnos);
3170 
3171  /* Also, mark the resulting plan as dependent on this constraint */
3172  parse->constraintDeps = lappend_oid(parse->constraintDeps,
3173  constraintOid);
3174  }
3175  }
3176 
3177  /*
3178  * If we found any surplus Vars, build a new GROUP BY clause without them.
3179  * (Note: this may leave some TLEs with unreferenced ressortgroupref
3180  * markings, but that's harmless.)
3181  */
3182  if (surplusvars != NULL)
3183  {
3184  List *new_groupby = NIL;
3185 
3186  foreach(lc, parse->groupClause)
3187  {
3189  TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
3190  Var *var = (Var *) tle->expr;
3191 
3192  /*
3193  * New list must include non-Vars, outer Vars, and anything not
3194  * marked as surplus.
3195  */
3196  if (!IsA(var, Var) ||
3197  var->varlevelsup > 0 ||
3199  surplusvars[var->varno]))
3200  new_groupby = lappend(new_groupby, sgc);
3201  }
3202 
3203  parse->groupClause = new_groupby;
3204  }
3205 }
3206 
3207 /*
3208  * preprocess_groupclause - do preparatory work on GROUP BY clause
3209  *
3210  * The idea here is to adjust the ordering of the GROUP BY elements
3211  * (which in itself is semantically insignificant) to match ORDER BY,
3212  * thereby allowing a single sort operation to both implement the ORDER BY
3213  * requirement and set up for a Unique step that implements GROUP BY.
3214  *
3215  * In principle it might be interesting to consider other orderings of the
3216  * GROUP BY elements, which could match the sort ordering of other
3217  * possible plans (eg an indexscan) and thereby reduce cost. We don't
3218  * bother with that, though. Hashed grouping will frequently win anyway.
3219  *
3220  * Note: we need no comparable processing of the distinctClause because
3221  * the parser already enforced that that matches ORDER BY.
3222  *
3223  * For grouping sets, the order of items is instead forced to agree with that
3224  * of the grouping set (and items not in the grouping set are skipped). The
3225  * work of sorting the order of grouping set elements to match the ORDER BY if
3226  * possible is done elsewhere.
3227  */
3228 static List *
3230 {
3231  Query *parse = root->parse;
3232  List *new_groupclause = NIL;
3233  bool partial_match;
3234  ListCell *sl;
3235  ListCell *gl;
3236 
3237  /* For grouping sets, we need to force the ordering */
3238  if (force)
3239  {
3240  foreach(sl, force)
3241  {
3242  Index ref = lfirst_int(sl);
3244 
3245  new_groupclause = lappend(new_groupclause, cl);
3246  }
3247 
3248  return new_groupclause;
3249  }
3250 
3251  /* If no ORDER BY, nothing useful to do here */
3252  if (parse->sortClause == NIL)
3253  return parse->groupClause;
3254 
3255  /*
3256  * Scan the ORDER BY clause and construct a list of matching GROUP BY
3257  * items, but only as far as we can make a matching prefix.
3258  *
3259  * This code assumes that the sortClause contains no duplicate items.
3260  */
3261  foreach(sl, parse->sortClause)
3262  {
3264 
3265  foreach(gl, parse->groupClause)
3266  {
3268 
3269  if (equal(gc, sc))
3270  {
3271  new_groupclause = lappend(new_groupclause, gc);
3272  break;
3273  }
3274  }
3275  if (gl == NULL)
3276  break; /* no match, so stop scanning */
3277  }
3278 
3279  /* Did we match all of the ORDER BY list, or just some of it? */
3280  partial_match = (sl != NULL);
3281 
3282  /* If no match at all, no point in reordering GROUP BY */
3283  if (new_groupclause == NIL)
3284  return parse->groupClause;
3285 
3286  /*
3287  * Add any remaining GROUP BY items to the new list, but only if we were
3288  * able to make a complete match. In other words, we only rearrange the
3289  * GROUP BY list if the result is that one list is a prefix of the other
3290  * --- otherwise there's no possibility of a common sort. Also, give up
3291  * if there are any non-sortable GROUP BY items, since then there's no
3292  * hope anyway.
3293  */
3294  foreach(gl, parse->groupClause)
3295  {
3297 
3298  if (list_member_ptr(new_groupclause, gc))
3299  continue; /* it matched an ORDER BY item */
3300  if (partial_match)
3301  return parse->groupClause; /* give up, no common sort possible */
3302  if (!OidIsValid(gc->sortop))
3303  return parse->groupClause; /* give up, GROUP BY can't be sorted */
3304  new_groupclause = lappend(new_groupclause, gc);
3305  }
3306 
3307  /* Success --- install the rearranged GROUP BY list */
3308  Assert(list_length(parse->groupClause) == list_length(new_groupclause));
3309  return new_groupclause;
3310 }
3311 
3312 /*
3313  * Extract lists of grouping sets that can be implemented using a single
3314  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
3315  *
3316  * Input must be sorted with smallest sets first. Result has each sublist
3317  * sorted with smallest sets first.
3318  *
3319  * We want to produce the absolute minimum possible number of lists here to
3320  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
3321  * of finding the minimal partition of a partially-ordered set into chains
3322  * (which is what we need, taking the list of grouping sets as a poset ordered
3323  * by set inclusion) can be mapped to the problem of finding the maximum
3324  * cardinality matching on a bipartite graph, which is solvable in polynomial
3325  * time with a worst case of no worse than O(n^2.5) and usually much
3326  * better. Since our N is at most 4096, we don't need to consider fallbacks to
3327  * heuristic or approximate methods. (Planning time for a 12-d cube is under
3328  * half a second on my modest system even with optimization off and assertions
3329  * on.)
3330  */
3331 static List *
3333 {
3334  int num_sets_raw = list_length(groupingSets);
3335  int num_empty = 0;
3336  int num_sets = 0; /* distinct sets */
3337  int num_chains = 0;
3338  List *result = NIL;
3339  List **results;
3340  List **orig_sets;
3341  Bitmapset **set_masks;
3342  int *chains;
3343  short **adjacency;
3344  short *adjacency_buf;
3346  int i;
3347  int j;
3348  int j_size;
3349  ListCell *lc1 = list_head(groupingSets);
3350  ListCell *lc;
3351 
3352  /*
3353  * Start by stripping out empty sets. The algorithm doesn't require this,
3354  * but the planner currently needs all empty sets to be returned in the
3355  * first list, so we strip them here and add them back after.
3356  */
3357  while (lc1 && lfirst(lc1) == NIL)
3358  {
3359  ++num_empty;
3360  lc1 = lnext(groupingSets, lc1);
3361  }
3362 
3363  /* bail out now if it turns out that all we had were empty sets. */
3364  if (!lc1)
3365  return list_make1(groupingSets);
3366 
3367  /*----------
3368  * We don't strictly need to remove duplicate sets here, but if we don't,
3369  * they tend to become scattered through the result, which is a bit
3370  * confusing (and irritating if we ever decide to optimize them out).
3371  * So we remove them here and add them back after.
3372  *
3373  * For each non-duplicate set, we fill in the following:
3374  *
3375  * orig_sets[i] = list of the original set lists
3376  * set_masks[i] = bitmapset for testing inclusion
3377  * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
3378  *
3379  * chains[i] will be the result group this set is assigned to.
3380  *
3381  * We index all of these from 1 rather than 0 because it is convenient
3382  * to leave 0 free for the NIL node in the graph algorithm.
3383  *----------
3384  */
3385  orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
3386  set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
3387  adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
3388  adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
3389 
3390  j_size = 0;
3391  j = 0;
3392  i = 1;
3393 
3394  for_each_cell(lc, groupingSets, lc1)
3395  {
3396  List *candidate = (List *) lfirst(lc);
3397  Bitmapset *candidate_set = NULL;
3398  ListCell *lc2;
3399  int dup_of = 0;
3400 
3401  foreach(lc2, candidate)
3402  {
3403  candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
3404  }
3405 
3406  /* we can only be a dup if we're the same length as a previous set */
3407  if (j_size == list_length(candidate))
3408  {
3409  int k;
3410 
3411  for (k = j; k < i; ++k)
3412  {
3413  if (bms_equal(set_masks[k], candidate_set))
3414  {
3415  dup_of = k;
3416  break;
3417  }
3418  }
3419  }
3420  else if (j_size < list_length(candidate))
3421  {
3422  j_size = list_length(candidate);
3423  j = i;
3424  }
3425 
3426  if (dup_of > 0)
3427  {
3428  orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3429  bms_free(candidate_set);
3430  }
3431  else
3432  {
3433  int k;
3434  int n_adj = 0;
3435 
3436  orig_sets[i] = list_make1(candidate);
3437  set_masks[i] = candidate_set;
3438 
3439  /* fill in adjacency list; no need to compare equal-size sets */
3440 
3441  for (k = j - 1; k > 0; --k)
3442  {
3443  if (bms_is_subset(set_masks[k], candidate_set))
3444  adjacency_buf[++n_adj] = k;
3445  }
3446 
3447  if (n_adj > 0)
3448  {
3449  adjacency_buf[0] = n_adj;
3450  adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3451  memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3452  }
3453  else
3454  adjacency[i] = NULL;
3455 
3456  ++i;
3457  }
3458  }
3459 
3460  num_sets = i - 1;
3461 
3462  /*
3463  * Apply the graph matching algorithm to do the work.
3464  */
3465  state = BipartiteMatch(num_sets, num_sets, adjacency);
3466 
3467  /*
3468  * Now, the state->pair* fields have the info we need to assign sets to
3469  * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3470  * pair_vu[v] = u (both will be true, but we check both so that we can do
3471  * it in one pass)
3472  */
3473  chains = palloc0((num_sets + 1) * sizeof(int));
3474 
3475  for (i = 1; i <= num_sets; ++i)
3476  {
3477  int u = state->pair_vu[i];
3478  int v = state->pair_uv[i];
3479 
3480  if (u > 0 && u < i)
3481  chains[i] = chains[u];
3482  else if (v > 0 && v < i)
3483  chains[i] = chains[v];
3484  else
3485  chains[i] = ++num_chains;
3486  }
3487 
3488  /* build result lists. */
3489  results = palloc0((num_chains + 1) * sizeof(List *));
3490 
3491  for (i = 1; i <= num_sets; ++i)
3492  {
3493  int c = chains[i];
3494 
3495  Assert(c > 0);
3496 
3497  results[c] = list_concat(results[c], orig_sets[i]);
3498  }
3499 
3500  /* push any empty sets back on the first list. */
3501  while (num_empty-- > 0)
3502  results[1] = lcons(NIL, results[1]);
3503 
3504  /* make result list */
3505  for (i = 1; i <= num_chains; ++i)
3506  result = lappend(result, results[i]);
3507 
3508  /*
3509  * Free all the things.
3510  *
3511  * (This is over-fussy for small sets but for large sets we could have
3512  * tied up a nontrivial amount of memory.)
3513  */
3514  BipartiteMatchFree(state);
3515  pfree(results);
3516  pfree(chains);
3517  for (i = 1; i <= num_sets; ++i)
3518  if (adjacency[i])
3519  pfree(adjacency[i]);
3520  pfree(adjacency);
3521  pfree(adjacency_buf);
3522  pfree(orig_sets);
3523  for (i = 1; i <= num_sets; ++i)
3524  bms_free(set_masks[i]);
3525  pfree(set_masks);
3526 
3527  return result;
3528 }
3529 
3530 /*
3531  * Reorder the elements of a list of grouping sets such that they have correct
3532  * prefix relationships. Also inserts the GroupingSetData annotations.
3533  *
3534  * The input must be ordered with smallest sets first; the result is returned
3535  * with largest sets first. Note that the result shares no list substructure
3536  * with the input, so it's safe for the caller to modify it later.
3537  *
3538  * If we're passed in a sortclause, we follow its order of columns to the
3539  * extent possible, to minimize the chance that we add unnecessary sorts.
3540  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3541  * gets implemented in one pass.)
3542  */
3543 static List *
3544 reorder_grouping_sets(List *groupingsets, List *sortclause)
3545 {
3546  ListCell *lc;
3547  List *previous = NIL;
3548  List *result = NIL;
3549 
3550  foreach(lc, groupingsets)
3551  {
3552  List *candidate = (List *) lfirst(lc);
3553  List *new_elems = list_difference_int(candidate, previous);
3555 
3556  while (list_length(sortclause) > list_length(previous) &&
3557  list_length(new_elems) > 0)
3558  {
3559  SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3560  int ref = sc->tleSortGroupRef;
3561 
3562  if (list_member_int(new_elems, ref))
3563  {
3564  previous = lappend_int(previous, ref);
3565  new_elems = list_delete_int(new_elems, ref);
3566  }
3567  else
3568  {
3569  /* diverged from the sortclause; give up on it */
3570  sortclause = NIL;
3571  break;
3572  }
3573  }
3574 
3575  previous = list_concat(previous, new_elems);
3576 
3577  gs->set = list_copy(previous);
3578  result = lcons(gs, result);
3579  }
3580 
3581  list_free(previous);
3582 
3583  return result;
3584 }
3585 
3586 /*
3587  * Compute query_pathkeys and other pathkeys during plan generation
3588  */
3589 static void
3591 {
3592  Query *parse = root->parse;
3593  standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3594  List *tlist = root->processed_tlist;
3595  List *activeWindows = qp_extra->activeWindows;
3596 
3597  /*
3598  * Calculate pathkeys that represent grouping/ordering requirements. The
3599  * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
3600  * be, in which case we just leave their pathkeys empty.
3601  */
3602  if (qp_extra->groupClause &&
3603  grouping_is_sortable(qp_extra->groupClause))
3604  root->group_pathkeys =
3606  qp_extra->groupClause,
3607  tlist);
3608  else
3609  root->group_pathkeys = NIL;
3610 
3611  /* We consider only the first (bottom) window in pathkeys logic */
3612  if (activeWindows != NIL)
3613  {
3614  WindowClause *wc = linitial_node(WindowClause, activeWindows);
3615 
3617  wc,
3618  tlist);
3619  }
3620  else
3621  root->window_pathkeys = NIL;
3622 
3623  if (parse->distinctClause &&
3625  root->distinct_pathkeys =
3627  parse->distinctClause,
3628  tlist);
3629  else
3630  root->distinct_pathkeys = NIL;
3631 
3632  root->sort_pathkeys =
3634  parse->sortClause,
3635  tlist);
3636 
3637  /*
3638  * Figure out whether we want a sorted result from query_planner.
3639  *
3640  * If we have a sortable GROUP BY clause, then we want a result sorted
3641  * properly for grouping. Otherwise, if we have window functions to
3642  * evaluate, we try to sort for the first window. Otherwise, if there's a
3643  * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3644  * we try to produce output that's sufficiently well sorted for the
3645  * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3646  * by the ORDER BY clause.
3647  *
3648  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3649  * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3650  * that might just leave us failing to exploit an available sort order at
3651  * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3652  * much easier, since we know that the parser ensured that one is a
3653  * superset of the other.
3654  */
3655  if (root->group_pathkeys)
3656  root->query_pathkeys = root->group_pathkeys;
3657  else if (root->window_pathkeys)
3658  root->query_pathkeys = root->window_pathkeys;
3659  else if (list_length(root->distinct_pathkeys) >
3660  list_length(root->sort_pathkeys))
3661  root->query_pathkeys = root->distinct_pathkeys;
3662  else if (root->sort_pathkeys)
3663  root->query_pathkeys = root->sort_pathkeys;
3664  else
3665  root->query_pathkeys = NIL;
3666 }
3667 
3668 /*
3669  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3670  *
3671  * path_rows: number of output rows from scan/join step
3672  * gd: grouping sets data including list of grouping sets and their clauses
3673  * target_list: target list containing group clause references
3674  *
3675  * If doing grouping sets, we also annotate the gsets data with the estimates
3676  * for each set and each individual rollup list, with a view to later
3677  * determining whether some combination of them could be hashed instead.
3678  */
3679 static double
3681  double path_rows,
3682  grouping_sets_data *gd,
3683  List *target_list)
3684 {
3685  Query *parse = root->parse;
3686  double dNumGroups;
3687 
3688  if (parse->groupClause)
3689  {
3690  List *groupExprs;
3691 
3692  if (parse->groupingSets)
3693  {
3694  /* Add up the estimates for each grouping set */
3695  ListCell *lc;
3696  ListCell *lc2;
3697 
3698  Assert(gd); /* keep Coverity happy */
3699 
3700  dNumGroups = 0;
3701 
3702  foreach(lc, gd->rollups)
3703  {
3704  RollupData *rollup = lfirst_node(RollupData, lc);
3705  ListCell *lc;
3706 
3707  groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3708  target_list);
3709 
3710  rollup->numGroups = 0.0;
3711 
3712  forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
3713  {
3714  List *gset = (List *) lfirst(lc);
3716  double numGroups = estimate_num_groups(root,
3717  groupExprs,
3718  path_rows,
3719  &gset);
3720 
3721  gs->numGroups = numGroups;
3722  rollup->numGroups += numGroups;
3723  }
3724 
3725  dNumGroups += rollup->numGroups;
3726  }
3727 
3728  if (gd->hash_sets_idx)
3729  {
3730  ListCell *lc;
3731 
3732  gd->dNumHashGroups = 0;
3733 
3734  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3735  target_list);
3736 
3737  forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3738  {
3739  List *gset = (List *) lfirst(lc);
3741  double numGroups = estimate_num_groups(root,
3742  groupExprs,
3743  path_rows,
3744  &gset);
3745 
3746  gs->numGroups = numGroups;
3747  gd->dNumHashGroups += numGroups;
3748  }
3749 
3750  dNumGroups += gd->dNumHashGroups;
3751  }
3752  }
3753  else
3754  {
3755  /* Plain GROUP BY */
3756  groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3757  target_list);
3758 
3759  dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3760  NULL);
3761  }
3762  }
3763  else if (parse->groupingSets)
3764  {
3765  /* Empty grouping sets ... one result row for each one */
3766  dNumGroups = list_length(parse->groupingSets);
3767  }
3768  else if (parse->hasAggs || root->hasHavingQual)
3769  {
3770  /* Plain aggregation, one result row */
3771  dNumGroups = 1;
3772  }
3773  else
3774  {
3775  /* Not grouping */
3776  dNumGroups = 1;
3777  }
3778 
3779  return dNumGroups;
3780 }
3781 
3782 /*
3783  * create_grouping_paths
3784  *
3785  * Build a new upperrel containing Paths for grouping and/or aggregation.
3786  * Along the way, we also build an upperrel for Paths which are partially
3787  * grouped and/or aggregated. A partially grouped and/or aggregated path
3788  * needs a FinalizeAggregate node to complete the aggregation. Currently,
3789  * the only partially grouped paths we build are also partial paths; that
3790  * is, they need a Gather and then a FinalizeAggregate.
3791  *
3792  * input_rel: contains the source-data Paths
3793  * target: the pathtarget for the result Paths to compute
3794  * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
3795  * gd: grouping sets data including list of grouping sets and their clauses
3796  *
3797  * Note: all Paths in input_rel are expected to return the target computed
3798  * by make_group_input_target.
3799  */
3800 static RelOptInfo *
3802  RelOptInfo *input_rel,
3803  PathTarget *target,
3804  bool target_parallel_safe,
3805  const AggClauseCosts *agg_costs,
3806  grouping_sets_data *gd)
3807 {
3808  Query *parse = root->parse;
3809  RelOptInfo *grouped_rel;
3810  RelOptInfo *partially_grouped_rel;
3811 
3812  /*
3813  * Create grouping relation to hold fully aggregated grouping and/or
3814  * aggregation paths.
3815  */
3816  grouped_rel = make_grouping_rel(root, input_rel, target,
3817  target_parallel_safe, parse->havingQual);
3818 
3819  /*
3820  * Create either paths for a degenerate grouping or paths for ordinary
3821  * grouping, as appropriate.
3822  */
3823  if (is_degenerate_grouping(root))
3824  create_degenerate_grouping_paths(root, input_rel, grouped_rel);
3825  else
3826  {
3827  int flags = 0;
3828  GroupPathExtraData extra;
3829 
3830  /*
3831  * Determine whether it's possible to perform sort-based
3832  * implementations of grouping. (Note that if groupClause is empty,
3833  * grouping_is_sortable() is trivially true, and all the
3834  * pathkeys_contained_in() tests will succeed too, so that we'll
3835  * consider every surviving input path.)
3836  *
3837  * If we have grouping sets, we might be able to sort some but not all
3838  * of them; in this case, we need can_sort to be true as long as we
3839  * must consider any sorted-input plan.
3840  */
3841  if ((gd && gd->rollups != NIL)
3842  || grouping_is_sortable(parse->groupClause))
3843  flags |= GROUPING_CAN_USE_SORT;
3844 
3845  /*
3846  * Determine whether we should consider hash-based implementations of
3847  * grouping.
3848  *
3849  * Hashed aggregation only applies if we're grouping. If we have
3850  * grouping sets, some groups might be hashable but others not; in
3851  * this case we set can_hash true as long as there is nothing globally
3852  * preventing us from hashing (and we should therefore consider plans
3853  * with hashes).
3854  *
3855  * Executor doesn't support hashed aggregation with DISTINCT or ORDER
3856  * BY aggregates. (Doing so would imply storing *all* the input
3857  * values in the hash table, and/or running many sorts in parallel,
3858  * either of which seems like a certain loser.) We similarly don't
3859  * support ordered-set aggregates in hashed aggregation, but that case
3860  * is also included in the numOrderedAggs count.
3861  *
3862  * Note: grouping_is_hashable() is much more expensive to check than
3863  * the other gating conditions, so we want to do it last.
3864  */
3865  if ((parse->groupClause != NIL &&
3866  agg_costs->numOrderedAggs == 0 &&
3867  (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))))
3868  flags |= GROUPING_CAN_USE_HASH;
3869 
3870  /*
3871  * Determine whether partial aggregation is possible.
3872  */
3873  if (can_partial_agg(root, agg_costs))
3874  flags |= GROUPING_CAN_PARTIAL_AGG;
3875 
3876  extra.flags = flags;
3877  extra.target_parallel_safe = target_parallel_safe;
3878  extra.havingQual = parse->havingQual;
3879  extra.targetList = parse->targetList;
3880  extra.partial_costs_set = false;
3881 
3882  /*
3883  * Determine whether partitionwise aggregation is in theory possible.
3884  * It can be disabled by the user, and for now, we don't try to
3885  * support grouping sets. create_ordinary_grouping_paths() will check
3886  * additional conditions, such as whether input_rel is partitioned.
3887  */
3890  else
3892 
3893  create_ordinary_grouping_paths(root, input_rel, grouped_rel,
3894  agg_costs, gd, &extra,
3895  &partially_grouped_rel);
3896  }
3897 
3898  set_cheapest(grouped_rel);
3899  return grouped_rel;
3900 }
3901 
3902 /*
3903  * make_grouping_rel
3904  *
3905  * Create a new grouping rel and set basic properties.
3906  *
3907  * input_rel represents the underlying scan/join relation.
3908  * target is the output expected from the grouping relation.
3909  */
3910 static RelOptInfo *
3912  PathTarget *target, bool target_parallel_safe,
3913  Node *havingQual)
3914 {
3915  RelOptInfo *grouped_rel;
3916 
3917  if (IS_OTHER_REL(input_rel))
3918  {
3919  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
3920  input_rel->relids);
3921  grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
3922  }
3923  else
3924  {
3925  /*
3926  * By tradition, the relids set for the main grouping relation is
3927  * NULL. (This could be changed, but might require adjustments
3928  * elsewhere.)
3929  */
3930  grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3931  }
3932 
3933  /* Set target. */
3934  grouped_rel->reltarget = target;
3935 
3936  /*
3937  * If the input relation is not parallel-safe, then the grouped relation
3938  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3939  * target list and HAVING quals are parallel-safe.
3940  */
3941  if (input_rel->consider_parallel && target_parallel_safe &&
3942  is_parallel_safe(root, (Node *) havingQual))
3943  grouped_rel->consider_parallel = true;
3944 
3945  /*
3946  * If the input rel belongs to a single FDW, so does the grouped rel.
3947  */
3948  grouped_rel->serverid = input_rel->serverid;
3949  grouped_rel->userid = input_rel->userid;
3950  grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3951  grouped_rel->fdwroutine = input_rel->fdwroutine;
3952 
3953  return grouped_rel;
3954 }
3955 
3956 /*
3957  * is_degenerate_grouping
3958  *
3959  * A degenerate grouping is one in which the query has a HAVING qual and/or
3960  * grouping sets, but no aggregates and no GROUP BY (which implies that the
3961  * grouping sets are all empty).
3962  */
3963 static bool
3965 {
3966  Query *parse = root->parse;
3967 
3968  return (root->hasHavingQual || parse->groupingSets) &&
3969  !parse->hasAggs && parse->groupClause == NIL;
3970 }
3971 
3972 /*
3973  * create_degenerate_grouping_paths
3974  *
3975  * When the grouping is degenerate (see is_degenerate_grouping), we are
3976  * supposed to emit either zero or one row for each grouping set depending on
3977  * whether HAVING succeeds. Furthermore, there cannot be any variables in
3978  * either HAVING or the targetlist, so we actually do not need the FROM table
3979  * at all! We can just throw away the plan-so-far and generate a Result node.
3980  * This is a sufficiently unusual corner case that it's not worth contorting
3981  * the structure of this module to avoid having to generate the earlier paths
3982  * in the first place.
3983  */
3984 static void
3986  RelOptInfo *grouped_rel)
3987 {
3988  Query *parse = root->parse;
3989  int nrows;
3990  Path *path;
3991 
3992  nrows = list_length(parse->groupingSets);
3993  if (nrows > 1)
3994  {
3995  /*
3996  * Doesn't seem worthwhile writing code to cons up a generate_series
3997  * or a values scan to emit multiple rows. Instead just make N clones
3998  * and append them. (With a volatile HAVING clause, this means you
3999  * might get between 0 and N output rows. Offhand I think that's
4000  * desired.)
4001  */
4002  List *paths = NIL;
4003 
4004  while (--nrows >= 0)
4005  {
4006  path = (Path *)
4007  create_group_result_path(root, grouped_rel,
4008  grouped_rel->reltarget,
4009  (List *) parse->havingQual);
4010  paths = lappend(paths, path);
4011  }
4012  path = (Path *)
4013  create_append_path(root,
4014  grouped_rel,
4015  paths,
4016  NIL,
4017  NIL,
4018  NULL,
4019  0,
4020  false,
4021  NIL,
4022  -1);
4023  }
4024  else
4025  {
4026  /* No grouping sets, or just one, so one output row */
4027  path = (Path *)
4028  create_group_result_path(root, grouped_rel,
4029  grouped_rel->reltarget,
4030  (List *) parse->havingQual);
4031  }
4032 
4033  add_path(grouped_rel, path);
4034 }
4035 
4036 /*
4037  * create_ordinary_grouping_paths
4038  *
4039  * Create grouping paths for the ordinary (that is, non-degenerate) case.
4040  *
4041  * We need to consider sorted and hashed aggregation in the same function,
4042  * because otherwise (1) it would be harder to throw an appropriate error
4043  * message if neither way works, and (2) we should not allow hashtable size
4044  * considerations to dissuade us from using hashing if sorting is not possible.
4045  *
4046  * *partially_grouped_rel_p will be set to the partially grouped rel which this
4047  * function creates, or to NULL if it doesn't create one.
4048  */
4049 static void
4051  RelOptInfo *grouped_rel,
4052  const AggClauseCosts *agg_costs,
4053  grouping_sets_data *gd,
4054  GroupPathExtraData *extra,
4055  RelOptInfo **partially_grouped_rel_p)
4056 {
4057  Path *cheapest_path = input_rel->cheapest_total_path;
4058  RelOptInfo *partially_grouped_rel = NULL;
4059  double dNumGroups;
4061 
4062  /*
4063  * If this is the topmost grouping relation or if the parent relation is
4064  * doing some form of partitionwise aggregation, then we may be able to do
4065  * it at this level also. However, if the input relation is not
4066  * partitioned, partitionwise aggregate is impossible.
4067  */
4068  if (extra->patype != PARTITIONWISE_AGGREGATE_NONE &&
4069  IS_PARTITIONED_REL(input_rel))
4070  {
4071  /*
4072  * If this is the topmost relation or if the parent relation is doing
4073  * full partitionwise aggregation, then we can do full partitionwise
4074  * aggregation provided that the GROUP BY clause contains all of the
4075  * partitioning columns at this level. Otherwise, we can do at most
4076  * partial partitionwise aggregation. But if partial aggregation is
4077  * not supported in general then we can't use it for partitionwise
4078  * aggregation either.
4079  */
4080  if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
4081  group_by_has_partkey(input_rel, extra->targetList,
4082  root->parse->groupClause))
4084  else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
4086  else
4088  }
4089 
4090  /*
4091  * Before generating paths for grouped_rel, we first generate any possible
4092  * partially grouped paths; that way, later code can easily consider both
4093  * parallel and non-parallel approaches to grouping.
4094  */
4095  if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0)
4096  {
4097  bool force_rel_creation;
4098 
4099  /*
4100  * If we're doing partitionwise aggregation at this level, force
4101  * creation of a partially_grouped_rel so we can add partitionwise
4102  * paths to it.
4103  */
4104  force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL);
4105 
4106  partially_grouped_rel =
4108  grouped_rel,
4109  input_rel,
4110  gd,
4111  extra,
4112  force_rel_creation);
4113  }
4114 
4115  /* Set out parameter. */
4116  *partially_grouped_rel_p = partially_grouped_rel;
4117 
4118  /* Apply partitionwise aggregation technique, if possible. */
4119  if (patype != PARTITIONWISE_AGGREGATE_NONE)
4120  create_partitionwise_grouping_paths(root, input_rel, grouped_rel,
4121  partially_grouped_rel, agg_costs,
4122  gd, patype, extra);
4123 
4124  /* If we are doing partial aggregation only, return. */
4126  {
4127  Assert(partially_grouped_rel);
4128 
4129  if (partially_grouped_rel->pathlist)
4130  set_cheapest(partially_grouped_rel);
4131 
4132  return;
4133  }
4134 
4135  /* Gather any partially grouped partial paths. */
4136  if (partially_grouped_rel && partially_grouped_rel->partial_pathlist)
4137  {
4138  gather_grouping_paths(root, partially_grouped_rel);
4139  set_cheapest(partially_grouped_rel);
4140  }
4141 
4142  /*
4143  * Estimate number of groups.
4144  */
4145  dNumGroups = get_number_of_groups(root,
4146  cheapest_path->rows,
4147  gd,
4148  extra->targetList);
4149 
4150  /* Build final grouping paths */
4151  add_paths_to_grouping_rel(root, input_rel, grouped_rel,
4152  partially_grouped_rel, agg_costs, gd,
4153  dNumGroups, extra);
4154 
4155  /* Give a helpful error if we failed to find any implementation */
4156  if (grouped_rel->pathlist == NIL)
4157  ereport(ERROR,
4158  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4159  errmsg("could not implement GROUP BY"),
4160  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4161 
4162  /*
4163  * If there is an FDW that's responsible for all baserels of the query,
4164  * let it consider adding ForeignPaths.
4165  */
4166  if (grouped_rel->fdwroutine &&
4167  grouped_rel->fdwroutine->GetForeignUpperPaths)
4169  input_rel, grouped_rel,
4170  extra);
4171 
4172  /* Let extensions possibly add some more paths */
4174  (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4175  input_rel, grouped_rel,
4176  extra);
4177 }
4178 
4179 /*
4180  * For a given input path, consider the possible ways of doing grouping sets on
4181  * it, by combinations of hashing and sorting. This can be called multiple
4182  * times, so it's important that it not scribble on input. No result is
4183  * returned, but any generated paths are added to grouped_rel.
4184  */
4185 static void
4187  RelOptInfo *grouped_rel,
4188  Path *path,
4189  bool is_sorted,
4190  bool can_hash,
4191  grouping_sets_data *gd,
4192  const AggClauseCosts *agg_costs,
4193  double dNumGroups)
4194 {
4195  Query *parse = root->parse;
4196 
4197  /*
4198  * If we're not being offered sorted input, then only consider plans that
4199  * can be done entirely by hashing.
4200  *
4201  * We can hash everything if it looks like it'll fit in work_mem. But if
4202  * the input is actually sorted despite not being advertised as such, we
4203  * prefer to make use of that in order to use less memory.
4204  *
4205  * If none of the grouping sets are sortable, then ignore the work_mem
4206  * limit and generate a path anyway, since otherwise we'll just fail.
4207  */
4208  if (!is_sorted)
4209  {
4210  List *new_rollups = NIL;
4211  RollupData *unhashed_rollup = NULL;
4212  List *sets_data;
4213  List *empty_sets_data = NIL;
4214  List *empty_sets = NIL;
4215  ListCell *lc;
4216  ListCell *l_start = list_head(gd->rollups);
4217  AggStrategy strat = AGG_HASHED;
4218  double hashsize;
4219  double exclude_groups = 0.0;
4220 
4221  Assert(can_hash);
4222 
4223  /*
4224  * If the input is coincidentally sorted usefully (which can happen
4225  * even if is_sorted is false, since that only means that our caller
4226  * has set up the sorting for us), then save some hashtable space by
4227  * making use of that. But we need to watch out for degenerate cases:
4228  *
4229  * 1) If there are any empty grouping sets, then group_pathkeys might
4230  * be NIL if all non-empty grouping sets are unsortable. In this case,
4231  * there will be a rollup containing only empty groups, and the
4232  * pathkeys_contained_in test is vacuously true; this is ok.
4233  *
4234  * XXX: the above relies on the fact that group_pathkeys is generated
4235  * from the first rollup. If we add the ability to consider multiple
4236  * sort orders for grouping input, this assumption might fail.
4237  *
4238  * 2) If there are no empty sets and only unsortable sets, then the
4239  * rollups list will be empty (and thus l_start == NULL), and
4240  * group_pathkeys will be NIL; we must ensure that the vacuously-true
4241  * pathkeys_contained_in test doesn't cause us to crash.
4242  */
4243  if (l_start != NULL &&
4245  {
4246  unhashed_rollup = lfirst_node(RollupData, l_start);
4247  exclude_groups = unhashed_rollup->numGroups;
4248  l_start = lnext(gd->rollups, l_start);
4249  }
4250 
4251  hashsize = estimate_hashagg_tablesize(path,
4252  agg_costs,
4253  dNumGroups - exclude_groups);
4254 
4255  /*
4256  * gd->rollups is empty if we have only unsortable columns to work
4257  * with. Override work_mem in that case; otherwise, we'll rely on the
4258  * sorted-input case to generate usable mixed paths.
4259  */
4260  if (hashsize > work_mem * 1024L && gd->rollups)
4261  return; /* nope, won't fit */
4262 
4263  /*
4264  * We need to burst the existing rollups list into individual grouping
4265  * sets and recompute a groupClause for each set.
4266  */
4267  sets_data = list_copy(gd->unsortable_sets);
4268 
4269  for_each_cell(lc, gd->rollups, l_start)
4270  {
4271  RollupData *rollup = lfirst_node(RollupData, lc);
4272 
4273  /*
4274  * If we find an unhashable rollup that's not been skipped by the
4275  * "actually sorted" check above, we can't cope; we'd need sorted
4276  * input (with a different sort order) but we can't get that here.
4277  * So bail out; we'll get a valid path from the is_sorted case
4278  * instead.
4279  *
4280  * The mere presence of empty grouping sets doesn't make a rollup
4281  * unhashable (see preprocess_grouping_sets), we handle those
4282  * specially below.
4283  */
4284  if (!rollup->hashable)
4285  return;
4286 
4287  sets_data = list_concat(sets_data, rollup->gsets_data);
4288  }
4289  foreach(lc, sets_data)
4290  {
4292  List *gset = gs->set;
4293  RollupData *rollup;
4294 
4295  if (gset == NIL)
4296  {
4297  /* Empty grouping sets can't be hashed. */
4298  empty_sets_data = lappend(empty_sets_data, gs);
4299  empty_sets = lappend(empty_sets, NIL);
4300  }
4301  else
4302  {
4303  rollup = makeNode(RollupData);
4304 
4305  rollup->groupClause = preprocess_groupclause(root, gset);
4306  rollup->gsets_data = list_make1(gs);
4307  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4308  rollup->gsets_data,
4309  gd->tleref_to_colnum_map);
4310  rollup->numGroups = gs->numGroups;
4311  rollup->hashable = true;
4312  rollup->is_hashed = true;
4313  new_rollups = lappend(new_rollups, rollup);
4314  }
4315  }
4316 
4317  /*
4318  * If we didn't find anything nonempty to hash, then bail. We'll
4319  * generate a path from the is_sorted case.
4320  */
4321  if (new_rollups == NIL)
4322  return;
4323 
4324  /*
4325  * If there were empty grouping sets they should have been in the
4326  * first rollup.
4327  */
4328  Assert(!unhashed_rollup || !empty_sets);
4329 
4330  if (unhashed_rollup)
4331  {
4332  new_rollups = lappend(new_rollups, unhashed_rollup);
4333  strat = AGG_MIXED;
4334  }
4335  else if (empty_sets)
4336  {
4337  RollupData *rollup = makeNode(RollupData);
4338 
4339  rollup->groupClause = NIL;
4340  rollup->gsets_data = empty_sets_data;
4341  rollup->gsets = empty_sets;
4342  rollup->numGroups = list_length(empty_sets);
4343  rollup->hashable = false;
4344  rollup->is_hashed = false;
4345  new_rollups = lappend(new_rollups, rollup);
4346  strat = AGG_MIXED;
4347  }
4348 
4349  add_path(grouped_rel, (Path *)
4351  grouped_rel,
4352  path,
4353  (List *) parse->havingQual,
4354  strat,
4355  new_rollups,
4356  agg_costs,
4357  dNumGroups));
4358  return;
4359  }
4360 
4361  /*
4362  * If we have sorted input but nothing we can do with it, bail.
4363  */
4364  if (list_length(gd->rollups) == 0)
4365  return;
4366 
4367  /*
4368  * Given sorted input, we try and make two paths: one sorted and one mixed
4369  * sort/hash. (We need to try both because hashagg might be disabled, or
4370  * some columns might not be sortable.)
4371  *
4372  * can_hash is passed in as false if some obstacle elsewhere (such as
4373  * ordered aggs) means that we shouldn't consider hashing at all.
4374  */
4375  if (can_hash && gd->any_hashable)
4376  {
4377  List *rollups = NIL;
4378  List *hash_sets = list_copy(gd->unsortable_sets);
4379  double availspace = (work_mem * 1024.0);
4380  ListCell *lc;
4381 
4382  /*
4383  * Account first for space needed for groups we can't sort at all.
4384  */
4385  availspace -= estimate_hashagg_tablesize(path,
4386  agg_costs,
4387  gd->dNumHashGroups);
4388 
4389  if (availspace > 0 && list_length(gd->rollups) > 1)
4390  {
4391  double scale;
4392  int num_rollups = list_length(gd->rollups);
4393  int k_capacity;
4394  int *k_weights = palloc(num_rollups * sizeof(int));
4395  Bitmapset *hash_items = NULL;
4396  int i;
4397 
4398  /*
4399  * We treat this as a knapsack problem: the knapsack capacity
4400  * represents work_mem, the item weights are the estimated memory
4401  * usage of the hashtables needed to implement a single rollup,
4402  * and we really ought to use the cost saving as the item value;
4403  * however, currently the costs assigned to sort nodes don't
4404  * reflect the comparison costs well, and so we treat all items as
4405  * of equal value (each rollup we hash instead saves us one sort).
4406  *
4407  * To use the discrete knapsack, we need to scale the values to a
4408  * reasonably small bounded range. We choose to allow a 5% error
4409  * margin; we have no more than 4096 rollups in the worst possible
4410  * case, which with a 5% error margin will require a bit over 42MB
4411  * of workspace. (Anyone wanting to plan queries that complex had
4412  * better have the memory for it. In more reasonable cases, with
4413  * no more than a couple of dozen rollups, the memory usage will
4414  * be negligible.)
4415  *
4416  * k_capacity is naturally bounded, but we clamp the values for
4417  * scale and weight (below) to avoid overflows or underflows (or
4418  * uselessly trying to use a scale factor less than 1 byte).
4419  */
4420  scale = Max(availspace / (20.0 * num_rollups), 1.0);
4421  k_capacity = (int) floor(availspace / scale);
4422 
4423  /*
4424  * We leave the first rollup out of consideration since it's the
4425  * one that matches the input sort order. We assign indexes "i"
4426  * to only those entries considered for hashing; the second loop,
4427  * below, must use the same condition.
4428  */
4429  i = 0;
4431  {
4432  RollupData *rollup = lfirst_node(RollupData, lc);
4433 
4434  if (rollup->hashable)
4435  {
4436  double sz = estimate_hashagg_tablesize(path,
4437  agg_costs,
4438  rollup->numGroups);
4439 
4440  /*
4441  * If sz is enormous, but work_mem (and hence scale) is
4442  * small, avoid integer overflow here.
4443  */
4444  k_weights[i] = (int) Min(floor(sz / scale),
4445  k_capacity + 1.0);
4446  ++i;
4447  }
4448  }
4449 
4450  /*
4451  * Apply knapsack algorithm; compute the set of items which
4452  * maximizes the value stored (in this case the number of sorts
4453  * saved) while keeping the total size (approximately) within
4454  * capacity.
4455  */
4456  if (i > 0)
4457  hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4458 
4459  if (!bms_is_empty(hash_items))
4460  {
4461  rollups = list_make1(linitial(gd->rollups));
4462 
4463  i = 0;
4465  {
4466  RollupData *rollup = lfirst_node(RollupData, lc);
4467 
4468  if (rollup->hashable)
4469  {
4470  if (bms_is_member(i, hash_items))
4471  hash_sets = list_concat(hash_sets,
4472  rollup->gsets_data);
4473  else
4474  rollups = lappend(rollups, rollup);
4475  ++i;
4476  }
4477  else
4478  rollups = lappend(rollups, rollup);
4479  }
4480  }
4481  }
4482 
4483  if (!rollups && hash_sets)
4484  rollups = list_copy(gd->rollups);
4485 
4486  foreach(lc, hash_sets)
4487  {
4489  RollupData *rollup = makeNode(RollupData);
4490 
4491  Assert(gs->set != NIL);
4492 
4493  rollup->groupClause = preprocess_groupclause(root, gs->set);
4494  rollup->gsets_data = list_make1(gs);
4495  rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4496  rollup->gsets_data,
4497  gd->tleref_to_colnum_map);
4498  rollup->numGroups = gs->numGroups;
4499  rollup->hashable = true;
4500  rollup->is_hashed = true;
4501  rollups = lcons(rollup, rollups);
4502  }
4503 
4504  if (rollups)
4505  {
4506  add_path(grouped_rel, (Path *)
4508  grouped_rel,
4509  path,
4510  (List *) parse->havingQual,
4511  AGG_MIXED,
4512  rollups,
4513  agg_costs,
4514  dNumGroups));
4515  }
4516  }
4517 
4518  /*
4519  * Now try the simple sorted case.
4520  */
4521  if (!gd->unsortable_sets)
4522  add_path(grouped_rel, (Path *)
4524  grouped_rel,
4525  path,
4526  (List *) parse->havingQual,
4527  AGG_SORTED,
4528  gd->rollups,
4529  agg_costs,
4530  dNumGroups));
4531 }
4532 
4533 /*
4534  * create_window_paths
4535  *
4536  * Build a new upperrel containing Paths for window-function evaluation.
4537  *
4538  * input_rel: contains the source-data Paths
4539  * input_target: result of make_window_input_target
4540  * output_target: what the topmost WindowAggPath should return
4541  * wflists: result of find_window_functions
4542  * activeWindows: result of select_active_windows
4543  *
4544  * Note: all Paths in input_rel are expected to return input_target.
4545  */
4546 static RelOptInfo *
4548  RelOptInfo *input_rel,
4549  PathTarget *input_target,
4550  PathTarget *output_target,
4551  bool output_target_parallel_safe,
4552  WindowFuncLists *wflists,
4553  List *activeWindows)
4554 {
4555  RelOptInfo *window_rel;
4556  ListCell *lc;
4557 
4558  /* For now, do all work in the (WINDOW, NULL) upperrel */
4559  window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4560 
4561  /*
4562  * If the input relation is not parallel-safe, then the window relation
4563  * can't be parallel-safe, either. Otherwise, we need to examine the
4564  * target list and active windows for non-parallel-safe constructs.
4565  */
4566  if (input_rel->consider_parallel && output_target_parallel_safe &&
4567  is_parallel_safe(root, (Node *) activeWindows))
4568  window_rel->consider_parallel = true;
4569 
4570  /*
4571  * If the input rel belongs to a single FDW, so does the window rel.
4572  */
4573  window_rel->serverid = input_rel->serverid;
4574  window_rel->userid = input_rel->userid;
4575  window_rel->useridiscurrent = input_rel->useridiscurrent;
4576  window_rel->fdwroutine = input_rel->fdwroutine;
4577 
4578  /*
4579  * Consider computing window functions starting from the existing
4580  * cheapest-total path (which will likely require a sort) as well as any
4581  * existing paths that satisfy root->window_pathkeys (which won't).
4582  */
4583  foreach(lc, input_rel->pathlist)
4584  {
4585  Path *path = (Path *) lfirst(lc);
4586 
4587  if (path == input_rel->cheapest_total_path ||
4590  window_rel,
4591  path,
4592  input_target,
4593  output_target,
4594  wflists,
4595  activeWindows);
4596  }
4597 
4598  /*
4599  * If there is an FDW that's responsible for all baserels of the query,
4600  * let it consider adding ForeignPaths.
4601  */
4602  if (window_rel->fdwroutine &&
4603  window_rel->fdwroutine->GetForeignUpperPaths)
4604  window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4605  input_rel, window_rel,
4606  NULL);
4607 
4608  /* Let extensions possibly add some more paths */
4610  (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4611  input_rel, window_rel, NULL);
4612 
4613  /* Now choose the best path(s) */
4614  set_cheapest(window_rel);
4615 
4616  return window_rel;
4617 }
4618 
4619 /*
4620  * Stack window-function implementation steps atop the given Path, and
4621  * add the result to window_rel.
4622  *
4623  * window_rel: upperrel to contain result
4624  * path: input Path to use (must return input_target)
4625  * input_target: result of make_window_input_target
4626  * output_target: what the topmost WindowAggPath should return
4627  * wflists: result of find_window_functions
4628  * activeWindows: result of select_active_windows
4629  */
4630 static void
4632  RelOptInfo *window_rel,
4633  Path *path,
4634  PathTarget *input_target,
4635  PathTarget *output_target,
4636  WindowFuncLists *wflists,
4637  List *activeWindows)
4638 {
4639  PathTarget *window_target;
4640  ListCell *l;
4641 
4642  /*
4643  * Since each window clause could require a different sort order, we stack
4644  * up a WindowAgg node for each clause, with sort steps between them as
4645  * needed. (We assume that select_active_windows chose a good order for
4646  * executing the clauses in.)
4647  *
4648  * input_target should contain all Vars and Aggs needed for the result.
4649  * (In some cases we wouldn't need to propagate all of these all the way
4650  * to the top, since they might only be needed as inputs to WindowFuncs.
4651  * It's probably not worth trying to optimize that though.) It must also
4652  * contain all window partitioning and sorting expressions, to ensure
4653  * they're computed only once at the bottom of the stack (that's critical
4654  * for volatile functions). As we climb up the stack, we'll add outputs
4655  * for the WindowFuncs computed at each level.
4656  */
4657  window_target = input_target;
4658 
4659  foreach(l, activeWindows)
4660  {
4662  List *window_pathkeys;
4663 
4664  window_pathkeys = make_pathkeys_for_window(root,
4665  wc,
4666  root->processed_tlist);
4667 
4668  /* Sort if necessary */
4669  if (!pathkeys_contained_in(window_pathkeys, path->pathkeys))
4670  {
4671  path = (Path *) create_sort_path(root, window_rel,
4672  path,
4673  window_pathkeys,
4674  -1.0);
4675  }
4676 
4677  if (lnext(activeWindows, l))
4678  {
4679  /*
4680  * Add the current WindowFuncs to the output target for this
4681  * intermediate WindowAggPath. We must copy window_target to
4682  * avoid changing the previous path's target.
4683  *
4684  * Note: a WindowFunc adds nothing to the target's eval costs; but
4685  * we do need to account for the increase in tlist width.
4686  */
4687  ListCell *lc2;
4688 
4689  window_target = copy_pathtarget(window_target);
4690  foreach(lc2, wflists->windowFuncs[wc->winref])
4691  {
4692  WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4693 
4694  add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4695  window_target->width += get_typavgwidth(wfunc->wintype, -1);
4696  }
4697  }
4698  else
4699  {
4700  /* Install the goal target in the topmost WindowAgg */
4701  window_target = output_target;
4702  }
4703 
4704  path = (Path *)
4705  create_windowagg_path(root, window_rel, path, window_target,
4706  wflists->windowFuncs[wc->winref],
4707  wc);
4708  }
4709 
4710  add_path(window_rel, path);
4711 }
4712 
4713 /*
4714  * create_distinct_paths
4715  *
4716  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4717  *
4718  * input_rel: contains the source-data Paths
4719  *
4720  * Note: input paths should already compute the desired pathtarget, since
4721  * Sort/Unique won't project anything.
4722  */
4723 static RelOptInfo *
4725  RelOptInfo *input_rel)
4726 {
4727  Query *parse = root->parse;
4728  Path *cheapest_input_path = input_rel->cheapest_total_path;
4729  RelOptInfo *distinct_rel;
4730  double numDistinctRows;
4731  bool allow_hash;
4732  Path *path;
4733  ListCell *lc;
4734 
4735  /* For now, do all work in the (DISTINCT, NULL) upperrel */
4736  distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4737 
4738  /*
4739  * We don't compute anything at this level, so distinct_rel will be
4740  * parallel-safe if the input rel is parallel-safe. In particular, if
4741  * there is a DISTINCT ON (...) clause, any path for the input_rel will
4742  * output those expressions, and will not be parallel-safe unless those
4743  * expressions are parallel-safe.
4744  */
4745  distinct_rel->consider_parallel = input_rel->consider_parallel;
4746 
4747  /*
4748  * If the input rel belongs to a single FDW, so does the distinct_rel.
4749  */
4750  distinct_rel->serverid = input_rel->serverid;
4751  distinct_rel->userid = input_rel->userid;
4752  distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4753  distinct_rel->fdwroutine = input_rel->fdwroutine;
4754 
4755  /* Estimate number of distinct rows there will be */
4756  if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4757  root->hasHavingQual)
4758  {
4759  /*
4760  * If there was grouping or aggregation, use the number of input rows
4761  * as the estimated number of DISTINCT rows (ie, assume the input is
4762  * already mostly unique).
4763  */
4764  numDistinctRows = cheapest_input_path->rows;
4765  }
4766  else
4767  {
4768  /*
4769  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4770  */
4771  List *distinctExprs;
4772 
4773  distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4774  parse->targetList);
4775  numDistinctRows = estimate_num_groups(root, distinctExprs,
4776  cheapest_input_path->rows,
4777  NULL);
4778  }
4779 
4780  /*
4781  * Consider sort-based implementations of DISTINCT, if possible.
4782  */
4784  {
4785  /*
4786  * First, if we have any adequately-presorted paths, just stick a
4787  * Unique node on those. Then consider doing an explicit sort of the
4788  * cheapest input path and Unique'ing that.
4789  *
4790  * When we have DISTINCT ON, we must sort by the more rigorous of
4791  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4792  * Also, if we do have to do an explicit sort, we might as well use
4793  * the more rigorous ordering to avoid a second sort later. (Note
4794  * that the parser will have ensured that one clause is a prefix of
4795  * the other.)
4796  */
4797  List *needed_pathkeys;
4798 
4799  if (parse->hasDistinctOn &&
4801  list_length(root->sort_pathkeys))
4802  needed_pathkeys = root->sort_pathkeys;
4803  else
4804  needed_pathkeys = root->distinct_pathkeys;
4805 
4806  foreach(lc, input_rel->pathlist)
4807  {
4808  Path *path = (Path *) lfirst(lc);
4809 
4810  if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4811  {
4812  add_path(distinct_rel, (Path *)
4813  create_upper_unique_path(root, distinct_rel,
4814  path,
4816  numDistinctRows));
4817  }
4818  }
4819 
4820  /* For explicit-sort case, always use the more rigorous clause */
4821  if (list_length(root->distinct_pathkeys) <
4822  list_length(root->sort_pathkeys))
4823  {
4824  needed_pathkeys = root->sort_pathkeys;
4825  /* Assert checks that parser didn't mess up... */
4827  needed_pathkeys));
4828  }
4829  else
4830  needed_pathkeys = root->distinct_pathkeys;
4831 
4832  path = cheapest_input_path;
4833  if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4834  path = (Path *) create_sort_path(root, distinct_rel,
4835  path,
4836  needed_pathkeys,
4837  -1.0);
4838 
4839  add_path(distinct_rel, (Path *)
4840  create_upper_unique_path(root, distinct_rel,
4841  path,
4843  numDistinctRows));
4844  }
4845 
4846  /*
4847  * Consider hash-based implementations of DISTINCT, if possible.
4848  *
4849  * If we were not able to make any other types of path, we *must* hash or
4850  * die trying. If we do have other choices, there are several things that
4851  * should prevent selection of hashing: if the query uses DISTINCT ON
4852  * (because it won't really have the expected behavior if we hash), or if
4853  * enable_hashagg is off, or if it looks like the hashtable will exceed
4854  * work_mem.
4855  *
4856  * Note: grouping_is_hashable() is much more expensive to check than the
4857  * other gating conditions, so we want to do it last.
4858  */
4859  if (distinct_rel->pathlist == NIL)
4860  allow_hash = true; /* we have no alternatives */
4861  else if (parse->hasDistinctOn || !enable_hashagg)
4862  allow_hash = false; /* policy-based decision not to hash */
4863  else
4864  {
4865  Size hashentrysize;
4866 
4867  /* Estimate per-hash-entry space at tuple width... */
4868  hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) +
4870  /* plus the per-hash-entry overhead */
4871  hashentrysize += hash_agg_entry_size(0);
4872 
4873  /* Allow hashing only if hashtable is predicted to fit in work_mem */
4874  allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L);
4875  }
4876 
4877  if (allow_hash && grouping_is_hashable(parse->distinctClause))
4878  {
4879  /* Generate hashed aggregate path --- no sort needed */
4880  add_path(distinct_rel, (Path *)
4881  create_agg_path(root,
4882  distinct_rel,
4883  cheapest_input_path,
4884  cheapest_input_path->pathtarget,
4885  AGG_HASHED,
4887  parse->distinctClause,
4888  NIL,
4889  NULL,
4890  numDistinctRows));
4891  }
4892 
4893  /* Give a helpful error if we failed to find any implementation */
4894  if (distinct_rel->pathlist == NIL)
4895  ereport(ERROR,
4896  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4897  errmsg("could not implement DISTINCT"),
4898  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4899 
4900  /*
4901  * If there is an FDW that's responsible for all baserels of the query,
4902  * let it consider adding ForeignPaths.
4903  */
4904  if (distinct_rel->fdwroutine &&
4905  distinct_rel->fdwroutine->GetForeignUpperPaths)
4906  distinct_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_DISTINCT,
4907  input_rel, distinct_rel,
4908  NULL);
4909 
4910  /* Let extensions possibly add some more paths */
4912  (*create_upper_paths_hook) (root, UPPERREL_DISTINCT,
4913  input_rel, distinct_rel, NULL);
4914 
4915  /* Now choose the best path(s) */
4916  set_cheapest(distinct_rel);
4917 
4918  return distinct_rel;
4919 }
4920 
4921 /*
4922  * create_ordered_paths
4923  *
4924  * Build a new upperrel containing Paths for ORDER BY evaluation.
4925  *
4926  * All paths in the result must satisfy the ORDER BY ordering.
4927  * The only new path we need consider is an explicit sort on the
4928  * cheapest-total existing path.
4929  *
4930  * input_rel: contains the source-data Paths
4931  * target: the output tlist the result Paths must emit
4932  * limit_tuples: estimated bound on the number of output tuples,
4933  * or -1 if no LIMIT or couldn't estimate
4934  */
4935 static RelOptInfo *
4937  RelOptInfo *input_rel,
4938  PathTarget *target,
4939  bool target_parallel_safe,
4940  double limit_tuples)
4941 {
4942  Path *cheapest_input_path = input_rel->cheapest_total_path;
4943  RelOptInfo *ordered_rel;
4944  ListCell *lc;
4945 
4946  /* For now, do all work in the (ORDERED, NULL) upperrel */
4947  ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4948 
4949  /*
4950  * If the input relation is not parallel-safe, then the ordered relation
4951  * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
4952  * target list is parallel-safe.
4953  */
4954  if (input_rel->consider_parallel && target_parallel_safe)
4955  ordered_rel->consider_parallel = true;
4956 
4957  /*
4958  * If the input rel belongs to a single FDW, so does the ordered_rel.
4959  */
4960  ordered_rel->serverid = input_rel->serverid;
4961  ordered_rel->userid = input_rel->userid;
4962  ordered_rel->useridiscurrent = input_rel->useridiscurrent;
4963  ordered_rel->fdwroutine = input_rel->fdwroutine;
4964 
4965  foreach(lc, input_rel->pathlist)
4966  {
4967  Path *path = (Path *) lfirst(lc);
4968  bool is_sorted;
4969 
4970  is_sorted = pathkeys_contained_in(root->sort_pathkeys,
4971  path->pathkeys);
4972  if (path == cheapest_input_path || is_sorted)
4973  {
4974  if (!is_sorted)
4975  {
4976  /* An explicit sort here can take advantage of LIMIT */
4977  path = (Path *) create_sort_path(root,
4978  ordered_rel,
4979  path,
4980  root->sort_pathkeys,
4981  limit_tuples);
4982  }
4983 
4984  /* Add projection step if needed */
4985  if (path->pathtarget != target)
4986  path = apply_projection_to_path(root, ordered_rel,
4987  path, target);
4988 
4989  add_path(ordered_rel, path);
4990  }
4991  }
4992 
4993  /*
4994  * generate_gather_paths() will have already generated a simple Gather
4995  * path for the best parallel path, if any, and the loop above will have
4996  * considered sorting it. Similarly, generate_gather_paths() will also
4997  * have generated order-preserving Gather Merge plans which can be used
4998  * without sorting if they happen to match the sort_pathkeys, and the loop
4999  * above will have handled those as well. However, there's one more
5000  * possibility: it may make sense to sort the cheapest partial path
5001  * according to the required output order and then use Gather Merge.
5002  */
5003  if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
5004  input_rel->partial_pathlist != NIL)
5005  {
5006  Path *cheapest_partial_path;
5007 
5008  cheapest_partial_path = linitial(input_rel->partial_pathlist);
5009 
5010  /*
5011  * If cheapest partial path doesn't need a sort, this is redundant
5012  * with what's already been tried.
5013  */
5015  cheapest_partial_path->pathkeys))
5016  {
5017  Path *path;
5018  double total_groups;
5019 
5020  path = (Path *) create_sort_path(root,
5021  ordered_rel,
5022  cheapest_partial_path,
5023  root->sort_pathkeys,
5024  limit_tuples);
5025 
5026  total_groups = cheapest_partial_path->rows *
5027  cheapest_partial_path->parallel_workers;
5028  path = (Path *)
5029  create_gather_merge_path(root, ordered_rel,
5030  path,
5031  path->pathtarget,
5032  root->sort_pathkeys, NULL,
5033  &total_groups);
5034 
5035  /* Add projection step if needed */
5036  if (path->pathtarget != target)
5037  path = apply_projection_to_path(root, ordered_rel,
5038  path, target);
5039 
5040  add_path(ordered_rel, path);
5041  }
5042  }
5043 
5044  /*
5045  * If there is an FDW that's responsible for all baserels of the query,
5046  * let it consider adding ForeignPaths.
5047  */
5048  if (ordered_rel->fdwroutine &&
5049  ordered_rel->fdwroutine->GetForeignUpperPaths)
5050  ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5051  input_rel, ordered_rel,
5052  NULL);
5053 
5054  /* Let extensions possibly add some more paths */
5056  (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5057  input_rel, ordered_rel, NULL);
5058 
5059  /*
5060  * No need to bother with set_cheapest here; grouping_planner does not
5061  * need us to do it.
5062  */
5063  Assert(ordered_rel->pathlist != NIL);
5064 
5065  return ordered_rel;
5066 }
5067 
5068 
5069 /*
5070  * make_group_input_target
5071  * Generate appropriate PathTarget for initial input to grouping nodes.
5072  *
5073  * If there is grouping or aggregation, the scan/join subplan cannot emit
5074  * the query's final targetlist; for example, it certainly can't emit any
5075  * aggregate function calls. This routine generates the correct target
5076  * for the scan/join subplan.
5077  *
5078  * The query target list passed from the parser already contains entries
5079  * for all ORDER BY and GROUP BY expressions, but it will not have entries
5080  * for variables used only in HAVING clauses; so we need to add those
5081  * variables to the subplan target list. Also, we flatten all expressions
5082  * except GROUP BY items into their component variables; other expressions
5083  * will be computed by the upper plan nodes rather than by the subplan.
5084  * For example, given a query like
5085  * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5086  * we want to pass this targetlist to the subplan:
5087  * a+b,c,d
5088  * where the a+b target will be used by the Sort/Group steps, and the
5089  * other targets will be used for computing the final results.
5090  *
5091  * 'final_target' is the query's final target list (in PathTarget form)
5092  *
5093  * The result is the PathTarget to be computed by the Paths returned from
5094  * query_planner().
5095  */
5096 static PathTarget *
5098 {
5099  Query *parse = root->parse;
5100  PathTarget *input_target;
5101  List *non_group_cols;
5102  List *non_group_vars;
5103  int i;
5104  ListCell *lc;
5105 
5106  /*
5107  * We must build a target containing all grouping columns, plus any other
5108  * Vars mentioned in the query's targetlist and HAVING qual.
5109  */
5110  input_target = create_empty_pathtarget();
5111  non_group_cols = NIL;
5112 
5113  i = 0;
5114  foreach(lc, final_target->exprs)
5115  {
5116  Expr *expr = (Expr *) lfirst(lc);
5117  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5118 
5119  if (sgref && parse->groupClause &&
5120  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5121  {
5122  /*
5123  * It's a grouping column, so add it to the input target as-is.
5124  */
5125  add_column_to_pathtarget(input_target, expr, sgref);
5126  }
5127  else
5128  {
5129  /*
5130  * Non-grouping column, so just remember the expression for later
5131  * call to pull_var_clause.
5132  */
5133  non_group_cols = lappend(non_group_cols, expr);
5134  }
5135 
5136  i++;
5137  }
5138 
5139  /*
5140  * If there's a HAVING clause, we'll need the Vars it uses, too.
5141  */
5142  if (parse->havingQual)
5143  non_group_cols = lappend(non_group_cols, parse->havingQual);
5144 
5145  /*
5146  * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5147  * add them to the input target if not already present. (A Var used
5148  * directly as a GROUP BY item will be present already.) Note this
5149  * includes Vars used in resjunk items, so we are covering the needs of
5150  * ORDER BY and window specifications. Vars used within Aggrefs and
5151  * WindowFuncs will be pulled out here, too.
5152  */
5153  non_group_vars = pull_var_clause((Node *) non_group_cols,
5157  add_new_columns_to_pathtarget(input_target, non_group_vars);
5158 
5159  /* clean up cruft */
5160  list_free(non_group_vars);
5161  list_free(non_group_cols);
5162 
5163  /* XXX this causes some redundant cost calculation ... */
5164  return set_pathtarget_cost_width(root, input_target);
5165 }
5166 
5167 /*
5168  * make_partial_grouping_target
5169  * Generate appropriate PathTarget for output of partial aggregate
5170  * (or partial grouping, if there are no aggregates) nodes.
5171  *
5172  * A partial aggregation node needs to emit all the same aggregates that
5173  * a regular aggregation node would, plus any aggregates used in HAVING;
5174  * except that the Aggref nodes should be marked as partial aggregates.
5175  *
5176  * In addition, we'd better emit any Vars and PlaceHolderVars that are
5177  * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5178  * these would be Vars that are grouped by or used in grouping expressions.)
5179  *
5180  * grouping_target is the tlist to be emitted by the topmost aggregation step.
5181  * havingQual represents the HAVING clause.
5182  */
5183 static PathTarget *
5185  PathTarget *grouping_target,
5186  Node *havingQual)
5187 {
5188  Query *parse = root->parse;
5189  PathTarget *partial_target;
5190  List *non_group_cols;
5191  List *non_group_exprs;
5192  int i;
5193  ListCell *lc;
5194 
5195  partial_target = create_empty_pathtarget();
5196  non_group_cols = NIL;
5197 
5198  i = 0;
5199  foreach(lc, grouping_target->exprs)
5200  {
5201  Expr *expr = (Expr *) lfirst(lc);
5202  Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5203 
5204  if (sgref && parse->groupClause &&
5205  get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5206  {
5207  /*
5208  * It's a grouping column, so add it to the partial_target as-is.
5209  * (This allows the upper agg step to repeat the grouping calcs.)
5210  */
5211  add_column_to_pathtarget(partial_target, expr, sgref);
5212  }
5213  else
5214  {
5215  /*
5216  * Non-grouping column, so just remember the expression for later
5217  * call to pull_var_clause.
5218  */
5219  non_group_cols = lappend(non_group_cols, expr);
5220  }
5221 
5222  i++;
5223  }
5224 
5225  /*
5226  * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5227  */
5228  if (havingQual)
5229  non_group_cols = lappend(non_group_cols, havingQual);
5230 
5231  /*
5232  * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5233  * non-group cols (plus HAVING), and add them to the partial_target if not
5234  * already present. (An expression used directly as a GROUP BY item will
5235  * be present already.) Note this includes Vars used in resjunk items, so
5236  * we are covering the needs of ORDER BY and window specifications.
5237  */
5238  non_group_exprs = pull_var_clause((Node *) non_group_cols,
5242 
5243  add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5244 
5245  /*
5246  * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5247  * are at the top level of the target list, so we can just scan the list
5248  * rather than recursing through the expression trees.
5249  */
5250  foreach(lc, partial_target->exprs)
5251  {
5252  Aggref *aggref = (Aggref *) lfirst(lc);
5253 
5254  if (IsA(aggref, Aggref))
5255  {
5256  Aggref *newaggref;
5257 
5258  /*
5259  * We shouldn't need to copy the substructure of the Aggref node,
5260  * but flat-copy the node itself to avoid damaging other trees.
5261  */
5262  newaggref = makeNode(Aggref);
5263  memcpy(newaggref, aggref, sizeof(Aggref));
5264 
5265  /* For now, assume serialization is required */
5267 
5268  lfirst(lc) = newaggref;
5269  }
5270  }
5271 
5272  /* clean up cruft */
5273  list_free(non_group_exprs);
5274  list_free(non_group_cols);
5275 
5276  /* XXX this causes some redundant cost calculation ... */
5277  return set_pathtarget_cost_width(root, partial_target);
5278 }
5279 
5280 /*
5281  * mark_partial_aggref
5282  * Adjust an Aggref to make it represent a partial-aggregation step.
5283  *
5284  * The Aggref node is modified in-place; caller must do any copying required.
5285  */
5286 void
5288 {
5289  /* aggtranstype should be computed by this point */
5291  /* ... but aggsplit should still be as the parser left it */
5292  Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5293 
5294  /* Mark the Aggref with the intended partial-aggregation mode */
5295  agg->aggsplit = aggsplit;
5296 
5297  /*
5298  * Adjust result type if needed. Normally, a partial aggregate returns
5299  * the aggregate's transition type; but if that's INTERNAL and we're
5300  * serializing, it returns BYTEA instead.
5301  */
5302  if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5303  {
5304  if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5305  agg->aggtype = BYTEAOID;
5306  else
5307  agg->aggtype = agg->aggtranstype;
5308  }
5309 }
5310 
5311 /*
5312  * postprocess_setop_tlist
5313  * Fix up targetlist returned by plan_set_operations().
5314  *
5315  * We need to transpose sort key info from the orig_tlist into new_tlist.
5316  * NOTE: this would not be good enough if we supported resjunk sort keys
5317  * for results of set operations --- then, we'd need to project a whole
5318  * new tlist to evaluate the resjunk columns. For now, just ereport if we
5319  * find any resjunk columns in orig_tlist.
5320  */
5321 static List *
5322 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5323 {
5324  ListCell *l;
5325  ListCell *orig_tlist_item = list_head(orig_tlist);
5326 
5327  foreach(l, new_tlist)
5328  {
5329  TargetEntry *new_tle = lfirst_node(TargetEntry, l);
5330  TargetEntry *orig_tle;
5331 
5332  /* ignore resjunk columns in setop result */
5333  if (new_tle->resjunk)
5334  continue;
5335 
5336  Assert(orig_tlist_item != NULL);
5337  orig_tle = lfirst_node(TargetEntry, orig_tlist_item);
5338  orig_tlist_item = lnext(orig_tlist, orig_tlist_item);
5339  if (orig_tle->resjunk) /* should not happen */
5340  elog(ERROR, "resjunk output columns are not implemented");
5341  Assert(new_tle->resno == orig_tle->resno);
5342  new_tle->ressortgroupref = orig_tle->ressortgroupref;
5343  }
5344  if (orig_tlist_item != NULL)
5345  elog(ERROR, "resjunk output columns are not implemented");
5346  return new_tlist;
5347 }
5348 
5349 /*
5350  * select_active_windows
5351  * Create a list of the "active" window clauses (ie, those referenced
5352  * by non-deleted WindowFuncs) in the order they are to be executed.
5353  */
5354 static List *
5356 {
5357  List *windowClause = root->parse->windowClause;
5358  List *result = NIL;
5359  ListCell *lc;
5360  int nActive = 0;
5362  * list_length(windowClause));
5363 
5364  /* First, construct an array of the active windows */
5365  foreach(lc, windowClause)
5366  {
5368 
5369  /* It's only active if wflists shows some related WindowFuncs */
5370  Assert(wc->winref <= wflists->maxWinRef);
5371  if (wflists->windowFuncs[wc->winref] == NIL)
5372  continue;
5373 
5374  actives[nActive].wc = wc; /* original clause */
5375 
5376  /*
5377  * For sorting, we want the list of partition keys followed by the
5378  * list of sort keys. But pathkeys construction will remove duplicates
5379  * between the two, so we can as well (even though we can't detect all
5380  * of the duplicates, since some may come from ECs - that might mean
5381  * we miss optimization chances here). We must, however, ensure that
5382  * the order of entries is preserved with respect to the ones we do
5383  * keep.
5384  *
5385  * partitionClause and orderClause had their own duplicates removed in
5386  * parse analysis, so we're only concerned here with removing
5387  * orderClause entries that also appear in partitionClause.
5388  */
5389  actives[nActive].uniqueOrder =
5391  wc->orderClause);
5392  nActive++;
5393  }
5394 
5395  /*
5396  * Sort active windows by their partitioning/ordering clauses, ignoring
5397  * any framing clauses, so that the windows that need the same sorting are
5398  * adjacent in the list. When we come to generate paths, this will avoid
5399  * inserting additional Sort nodes.
5400  *
5401  * This is how we implement a specific requirement from the SQL standard,
5402  * which says that when two or more windows are order-equivalent (i.e.
5403  * have matching partition and order clauses, even if their names or
5404  * framing clauses differ), then all peer rows must be presented in the
5405  * same order in all of them. If we allowed multiple sort nodes for such
5406  * cases, we'd risk having the peer rows end up in different orders in
5407  * equivalent windows due to sort instability. (See General Rule 4 of
5408  * <window clause> in SQL2008 - SQL2016.)
5409  *
5410  * Additionally, if the entire list of clauses of one window is a prefix
5411  * of another, put first the window with stronger sorting requirements.
5412  * This way we will first sort for stronger window, and won't have to sort
5413  * again for the weaker one.
5414  */
5415  qsort(actives, nActive, sizeof(WindowClauseSortData), common_prefix_cmp);
5416 
5417  /* build ordered list of the original WindowClause nodes */
5418  for (int i = 0; i < nActive; i++)
5419  result = lappend(result, actives[i].wc);
5420 
5421  pfree(actives);
5422 
5423  return result;
5424 }
5425 
5426 /*
5427  * common_prefix_cmp
5428  * QSort comparison function for WindowClauseSortData
5429  *
5430  * Sort the windows by the required sorting clauses. First, compare the sort
5431  * clauses themselves. Second, if one window's clauses are a prefix of another
5432  * one's clauses, put the window with more sort clauses first.
5433  */
5434 static int
5435 common_prefix_cmp(const void *a, const void *b)
5436 {
5437  const WindowClauseSortData *wcsa = a;
5438  const WindowClauseSortData *wcsb = b;
5439  ListCell *item_a;
5440  ListCell *item_b;
5441 
5442  forboth(item_a, wcsa->uniqueOrder, item_b, wcsb->uniqueOrder)
5443  {
5446 
5447  if (sca->tleSortGroupRef > scb->tleSortGroupRef)
5448  return -1;
5449  else if (sca->tleSortGroupRef < scb->tleSortGroupRef)
5450  return 1;
5451  else if (sca->sortop > scb->sortop)
5452  return -1;
5453  else if (sca->sortop < scb->sortop)
5454  return 1;
5455  else if (sca->nulls_first && !scb->nulls_first)
5456  return -1;
5457  else if (!sca->nulls_first && scb->nulls_first)
5458  return 1;
5459  /* no need to compare eqop, since it is fully determined by sortop */
5460  }
5461 
5462  if (list_length(wcsa->uniqueOrder) > list_length(wcsb->uniqueOrder))
5463  return -1;
5464  else if (list_length(wcsa->uniqueOrder) < list_length(wcsb->uniqueOrder))
5465  return 1;
5466 
5467  return 0;
5468 }
5469 
5470 /*
5471  * make_window_input_target
5472  * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5473  *
5474  * When the query has window functions, this function computes the desired
5475  * target to be computed by the node just below the first WindowAgg.
5476  * This tlist must contain all values needed to evaluate the window functions,
5477  * compute the final target list, and perform any required final sort step.
5478  * If multiple WindowAggs are needed, each intermediate one adds its window
5479  * function results onto this base tlist; only the topmost WindowAgg computes
5480  * the actual desired target list.
5481  *
5482  * This function is much like make_group_input_target, though not quite enough
5483  * like it to share code. As in that function, we flatten most expressions
5484  * into their component variables. But we do not want to flatten window
5485  * PARTITION BY/ORDER BY clauses, since that might result in multiple
5486  * evaluations of them, which would be bad (possibly even resulting in
5487  * inconsistent answers, if they contain volatile functions).
5488  * Also, we must not flatten GROUP BY clauses that were left unflattened by
5489  * make_group_input_target, because we may no longer have access to the
5490  * individual Vars in them.
5491  *
5492  * Another key difference from make_group_input_target is that we don't
5493  * flatten Aggref expressions, since those are to be computed below the
5494  * window functions and just referenced like Vars above that.
5495  *
5496  * 'final_target' is the query's final target list (in PathTarget form)
5497  * 'activeWindows' is the list of active windows previously identified by
5498  * select_active_windows.
5499  *
5500  * The result is the PathTarget to be computed by the plan node immediately
5501  * below the first WindowAgg node.
5502  */
5503 static PathTarget *
5505  PathTarget *final_target,
5506  List *activeWindows)
5507 {
5508  Query *parse = root->parse;
5509  PathTarget *input_target;
5510  Bitmapset *sgrefs;
5511  List *flattenable_cols;
5512  List *flattenable_vars;
5513  int i;
5514  ListCell *lc;
5515 
5516  Assert(parse->hasWindowFuncs);
5517 
5518  /*
5519  * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5520  * into a bitmapset for convenient reference below.
5521  */
5522  sgrefs = NULL;
5523  foreach(lc, activeWindows)
5524  {
5526  ListCell *lc2;
5527 
5528  foreach(lc2, wc->partitionClause)
5529  {
5531 
5532  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5533  }
5534  foreach(lc2, wc->orderClause)
5535  {
5537 
5538  sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5539  }
5540  }
5541 
5542  /* Add in sortgroupref numbers of GROUP BY clauses, too */
5543  foreach(lc, parse->groupClause)
5544  {
5546 
5547  sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5548  }
5549 
5550  /*
5551  * Construct a target containing all the non-flattenable targetlist items,
5552  * and save aside the others for a moment.
5553  */
5554  input_target = create_empty_pathtarget();
5555  flattenable_cols = NIL;
5556 
5557  i = 0;
5558  foreach(lc, final_target->exprs)
5559  {
5560  Expr *expr = (Expr *) lfirst(lc);
5561  Index sgref = get_pathtarget_sortgroupref(final_target, i);
5562 
5563  /*
5564  * Don't want to deconstruct window clauses or GROUP BY items. (Note
5565  * that such items can't contain window functions, so it's okay to
5566  * compute them below the WindowAgg nodes.)
5567  */
5568  if (sgref != 0 && bms_is_member(sgref, sgrefs))
5569  {
5570  /*
5571  * Don't want to deconstruct this value, so add it to the input
5572  * target as-is.
5573  */
5574  add_column_to_pathtarget(input_target, expr, sgref);
5575  }
5576  else
5577  {
5578  /*
5579  * Column is to be flattened, so just remember the expression for
5580  * later call to pull_var_clause.
5581  */
5582  flattenable_cols = lappend(flattenable_cols, expr);
5583  }
5584 
5585  i++;
5586  }
5587 
5588  /*
5589  * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
5590  * add them to the input target if not already present. (Some might be
5591  * there already because they're used directly as window/group clauses.)
5592  *
5593  * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
5594  * Aggrefs are placed in the Agg node's tlist and not left to be computed
5595  * at higher levels. On the other hand, we should recurse into
5596  * WindowFuncs to make sure their input expressions are available.
5597  */
5598  flattenable_vars = pull_var_clause((Node *) flattenable_cols,
5602  add_new_columns_to_pathtarget(input_target, flattenable_vars);
5603 
5604  /* clean up cruft */
5605  list_free(flattenable_vars);
5606  list_free(flattenable_cols);
5607 
5608  /* XXX this causes some redundant cost calculation ... */
5609  return set_pathtarget_cost_width(root, input_target);
5610 }
5611 
5612 /*
5613  * make_pathkeys_for_window
5614  * Create a pathkeys list describing the required input ordering
5615  * for the given WindowClause.
5616  *
5617  * The required ordering is first the PARTITION keys, then the ORDER keys.
5618  * In the future we might try to implement windowing using hashing, in which
5619  * case the ordering could be relaxed, but for now we always sort.
5620  */
5621 static List *
5623  List *tlist)
5624 {
5625  List *window_pathkeys;
5626  List *window_sortclauses;
5627 
5628  /* Throw error if can't sort */
5630  ereport(ERROR,
5631  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5632  errmsg("could not implement window PARTITION BY"),
5633  errdetail("Window partitioning columns must be of sortable datatypes.")));
5635  ereport(ERROR,
5636  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5637  errmsg("could not implement window ORDER BY"),
5638  errdetail("Window ordering columns must be of sortable datatypes.")));
5639 
5640  /* Okay, make the combined pathkeys */
5641  window_sortclauses = list_concat_copy(wc->partitionClause, wc->orderClause);
5642  window_pathkeys = make_pathkeys_for_sortclauses(root,
5643  window_sortclauses,
5644  tlist);
5645  list_free(window_sortclauses);
5646  return window_pathkeys;
5647 }
5648 
5649 /*
5650  * make_sort_input_target
5651  * Generate appropriate PathTarget for initial input to Sort step.
5652  *
5653  * If the query has ORDER BY, this function chooses the target to be computed
5654  * by the node just below the Sort (and DISTINCT, if any, since Unique can't
5655  * project) steps. This might or might not be identical to the query's final
5656  * output target.
5657  *
5658  * The main argument for keeping the sort-input tlist the same as the final
5659  * is that we avoid a separate projection node (which will be needed if
5660  * they're different, because Sort can't project). However, there are also
5661  * advantages to postponing tlist evaluation till after the Sort: it ensures
5662  * a consistent order of evaluation for any volatile functions in the tlist,
5663  * and if there's also a LIMIT, we can stop the query without ever computing
5664  * tlist functions for later rows, which is beneficial for both volatile and
5665  * expensive functions.
5666  *
5667  * Our current policy is to postpone volatile expressions till after the sort
5668  * unconditionally (assuming that that's possible, ie they are in plain tlist
5669  * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
5670  * postpone set-returning expressions, because running them beforehand would
5671  * bloat the sort dataset, and because it might cause unexpected output order
5672  * if the sort isn't stable. However there's a constraint on that: all SRFs
5673  * in the tlist should be evaluated at the same plan step, so that they can
5674  * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
5675  * mustn't postpone any SRFs. (Note that in principle that policy should
5676  * probably get applied to the group/window input targetlists too, but we
5677  * have not done that historically.) Lastly, expensive expressions are
5678  * postponed if there is a LIMIT, or if root->tuple_fraction shows that
5679  * partial evaluation of the query is possible (if neither is true, we expect
5680  * to have to evaluate the expressions for every row anyway), or if there are
5681  * any volatile or set-returning expressions (since once we've put in a
5682  * projection at all, it won't cost any more to postpone more stuff).
5683  *
5684  * Another issue that could potentially be considered here is that
5685  * evaluating tlist expressions could result in data that's either wider
5686  * or narrower than the input Vars, thus changing the volume of data that
5687  * has to go through the Sort. However, we usually have only a very bad
5688  * idea of the output width of any expression more complex than a Var,
5689  * so for now it seems too risky to try to optimize on that basis.
5690  *
5691  * Note that if we do produce a modified sort-input target, and then the
5692  * query ends up not using an explicit Sort, no particular harm is done:
5693  * we'll initially use the modified target for the preceding path nodes,
5694  * but then change them to the final target with apply_projection_to_path.
5695  * Moreover, in such a case the guarantees about evaluation order of
5696  * volatile functions still hold, since the rows are sorted already.
5697  *
5698  * This function has some things in common with make_group_input_target and
5699  * make_window_input_target, though the detailed rules for what to do are
5700  * different. We never flatten/postpone any grouping or ordering columns;
5701  * those are needed before the sort. If we do flatten a particular
5702  * expression, we leave Aggref and WindowFunc nodes alone, since those were
5703  * computed earlier.
5704  *
5705  * 'final_target' is the query's final target list (in PathTarget form)
5706  * 'have_postponed_srfs' is an output argument, see below
5707  *
5708  * The result is the PathTarget to be computed by the plan node immediately
5709  * below the Sort step (and the Distinct step, if any). This will be
5710  * exactly final_target if we decide a projection step wouldn't be helpful.
5711  *
5712  * In addition, *have_postponed_srfs is set to true if we choose to postpone
5713  * any set-returning functions to after the Sort.
5714  */
5715 static PathTarget *
5717  PathTarget *final_target,
5718  bool *have_postponed_srfs)
5719 {
5720  Query *parse = root->parse;
5721  PathTarget *input_target;
5722  int ncols;
5723  bool *col_is_srf;
5724  bool *postpone_col;
5725  bool have_srf;
5726  bool have_volatile;
5727  bool have_expensive;
5728  bool have_srf_sortcols;
5729  bool postpone_srfs;
5730  List *postponable_cols;
5731  List *postponable_vars;
5732  int i;
5733  ListCell *lc;
5734 
5735  /* Shouldn't get here unless query has ORDER BY */
5736  Assert(parse->sortClause);
5737 
5738  *have_postponed_srfs = false; /* default result */
5739 
5740  /* Inspect tlist and collect per-column information */
5741  ncols = list_length(final_target->exprs);
5742  col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
5743  postpone_col = (bool *) palloc0(ncols * sizeof(bool));
5744  have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
5745 
5746  i = 0;
5747  foreach(lc, final_target->exprs)
5748  {
5749  Expr *expr = (Expr *) lfirst(lc);
5750 
5751  /*
5752  * If the column has a sortgroupref, assume it has to be evaluated
5753  * before sorting. Generally such columns would be ORDER BY, GROUP
5754  * BY, etc targets. One exception is columns that were removed from
5755  * GROUP BY by remove_useless_groupby_columns() ... but those would
5756  * only be Vars anyway. There don't seem to be any cases where it
5757  * would be worth the trouble to double-check.
5758  */
5759  if (get_pathtarget_sortgroupref(final_target, i) == 0)
5760  {
5761  /*
5762  * Check for SRF or volatile functions. Check the SRF case first
5763  * because we must know whether we have any postponed SRFs.
5764  */
5765  if (parse->hasTargetSRFs &&
5766  expression_returns_set((Node *) expr))
5767  {
5768  /* We'll decide below whether these are postponable */
5769  col_is_srf[i] = true;
5770  have_srf = true;
5771  }
5772  else if (contain_volatile_functions((Node *) expr))
5773  {
5774  /* Unconditionally postpone */
5775  postpone_col[i] = true;
5776  have_volatile = true;
5777  }
5778  else
5779  {
5780  /*
5781  * Else check the cost. XXX it's annoying to have to do this
5782  * when set_pathtarget_cost_width() just did it. Refactor to
5783  * allow sharing the work?
5784  */
5785  QualCost cost;
5786 
5787  cost_qual_eval_node(&cost, (Node *) expr, root);
5788 
5789  /*
5790  * We arbitrarily define "expensive" as "more than 10X
5791  * cpu_operator_cost". Note this will take in any PL function
5792  * with default cost.
5793  */
5794  if (cost.per_tuple > 10 * cpu_operator_cost)
5795  {
5796  postpone_col[i] = true;
5797  have_expensive = true;
5798  }
5799  }
5800  }
5801  else
5802  {
5803  /* For sortgroupref cols, just check if any contain SRFs */
5804  if (!have_srf_sortcols &&
5805  parse->hasTargetSRFs &&
5806  expression_returns_set((Node *) expr))
5807  have_srf_sortcols = true;
5808  }
5809 
5810  i++;
5811  }
5812 
5813  /*
5814  * We can postpone SRFs if we have some but none are in sortgroupref cols.
5815  */
5816  postpone_srfs = (have_srf && !have_srf_sortcols);
5817 
5818  /*
5819  * If we don't need a post-sort projection, just return final_target.
5820  */
5821  if (!(postpone_srfs || have_volatile ||
5822  (have_expensive &&
5823  (parse->limitCount || root->tuple_fraction > 0))))
5824  return final_target;
5825 
5826  /*
5827  * Report whether the post-sort projection will contain set-returning
5828  * functions. This is important because it affects whether the Sort can
5829  * rely on the query's LIMIT (if any) to bound the number of rows it needs
5830  * to return.
5831  */
5832  *have_postponed_srfs = postpone_srfs;
5833 
5834  /*
5835  * Construct the sort-input target, taking all non-postponable columns and
5836  * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
5837  * the postponable ones.
5838  */
5839  input_target = create_empty_pathtarget();
5840  postponable_cols = NIL;
5841 
5842  i = 0;
5843  foreach(lc, final_target->exprs)
5844  {
5845  Expr *expr = (Expr *) lfirst(lc);
5846 
5847  if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
5848  postponable_cols = lappend(postponable_cols, expr);
5849  else
5850  add_column_to_pathtarget(input_target, expr,
5851  get_pathtarget_sortgroupref(final_target, i));
5852 
5853  i++;
5854  }
5855 
5856  /*
5857  * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
5858  * postponable columns, and add them to the sort-input target if not
5859  * already present. (Some might be there already.) We mustn't
5860  * deconstruct Aggrefs or WindowFuncs here, since the projection node
5861  * would be unable to recompute them.
5862  */
5863  postponable_vars = pull_var_clause((Node *) postponable_cols,
5867  add_new_columns_to_pathtarget(input_target, postponable_vars);
5868 
5869  /* clean up cruft */
5870  list_free(postponable_vars);
5871  list_free(postponable_cols);
5872 
5873  /* XXX this represents even more redundant cost calculation ... */
5874  return set_pathtarget_cost_width(root, input_target);