PostgreSQL Source Code  git master
costsize.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * costsize.c
4  * Routines to compute (and set) relation sizes and path costs
5  *
6  * Path costs are measured in arbitrary units established by these basic
7  * parameters:
8  *
9  * seq_page_cost Cost of a sequential page fetch
10  * random_page_cost Cost of a non-sequential page fetch
11  * cpu_tuple_cost Cost of typical CPU time to process a tuple
12  * cpu_index_tuple_cost Cost of typical CPU time to process an index tuple
13  * cpu_operator_cost Cost of CPU time to execute an operator or function
14  * parallel_tuple_cost Cost of CPU time to pass a tuple from worker to leader backend
15  * parallel_setup_cost Cost of setting up shared memory for parallelism
16  *
17  * We expect that the kernel will typically do some amount of read-ahead
18  * optimization; this in conjunction with seek costs means that seq_page_cost
19  * is normally considerably less than random_page_cost. (However, if the
20  * database is fully cached in RAM, it is reasonable to set them equal.)
21  *
22  * We also use a rough estimate "effective_cache_size" of the number of
23  * disk pages in Postgres + OS-level disk cache. (We can't simply use
24  * NBuffers for this purpose because that would ignore the effects of
25  * the kernel's disk cache.)
26  *
27  * Obviously, taking constants for these values is an oversimplification,
28  * but it's tough enough to get any useful estimates even at this level of
29  * detail. Note that all of these parameters are user-settable, in case
30  * the default values are drastically off for a particular platform.
31  *
32  * seq_page_cost and random_page_cost can also be overridden for an individual
33  * tablespace, in case some data is on a fast disk and other data is on a slow
34  * disk. Per-tablespace overrides never apply to temporary work files such as
35  * an external sort or a materialize node that overflows work_mem.
36  *
37  * We compute two separate costs for each path:
38  * total_cost: total estimated cost to fetch all tuples
39  * startup_cost: cost that is expended before first tuple is fetched
40  * In some scenarios, such as when there is a LIMIT or we are implementing
41  * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the
42  * path's result. A caller can estimate the cost of fetching a partial
43  * result by interpolating between startup_cost and total_cost. In detail:
44  * actual_cost = startup_cost +
45  * (total_cost - startup_cost) * tuples_to_fetch / path->rows;
46  * Note that a base relation's rows count (and, by extension, plan_rows for
47  * plan nodes below the LIMIT node) are set without regard to any LIMIT, so
48  * that this equation works properly. (Note: while path->rows is never zero
49  * for ordinary relations, it is zero for paths for provably-empty relations,
50  * so beware of division-by-zero.) The LIMIT is applied as a top-level
51  * plan node.
52  *
53  * For largely historical reasons, most of the routines in this module use
54  * the passed result Path only to store their results (rows, startup_cost and
55  * total_cost) into. All the input data they need is passed as separate
56  * parameters, even though much of it could be extracted from the Path.
57  * An exception is made for the cost_XXXjoin() routines, which expect all
58  * the other fields of the passed XXXPath to be filled in, and similarly
59  * cost_index() assumes the passed IndexPath is valid except for its output
60  * values.
61  *
62  *
63  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
64  * Portions Copyright (c) 1994, Regents of the University of California
65  *
66  * IDENTIFICATION
67  * src/backend/optimizer/path/costsize.c
68  *
69  *-------------------------------------------------------------------------
70  */
71 
72 #include "postgres.h"
73 
74 #include <limits.h>
75 #include <math.h>
76 
77 #include "access/amapi.h"
78 #include "access/htup_details.h"
79 #include "access/tsmapi.h"
80 #include "executor/executor.h"
81 #include "executor/nodeAgg.h"
82 #include "executor/nodeHash.h"
83 #include "executor/nodeMemoize.h"
84 #include "miscadmin.h"
85 #include "nodes/makefuncs.h"
86 #include "nodes/nodeFuncs.h"
87 #include "optimizer/clauses.h"
88 #include "optimizer/cost.h"
89 #include "optimizer/optimizer.h"
90 #include "optimizer/pathnode.h"
91 #include "optimizer/paths.h"
92 #include "optimizer/placeholder.h"
93 #include "optimizer/plancat.h"
94 #include "optimizer/planmain.h"
95 #include "optimizer/restrictinfo.h"
96 #include "parser/parsetree.h"
97 #include "utils/lsyscache.h"
98 #include "utils/selfuncs.h"
99 #include "utils/spccache.h"
100 #include "utils/tuplesort.h"
101 
102 
103 #define LOG2(x) (log(x) / 0.693147180559945)
104 
105 /*
106  * Append and MergeAppend nodes are less expensive than some other operations
107  * which use cpu_tuple_cost; instead of adding a separate GUC, estimate the
108  * per-tuple cost as cpu_tuple_cost multiplied by this value.
109  */
110 #define APPEND_CPU_COST_MULTIPLIER 0.5
111 
112 /*
113  * Maximum value for row estimates. We cap row estimates to this to help
114  * ensure that costs based on these estimates remain within the range of what
115  * double can represent. add_path() wouldn't act sanely given infinite or NaN
116  * cost values.
117  */
118 #define MAXIMUM_ROWCOUNT 1e100
119 
128 
130 
132 
134 
135 bool enable_seqscan = true;
136 bool enable_indexscan = true;
138 bool enable_bitmapscan = true;
139 bool enable_tidscan = true;
140 bool enable_sort = true;
142 bool enable_hashagg = true;
143 bool enable_nestloop = true;
144 bool enable_material = true;
145 bool enable_memoize = true;
146 bool enable_mergejoin = true;
147 bool enable_hashjoin = true;
148 bool enable_gathermerge = true;
156 
157 typedef struct
158 {
162 
163 static List *extract_nonindex_conditions(List *qual_clauses, List *indexclauses);
165  RestrictInfo *rinfo,
166  PathKey *pathkey);
167 static void cost_rescan(PlannerInfo *root, Path *path,
168  Cost *rescan_startup_cost, Cost *rescan_total_cost);
169 static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
170 static void get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel,
171  ParamPathInfo *param_info,
172  QualCost *qpqual_cost);
173 static bool has_indexed_join_quals(NestPath *path);
174 static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
175  List *quals);
176 static double calc_joinrel_size_estimate(PlannerInfo *root,
177  RelOptInfo *joinrel,
178  RelOptInfo *outer_rel,
179  RelOptInfo *inner_rel,
180  double outer_rows,
181  double inner_rows,
182  SpecialJoinInfo *sjinfo,
183  List *restrictlist);
185  Relids outer_relids,
186  Relids inner_relids,
187  SpecialJoinInfo *sjinfo,
188  List **restrictlist);
189 static Cost append_nonpartial_cost(List *subpaths, int numpaths,
190  int parallel_workers);
191 static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
192 static int32 get_expr_width(PlannerInfo *root, const Node *expr);
193 static double relation_byte_size(double tuples, int width);
194 static double page_size(double tuples, int width);
195 static double get_parallel_divisor(Path *path);
196 
197 
198 /*
199  * clamp_row_est
200  * Force a row-count estimate to a sane value.
201  */
202 double
203 clamp_row_est(double nrows)
204 {
205  /*
206  * Avoid infinite and NaN row estimates. Costs derived from such values
207  * are going to be useless. Also force the estimate to be at least one
208  * row, to make explain output look better and to avoid possible
209  * divide-by-zero when interpolating costs. Make it an integer, too.
210  */
211  if (nrows > MAXIMUM_ROWCOUNT || isnan(nrows))
212  nrows = MAXIMUM_ROWCOUNT;
213  else if (nrows <= 1.0)
214  nrows = 1.0;
215  else
216  nrows = rint(nrows);
217 
218  return nrows;
219 }
220 
221 /*
222  * clamp_cardinality_to_long
223  * Cast a Cardinality value to a sane long value.
224  */
225 long
227 {
228  /*
229  * Just for paranoia's sake, ensure we do something sane with negative or
230  * NaN values.
231  */
232  if (isnan(x))
233  return LONG_MAX;
234  if (x <= 0)
235  return 0;
236 
237  /*
238  * If "long" is 64 bits, then LONG_MAX cannot be represented exactly as a
239  * double. Casting it to double and back may well result in overflow due
240  * to rounding, so avoid doing that. We trust that any double value that
241  * compares strictly less than "(double) LONG_MAX" will cast to a
242  * representable "long" value.
243  */
244  return (x < (double) LONG_MAX) ? (long) x : LONG_MAX;
245 }
246 
247 
248 /*
249  * cost_seqscan
250  * Determines and returns the cost of scanning a relation sequentially.
251  *
252  * 'baserel' is the relation to be scanned
253  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
254  */
255 void
257  RelOptInfo *baserel, ParamPathInfo *param_info)
258 {
259  Cost startup_cost = 0;
260  Cost cpu_run_cost;
261  Cost disk_run_cost;
262  double spc_seq_page_cost;
263  QualCost qpqual_cost;
264  Cost cpu_per_tuple;
265 
266  /* Should only be applied to base relations */
267  Assert(baserel->relid > 0);
268  Assert(baserel->rtekind == RTE_RELATION);
269 
270  /* Mark the path with the correct row estimate */
271  if (param_info)
272  path->rows = param_info->ppi_rows;
273  else
274  path->rows = baserel->rows;
275 
276  if (!enable_seqscan)
277  startup_cost += disable_cost;
278 
279  /* fetch estimated page cost for tablespace containing table */
281  NULL,
282  &spc_seq_page_cost);
283 
284  /*
285  * disk costs
286  */
287  disk_run_cost = spc_seq_page_cost * baserel->pages;
288 
289  /* CPU costs */
290  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
291 
292  startup_cost += qpqual_cost.startup;
293  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
294  cpu_run_cost = cpu_per_tuple * baserel->tuples;
295  /* tlist eval costs are paid per output row, not per tuple scanned */
296  startup_cost += path->pathtarget->cost.startup;
297  cpu_run_cost += path->pathtarget->cost.per_tuple * path->rows;
298 
299  /* Adjust costing for parallelism, if used. */
300  if (path->parallel_workers > 0)
301  {
302  double parallel_divisor = get_parallel_divisor(path);
303 
304  /* The CPU cost is divided among all the workers. */
305  cpu_run_cost /= parallel_divisor;
306 
307  /*
308  * It may be possible to amortize some of the I/O cost, but probably
309  * not very much, because most operating systems already do aggressive
310  * prefetching. For now, we assume that the disk run cost can't be
311  * amortized at all.
312  */
313 
314  /*
315  * In the case of a parallel plan, the row count needs to represent
316  * the number of tuples processed per worker.
317  */
318  path->rows = clamp_row_est(path->rows / parallel_divisor);
319  }
320 
321  path->startup_cost = startup_cost;
322  path->total_cost = startup_cost + cpu_run_cost + disk_run_cost;
323 }
324 
325 /*
326  * cost_samplescan
327  * Determines and returns the cost of scanning a relation using sampling.
328  *
329  * 'baserel' is the relation to be scanned
330  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
331  */
332 void
334  RelOptInfo *baserel, ParamPathInfo *param_info)
335 {
336  Cost startup_cost = 0;
337  Cost run_cost = 0;
338  RangeTblEntry *rte;
339  TableSampleClause *tsc;
340  TsmRoutine *tsm;
341  double spc_seq_page_cost,
342  spc_random_page_cost,
343  spc_page_cost;
344  QualCost qpqual_cost;
345  Cost cpu_per_tuple;
346 
347  /* Should only be applied to base relations with tablesample clauses */
348  Assert(baserel->relid > 0);
349  rte = planner_rt_fetch(baserel->relid, root);
350  Assert(rte->rtekind == RTE_RELATION);
351  tsc = rte->tablesample;
352  Assert(tsc != NULL);
353  tsm = GetTsmRoutine(tsc->tsmhandler);
354 
355  /* Mark the path with the correct row estimate */
356  if (param_info)
357  path->rows = param_info->ppi_rows;
358  else
359  path->rows = baserel->rows;
360 
361  /* fetch estimated page cost for tablespace containing table */
363  &spc_random_page_cost,
364  &spc_seq_page_cost);
365 
366  /* if NextSampleBlock is used, assume random access, else sequential */
367  spc_page_cost = (tsm->NextSampleBlock != NULL) ?
368  spc_random_page_cost : spc_seq_page_cost;
369 
370  /*
371  * disk costs (recall that baserel->pages has already been set to the
372  * number of pages the sampling method will visit)
373  */
374  run_cost += spc_page_cost * baserel->pages;
375 
376  /*
377  * CPU costs (recall that baserel->tuples has already been set to the
378  * number of tuples the sampling method will select). Note that we ignore
379  * execution cost of the TABLESAMPLE parameter expressions; they will be
380  * evaluated only once per scan, and in most usages they'll likely be
381  * simple constants anyway. We also don't charge anything for the
382  * calculations the sampling method might do internally.
383  */
384  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
385 
386  startup_cost += qpqual_cost.startup;
387  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
388  run_cost += cpu_per_tuple * baserel->tuples;
389  /* tlist eval costs are paid per output row, not per tuple scanned */
390  startup_cost += path->pathtarget->cost.startup;
391  run_cost += path->pathtarget->cost.per_tuple * path->rows;
392 
393  path->startup_cost = startup_cost;
394  path->total_cost = startup_cost + run_cost;
395 }
396 
397 /*
398  * cost_gather
399  * Determines and returns the cost of gather path.
400  *
401  * 'rel' is the relation to be operated upon
402  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
403  * 'rows' may be used to point to a row estimate; if non-NULL, it overrides
404  * both 'rel' and 'param_info'. This is useful when the path doesn't exactly
405  * correspond to any particular RelOptInfo.
406  */
407 void
409  RelOptInfo *rel, ParamPathInfo *param_info,
410  double *rows)
411 {
412  Cost startup_cost = 0;
413  Cost run_cost = 0;
414 
415  /* Mark the path with the correct row estimate */
416  if (rows)
417  path->path.rows = *rows;
418  else if (param_info)
419  path->path.rows = param_info->ppi_rows;
420  else
421  path->path.rows = rel->rows;
422 
423  startup_cost = path->subpath->startup_cost;
424 
425  run_cost = path->subpath->total_cost - path->subpath->startup_cost;
426 
427  /* Parallel setup and communication cost. */
428  startup_cost += parallel_setup_cost;
429  run_cost += parallel_tuple_cost * path->path.rows;
430 
431  path->path.startup_cost = startup_cost;
432  path->path.total_cost = (startup_cost + run_cost);
433 }
434 
435 /*
436  * cost_gather_merge
437  * Determines and returns the cost of gather merge path.
438  *
439  * GatherMerge merges several pre-sorted input streams, using a heap that at
440  * any given instant holds the next tuple from each stream. If there are N
441  * streams, we need about N*log2(N) tuple comparisons to construct the heap at
442  * startup, and then for each output tuple, about log2(N) comparisons to
443  * replace the top heap entry with the next tuple from the same stream.
444  */
445 void
447  RelOptInfo *rel, ParamPathInfo *param_info,
448  Cost input_startup_cost, Cost input_total_cost,
449  double *rows)
450 {
451  Cost startup_cost = 0;
452  Cost run_cost = 0;
453  Cost comparison_cost;
454  double N;
455  double logN;
456 
457  /* Mark the path with the correct row estimate */
458  if (rows)
459  path->path.rows = *rows;
460  else if (param_info)
461  path->path.rows = param_info->ppi_rows;
462  else
463  path->path.rows = rel->rows;
464 
465  if (!enable_gathermerge)
466  startup_cost += disable_cost;
467 
468  /*
469  * Add one to the number of workers to account for the leader. This might
470  * be overgenerous since the leader will do less work than other workers
471  * in typical cases, but we'll go with it for now.
472  */
473  Assert(path->num_workers > 0);
474  N = (double) path->num_workers + 1;
475  logN = LOG2(N);
476 
477  /* Assumed cost per tuple comparison */
478  comparison_cost = 2.0 * cpu_operator_cost;
479 
480  /* Heap creation cost */
481  startup_cost += comparison_cost * N * logN;
482 
483  /* Per-tuple heap maintenance cost */
484  run_cost += path->path.rows * comparison_cost * logN;
485 
486  /* small cost for heap management, like cost_merge_append */
487  run_cost += cpu_operator_cost * path->path.rows;
488 
489  /*
490  * Parallel setup and communication cost. Since Gather Merge, unlike
491  * Gather, requires us to block until a tuple is available from every
492  * worker, we bump the IPC cost up a little bit as compared with Gather.
493  * For lack of a better idea, charge an extra 5%.
494  */
495  startup_cost += parallel_setup_cost;
496  run_cost += parallel_tuple_cost * path->path.rows * 1.05;
497 
498  path->path.startup_cost = startup_cost + input_startup_cost;
499  path->path.total_cost = (startup_cost + run_cost + input_total_cost);
500 }
501 
502 /*
503  * cost_index
504  * Determines and returns the cost of scanning a relation using an index.
505  *
506  * 'path' describes the indexscan under consideration, and is complete
507  * except for the fields to be set by this routine
508  * 'loop_count' is the number of repetitions of the indexscan to factor into
509  * estimates of caching behavior
510  *
511  * In addition to rows, startup_cost and total_cost, cost_index() sets the
512  * path's indextotalcost and indexselectivity fields. These values will be
513  * needed if the IndexPath is used in a BitmapIndexScan.
514  *
515  * NOTE: path->indexquals must contain only clauses usable as index
516  * restrictions. Any additional quals evaluated as qpquals may reduce the
517  * number of returned tuples, but they won't reduce the number of tuples
518  * we have to fetch from the table, so they don't reduce the scan cost.
519  */
520 void
521 cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
522  bool partial_path)
523 {
524  IndexOptInfo *index = path->indexinfo;
525  RelOptInfo *baserel = index->rel;
526  bool indexonly = (path->path.pathtype == T_IndexOnlyScan);
527  amcostestimate_function amcostestimate;
528  List *qpquals;
529  Cost startup_cost = 0;
530  Cost run_cost = 0;
531  Cost cpu_run_cost = 0;
532  Cost indexStartupCost;
533  Cost indexTotalCost;
534  Selectivity indexSelectivity;
535  double indexCorrelation,
536  csquared;
537  double spc_seq_page_cost,
538  spc_random_page_cost;
539  Cost min_IO_cost,
540  max_IO_cost;
541  QualCost qpqual_cost;
542  Cost cpu_per_tuple;
543  double tuples_fetched;
544  double pages_fetched;
545  double rand_heap_pages;
546  double index_pages;
547 
548  /* Should only be applied to base relations */
549  Assert(IsA(baserel, RelOptInfo) &&
551  Assert(baserel->relid > 0);
552  Assert(baserel->rtekind == RTE_RELATION);
553 
554  /*
555  * Mark the path with the correct row estimate, and identify which quals
556  * will need to be enforced as qpquals. We need not check any quals that
557  * are implied by the index's predicate, so we can use indrestrictinfo not
558  * baserestrictinfo as the list of relevant restriction clauses for the
559  * rel.
560  */
561  if (path->path.param_info)
562  {
563  path->path.rows = path->path.param_info->ppi_rows;
564  /* qpquals come from the rel's restriction clauses and ppi_clauses */
566  path->indexclauses),
567  extract_nonindex_conditions(path->path.param_info->ppi_clauses,
568  path->indexclauses));
569  }
570  else
571  {
572  path->path.rows = baserel->rows;
573  /* qpquals come from just the rel's restriction clauses */
575  path->indexclauses);
576  }
577 
578  if (!enable_indexscan)
579  startup_cost += disable_cost;
580  /* we don't need to check enable_indexonlyscan; indxpath.c does that */
581 
582  /*
583  * Call index-access-method-specific code to estimate the processing cost
584  * for scanning the index, as well as the selectivity of the index (ie,
585  * the fraction of main-table tuples we will have to retrieve) and its
586  * correlation to the main-table tuple order. We need a cast here because
587  * pathnodes.h uses a weak function type to avoid including amapi.h.
588  */
589  amcostestimate = (amcostestimate_function) index->amcostestimate;
590  amcostestimate(root, path, loop_count,
591  &indexStartupCost, &indexTotalCost,
592  &indexSelectivity, &indexCorrelation,
593  &index_pages);
594 
595  /*
596  * Save amcostestimate's results for possible use in bitmap scan planning.
597  * We don't bother to save indexStartupCost or indexCorrelation, because a
598  * bitmap scan doesn't care about either.
599  */
600  path->indextotalcost = indexTotalCost;
601  path->indexselectivity = indexSelectivity;
602 
603  /* all costs for touching index itself included here */
604  startup_cost += indexStartupCost;
605  run_cost += indexTotalCost - indexStartupCost;
606 
607  /* estimate number of main-table tuples fetched */
608  tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
609 
610  /* fetch estimated page costs for tablespace containing table */
612  &spc_random_page_cost,
613  &spc_seq_page_cost);
614 
615  /*----------
616  * Estimate number of main-table pages fetched, and compute I/O cost.
617  *
618  * When the index ordering is uncorrelated with the table ordering,
619  * we use an approximation proposed by Mackert and Lohman (see
620  * index_pages_fetched() for details) to compute the number of pages
621  * fetched, and then charge spc_random_page_cost per page fetched.
622  *
623  * When the index ordering is exactly correlated with the table ordering
624  * (just after a CLUSTER, for example), the number of pages fetched should
625  * be exactly selectivity * table_size. What's more, all but the first
626  * will be sequential fetches, not the random fetches that occur in the
627  * uncorrelated case. So if the number of pages is more than 1, we
628  * ought to charge
629  * spc_random_page_cost + (pages_fetched - 1) * spc_seq_page_cost
630  * For partially-correlated indexes, we ought to charge somewhere between
631  * these two estimates. We currently interpolate linearly between the
632  * estimates based on the correlation squared (XXX is that appropriate?).
633  *
634  * If it's an index-only scan, then we will not need to fetch any heap
635  * pages for which the visibility map shows all tuples are visible.
636  * Hence, reduce the estimated number of heap fetches accordingly.
637  * We use the measured fraction of the entire heap that is all-visible,
638  * which might not be particularly relevant to the subset of the heap
639  * that this query will fetch; but it's not clear how to do better.
640  *----------
641  */
642  if (loop_count > 1)
643  {
644  /*
645  * For repeated indexscans, the appropriate estimate for the
646  * uncorrelated case is to scale up the number of tuples fetched in
647  * the Mackert and Lohman formula by the number of scans, so that we
648  * estimate the number of pages fetched by all the scans; then
649  * pro-rate the costs for one scan. In this case we assume all the
650  * fetches are random accesses.
651  */
652  pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
653  baserel->pages,
654  (double) index->pages,
655  root);
656 
657  if (indexonly)
658  pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
659 
660  rand_heap_pages = pages_fetched;
661 
662  max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
663 
664  /*
665  * In the perfectly correlated case, the number of pages touched by
666  * each scan is selectivity * table_size, and we can use the Mackert
667  * and Lohman formula at the page level to estimate how much work is
668  * saved by caching across scans. We still assume all the fetches are
669  * random, though, which is an overestimate that's hard to correct for
670  * without double-counting the cache effects. (But in most cases
671  * where such a plan is actually interesting, only one page would get
672  * fetched per scan anyway, so it shouldn't matter much.)
673  */
674  pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
675 
676  pages_fetched = index_pages_fetched(pages_fetched * loop_count,
677  baserel->pages,
678  (double) index->pages,
679  root);
680 
681  if (indexonly)
682  pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
683 
684  min_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
685  }
686  else
687  {
688  /*
689  * Normal case: apply the Mackert and Lohman formula, and then
690  * interpolate between that and the correlation-derived result.
691  */
692  pages_fetched = index_pages_fetched(tuples_fetched,
693  baserel->pages,
694  (double) index->pages,
695  root);
696 
697  if (indexonly)
698  pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
699 
700  rand_heap_pages = pages_fetched;
701 
702  /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
703  max_IO_cost = pages_fetched * spc_random_page_cost;
704 
705  /* min_IO_cost is for the perfectly correlated case (csquared=1) */
706  pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
707 
708  if (indexonly)
709  pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
710 
711  if (pages_fetched > 0)
712  {
713  min_IO_cost = spc_random_page_cost;
714  if (pages_fetched > 1)
715  min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost;
716  }
717  else
718  min_IO_cost = 0;
719  }
720 
721  if (partial_path)
722  {
723  /*
724  * For index only scans compute workers based on number of index pages
725  * fetched; the number of heap pages we fetch might be so small as to
726  * effectively rule out parallelism, which we don't want to do.
727  */
728  if (indexonly)
729  rand_heap_pages = -1;
730 
731  /*
732  * Estimate the number of parallel workers required to scan index. Use
733  * the number of heap pages computed considering heap fetches won't be
734  * sequential as for parallel scans the pages are accessed in random
735  * order.
736  */
738  rand_heap_pages,
739  index_pages,
741 
742  /*
743  * Fall out if workers can't be assigned for parallel scan, because in
744  * such a case this path will be rejected. So there is no benefit in
745  * doing extra computation.
746  */
747  if (path->path.parallel_workers <= 0)
748  return;
749 
750  path->path.parallel_aware = true;
751  }
752 
753  /*
754  * Now interpolate based on estimated index order correlation to get total
755  * disk I/O cost for main table accesses.
756  */
757  csquared = indexCorrelation * indexCorrelation;
758 
759  run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
760 
761  /*
762  * Estimate CPU costs per tuple.
763  *
764  * What we want here is cpu_tuple_cost plus the evaluation costs of any
765  * qual clauses that we have to evaluate as qpquals.
766  */
767  cost_qual_eval(&qpqual_cost, qpquals, root);
768 
769  startup_cost += qpqual_cost.startup;
770  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
771 
772  cpu_run_cost += cpu_per_tuple * tuples_fetched;
773 
774  /* tlist eval costs are paid per output row, not per tuple scanned */
775  startup_cost += path->path.pathtarget->cost.startup;
776  cpu_run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
777 
778  /* Adjust costing for parallelism, if used. */
779  if (path->path.parallel_workers > 0)
780  {
781  double parallel_divisor = get_parallel_divisor(&path->path);
782 
783  path->path.rows = clamp_row_est(path->path.rows / parallel_divisor);
784 
785  /* The CPU cost is divided among all the workers. */
786  cpu_run_cost /= parallel_divisor;
787  }
788 
789  run_cost += cpu_run_cost;
790 
791  path->path.startup_cost = startup_cost;
792  path->path.total_cost = startup_cost + run_cost;
793 }
794 
795 /*
796  * extract_nonindex_conditions
797  *
798  * Given a list of quals to be enforced in an indexscan, extract the ones that
799  * will have to be applied as qpquals (ie, the index machinery won't handle
800  * them). Here we detect only whether a qual clause is directly redundant
801  * with some indexclause. If the index path is chosen for use, createplan.c
802  * will try a bit harder to get rid of redundant qual conditions; specifically
803  * it will see if quals can be proven to be implied by the indexquals. But
804  * it does not seem worth the cycles to try to factor that in at this stage,
805  * since we're only trying to estimate qual eval costs. Otherwise this must
806  * match the logic in create_indexscan_plan().
807  *
808  * qual_clauses, and the result, are lists of RestrictInfos.
809  * indexclauses is a list of IndexClauses.
810  */
811 static List *
812 extract_nonindex_conditions(List *qual_clauses, List *indexclauses)
813 {
814  List *result = NIL;
815  ListCell *lc;
816 
817  foreach(lc, qual_clauses)
818  {
819  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
820 
821  if (rinfo->pseudoconstant)
822  continue; /* we may drop pseudoconstants here */
823  if (is_redundant_with_indexclauses(rinfo, indexclauses))
824  continue; /* dup or derived from same EquivalenceClass */
825  /* ... skip the predicate proof attempt createplan.c will try ... */
826  result = lappend(result, rinfo);
827  }
828  return result;
829 }
830 
831 /*
832  * index_pages_fetched
833  * Estimate the number of pages actually fetched after accounting for
834  * cache effects.
835  *
836  * We use an approximation proposed by Mackert and Lohman, "Index Scans
837  * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
838  * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
839  * The Mackert and Lohman approximation is that the number of pages
840  * fetched is
841  * PF =
842  * min(2TNs/(2T+Ns), T) when T <= b
843  * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b)
844  * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b)
845  * where
846  * T = # pages in table
847  * N = # tuples in table
848  * s = selectivity = fraction of table to be scanned
849  * b = # buffer pages available (we include kernel space here)
850  *
851  * We assume that effective_cache_size is the total number of buffer pages
852  * available for the whole query, and pro-rate that space across all the
853  * tables in the query and the index currently under consideration. (This
854  * ignores space needed for other indexes used by the query, but since we
855  * don't know which indexes will get used, we can't estimate that very well;
856  * and in any case counting all the tables may well be an overestimate, since
857  * depending on the join plan not all the tables may be scanned concurrently.)
858  *
859  * The product Ns is the number of tuples fetched; we pass in that
860  * product rather than calculating it here. "pages" is the number of pages
861  * in the object under consideration (either an index or a table).
862  * "index_pages" is the amount to add to the total table space, which was
863  * computed for us by make_one_rel.
864  *
865  * Caller is expected to have ensured that tuples_fetched is greater than zero
866  * and rounded to integer (see clamp_row_est). The result will likewise be
867  * greater than zero and integral.
868  */
869 double
870 index_pages_fetched(double tuples_fetched, BlockNumber pages,
871  double index_pages, PlannerInfo *root)
872 {
873  double pages_fetched;
874  double total_pages;
875  double T,
876  b;
877 
878  /* T is # pages in table, but don't allow it to be zero */
879  T = (pages > 1) ? (double) pages : 1.0;
880 
881  /* Compute number of pages assumed to be competing for cache space */
882  total_pages = root->total_table_pages + index_pages;
883  total_pages = Max(total_pages, 1.0);
884  Assert(T <= total_pages);
885 
886  /* b is pro-rated share of effective_cache_size */
887  b = (double) effective_cache_size * T / total_pages;
888 
889  /* force it positive and integral */
890  if (b <= 1.0)
891  b = 1.0;
892  else
893  b = ceil(b);
894 
895  /* This part is the Mackert and Lohman formula */
896  if (T <= b)
897  {
898  pages_fetched =
899  (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
900  if (pages_fetched >= T)
901  pages_fetched = T;
902  else
903  pages_fetched = ceil(pages_fetched);
904  }
905  else
906  {
907  double lim;
908 
909  lim = (2.0 * T * b) / (2.0 * T - b);
910  if (tuples_fetched <= lim)
911  {
912  pages_fetched =
913  (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
914  }
915  else
916  {
917  pages_fetched =
918  b + (tuples_fetched - lim) * (T - b) / T;
919  }
920  pages_fetched = ceil(pages_fetched);
921  }
922  return pages_fetched;
923 }
924 
925 /*
926  * get_indexpath_pages
927  * Determine the total size of the indexes used in a bitmap index path.
928  *
929  * Note: if the same index is used more than once in a bitmap tree, we will
930  * count it multiple times, which perhaps is the wrong thing ... but it's
931  * not completely clear, and detecting duplicates is difficult, so ignore it
932  * for now.
933  */
934 static double
936 {
937  double result = 0;
938  ListCell *l;
939 
940  if (IsA(bitmapqual, BitmapAndPath))
941  {
942  BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
943 
944  foreach(l, apath->bitmapquals)
945  {
946  result += get_indexpath_pages((Path *) lfirst(l));
947  }
948  }
949  else if (IsA(bitmapqual, BitmapOrPath))
950  {
951  BitmapOrPath *opath = (BitmapOrPath *) bitmapqual;
952 
953  foreach(l, opath->bitmapquals)
954  {
955  result += get_indexpath_pages((Path *) lfirst(l));
956  }
957  }
958  else if (IsA(bitmapqual, IndexPath))
959  {
960  IndexPath *ipath = (IndexPath *) bitmapqual;
961 
962  result = (double) ipath->indexinfo->pages;
963  }
964  else
965  elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
966 
967  return result;
968 }
969 
970 /*
971  * cost_bitmap_heap_scan
972  * Determines and returns the cost of scanning a relation using a bitmap
973  * index-then-heap plan.
974  *
975  * 'baserel' is the relation to be scanned
976  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
977  * 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths
978  * 'loop_count' is the number of repetitions of the indexscan to factor into
979  * estimates of caching behavior
980  *
981  * Note: the component IndexPaths in bitmapqual should have been costed
982  * using the same loop_count.
983  */
984 void
986  ParamPathInfo *param_info,
987  Path *bitmapqual, double loop_count)
988 {
989  Cost startup_cost = 0;
990  Cost run_cost = 0;
991  Cost indexTotalCost;
992  QualCost qpqual_cost;
993  Cost cpu_per_tuple;
994  Cost cost_per_page;
995  Cost cpu_run_cost;
996  double tuples_fetched;
997  double pages_fetched;
998  double spc_seq_page_cost,
999  spc_random_page_cost;
1000  double T;
1001 
1002  /* Should only be applied to base relations */
1003  Assert(IsA(baserel, RelOptInfo));
1004  Assert(baserel->relid > 0);
1005  Assert(baserel->rtekind == RTE_RELATION);
1006 
1007  /* Mark the path with the correct row estimate */
1008  if (param_info)
1009  path->rows = param_info->ppi_rows;
1010  else
1011  path->rows = baserel->rows;
1012 
1013  if (!enable_bitmapscan)
1014  startup_cost += disable_cost;
1015 
1016  pages_fetched = compute_bitmap_pages(root, baserel, bitmapqual,
1017  loop_count, &indexTotalCost,
1018  &tuples_fetched);
1019 
1020  startup_cost += indexTotalCost;
1021  T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
1022 
1023  /* Fetch estimated page costs for tablespace containing table. */
1025  &spc_random_page_cost,
1026  &spc_seq_page_cost);
1027 
1028  /*
1029  * For small numbers of pages we should charge spc_random_page_cost
1030  * apiece, while if nearly all the table's pages are being read, it's more
1031  * appropriate to charge spc_seq_page_cost apiece. The effect is
1032  * nonlinear, too. For lack of a better idea, interpolate like this to
1033  * determine the cost per page.
1034  */
1035  if (pages_fetched >= 2.0)
1036  cost_per_page = spc_random_page_cost -
1037  (spc_random_page_cost - spc_seq_page_cost)
1038  * sqrt(pages_fetched / T);
1039  else
1040  cost_per_page = spc_random_page_cost;
1041 
1042  run_cost += pages_fetched * cost_per_page;
1043 
1044  /*
1045  * Estimate CPU costs per tuple.
1046  *
1047  * Often the indexquals don't need to be rechecked at each tuple ... but
1048  * not always, especially not if there are enough tuples involved that the
1049  * bitmaps become lossy. For the moment, just assume they will be
1050  * rechecked always. This means we charge the full freight for all the
1051  * scan clauses.
1052  */
1053  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1054 
1055  startup_cost += qpqual_cost.startup;
1056  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
1057  cpu_run_cost = cpu_per_tuple * tuples_fetched;
1058 
1059  /* Adjust costing for parallelism, if used. */
1060  if (path->parallel_workers > 0)
1061  {
1062  double parallel_divisor = get_parallel_divisor(path);
1063 
1064  /* The CPU cost is divided among all the workers. */
1065  cpu_run_cost /= parallel_divisor;
1066 
1067  path->rows = clamp_row_est(path->rows / parallel_divisor);
1068  }
1069 
1070 
1071  run_cost += cpu_run_cost;
1072 
1073  /* tlist eval costs are paid per output row, not per tuple scanned */
1074  startup_cost += path->pathtarget->cost.startup;
1075  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1076 
1077  path->startup_cost = startup_cost;
1078  path->total_cost = startup_cost + run_cost;
1079 }
1080 
1081 /*
1082  * cost_bitmap_tree_node
1083  * Extract cost and selectivity from a bitmap tree node (index/and/or)
1084  */
1085 void
1087 {
1088  if (IsA(path, IndexPath))
1089  {
1090  *cost = ((IndexPath *) path)->indextotalcost;
1091  *selec = ((IndexPath *) path)->indexselectivity;
1092 
1093  /*
1094  * Charge a small amount per retrieved tuple to reflect the costs of
1095  * manipulating the bitmap. This is mostly to make sure that a bitmap
1096  * scan doesn't look to be the same cost as an indexscan to retrieve a
1097  * single tuple.
1098  */
1099  *cost += 0.1 * cpu_operator_cost * path->rows;
1100  }
1101  else if (IsA(path, BitmapAndPath))
1102  {
1103  *cost = path->total_cost;
1104  *selec = ((BitmapAndPath *) path)->bitmapselectivity;
1105  }
1106  else if (IsA(path, BitmapOrPath))
1107  {
1108  *cost = path->total_cost;
1109  *selec = ((BitmapOrPath *) path)->bitmapselectivity;
1110  }
1111  else
1112  {
1113  elog(ERROR, "unrecognized node type: %d", nodeTag(path));
1114  *cost = *selec = 0; /* keep compiler quiet */
1115  }
1116 }
1117 
1118 /*
1119  * cost_bitmap_and_node
1120  * Estimate the cost of a BitmapAnd node
1121  *
1122  * Note that this considers only the costs of index scanning and bitmap
1123  * creation, not the eventual heap access. In that sense the object isn't
1124  * truly a Path, but it has enough path-like properties (costs in particular)
1125  * to warrant treating it as one. We don't bother to set the path rows field,
1126  * however.
1127  */
1128 void
1130 {
1131  Cost totalCost;
1132  Selectivity selec;
1133  ListCell *l;
1134 
1135  /*
1136  * We estimate AND selectivity on the assumption that the inputs are
1137  * independent. This is probably often wrong, but we don't have the info
1138  * to do better.
1139  *
1140  * The runtime cost of the BitmapAnd itself is estimated at 100x
1141  * cpu_operator_cost for each tbm_intersect needed. Probably too small,
1142  * definitely too simplistic?
1143  */
1144  totalCost = 0.0;
1145  selec = 1.0;
1146  foreach(l, path->bitmapquals)
1147  {
1148  Path *subpath = (Path *) lfirst(l);
1149  Cost subCost;
1150  Selectivity subselec;
1151 
1152  cost_bitmap_tree_node(subpath, &subCost, &subselec);
1153 
1154  selec *= subselec;
1155 
1156  totalCost += subCost;
1157  if (l != list_head(path->bitmapquals))
1158  totalCost += 100.0 * cpu_operator_cost;
1159  }
1160  path->bitmapselectivity = selec;
1161  path->path.rows = 0; /* per above, not used */
1162  path->path.startup_cost = totalCost;
1163  path->path.total_cost = totalCost;
1164 }
1165 
1166 /*
1167  * cost_bitmap_or_node
1168  * Estimate the cost of a BitmapOr node
1169  *
1170  * See comments for cost_bitmap_and_node.
1171  */
1172 void
1174 {
1175  Cost totalCost;
1176  Selectivity selec;
1177  ListCell *l;
1178 
1179  /*
1180  * We estimate OR selectivity on the assumption that the inputs are
1181  * non-overlapping, since that's often the case in "x IN (list)" type
1182  * situations. Of course, we clamp to 1.0 at the end.
1183  *
1184  * The runtime cost of the BitmapOr itself is estimated at 100x
1185  * cpu_operator_cost for each tbm_union needed. Probably too small,
1186  * definitely too simplistic? We are aware that the tbm_unions are
1187  * optimized out when the inputs are BitmapIndexScans.
1188  */
1189  totalCost = 0.0;
1190  selec = 0.0;
1191  foreach(l, path->bitmapquals)
1192  {
1193  Path *subpath = (Path *) lfirst(l);
1194  Cost subCost;
1195  Selectivity subselec;
1196 
1197  cost_bitmap_tree_node(subpath, &subCost, &subselec);
1198 
1199  selec += subselec;
1200 
1201  totalCost += subCost;
1202  if (l != list_head(path->bitmapquals) &&
1203  !IsA(subpath, IndexPath))
1204  totalCost += 100.0 * cpu_operator_cost;
1205  }
1206  path->bitmapselectivity = Min(selec, 1.0);
1207  path->path.rows = 0; /* per above, not used */
1208  path->path.startup_cost = totalCost;
1209  path->path.total_cost = totalCost;
1210 }
1211 
1212 /*
1213  * cost_tidscan
1214  * Determines and returns the cost of scanning a relation using TIDs.
1215  *
1216  * 'baserel' is the relation to be scanned
1217  * 'tidquals' is the list of TID-checkable quals
1218  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1219  */
1220 void
1222  RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info)
1223 {
1224  Cost startup_cost = 0;
1225  Cost run_cost = 0;
1226  bool isCurrentOf = false;
1227  QualCost qpqual_cost;
1228  Cost cpu_per_tuple;
1229  QualCost tid_qual_cost;
1230  int ntuples;
1231  ListCell *l;
1232  double spc_random_page_cost;
1233 
1234  /* Should only be applied to base relations */
1235  Assert(baserel->relid > 0);
1236  Assert(baserel->rtekind == RTE_RELATION);
1237 
1238  /* Mark the path with the correct row estimate */
1239  if (param_info)
1240  path->rows = param_info->ppi_rows;
1241  else
1242  path->rows = baserel->rows;
1243 
1244  /* Count how many tuples we expect to retrieve */
1245  ntuples = 0;
1246  foreach(l, tidquals)
1247  {
1248  RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
1249  Expr *qual = rinfo->clause;
1250 
1251  if (IsA(qual, ScalarArrayOpExpr))
1252  {
1253  /* Each element of the array yields 1 tuple */
1254  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual;
1255  Node *arraynode = (Node *) lsecond(saop->args);
1256 
1257  ntuples += estimate_array_length(arraynode);
1258  }
1259  else if (IsA(qual, CurrentOfExpr))
1260  {
1261  /* CURRENT OF yields 1 tuple */
1262  isCurrentOf = true;
1263  ntuples++;
1264  }
1265  else
1266  {
1267  /* It's just CTID = something, count 1 tuple */
1268  ntuples++;
1269  }
1270  }
1271 
1272  /*
1273  * We must force TID scan for WHERE CURRENT OF, because only nodeTidscan.c
1274  * understands how to do it correctly. Therefore, honor enable_tidscan
1275  * only when CURRENT OF isn't present. Also note that cost_qual_eval
1276  * counts a CurrentOfExpr as having startup cost disable_cost, which we
1277  * subtract off here; that's to prevent other plan types such as seqscan
1278  * from winning.
1279  */
1280  if (isCurrentOf)
1281  {
1283  startup_cost -= disable_cost;
1284  }
1285  else if (!enable_tidscan)
1286  startup_cost += disable_cost;
1287 
1288  /*
1289  * The TID qual expressions will be computed once, any other baserestrict
1290  * quals once per retrieved tuple.
1291  */
1292  cost_qual_eval(&tid_qual_cost, tidquals, root);
1293 
1294  /* fetch estimated page cost for tablespace containing table */
1296  &spc_random_page_cost,
1297  NULL);
1298 
1299  /* disk costs --- assume each tuple on a different page */
1300  run_cost += spc_random_page_cost * ntuples;
1301 
1302  /* Add scanning CPU costs */
1303  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1304 
1305  /* XXX currently we assume TID quals are a subset of qpquals */
1306  startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
1307  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
1308  tid_qual_cost.per_tuple;
1309  run_cost += cpu_per_tuple * ntuples;
1310 
1311  /* tlist eval costs are paid per output row, not per tuple scanned */
1312  startup_cost += path->pathtarget->cost.startup;
1313  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1314 
1315  path->startup_cost = startup_cost;
1316  path->total_cost = startup_cost + run_cost;
1317 }
1318 
1319 /*
1320  * cost_tidrangescan
1321  * Determines and sets the costs of scanning a relation using a range of
1322  * TIDs for 'path'
1323  *
1324  * 'baserel' is the relation to be scanned
1325  * 'tidrangequals' is the list of TID-checkable range quals
1326  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1327  */
1328 void
1330  RelOptInfo *baserel, List *tidrangequals,
1331  ParamPathInfo *param_info)
1332 {
1333  Selectivity selectivity;
1334  double pages;
1335  Cost startup_cost = 0;
1336  Cost run_cost = 0;
1337  QualCost qpqual_cost;
1338  Cost cpu_per_tuple;
1339  QualCost tid_qual_cost;
1340  double ntuples;
1341  double nseqpages;
1342  double spc_random_page_cost;
1343  double spc_seq_page_cost;
1344 
1345  /* Should only be applied to base relations */
1346  Assert(baserel->relid > 0);
1347  Assert(baserel->rtekind == RTE_RELATION);
1348 
1349  /* Mark the path with the correct row estimate */
1350  if (param_info)
1351  path->rows = param_info->ppi_rows;
1352  else
1353  path->rows = baserel->rows;
1354 
1355  /* Count how many tuples and pages we expect to scan */
1356  selectivity = clauselist_selectivity(root, tidrangequals, baserel->relid,
1357  JOIN_INNER, NULL);
1358  pages = ceil(selectivity * baserel->pages);
1359 
1360  if (pages <= 0.0)
1361  pages = 1.0;
1362 
1363  /*
1364  * The first page in a range requires a random seek, but each subsequent
1365  * page is just a normal sequential page read. NOTE: it's desirable for
1366  * TID Range Scans to cost more than the equivalent Sequential Scans,
1367  * because Seq Scans have some performance advantages such as scan
1368  * synchronization and parallelizability, and we'd prefer one of them to
1369  * be picked unless a TID Range Scan really is better.
1370  */
1371  ntuples = selectivity * baserel->tuples;
1372  nseqpages = pages - 1.0;
1373 
1374  if (!enable_tidscan)
1375  startup_cost += disable_cost;
1376 
1377  /*
1378  * The TID qual expressions will be computed once, any other baserestrict
1379  * quals once per retrieved tuple.
1380  */
1381  cost_qual_eval(&tid_qual_cost, tidrangequals, root);
1382 
1383  /* fetch estimated page cost for tablespace containing table */
1385  &spc_random_page_cost,
1386  &spc_seq_page_cost);
1387 
1388  /* disk costs; 1 random page and the remainder as seq pages */
1389  run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages;
1390 
1391  /* Add scanning CPU costs */
1392  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1393 
1394  /*
1395  * XXX currently we assume TID quals are a subset of qpquals at this
1396  * point; they will be removed (if possible) when we create the plan, so
1397  * we subtract their cost from the total qpqual cost. (If the TID quals
1398  * can't be removed, this is a mistake and we're going to underestimate
1399  * the CPU cost a bit.)
1400  */
1401  startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
1402  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
1403  tid_qual_cost.per_tuple;
1404  run_cost += cpu_per_tuple * ntuples;
1405 
1406  /* tlist eval costs are paid per output row, not per tuple scanned */
1407  startup_cost += path->pathtarget->cost.startup;
1408  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1409 
1410  path->startup_cost = startup_cost;
1411  path->total_cost = startup_cost + run_cost;
1412 }
1413 
1414 /*
1415  * cost_subqueryscan
1416  * Determines and returns the cost of scanning a subquery RTE.
1417  *
1418  * 'baserel' is the relation to be scanned
1419  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1420  * 'trivial_pathtarget' is true if the pathtarget is believed to be trivial.
1421  */
1422 void
1424  RelOptInfo *baserel, ParamPathInfo *param_info,
1425  bool trivial_pathtarget)
1426 {
1427  Cost startup_cost;
1428  Cost run_cost;
1429  List *qpquals;
1430  QualCost qpqual_cost;
1431  Cost cpu_per_tuple;
1432 
1433  /* Should only be applied to base relations that are subqueries */
1434  Assert(baserel->relid > 0);
1435  Assert(baserel->rtekind == RTE_SUBQUERY);
1436 
1437  /*
1438  * We compute the rowcount estimate as the subplan's estimate times the
1439  * selectivity of relevant restriction clauses. In simple cases this will
1440  * come out the same as baserel->rows; but when dealing with parallelized
1441  * paths we must do it like this to get the right answer.
1442  */
1443  if (param_info)
1444  qpquals = list_concat_copy(param_info->ppi_clauses,
1445  baserel->baserestrictinfo);
1446  else
1447  qpquals = baserel->baserestrictinfo;
1448 
1449  path->path.rows = clamp_row_est(path->subpath->rows *
1451  qpquals,
1452  0,
1453  JOIN_INNER,
1454  NULL));
1455 
1456  /*
1457  * Cost of path is cost of evaluating the subplan, plus cost of evaluating
1458  * any restriction clauses and tlist that will be attached to the
1459  * SubqueryScan node, plus cpu_tuple_cost to account for selection and
1460  * projection overhead.
1461  */
1462  path->path.startup_cost = path->subpath->startup_cost;
1463  path->path.total_cost = path->subpath->total_cost;
1464 
1465  /*
1466  * However, if there are no relevant restriction clauses and the
1467  * pathtarget is trivial, then we expect that setrefs.c will optimize away
1468  * the SubqueryScan plan node altogether, so we should just make its cost
1469  * and rowcount equal to the input path's.
1470  *
1471  * Note: there are some edge cases where createplan.c will apply a
1472  * different targetlist to the SubqueryScan node, thus falsifying our
1473  * current estimate of whether the target is trivial, and making the cost
1474  * estimate (though not the rowcount) wrong. It does not seem worth the
1475  * extra complication to try to account for that exactly, especially since
1476  * that behavior falsifies other cost estimates as well.
1477  */
1478  if (qpquals == NIL && trivial_pathtarget)
1479  return;
1480 
1481  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1482 
1483  startup_cost = qpqual_cost.startup;
1484  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
1485  run_cost = cpu_per_tuple * path->subpath->rows;
1486 
1487  /* tlist eval costs are paid per output row, not per tuple scanned */
1488  startup_cost += path->path.pathtarget->cost.startup;
1489  run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
1490 
1491  path->path.startup_cost += startup_cost;
1492  path->path.total_cost += startup_cost + run_cost;
1493 }
1494 
1495 /*
1496  * cost_functionscan
1497  * Determines and returns the cost of scanning a function RTE.
1498  *
1499  * 'baserel' is the relation to be scanned
1500  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1501  */
1502 void
1504  RelOptInfo *baserel, ParamPathInfo *param_info)
1505 {
1506  Cost startup_cost = 0;
1507  Cost run_cost = 0;
1508  QualCost qpqual_cost;
1509  Cost cpu_per_tuple;
1510  RangeTblEntry *rte;
1511  QualCost exprcost;
1512 
1513  /* Should only be applied to base relations that are functions */
1514  Assert(baserel->relid > 0);
1515  rte = planner_rt_fetch(baserel->relid, root);
1516  Assert(rte->rtekind == RTE_FUNCTION);
1517 
1518  /* Mark the path with the correct row estimate */
1519  if (param_info)
1520  path->rows = param_info->ppi_rows;
1521  else
1522  path->rows = baserel->rows;
1523 
1524  /*
1525  * Estimate costs of executing the function expression(s).
1526  *
1527  * Currently, nodeFunctionscan.c always executes the functions to
1528  * completion before returning any rows, and caches the results in a
1529  * tuplestore. So the function eval cost is all startup cost, and per-row
1530  * costs are minimal.
1531  *
1532  * XXX in principle we ought to charge tuplestore spill costs if the
1533  * number of rows is large. However, given how phony our rowcount
1534  * estimates for functions tend to be, there's not a lot of point in that
1535  * refinement right now.
1536  */
1537  cost_qual_eval_node(&exprcost, (Node *) rte->functions, root);
1538 
1539  startup_cost += exprcost.startup + exprcost.per_tuple;
1540 
1541  /* Add scanning CPU costs */
1542  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1543 
1544  startup_cost += qpqual_cost.startup;
1545  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
1546  run_cost += cpu_per_tuple * baserel->tuples;
1547 
1548  /* tlist eval costs are paid per output row, not per tuple scanned */
1549  startup_cost += path->pathtarget->cost.startup;
1550  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1551 
1552  path->startup_cost = startup_cost;
1553  path->total_cost = startup_cost + run_cost;
1554 }
1555 
1556 /*
1557  * cost_tablefuncscan
1558  * Determines and returns the cost of scanning a table function.
1559  *
1560  * 'baserel' is the relation to be scanned
1561  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1562  */
1563 void
1565  RelOptInfo *baserel, ParamPathInfo *param_info)
1566 {
1567  Cost startup_cost = 0;
1568  Cost run_cost = 0;
1569  QualCost qpqual_cost;
1570  Cost cpu_per_tuple;
1571  RangeTblEntry *rte;
1572  QualCost exprcost;
1573 
1574  /* Should only be applied to base relations that are functions */
1575  Assert(baserel->relid > 0);
1576  rte = planner_rt_fetch(baserel->relid, root);
1577  Assert(rte->rtekind == RTE_TABLEFUNC);
1578 
1579  /* Mark the path with the correct row estimate */
1580  if (param_info)
1581  path->rows = param_info->ppi_rows;
1582  else
1583  path->rows = baserel->rows;
1584 
1585  /*
1586  * Estimate costs of executing the table func expression(s).
1587  *
1588  * XXX in principle we ought to charge tuplestore spill costs if the
1589  * number of rows is large. However, given how phony our rowcount
1590  * estimates for tablefuncs tend to be, there's not a lot of point in that
1591  * refinement right now.
1592  */
1593  cost_qual_eval_node(&exprcost, (Node *) rte->tablefunc, root);
1594 
1595  startup_cost += exprcost.startup + exprcost.per_tuple;
1596 
1597  /* Add scanning CPU costs */
1598  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1599 
1600  startup_cost += qpqual_cost.startup;
1601  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
1602  run_cost += cpu_per_tuple * baserel->tuples;
1603 
1604  /* tlist eval costs are paid per output row, not per tuple scanned */
1605  startup_cost += path->pathtarget->cost.startup;
1606  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1607 
1608  path->startup_cost = startup_cost;
1609  path->total_cost = startup_cost + run_cost;
1610 }
1611 
1612 /*
1613  * cost_valuesscan
1614  * Determines and returns the cost of scanning a VALUES RTE.
1615  *
1616  * 'baserel' is the relation to be scanned
1617  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1618  */
1619 void
1621  RelOptInfo *baserel, ParamPathInfo *param_info)
1622 {
1623  Cost startup_cost = 0;
1624  Cost run_cost = 0;
1625  QualCost qpqual_cost;
1626  Cost cpu_per_tuple;
1627 
1628  /* Should only be applied to base relations that are values lists */
1629  Assert(baserel->relid > 0);
1630  Assert(baserel->rtekind == RTE_VALUES);
1631 
1632  /* Mark the path with the correct row estimate */
1633  if (param_info)
1634  path->rows = param_info->ppi_rows;
1635  else
1636  path->rows = baserel->rows;
1637 
1638  /*
1639  * For now, estimate list evaluation cost at one operator eval per list
1640  * (probably pretty bogus, but is it worth being smarter?)
1641  */
1642  cpu_per_tuple = cpu_operator_cost;
1643 
1644  /* Add scanning CPU costs */
1645  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1646 
1647  startup_cost += qpqual_cost.startup;
1648  cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
1649  run_cost += cpu_per_tuple * baserel->tuples;
1650 
1651  /* tlist eval costs are paid per output row, not per tuple scanned */
1652  startup_cost += path->pathtarget->cost.startup;
1653  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1654 
1655  path->startup_cost = startup_cost;
1656  path->total_cost = startup_cost + run_cost;
1657 }
1658 
1659 /*
1660  * cost_ctescan
1661  * Determines and returns the cost of scanning a CTE RTE.
1662  *
1663  * Note: this is used for both self-reference and regular CTEs; the
1664  * possible cost differences are below the threshold of what we could
1665  * estimate accurately anyway. Note that the costs of evaluating the
1666  * referenced CTE query are added into the final plan as initplan costs,
1667  * and should NOT be counted here.
1668  */
1669 void
1671  RelOptInfo *baserel, ParamPathInfo *param_info)
1672 {
1673  Cost startup_cost = 0;
1674  Cost run_cost = 0;
1675  QualCost qpqual_cost;
1676  Cost cpu_per_tuple;
1677 
1678  /* Should only be applied to base relations that are CTEs */
1679  Assert(baserel->relid > 0);
1680  Assert(baserel->rtekind == RTE_CTE);
1681 
1682  /* Mark the path with the correct row estimate */
1683  if (param_info)
1684  path->rows = param_info->ppi_rows;
1685  else
1686  path->rows = baserel->rows;
1687 
1688  /* Charge one CPU tuple cost per row for tuplestore manipulation */
1689  cpu_per_tuple = cpu_tuple_cost;
1690 
1691  /* Add scanning CPU costs */
1692  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1693 
1694  startup_cost += qpqual_cost.startup;
1695  cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
1696  run_cost += cpu_per_tuple * baserel->tuples;
1697 
1698  /* tlist eval costs are paid per output row, not per tuple scanned */
1699  startup_cost += path->pathtarget->cost.startup;
1700  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1701 
1702  path->startup_cost = startup_cost;
1703  path->total_cost = startup_cost + run_cost;
1704 }
1705 
1706 /*
1707  * cost_namedtuplestorescan
1708  * Determines and returns the cost of scanning a named tuplestore.
1709  */
1710 void
1712  RelOptInfo *baserel, ParamPathInfo *param_info)
1713 {
1714  Cost startup_cost = 0;
1715  Cost run_cost = 0;
1716  QualCost qpqual_cost;
1717  Cost cpu_per_tuple;
1718 
1719  /* Should only be applied to base relations that are Tuplestores */
1720  Assert(baserel->relid > 0);
1721  Assert(baserel->rtekind == RTE_NAMEDTUPLESTORE);
1722 
1723  /* Mark the path with the correct row estimate */
1724  if (param_info)
1725  path->rows = param_info->ppi_rows;
1726  else
1727  path->rows = baserel->rows;
1728 
1729  /* Charge one CPU tuple cost per row for tuplestore manipulation */
1730  cpu_per_tuple = cpu_tuple_cost;
1731 
1732  /* Add scanning CPU costs */
1733  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1734 
1735  startup_cost += qpqual_cost.startup;
1736  cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
1737  run_cost += cpu_per_tuple * baserel->tuples;
1738 
1739  path->startup_cost = startup_cost;
1740  path->total_cost = startup_cost + run_cost;
1741 }
1742 
1743 /*
1744  * cost_resultscan
1745  * Determines and returns the cost of scanning an RTE_RESULT relation.
1746  */
1747 void
1749  RelOptInfo *baserel, ParamPathInfo *param_info)
1750 {
1751  Cost startup_cost = 0;
1752  Cost run_cost = 0;
1753  QualCost qpqual_cost;
1754  Cost cpu_per_tuple;
1755 
1756  /* Should only be applied to RTE_RESULT base relations */
1757  Assert(baserel->relid > 0);
1758  Assert(baserel->rtekind == RTE_RESULT);
1759 
1760  /* Mark the path with the correct row estimate */
1761  if (param_info)
1762  path->rows = param_info->ppi_rows;
1763  else
1764  path->rows = baserel->rows;
1765 
1766  /* We charge qual cost plus cpu_tuple_cost */
1767  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1768 
1769  startup_cost += qpqual_cost.startup;
1770  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
1771  run_cost += cpu_per_tuple * baserel->tuples;
1772 
1773  path->startup_cost = startup_cost;
1774  path->total_cost = startup_cost + run_cost;
1775 }
1776 
1777 /*
1778  * cost_recursive_union
1779  * Determines and returns the cost of performing a recursive union,
1780  * and also the estimated output size.
1781  *
1782  * We are given Paths for the nonrecursive and recursive terms.
1783  */
1784 void
1785 cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
1786 {
1787  Cost startup_cost;
1788  Cost total_cost;
1789  double total_rows;
1790 
1791  /* We probably have decent estimates for the non-recursive term */
1792  startup_cost = nrterm->startup_cost;
1793  total_cost = nrterm->total_cost;
1794  total_rows = nrterm->rows;
1795 
1796  /*
1797  * We arbitrarily assume that about 10 recursive iterations will be
1798  * needed, and that we've managed to get a good fix on the cost and output
1799  * size of each one of them. These are mighty shaky assumptions but it's
1800  * hard to see how to do better.
1801  */
1802  total_cost += 10 * rterm->total_cost;
1803  total_rows += 10 * rterm->rows;
1804 
1805  /*
1806  * Also charge cpu_tuple_cost per row to account for the costs of
1807  * manipulating the tuplestores. (We don't worry about possible
1808  * spill-to-disk costs.)
1809  */
1810  total_cost += cpu_tuple_cost * total_rows;
1811 
1812  runion->startup_cost = startup_cost;
1813  runion->total_cost = total_cost;
1814  runion->rows = total_rows;
1815  runion->pathtarget->width = Max(nrterm->pathtarget->width,
1816  rterm->pathtarget->width);
1817 }
1818 
1819 /*
1820  * cost_tuplesort
1821  * Determines and returns the cost of sorting a relation using tuplesort,
1822  * not including the cost of reading the input data.
1823  *
1824  * If the total volume of data to sort is less than sort_mem, we will do
1825  * an in-memory sort, which requires no I/O and about t*log2(t) tuple
1826  * comparisons for t tuples.
1827  *
1828  * If the total volume exceeds sort_mem, we switch to a tape-style merge
1829  * algorithm. There will still be about t*log2(t) tuple comparisons in
1830  * total, but we will also need to write and read each tuple once per
1831  * merge pass. We expect about ceil(logM(r)) merge passes where r is the
1832  * number of initial runs formed and M is the merge order used by tuplesort.c.
1833  * Since the average initial run should be about sort_mem, we have
1834  * disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
1835  * cpu = comparison_cost * t * log2(t)
1836  *
1837  * If the sort is bounded (i.e., only the first k result tuples are needed)
1838  * and k tuples can fit into sort_mem, we use a heap method that keeps only
1839  * k tuples in the heap; this will require about t*log2(k) tuple comparisons.
1840  *
1841  * The disk traffic is assumed to be 3/4ths sequential and 1/4th random
1842  * accesses (XXX can't we refine that guess?)
1843  *
1844  * By default, we charge two operator evals per tuple comparison, which should
1845  * be in the right ballpark in most cases. The caller can tweak this by
1846  * specifying nonzero comparison_cost; typically that's used for any extra
1847  * work that has to be done to prepare the inputs to the comparison operators.
1848  *
1849  * 'tuples' is the number of tuples in the relation
1850  * 'width' is the average tuple width in bytes
1851  * 'comparison_cost' is the extra cost per comparison, if any
1852  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
1853  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
1854  */
1855 static void
1856 cost_tuplesort(Cost *startup_cost, Cost *run_cost,
1857  double tuples, int width,
1858  Cost comparison_cost, int sort_mem,
1859  double limit_tuples)
1860 {
1861  double input_bytes = relation_byte_size(tuples, width);
1862  double output_bytes;
1863  double output_tuples;
1864  long sort_mem_bytes = sort_mem * 1024L;
1865 
1866  /*
1867  * We want to be sure the cost of a sort is never estimated as zero, even
1868  * if passed-in tuple count is zero. Besides, mustn't do log(0)...
1869  */
1870  if (tuples < 2.0)
1871  tuples = 2.0;
1872 
1873  /* Include the default cost-per-comparison */
1874  comparison_cost += 2.0 * cpu_operator_cost;
1875 
1876  /* Do we have a useful LIMIT? */
1877  if (limit_tuples > 0 && limit_tuples < tuples)
1878  {
1879  output_tuples = limit_tuples;
1880  output_bytes = relation_byte_size(output_tuples, width);
1881  }
1882  else
1883  {
1884  output_tuples = tuples;
1885  output_bytes = input_bytes;
1886  }
1887 
1888  if (output_bytes > sort_mem_bytes)
1889  {
1890  /*
1891  * We'll have to use a disk-based sort of all the tuples
1892  */
1893  double npages = ceil(input_bytes / BLCKSZ);
1894  double nruns = input_bytes / sort_mem_bytes;
1895  double mergeorder = tuplesort_merge_order(sort_mem_bytes);
1896  double log_runs;
1897  double npageaccesses;
1898 
1899  /*
1900  * CPU costs
1901  *
1902  * Assume about N log2 N comparisons
1903  */
1904  *startup_cost = comparison_cost * tuples * LOG2(tuples);
1905 
1906  /* Disk costs */
1907 
1908  /* Compute logM(r) as log(r) / log(M) */
1909  if (nruns > mergeorder)
1910  log_runs = ceil(log(nruns) / log(mergeorder));
1911  else
1912  log_runs = 1.0;
1913  npageaccesses = 2.0 * npages * log_runs;
1914  /* Assume 3/4ths of accesses are sequential, 1/4th are not */
1915  *startup_cost += npageaccesses *
1916  (seq_page_cost * 0.75 + random_page_cost * 0.25);
1917  }
1918  else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
1919  {
1920  /*
1921  * We'll use a bounded heap-sort keeping just K tuples in memory, for
1922  * a total number of tuple comparisons of N log2 K; but the constant
1923  * factor is a bit higher than for quicksort. Tweak it so that the
1924  * cost curve is continuous at the crossover point.
1925  */
1926  *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
1927  }
1928  else
1929  {
1930  /* We'll use plain quicksort on all the input tuples */
1931  *startup_cost = comparison_cost * tuples * LOG2(tuples);
1932  }
1933 
1934  /*
1935  * Also charge a small amount (arbitrarily set equal to operator cost) per
1936  * extracted tuple. We don't charge cpu_tuple_cost because a Sort node
1937  * doesn't do qual-checking or projection, so it has less overhead than
1938  * most plan nodes. Note it's correct to use tuples not output_tuples
1939  * here --- the upper LIMIT will pro-rate the run cost so we'd be double
1940  * counting the LIMIT otherwise.
1941  */
1942  *run_cost = cpu_operator_cost * tuples;
1943 }
1944 
1945 /*
1946  * cost_incremental_sort
1947  * Determines and returns the cost of sorting a relation incrementally, when
1948  * the input path is presorted by a prefix of the pathkeys.
1949  *
1950  * 'presorted_keys' is the number of leading pathkeys by which the input path
1951  * is sorted.
1952  *
1953  * We estimate the number of groups into which the relation is divided by the
1954  * leading pathkeys, and then calculate the cost of sorting a single group
1955  * with tuplesort using cost_tuplesort().
1956  */
1957 void
1959  PlannerInfo *root, List *pathkeys, int presorted_keys,
1960  Cost input_startup_cost, Cost input_total_cost,
1961  double input_tuples, int width, Cost comparison_cost, int sort_mem,
1962  double limit_tuples)
1963 {
1964  Cost startup_cost,
1965  run_cost,
1966  input_run_cost = input_total_cost - input_startup_cost;
1967  double group_tuples,
1968  input_groups;
1969  Cost group_startup_cost,
1970  group_run_cost,
1971  group_input_run_cost;
1972  List *presortedExprs = NIL;
1973  ListCell *l;
1974  bool unknown_varno = false;
1975 
1976  Assert(presorted_keys > 0 && presorted_keys < list_length(pathkeys));
1977 
1978  /*
1979  * We want to be sure the cost of a sort is never estimated as zero, even
1980  * if passed-in tuple count is zero. Besides, mustn't do log(0)...
1981  */
1982  if (input_tuples < 2.0)
1983  input_tuples = 2.0;
1984 
1985  /* Default estimate of number of groups, capped to one group per row. */
1986  input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT);
1987 
1988  /*
1989  * Extract presorted keys as list of expressions.
1990  *
1991  * We need to be careful about Vars containing "varno 0" which might have
1992  * been introduced by generate_append_tlist, which would confuse
1993  * estimate_num_groups (in fact it'd fail for such expressions). See
1994  * recurse_set_operations which has to deal with the same issue.
1995  *
1996  * Unlike recurse_set_operations we can't access the original target list
1997  * here, and even if we could it's not very clear how useful would that be
1998  * for a set operation combining multiple tables. So we simply detect if
1999  * there are any expressions with "varno 0" and use the default
2000  * DEFAULT_NUM_DISTINCT in that case.
2001  *
2002  * We might also use either 1.0 (a single group) or input_tuples (each row
2003  * being a separate group), pretty much the worst and best case for
2004  * incremental sort. But those are extreme cases and using something in
2005  * between seems reasonable. Furthermore, generate_append_tlist is used
2006  * for set operations, which are likely to produce mostly unique output
2007  * anyway - from that standpoint the DEFAULT_NUM_DISTINCT is defensive
2008  * while maintaining lower startup cost.
2009  */
2010  foreach(l, pathkeys)
2011  {
2012  PathKey *key = (PathKey *) lfirst(l);
2013  EquivalenceMember *member = (EquivalenceMember *)
2014  linitial(key->pk_eclass->ec_members);
2015 
2016  /*
2017  * Check if the expression contains Var with "varno 0" so that we
2018  * don't call estimate_num_groups in that case.
2019  */
2020  if (bms_is_member(0, pull_varnos(root, (Node *) member->em_expr)))
2021  {
2022  unknown_varno = true;
2023  break;
2024  }
2025 
2026  /* expression not containing any Vars with "varno 0" */
2027  presortedExprs = lappend(presortedExprs, member->em_expr);
2028 
2029  if (foreach_current_index(l) + 1 >= presorted_keys)
2030  break;
2031  }
2032 
2033  /* Estimate the number of groups with equal presorted keys. */
2034  if (!unknown_varno)
2035  input_groups = estimate_num_groups(root, presortedExprs, input_tuples,
2036  NULL, NULL);
2037 
2038  group_tuples = input_tuples / input_groups;
2039  group_input_run_cost = input_run_cost / input_groups;
2040 
2041  /*
2042  * Estimate the average cost of sorting of one group where presorted keys
2043  * are equal.
2044  */
2045  cost_tuplesort(&group_startup_cost, &group_run_cost,
2046  group_tuples, width, comparison_cost, sort_mem,
2047  limit_tuples);
2048 
2049  /*
2050  * Startup cost of incremental sort is the startup cost of its first group
2051  * plus the cost of its input.
2052  */
2053  startup_cost = group_startup_cost + input_startup_cost +
2054  group_input_run_cost;
2055 
2056  /*
2057  * After we started producing tuples from the first group, the cost of
2058  * producing all the tuples is given by the cost to finish processing this
2059  * group, plus the total cost to process the remaining groups, plus the
2060  * remaining cost of input.
2061  */
2062  run_cost = group_run_cost + (group_run_cost + group_startup_cost) *
2063  (input_groups - 1) + group_input_run_cost * (input_groups - 1);
2064 
2065  /*
2066  * Incremental sort adds some overhead by itself. Firstly, it has to
2067  * detect the sort groups. This is roughly equal to one extra copy and
2068  * comparison per tuple.
2069  */
2070  run_cost += (cpu_tuple_cost + comparison_cost) * input_tuples;
2071 
2072  /*
2073  * Additionally, we charge double cpu_tuple_cost for each input group to
2074  * account for the tuplesort_reset that's performed after each group.
2075  */
2076  run_cost += 2.0 * cpu_tuple_cost * input_groups;
2077 
2078  path->rows = input_tuples;
2079  path->startup_cost = startup_cost;
2080  path->total_cost = startup_cost + run_cost;
2081 }
2082 
2083 /*
2084  * cost_sort
2085  * Determines and returns the cost of sorting a relation, including
2086  * the cost of reading the input data.
2087  *
2088  * NOTE: some callers currently pass NIL for pathkeys because they
2089  * can't conveniently supply the sort keys. Since this routine doesn't
2090  * currently do anything with pathkeys anyway, that doesn't matter...
2091  * but if it ever does, it should react gracefully to lack of key data.
2092  * (Actually, the thing we'd most likely be interested in is just the number
2093  * of sort keys, which all callers *could* supply.)
2094  */
2095 void
2097  List *pathkeys, Cost input_cost, double tuples, int width,
2098  Cost comparison_cost, int sort_mem,
2099  double limit_tuples)
2100 
2101 {
2102  Cost startup_cost;
2103  Cost run_cost;
2104 
2105  cost_tuplesort(&startup_cost, &run_cost,
2106  tuples, width,
2107  comparison_cost, sort_mem,
2108  limit_tuples);
2109 
2110  if (!enable_sort)
2111  startup_cost += disable_cost;
2112 
2113  startup_cost += input_cost;
2114 
2115  path->rows = tuples;
2116  path->startup_cost = startup_cost;
2117  path->total_cost = startup_cost + run_cost;
2118 }
2119 
2120 /*
2121  * append_nonpartial_cost
2122  * Estimate the cost of the non-partial paths in a Parallel Append.
2123  * The non-partial paths are assumed to be the first "numpaths" paths
2124  * from the subpaths list, and to be in order of decreasing cost.
2125  */
2126 static Cost
2127 append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
2128 {
2129  Cost *costarr;
2130  int arrlen;
2131  ListCell *l;
2132  ListCell *cell;
2133  int path_index;
2134  int min_index;
2135  int max_index;
2136 
2137  if (numpaths == 0)
2138  return 0;
2139 
2140  /*
2141  * Array length is number of workers or number of relevant paths,
2142  * whichever is less.
2143  */
2144  arrlen = Min(parallel_workers, numpaths);
2145  costarr = (Cost *) palloc(sizeof(Cost) * arrlen);
2146 
2147  /* The first few paths will each be claimed by a different worker. */
2148  path_index = 0;
2149  foreach(cell, subpaths)
2150  {
2151  Path *subpath = (Path *) lfirst(cell);
2152 
2153  if (path_index == arrlen)
2154  break;
2155  costarr[path_index++] = subpath->total_cost;
2156  }
2157 
2158  /*
2159  * Since subpaths are sorted by decreasing cost, the last one will have
2160  * the minimum cost.
2161  */
2162  min_index = arrlen - 1;
2163 
2164  /*
2165  * For each of the remaining subpaths, add its cost to the array element
2166  * with minimum cost.
2167  */
2168  for_each_cell(l, subpaths, cell)
2169  {
2170  Path *subpath = (Path *) lfirst(l);
2171 
2172  /* Consider only the non-partial paths */
2173  if (path_index++ == numpaths)
2174  break;
2175 
2176  costarr[min_index] += subpath->total_cost;
2177 
2178  /* Update the new min cost array index */
2179  min_index = 0;
2180  for (int i = 0; i < arrlen; i++)
2181  {
2182  if (costarr[i] < costarr[min_index])
2183  min_index = i;
2184  }
2185  }
2186 
2187  /* Return the highest cost from the array */
2188  max_index = 0;
2189  for (int i = 0; i < arrlen; i++)
2190  {
2191  if (costarr[i] > costarr[max_index])
2192  max_index = i;
2193  }
2194 
2195  return costarr[max_index];
2196 }
2197 
2198 /*
2199  * cost_append
2200  * Determines and returns the cost of an Append node.
2201  */
2202 void
2204 {
2205  ListCell *l;
2206 
2207  apath->path.startup_cost = 0;
2208  apath->path.total_cost = 0;
2209  apath->path.rows = 0;
2210 
2211  if (apath->subpaths == NIL)
2212  return;
2213 
2214  if (!apath->path.parallel_aware)
2215  {
2216  List *pathkeys = apath->path.pathkeys;
2217 
2218  if (pathkeys == NIL)
2219  {
2220  Path *firstsubpath = (Path *) linitial(apath->subpaths);
2221 
2222  /*
2223  * For an unordered, non-parallel-aware Append we take the startup
2224  * cost as the startup cost of the first subpath.
2225  */
2226  apath->path.startup_cost = firstsubpath->startup_cost;
2227 
2228  /* Compute rows and costs as sums of subplan rows and costs. */
2229  foreach(l, apath->subpaths)
2230  {
2231  Path *subpath = (Path *) lfirst(l);
2232 
2233  apath->path.rows += subpath->rows;
2234  apath->path.total_cost += subpath->total_cost;
2235  }
2236  }
2237  else
2238  {
2239  /*
2240  * For an ordered, non-parallel-aware Append we take the startup
2241  * cost as the sum of the subpath startup costs. This ensures
2242  * that we don't underestimate the startup cost when a query's
2243  * LIMIT is such that several of the children have to be run to
2244  * satisfy it. This might be overkill --- another plausible hack
2245  * would be to take the Append's startup cost as the maximum of
2246  * the child startup costs. But we don't want to risk believing
2247  * that an ORDER BY LIMIT query can be satisfied at small cost
2248  * when the first child has small startup cost but later ones
2249  * don't. (If we had the ability to deal with nonlinear cost
2250  * interpolation for partial retrievals, we would not need to be
2251  * so conservative about this.)
2252  *
2253  * This case is also different from the above in that we have to
2254  * account for possibly injecting sorts into subpaths that aren't
2255  * natively ordered.
2256  */
2257  foreach(l, apath->subpaths)
2258  {
2259  Path *subpath = (Path *) lfirst(l);
2260  Path sort_path; /* dummy for result of cost_sort */
2261 
2262  if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
2263  {
2264  /*
2265  * We'll need to insert a Sort node, so include costs for
2266  * that. We can use the parent's LIMIT if any, since we
2267  * certainly won't pull more than that many tuples from
2268  * any child.
2269  */
2270  cost_sort(&sort_path,
2271  NULL, /* doesn't currently need root */
2272  pathkeys,
2273  subpath->total_cost,
2274  subpath->rows,
2275  subpath->pathtarget->width,
2276  0.0,
2277  work_mem,
2278  apath->limit_tuples);
2279  subpath = &sort_path;
2280  }
2281 
2282  apath->path.rows += subpath->rows;
2283  apath->path.startup_cost += subpath->startup_cost;
2284  apath->path.total_cost += subpath->total_cost;
2285  }
2286  }
2287  }
2288  else /* parallel-aware */
2289  {
2290  int i = 0;
2291  double parallel_divisor = get_parallel_divisor(&apath->path);
2292 
2293  /* Parallel-aware Append never produces ordered output. */
2294  Assert(apath->path.pathkeys == NIL);
2295 
2296  /* Calculate startup cost. */
2297  foreach(l, apath->subpaths)
2298  {
2299  Path *subpath = (Path *) lfirst(l);
2300 
2301  /*
2302  * Append will start returning tuples when the child node having
2303  * lowest startup cost is done setting up. We consider only the
2304  * first few subplans that immediately get a worker assigned.
2305  */
2306  if (i == 0)
2307  apath->path.startup_cost = subpath->startup_cost;
2308  else if (i < apath->path.parallel_workers)
2309  apath->path.startup_cost = Min(apath->path.startup_cost,
2310  subpath->startup_cost);
2311 
2312  /*
2313  * Apply parallel divisor to subpaths. Scale the number of rows
2314  * for each partial subpath based on the ratio of the parallel
2315  * divisor originally used for the subpath to the one we adopted.
2316  * Also add the cost of partial paths to the total cost, but
2317  * ignore non-partial paths for now.
2318  */
2319  if (i < apath->first_partial_path)
2320  apath->path.rows += subpath->rows / parallel_divisor;
2321  else
2322  {
2323  double subpath_parallel_divisor;
2324 
2325  subpath_parallel_divisor = get_parallel_divisor(subpath);
2326  apath->path.rows += subpath->rows * (subpath_parallel_divisor /
2327  parallel_divisor);
2328  apath->path.total_cost += subpath->total_cost;
2329  }
2330 
2331  apath->path.rows = clamp_row_est(apath->path.rows);
2332 
2333  i++;
2334  }
2335 
2336  /* Add cost for non-partial subpaths. */
2337  apath->path.total_cost +=
2339  apath->first_partial_path,
2340  apath->path.parallel_workers);
2341  }
2342 
2343  /*
2344  * Although Append does not do any selection or projection, it's not free;
2345  * add a small per-tuple overhead.
2346  */
2347  apath->path.total_cost +=
2349 }
2350 
2351 /*
2352  * cost_merge_append
2353  * Determines and returns the cost of a MergeAppend node.
2354  *
2355  * MergeAppend merges several pre-sorted input streams, using a heap that
2356  * at any given instant holds the next tuple from each stream. If there
2357  * are N streams, we need about N*log2(N) tuple comparisons to construct
2358  * the heap at startup, and then for each output tuple, about log2(N)
2359  * comparisons to replace the top entry.
2360  *
2361  * (The effective value of N will drop once some of the input streams are
2362  * exhausted, but it seems unlikely to be worth trying to account for that.)
2363  *
2364  * The heap is never spilled to disk, since we assume N is not very large.
2365  * So this is much simpler than cost_sort.
2366  *
2367  * As in cost_sort, we charge two operator evals per tuple comparison.
2368  *
2369  * 'pathkeys' is a list of sort keys
2370  * 'n_streams' is the number of input streams
2371  * 'input_startup_cost' is the sum of the input streams' startup costs
2372  * 'input_total_cost' is the sum of the input streams' total costs
2373  * 'tuples' is the number of tuples in all the streams
2374  */
2375 void
2377  List *pathkeys, int n_streams,
2378  Cost input_startup_cost, Cost input_total_cost,
2379  double tuples)
2380 {
2381  Cost startup_cost = 0;
2382  Cost run_cost = 0;
2383  Cost comparison_cost;
2384  double N;
2385  double logN;
2386 
2387  /*
2388  * Avoid log(0)...
2389  */
2390  N = (n_streams < 2) ? 2.0 : (double) n_streams;
2391  logN = LOG2(N);
2392 
2393  /* Assumed cost per tuple comparison */
2394  comparison_cost = 2.0 * cpu_operator_cost;
2395 
2396  /* Heap creation cost */
2397  startup_cost += comparison_cost * N * logN;
2398 
2399  /* Per-tuple heap maintenance cost */
2400  run_cost += tuples * comparison_cost * logN;
2401 
2402  /*
2403  * Although MergeAppend does not do any selection or projection, it's not
2404  * free; add a small per-tuple overhead.
2405  */
2406  run_cost += cpu_tuple_cost * APPEND_CPU_COST_MULTIPLIER * tuples;
2407 
2408  path->startup_cost = startup_cost + input_startup_cost;
2409  path->total_cost = startup_cost + run_cost + input_total_cost;
2410 }
2411 
2412 /*
2413  * cost_material
2414  * Determines and returns the cost of materializing a relation, including
2415  * the cost of reading the input data.
2416  *
2417  * If the total volume of data to materialize exceeds work_mem, we will need
2418  * to write it to disk, so the cost is much higher in that case.
2419  *
2420  * Note that here we are estimating the costs for the first scan of the
2421  * relation, so the materialization is all overhead --- any savings will
2422  * occur only on rescan, which is estimated in cost_rescan.
2423  */
2424 void
2426  Cost input_startup_cost, Cost input_total_cost,
2427  double tuples, int width)
2428 {
2429  Cost startup_cost = input_startup_cost;
2430  Cost run_cost = input_total_cost - input_startup_cost;
2431  double nbytes = relation_byte_size(tuples, width);
2432  long work_mem_bytes = work_mem * 1024L;
2433 
2434  path->rows = tuples;
2435 
2436  /*
2437  * Whether spilling or not, charge 2x cpu_operator_cost per tuple to
2438  * reflect bookkeeping overhead. (This rate must be more than what
2439  * cost_rescan charges for materialize, ie, cpu_operator_cost per tuple;
2440  * if it is exactly the same then there will be a cost tie between
2441  * nestloop with A outer, materialized B inner and nestloop with B outer,
2442  * materialized A inner. The extra cost ensures we'll prefer
2443  * materializing the smaller rel.) Note that this is normally a good deal
2444  * less than cpu_tuple_cost; which is OK because a Material plan node
2445  * doesn't do qual-checking or projection, so it's got less overhead than
2446  * most plan nodes.
2447  */
2448  run_cost += 2 * cpu_operator_cost * tuples;
2449 
2450  /*
2451  * If we will spill to disk, charge at the rate of seq_page_cost per page.
2452  * This cost is assumed to be evenly spread through the plan run phase,
2453  * which isn't exactly accurate but our cost model doesn't allow for
2454  * nonuniform costs within the run phase.
2455  */
2456  if (nbytes > work_mem_bytes)
2457  {
2458  double npages = ceil(nbytes / BLCKSZ);
2459 
2460  run_cost += seq_page_cost * npages;
2461  }
2462 
2463  path->startup_cost = startup_cost;
2464  path->total_cost = startup_cost + run_cost;
2465 }
2466 
2467 /*
2468  * cost_memoize_rescan
2469  * Determines the estimated cost of rescanning a Memoize node.
2470  *
2471  * In order to estimate this, we must gain knowledge of how often we expect to
2472  * be called and how many distinct sets of parameters we are likely to be
2473  * called with. If we expect a good cache hit ratio, then we can set our
2474  * costs to account for that hit ratio, plus a little bit of cost for the
2475  * caching itself. Caching will not work out well if we expect to be called
2476  * with too many distinct parameter values. The worst-case here is that we
2477  * never see any parameter value twice, in which case we'd never get a cache
2478  * hit and caching would be a complete waste of effort.
2479  */
2480 static void
2482  Cost *rescan_startup_cost, Cost *rescan_total_cost)
2483 {
2484  EstimationInfo estinfo;
2485  ListCell *lc;
2486  Cost input_startup_cost = mpath->subpath->startup_cost;
2487  Cost input_total_cost = mpath->subpath->total_cost;
2488  double tuples = mpath->subpath->rows;
2489  double calls = mpath->calls;
2490  int width = mpath->subpath->pathtarget->width;
2491 
2492  double hash_mem_bytes;
2493  double est_entry_bytes;
2494  double est_cache_entries;
2495  double ndistinct;
2496  double evict_ratio;
2497  double hit_ratio;
2498  Cost startup_cost;
2499  Cost total_cost;
2500 
2501  /* available cache space */
2502  hash_mem_bytes = get_hash_memory_limit();
2503 
2504  /*
2505  * Set the number of bytes each cache entry should consume in the cache.
2506  * To provide us with better estimations on how many cache entries we can
2507  * store at once, we make a call to the executor here to ask it what
2508  * memory overheads there are for a single cache entry.
2509  */
2510  est_entry_bytes = relation_byte_size(tuples, width) +
2512 
2513  /* include the estimated width for the cache keys */
2514  foreach(lc, mpath->param_exprs)
2515  est_entry_bytes += get_expr_width(root, (Node *) lfirst(lc));
2516 
2517  /* estimate on the upper limit of cache entries we can hold at once */
2518  est_cache_entries = floor(hash_mem_bytes / est_entry_bytes);
2519 
2520  /* estimate on the distinct number of parameter values */
2521  ndistinct = estimate_num_groups(root, mpath->param_exprs, calls, NULL,
2522  &estinfo);
2523 
2524  /*
2525  * When the estimation fell back on using a default value, it's a bit too
2526  * risky to assume that it's ok to use a Memoize node. The use of a
2527  * default could cause us to use a Memoize node when it's really
2528  * inappropriate to do so. If we see that this has been done, then we'll
2529  * assume that every call will have unique parameters, which will almost
2530  * certainly mean a MemoizePath will never survive add_path().
2531  */
2532  if ((estinfo.flags & SELFLAG_USED_DEFAULT) != 0)
2533  ndistinct = calls;
2534 
2535  /*
2536  * Since we've already estimated the maximum number of entries we can
2537  * store at once and know the estimated number of distinct values we'll be
2538  * called with, we'll take this opportunity to set the path's est_entries.
2539  * This will ultimately determine the hash table size that the executor
2540  * will use. If we leave this at zero, the executor will just choose the
2541  * size itself. Really this is not the right place to do this, but it's
2542  * convenient since everything is already calculated.
2543  */
2544  mpath->est_entries = Min(Min(ndistinct, est_cache_entries),
2545  PG_UINT32_MAX);
2546 
2547  /*
2548  * When the number of distinct parameter values is above the amount we can
2549  * store in the cache, then we'll have to evict some entries from the
2550  * cache. This is not free. Here we estimate how often we'll incur the
2551  * cost of that eviction.
2552  */
2553  evict_ratio = 1.0 - Min(est_cache_entries, ndistinct) / ndistinct;
2554 
2555  /*
2556  * In order to estimate how costly a single scan will be, we need to
2557  * attempt to estimate what the cache hit ratio will be. To do that we
2558  * must look at how many scans are estimated in total for this node and
2559  * how many of those scans we expect to get a cache hit.
2560  */
2561  hit_ratio = ((calls - ndistinct) / calls) *
2562  (est_cache_entries / Max(ndistinct, est_cache_entries));
2563 
2564  Assert(hit_ratio >= 0 && hit_ratio <= 1.0);
2565 
2566  /*
2567  * Set the total_cost accounting for the expected cache hit ratio. We
2568  * also add on a cpu_operator_cost to account for a cache lookup. This
2569  * will happen regardless of whether it's a cache hit or not.
2570  */
2571  total_cost = input_total_cost * (1.0 - hit_ratio) + cpu_operator_cost;
2572 
2573  /* Now adjust the total cost to account for cache evictions */
2574 
2575  /* Charge a cpu_tuple_cost for evicting the actual cache entry */
2576  total_cost += cpu_tuple_cost * evict_ratio;
2577 
2578  /*
2579  * Charge a 10th of cpu_operator_cost to evict every tuple in that entry.
2580  * The per-tuple eviction is really just a pfree, so charging a whole
2581  * cpu_operator_cost seems a little excessive.
2582  */
2583  total_cost += cpu_operator_cost / 10.0 * evict_ratio * tuples;
2584 
2585  /*
2586  * Now adjust for storing things in the cache, since that's not free
2587  * either. Everything must go in the cache. We don't proportion this
2588  * over any ratio, just apply it once for the scan. We charge a
2589  * cpu_tuple_cost for the creation of the cache entry and also a
2590  * cpu_operator_cost for each tuple we expect to cache.
2591  */
2592  total_cost += cpu_tuple_cost + cpu_operator_cost * tuples;
2593 
2594  /*
2595  * Getting the first row must be also be proportioned according to the
2596  * expected cache hit ratio.
2597  */
2598  startup_cost = input_startup_cost * (1.0 - hit_ratio);
2599 
2600  /*
2601  * Additionally we charge a cpu_tuple_cost to account for cache lookups,
2602  * which we'll do regardless of whether it was a cache hit or not.
2603  */
2604  startup_cost += cpu_tuple_cost;
2605 
2606  *rescan_startup_cost = startup_cost;
2607  *rescan_total_cost = total_cost;
2608 }
2609 
2610 /*
2611  * cost_agg
2612  * Determines and returns the cost of performing an Agg plan node,
2613  * including the cost of its input.
2614  *
2615  * aggcosts can be NULL when there are no actual aggregate functions (i.e.,
2616  * we are using a hashed Agg node just to do grouping).
2617  *
2618  * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
2619  * are for appropriately-sorted input.
2620  */
2621 void
2623  AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
2624  int numGroupCols, double numGroups,
2625  List *quals,
2626  Cost input_startup_cost, Cost input_total_cost,
2627  double input_tuples, double input_width)
2628 {
2629  double output_tuples;
2630  Cost startup_cost;
2631  Cost total_cost;
2632  AggClauseCosts dummy_aggcosts;
2633 
2634  /* Use all-zero per-aggregate costs if NULL is passed */
2635  if (aggcosts == NULL)
2636  {
2637  Assert(aggstrategy == AGG_HASHED);
2638  MemSet(&dummy_aggcosts, 0, sizeof(AggClauseCosts));
2639  aggcosts = &dummy_aggcosts;
2640  }
2641 
2642  /*
2643  * The transCost.per_tuple component of aggcosts should be charged once
2644  * per input tuple, corresponding to the costs of evaluating the aggregate
2645  * transfns and their input expressions. The finalCost.per_tuple component
2646  * is charged once per output tuple, corresponding to the costs of
2647  * evaluating the finalfns. Startup costs are of course charged but once.
2648  *
2649  * If we are grouping, we charge an additional cpu_operator_cost per
2650  * grouping column per input tuple for grouping comparisons.
2651  *
2652  * We will produce a single output tuple if not grouping, and a tuple per
2653  * group otherwise. We charge cpu_tuple_cost for each output tuple.
2654  *
2655  * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the
2656  * same total CPU cost, but AGG_SORTED has lower startup cost. If the
2657  * input path is already sorted appropriately, AGG_SORTED should be
2658  * preferred (since it has no risk of memory overflow). This will happen
2659  * as long as the computed total costs are indeed exactly equal --- but if
2660  * there's roundoff error we might do the wrong thing. So be sure that
2661  * the computations below form the same intermediate values in the same
2662  * order.
2663  */
2664  if (aggstrategy == AGG_PLAIN)
2665  {
2666  startup_cost = input_total_cost;
2667  startup_cost += aggcosts->transCost.startup;
2668  startup_cost += aggcosts->transCost.per_tuple * input_tuples;
2669  startup_cost += aggcosts->finalCost.startup;
2670  startup_cost += aggcosts->finalCost.per_tuple;
2671  /* we aren't grouping */
2672  total_cost = startup_cost + cpu_tuple_cost;
2673  output_tuples = 1;
2674  }
2675  else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
2676  {
2677  /* Here we are able to deliver output on-the-fly */
2678  startup_cost = input_startup_cost;
2679  total_cost = input_total_cost;
2680  if (aggstrategy == AGG_MIXED && !enable_hashagg)
2681  {
2682  startup_cost += disable_cost;
2683  total_cost += disable_cost;
2684  }
2685  /* calcs phrased this way to match HASHED case, see note above */
2686  total_cost += aggcosts->transCost.startup;
2687  total_cost += aggcosts->transCost.per_tuple * input_tuples;
2688  total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
2689  total_cost += aggcosts->finalCost.startup;
2690  total_cost += aggcosts->finalCost.per_tuple * numGroups;
2691  total_cost += cpu_tuple_cost * numGroups;
2692  output_tuples = numGroups;
2693  }
2694  else
2695  {
2696  /* must be AGG_HASHED */
2697  startup_cost = input_total_cost;
2698  if (!enable_hashagg)
2699  startup_cost += disable_cost;
2700  startup_cost += aggcosts->transCost.startup;
2701  startup_cost += aggcosts->transCost.per_tuple * input_tuples;
2702  /* cost of computing hash value */
2703  startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
2704  startup_cost += aggcosts->finalCost.startup;
2705 
2706  total_cost = startup_cost;
2707  total_cost += aggcosts->finalCost.per_tuple * numGroups;
2708  /* cost of retrieving from hash table */
2709  total_cost += cpu_tuple_cost * numGroups;
2710  output_tuples = numGroups;
2711  }
2712 
2713  /*
2714  * Add the disk costs of hash aggregation that spills to disk.
2715  *
2716  * Groups that go into the hash table stay in memory until finalized, so
2717  * spilling and reprocessing tuples doesn't incur additional invocations
2718  * of transCost or finalCost. Furthermore, the computed hash value is
2719  * stored with the spilled tuples, so we don't incur extra invocations of
2720  * the hash function.
2721  *
2722  * Hash Agg begins returning tuples after the first batch is complete.
2723  * Accrue writes (spilled tuples) to startup_cost and to total_cost;
2724  * accrue reads only to total_cost.
2725  */
2726  if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED)
2727  {
2728  double pages;
2729  double pages_written = 0.0;
2730  double pages_read = 0.0;
2731  double spill_cost;
2732  double hashentrysize;
2733  double nbatches;
2734  Size mem_limit;
2735  uint64 ngroups_limit;
2736  int num_partitions;
2737  int depth;
2738 
2739  /*
2740  * Estimate number of batches based on the computed limits. If less
2741  * than or equal to one, all groups are expected to fit in memory;
2742  * otherwise we expect to spill.
2743  */
2744  hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
2745  input_width,
2746  aggcosts->transitionSpace);
2747  hash_agg_set_limits(hashentrysize, numGroups, 0, &mem_limit,
2748  &ngroups_limit, &num_partitions);
2749 
2750  nbatches = Max((numGroups * hashentrysize) / mem_limit,
2751  numGroups / ngroups_limit);
2752 
2753  nbatches = Max(ceil(nbatches), 1.0);
2754  num_partitions = Max(num_partitions, 2);
2755 
2756  /*
2757  * The number of partitions can change at different levels of
2758  * recursion; but for the purposes of this calculation assume it stays
2759  * constant.
2760  */
2761  depth = ceil(log(nbatches) / log(num_partitions));
2762 
2763  /*
2764  * Estimate number of pages read and written. For each level of
2765  * recursion, a tuple must be written and then later read.
2766  */
2767  pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
2768  pages_written = pages_read = pages * depth;
2769 
2770  /*
2771  * HashAgg has somewhat worse IO behavior than Sort on typical
2772  * hardware/OS combinations. Account for this with a generic penalty.
2773  */
2774  pages_read *= 2.0;
2775  pages_written *= 2.0;
2776 
2777  startup_cost += pages_written * random_page_cost;
2778  total_cost += pages_written * random_page_cost;
2779  total_cost += pages_read * seq_page_cost;
2780 
2781  /* account for CPU cost of spilling a tuple and reading it back */
2782  spill_cost = depth * input_tuples * 2.0 * cpu_tuple_cost;
2783  startup_cost += spill_cost;
2784  total_cost += spill_cost;
2785  }
2786 
2787  /*
2788  * If there are quals (HAVING quals), account for their cost and
2789  * selectivity.
2790  */
2791  if (quals)
2792  {
2793  QualCost qual_cost;
2794 
2795  cost_qual_eval(&qual_cost, quals, root);
2796  startup_cost += qual_cost.startup;
2797  total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple;
2798 
2799  output_tuples = clamp_row_est(output_tuples *
2801  quals,
2802  0,
2803  JOIN_INNER,
2804  NULL));
2805  }
2806 
2807  path->rows = output_tuples;
2808  path->startup_cost = startup_cost;
2809  path->total_cost = total_cost;
2810 }
2811 
2812 /*
2813  * cost_windowagg
2814  * Determines and returns the cost of performing a WindowAgg plan node,
2815  * including the cost of its input.
2816  *
2817  * Input is assumed already properly sorted.
2818  */
2819 void
2821  List *windowFuncs, int numPartCols, int numOrderCols,
2822  Cost input_startup_cost, Cost input_total_cost,
2823  double input_tuples)
2824 {
2825  Cost startup_cost;
2826  Cost total_cost;
2827  ListCell *lc;
2828 
2829  startup_cost = input_startup_cost;
2830  total_cost = input_total_cost;
2831 
2832  /*
2833  * Window functions are assumed to cost their stated execution cost, plus
2834  * the cost of evaluating their input expressions, per tuple. Since they
2835  * may in fact evaluate their inputs at multiple rows during each cycle,
2836  * this could be a drastic underestimate; but without a way to know how
2837  * many rows the window function will fetch, it's hard to do better. In
2838  * any case, it's a good estimate for all the built-in window functions,
2839  * so we'll just do this for now.
2840  */
2841  foreach(lc, windowFuncs)
2842  {
2843  WindowFunc *wfunc = lfirst_node(WindowFunc, lc);
2844  Cost wfunccost;
2845  QualCost argcosts;
2846 
2847  argcosts.startup = argcosts.per_tuple = 0;
2848  add_function_cost(root, wfunc->winfnoid, (Node *) wfunc,
2849  &argcosts);
2850  startup_cost += argcosts.startup;
2851  wfunccost = argcosts.per_tuple;
2852 
2853  /* also add the input expressions' cost to per-input-row costs */
2854  cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root);
2855  startup_cost += argcosts.startup;
2856  wfunccost += argcosts.per_tuple;
2857 
2858  /*
2859  * Add the filter's cost to per-input-row costs. XXX We should reduce
2860  * input expression costs according to filter selectivity.
2861  */
2862  cost_qual_eval_node(&argcosts, (Node *) wfunc->aggfilter, root);
2863  startup_cost += argcosts.startup;
2864  wfunccost += argcosts.per_tuple;
2865 
2866  total_cost += wfunccost * input_tuples;
2867  }
2868 
2869  /*
2870  * We also charge cpu_operator_cost per grouping column per tuple for
2871  * grouping comparisons, plus cpu_tuple_cost per tuple for general
2872  * overhead.
2873  *
2874  * XXX this neglects costs of spooling the data to disk when it overflows
2875  * work_mem. Sooner or later that should get accounted for.
2876  */
2877  total_cost += cpu_operator_cost * (numPartCols + numOrderCols) * input_tuples;
2878  total_cost += cpu_tuple_cost * input_tuples;
2879 
2880  path->rows = input_tuples;
2881  path->startup_cost = startup_cost;
2882  path->total_cost = total_cost;
2883 }
2884 
2885 /*
2886  * cost_group
2887  * Determines and returns the cost of performing a Group plan node,
2888  * including the cost of its input.
2889  *
2890  * Note: caller must ensure that input costs are for appropriately-sorted
2891  * input.
2892  */
2893 void
2895  int numGroupCols, double numGroups,
2896  List *quals,
2897  Cost input_startup_cost, Cost input_total_cost,
2898  double input_tuples)
2899 {
2900  double output_tuples;
2901  Cost startup_cost;
2902  Cost total_cost;
2903 
2904  output_tuples = numGroups;
2905  startup_cost = input_startup_cost;
2906  total_cost = input_total_cost;
2907 
2908  /*
2909  * Charge one cpu_operator_cost per comparison per input tuple. We assume
2910  * all columns get compared at most of the tuples.
2911  */
2912  total_cost += cpu_operator_cost * input_tuples * numGroupCols;
2913 
2914  /*
2915  * If there are quals (HAVING quals), account for their cost and
2916  * selectivity.
2917  */
2918  if (quals)
2919  {
2920  QualCost qual_cost;
2921 
2922  cost_qual_eval(&qual_cost, quals, root);
2923  startup_cost += qual_cost.startup;
2924  total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple;
2925 
2926  output_tuples = clamp_row_est(output_tuples *
2928  quals,
2929  0,
2930  JOIN_INNER,
2931  NULL));
2932  }
2933 
2934  path->rows = output_tuples;
2935  path->startup_cost = startup_cost;
2936  path->total_cost = total_cost;
2937 }
2938 
2939 /*
2940  * initial_cost_nestloop
2941  * Preliminary estimate of the cost of a nestloop join path.
2942  *
2943  * This must quickly produce lower-bound estimates of the path's startup and
2944  * total costs. If we are unable to eliminate the proposed path from
2945  * consideration using the lower bounds, final_cost_nestloop will be called
2946  * to obtain the final estimates.
2947  *
2948  * The exact division of labor between this function and final_cost_nestloop
2949  * is private to them, and represents a tradeoff between speed of the initial
2950  * estimate and getting a tight lower bound. We choose to not examine the
2951  * join quals here, since that's by far the most expensive part of the
2952  * calculations. The end result is that CPU-cost considerations must be
2953  * left for the second phase; and for SEMI/ANTI joins, we must also postpone
2954  * incorporation of the inner path's run cost.
2955  *
2956  * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
2957  * other data to be used by final_cost_nestloop
2958  * 'jointype' is the type of join to be performed
2959  * 'outer_path' is the outer input to the join
2960  * 'inner_path' is the inner input to the join
2961  * 'extra' contains miscellaneous information about the join
2962  */
2963 void
2965  JoinType jointype,
2966  Path *outer_path, Path *inner_path,
2967  JoinPathExtraData *extra)
2968 {
2969  Cost startup_cost = 0;
2970  Cost run_cost = 0;
2971  double outer_path_rows = outer_path->rows;
2972  Cost inner_rescan_start_cost;
2973  Cost inner_rescan_total_cost;
2974  Cost inner_run_cost;
2975  Cost inner_rescan_run_cost;
2976 
2977  /* estimate costs to rescan the inner relation */
2978  cost_rescan(root, inner_path,
2979  &inner_rescan_start_cost,
2980  &inner_rescan_total_cost);
2981 
2982  /* cost of source data */
2983 
2984  /*
2985  * NOTE: clearly, we must pay both outer and inner paths' startup_cost
2986  * before we can start returning tuples, so the join's startup cost is
2987  * their sum. We'll also pay the inner path's rescan startup cost
2988  * multiple times.
2989  */
2990  startup_cost += outer_path->startup_cost + inner_path->startup_cost;
2991  run_cost += outer_path->total_cost - outer_path->startup_cost;
2992  if (outer_path_rows > 1)
2993  run_cost += (outer_path_rows - 1) * inner_rescan_start_cost;
2994 
2995  inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
2996  inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost;
2997 
2998  if (jointype == JOIN_SEMI || jointype == JOIN_ANTI ||
2999  extra->inner_unique)
3000  {
3001  /*
3002  * With a SEMI or ANTI join, or if the innerrel is known unique, the
3003  * executor will stop after the first match.
3004  *
3005  * Getting decent estimates requires inspection of the join quals,
3006  * which we choose to postpone to final_cost_nestloop.
3007  */
3008 
3009  /* Save private data for final_cost_nestloop */
3010  workspace->inner_run_cost = inner_run_cost;
3011  workspace->inner_rescan_run_cost = inner_rescan_run_cost;
3012  }
3013  else
3014  {
3015  /* Normal case; we'll scan whole input rel for each outer row */
3016  run_cost += inner_run_cost;
3017  if (outer_path_rows > 1)
3018  run_cost += (outer_path_rows - 1) * inner_rescan_run_cost;
3019  }
3020 
3021  /* CPU costs left for later */
3022 
3023  /* Public result fields */
3024  workspace->startup_cost = startup_cost;
3025  workspace->total_cost = startup_cost + run_cost;
3026  /* Save private data for final_cost_nestloop */
3027  workspace->run_cost = run_cost;
3028 }
3029 
3030 /*
3031  * final_cost_nestloop
3032  * Final estimate of the cost and result size of a nestloop join path.
3033  *
3034  * 'path' is already filled in except for the rows and cost fields
3035  * 'workspace' is the result from initial_cost_nestloop
3036  * 'extra' contains miscellaneous information about the join
3037  */
3038 void
3040  JoinCostWorkspace *workspace,
3041  JoinPathExtraData *extra)
3042 {
3043  Path *outer_path = path->jpath.outerjoinpath;
3044  Path *inner_path = path->jpath.innerjoinpath;
3045  double outer_path_rows = outer_path->rows;
3046  double inner_path_rows = inner_path->rows;
3047  Cost startup_cost = workspace->startup_cost;
3048  Cost run_cost = workspace->run_cost;
3049  Cost cpu_per_tuple;
3050  QualCost restrict_qual_cost;
3051  double ntuples;
3052 
3053  /* Protect some assumptions below that rowcounts aren't zero */
3054  if (outer_path_rows <= 0)
3055  outer_path_rows = 1;
3056  if (inner_path_rows <= 0)
3057  inner_path_rows = 1;
3058  /* Mark the path with the correct row estimate */
3059  if (path->jpath.path.param_info)
3060  path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
3061  else
3062  path->jpath.path.rows = path->jpath.path.parent->rows;
3063 
3064  /* For partial paths, scale row estimate. */
3065  if (path->jpath.path.parallel_workers > 0)
3066  {
3067  double parallel_divisor = get_parallel_divisor(&path->jpath.path);
3068 
3069  path->jpath.path.rows =
3070  clamp_row_est(path->jpath.path.rows / parallel_divisor);
3071  }
3072 
3073  /*
3074  * We could include disable_cost in the preliminary estimate, but that
3075  * would amount to optimizing for the case where the join method is
3076  * disabled, which doesn't seem like the way to bet.
3077  */
3078  if (!enable_nestloop)
3079  startup_cost += disable_cost;
3080 
3081  /* cost of inner-relation source data (we already dealt with outer rel) */
3082 
3083  if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI ||
3084  extra->inner_unique)
3085  {
3086  /*
3087  * With a SEMI or ANTI join, or if the innerrel is known unique, the
3088  * executor will stop after the first match.
3089  */
3090  Cost inner_run_cost = workspace->inner_run_cost;
3091  Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost;
3092  double outer_matched_rows;
3093  double outer_unmatched_rows;
3094  Selectivity inner_scan_frac;
3095 
3096  /*
3097  * For an outer-rel row that has at least one match, we can expect the
3098  * inner scan to stop after a fraction 1/(match_count+1) of the inner
3099  * rows, if the matches are evenly distributed. Since they probably
3100  * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
3101  * that fraction. (If we used a larger fuzz factor, we'd have to
3102  * clamp inner_scan_frac to at most 1.0; but since match_count is at
3103  * least 1, no such clamp is needed now.)
3104  */
3105  outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac);
3106  outer_unmatched_rows = outer_path_rows - outer_matched_rows;
3107  inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0);
3108 
3109  /*
3110  * Compute number of tuples processed (not number emitted!). First,
3111  * account for successfully-matched outer rows.
3112  */
3113  ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
3114 
3115  /*
3116  * Now we need to estimate the actual costs of scanning the inner
3117  * relation, which may be quite a bit less than N times inner_run_cost
3118  * due to early scan stops. We consider two cases. If the inner path
3119  * is an indexscan using all the joinquals as indexquals, then an
3120  * unmatched outer row results in an indexscan returning no rows,
3121  * which is probably quite cheap. Otherwise, the executor will have
3122  * to scan the whole inner rel for an unmatched row; not so cheap.
3123  */
3124  if (has_indexed_join_quals(path))
3125  {
3126  /*
3127  * Successfully-matched outer rows will only require scanning
3128  * inner_scan_frac of the inner relation. In this case, we don't
3129  * need to charge the full inner_run_cost even when that's more
3130  * than inner_rescan_run_cost, because we can assume that none of
3131  * the inner scans ever scan the whole inner relation. So it's
3132  * okay to assume that all the inner scan executions can be
3133  * fractions of the full cost, even if materialization is reducing
3134  * the rescan cost. At this writing, it's impossible to get here
3135  * for a materialized inner scan, so inner_run_cost and
3136  * inner_rescan_run_cost will be the same anyway; but just in
3137  * case, use inner_run_cost for the first matched tuple and
3138  * inner_rescan_run_cost for additional ones.
3139  */
3140  run_cost += inner_run_cost * inner_scan_frac;
3141  if (outer_matched_rows > 1)
3142  run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
3143 
3144  /*
3145  * Add the cost of inner-scan executions for unmatched outer rows.
3146  * We estimate this as the same cost as returning the first tuple
3147  * of a nonempty scan. We consider that these are all rescans,
3148  * since we used inner_run_cost once already.
3149  */
3150  run_cost += outer_unmatched_rows *
3151  inner_rescan_run_cost / inner_path_rows;
3152 
3153  /*
3154  * We won't be evaluating any quals at all for unmatched rows, so
3155  * don't add them to ntuples.
3156  */
3157  }
3158  else
3159  {
3160  /*
3161  * Here, a complicating factor is that rescans may be cheaper than
3162  * first scans. If we never scan all the way to the end of the
3163  * inner rel, it might be (depending on the plan type) that we'd
3164  * never pay the whole inner first-scan run cost. However it is
3165  * difficult to estimate whether that will happen (and it could
3166  * not happen if there are any unmatched outer rows!), so be
3167  * conservative and always charge the whole first-scan cost once.
3168  * We consider this charge to correspond to the first unmatched
3169  * outer row, unless there isn't one in our estimate, in which
3170  * case blame it on the first matched row.
3171  */
3172 
3173  /* First, count all unmatched join tuples as being processed */
3174  ntuples += outer_unmatched_rows * inner_path_rows;
3175 
3176  /* Now add the forced full scan, and decrement appropriate count */
3177  run_cost += inner_run_cost;
3178  if (outer_unmatched_rows >= 1)
3179  outer_unmatched_rows -= 1;
3180  else
3181  outer_matched_rows -= 1;
3182 
3183  /* Add inner run cost for additional outer tuples having matches */
3184  if (outer_matched_rows > 0)
3185  run_cost += outer_matched_rows * inner_rescan_run_cost * inner_scan_frac;
3186 
3187  /* Add inner run cost for additional unmatched outer tuples */
3188  if (outer_unmatched_rows > 0)
3189  run_cost += outer_unmatched_rows * inner_rescan_run_cost;
3190  }
3191  }
3192  else
3193  {
3194  /* Normal-case source costs were included in preliminary estimate */
3195 
3196  /* Compute number of tuples processed (not number emitted!) */
3197  ntuples = outer_path_rows * inner_path_rows;
3198  }
3199 
3200  /* CPU costs */
3201  cost_qual_eval(&restrict_qual_cost, path->jpath.joinrestrictinfo, root);
3202  startup_cost += restrict_qual_cost.startup;
3203  cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
3204  run_cost += cpu_per_tuple * ntuples;
3205 
3206  /* tlist eval costs are paid per output row, not per tuple scanned */
3207  startup_cost += path->jpath.path.pathtarget->cost.startup;
3208  run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows;
3209 
3210  path->jpath.path.startup_cost = startup_cost;
3211  path->jpath.path.total_cost = startup_cost + run_cost;
3212 }
3213 
3214 /*
3215  * initial_cost_mergejoin
3216  * Preliminary estimate of the cost of a mergejoin path.
3217  *
3218  * This must quickly produce lower-bound estimates of the path's startup and
3219  * total costs. If we are unable to eliminate the proposed path from
3220  * consideration using the lower bounds, final_cost_mergejoin will be called
3221  * to obtain the final estimates.
3222  *
3223  * The exact division of labor between this function and final_cost_mergejoin
3224  * is private to them, and represents a tradeoff between speed of the initial
3225  * estimate and getting a tight lower bound. We choose to not examine the
3226  * join quals here, except for obtaining the scan selectivity estimate which
3227  * is really essential (but fortunately, use of caching keeps the cost of
3228  * getting that down to something reasonable).
3229  * We also assume that cost_sort is cheap enough to use here.
3230  *
3231  * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
3232  * other data to be used by final_cost_mergejoin
3233  * 'jointype' is the type of join to be performed
3234  * 'mergeclauses' is the list of joinclauses to be used as merge clauses
3235  * 'outer_path' is the outer input to the join
3236  * 'inner_path' is the inner input to the join
3237  * 'outersortkeys' is the list of sort keys for the outer path
3238  * 'innersortkeys' is the list of sort keys for the inner path
3239  * 'extra' contains miscellaneous information about the join
3240  *
3241  * Note: outersortkeys and innersortkeys should be NIL if no explicit
3242  * sort is needed because the respective source path is already ordered.
3243  */
3244 void
3246  JoinType jointype,
3247  List *mergeclauses,
3248  Path *outer_path, Path *inner_path,
3249  List *outersortkeys, List *innersortkeys,
3250  JoinPathExtraData *extra)
3251 {
3252  Cost startup_cost = 0;
3253  Cost run_cost = 0;
3254  double outer_path_rows = outer_path->rows;
3255  double inner_path_rows = inner_path->rows;
3256  Cost inner_run_cost;
3257  double outer_rows,
3258  inner_rows,
3259  outer_skip_rows,
3260  inner_skip_rows;
3261  Selectivity outerstartsel,
3262  outerendsel,
3263  innerstartsel,
3264  innerendsel;
3265  Path sort_path; /* dummy for result of cost_sort */
3266 
3267  /* Protect some assumptions below that rowcounts aren't zero */
3268  if (outer_path_rows <= 0)
3269  outer_path_rows = 1;
3270  if (inner_path_rows <= 0)
3271  inner_path_rows = 1;
3272 
3273  /*
3274  * A merge join will stop as soon as it exhausts either input stream
3275  * (unless it's an outer join, in which case the outer side has to be
3276  * scanned all the way anyway). Estimate fraction of the left and right
3277  * inputs that will actually need to be scanned. Likewise, we can
3278  * estimate the number of rows that will be skipped before the first join
3279  * pair is found, which should be factored into startup cost. We use only
3280  * the first (most significant) merge clause for this purpose. Since
3281  * mergejoinscansel() is a fairly expensive computation, we cache the
3282  * results in the merge clause RestrictInfo.
3283  */
3284  if (mergeclauses && jointype != JOIN_FULL)
3285  {
3286  RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses);
3287  List *opathkeys;
3288  List *ipathkeys;
3289  PathKey *opathkey;
3290  PathKey *ipathkey;
3291  MergeScanSelCache *cache;
3292 
3293  /* Get the input pathkeys to determine the sort-order details */
3294  opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys;
3295  ipathkeys = innersortkeys ? innersortkeys : inner_path->pathkeys;
3296  Assert(opathkeys);
3297  Assert(ipathkeys);
3298  opathkey = (PathKey *) linitial(opathkeys);
3299  ipathkey = (PathKey *) linitial(ipathkeys);
3300  /* debugging check */
3301  if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||
3302  opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation ||
3303  opathkey->pk_strategy != ipathkey->pk_strategy ||
3304  opathkey->pk_nulls_first != ipathkey->pk_nulls_first)
3305  elog(ERROR, "left and right pathkeys do not match in mergejoin");
3306 
3307  /* Get the selectivity with caching */
3308  cache = cached_scansel(root, firstclause, opathkey);
3309 
3310  if (bms_is_subset(firstclause->left_relids,
3311  outer_path->parent->relids))
3312  {
3313  /* left side of clause is outer */
3314  outerstartsel = cache->leftstartsel;
3315  outerendsel = cache->leftendsel;
3316  innerstartsel = cache->rightstartsel;
3317  innerendsel = cache->rightendsel;
3318  }
3319  else
3320  {
3321  /* left side of clause is inner */
3322  outerstartsel = cache->rightstartsel;
3323  outerendsel = cache->rightendsel;
3324  innerstartsel = cache->leftstartsel;
3325  innerendsel = cache->leftendsel;
3326  }
3327  if (jointype == JOIN_LEFT ||
3328  jointype == JOIN_ANTI)
3329  {
3330  outerstartsel = 0.0;
3331  outerendsel = 1.0;
3332  }
3333  else if (jointype == JOIN_RIGHT)
3334  {
3335  innerstartsel = 0.0;
3336  innerendsel = 1.0;
3337  }
3338  }
3339  else
3340  {
3341  /* cope with clauseless or full mergejoin */
3342  outerstartsel = innerstartsel = 0.0;
3343  outerendsel = innerendsel = 1.0;
3344  }
3345 
3346  /*
3347  * Convert selectivities to row counts. We force outer_rows and
3348  * inner_rows to be at least 1, but the skip_rows estimates can be zero.
3349  */
3350  outer_skip_rows = rint(outer_path_rows * outerstartsel);
3351  inner_skip_rows = rint(inner_path_rows * innerstartsel);
3352  outer_rows = clamp_row_est(outer_path_rows * outerendsel);
3353  inner_rows = clamp_row_est(inner_path_rows * innerendsel);
3354 
3355  Assert(outer_skip_rows <= outer_rows);
3356  Assert(inner_skip_rows <= inner_rows);
3357 
3358  /*
3359  * Readjust scan selectivities to account for above rounding. This is
3360  * normally an insignificant effect, but when there are only a few rows in
3361  * the inputs, failing to do this makes for a large percentage error.
3362  */
3363  outerstartsel = outer_skip_rows / outer_path_rows;
3364  innerstartsel = inner_skip_rows / inner_path_rows;
3365  outerendsel = outer_rows / outer_path_rows;
3366  innerendsel = inner_rows / inner_path_rows;
3367 
3368  Assert(outerstartsel <= outerendsel);
3369  Assert(innerstartsel <= innerendsel);
3370 
3371  /* cost of source data */
3372 
3373  if (outersortkeys) /* do we need to sort outer? */
3374  {
3375  cost_sort(&sort_path,
3376  root,
3377  outersortkeys,
3378  outer_path->total_cost,
3379  outer_path_rows,
3380  outer_path->pathtarget->width,
3381  0.0,
3382  work_mem,
3383  -1.0);
3384  startup_cost += sort_path.startup_cost;
3385  startup_cost += (sort_path.total_cost - sort_path.startup_cost)
3386  * outerstartsel;
3387  run_cost += (sort_path.total_cost - sort_path.startup_cost)
3388  * (outerendsel - outerstartsel);
3389  }
3390  else
3391  {
3392  startup_cost += outer_path->startup_cost;
3393  startup_cost += (outer_path->total_cost - outer_path->startup_cost)
3394  * outerstartsel;
3395  run_cost += (outer_path->total_cost - outer_path->startup_cost)
3396  * (outerendsel - outerstartsel);
3397  }
3398 
3399  if (innersortkeys) /* do we need to sort inner? */
3400  {
3401  cost_sort(&sort_path,
3402  root,
3403  innersortkeys,
3404  inner_path->total_cost,
3405  inner_path_rows,
3406  inner_path->pathtarget->width,
3407  0.0,
3408  work_mem,
3409  -1.0);
3410  startup_cost += sort_path.startup_cost;
3411  startup_cost += (sort_path.total_cost - sort_path.startup_cost)
3412  * innerstartsel;
3413  inner_run_cost = (sort_path.total_cost - sort_path.startup_cost)
3414  * (innerendsel - innerstartsel);
3415  }
3416  else
3417  {
3418  startup_cost += inner_path->startup_cost;
3419  startup_cost += (inner_path->total_cost - inner_path->startup_cost)
3420  * innerstartsel;
3421  inner_run_cost = (inner_path->total_cost - inner_path->startup_cost)
3422  * (innerendsel - innerstartsel);
3423  }
3424 
3425  /*
3426  * We can't yet determine whether rescanning occurs, or whether
3427  * materialization of the inner input should be done. The minimum
3428  * possible inner input cost, regardless of rescan and materialization
3429  * considerations, is inner_run_cost. We include that in
3430  * workspace->total_cost, but not yet in run_cost.
3431  */
3432 
3433  /* CPU costs left for later */
3434 
3435  /* Public result fields */
3436  workspace->startup_cost = startup_cost;
3437  workspace->total_cost = startup_cost + run_cost + inner_run_cost;
3438  /* Save private data for final_cost_mergejoin */
3439  workspace->run_cost = run_cost;
3440  workspace->inner_run_cost = inner_run_cost;
3441  workspace->outer_rows = outer_rows;
3442  workspace->inner_rows = inner_rows;
3443  workspace->outer_skip_rows = outer_skip_rows;
3444  workspace->inner_skip_rows = inner_skip_rows;
3445 }
3446 
3447 /*
3448  * final_cost_mergejoin
3449  * Final estimate of the cost and result size of a mergejoin path.
3450  *
3451  * Unlike other costsize functions, this routine makes two actual decisions:
3452  * whether the executor will need to do mark/restore, and whether we should
3453  * materialize the inner path. It would be logically cleaner to build
3454  * separate paths testing these alternatives, but that would require repeating
3455  * most of the cost calculations, which are not all that cheap. Since the
3456  * choice will not affect output pathkeys or startup cost, only total cost,
3457  * there is no possibility of wanting to keep more than one path. So it seems
3458  * best to make the decisions here and record them in the path's
3459  * skip_mark_restore and materialize_inner fields.
3460  *
3461  * Mark/restore overhead is usually required, but can be skipped if we know
3462  * that the executor need find only one match per outer tuple, and that the
3463  * mergeclauses are sufficient to identify a match.
3464  *
3465  * We materialize the inner path if we need mark/restore and either the inner
3466  * path can't support mark/restore, or it's cheaper to use an interposed
3467  * Material node to handle mark/restore.
3468  *
3469  * 'path' is already filled in except for the rows and cost fields and
3470  * skip_mark_restore and materialize_inner
3471  * 'workspace' is the result from initial_cost_mergejoin
3472  * 'extra' contains miscellaneous information about the join
3473  */
3474 void
3476  JoinCostWorkspace *workspace,
3477  JoinPathExtraData *extra)
3478 {
3479  Path *outer_path = path->jpath.outerjoinpath;
3480  Path *inner_path = path->jpath.innerjoinpath;
3481  double inner_path_rows = inner_path->rows;
3482  List *mergeclauses = path->path_mergeclauses;
3483  List *innersortkeys = path->innersortkeys;
3484  Cost startup_cost = workspace->startup_cost;
3485  Cost run_cost = workspace->run_cost;
3486  Cost inner_run_cost = workspace->inner_run_cost;
3487  double outer_rows = workspace->outer_rows;
3488  double inner_rows = workspace->inner_rows;
3489  double outer_skip_rows = workspace->outer_skip_rows;
3490  double inner_skip_rows = workspace->inner_skip_rows;
3491  Cost cpu_per_tuple,
3492  bare_inner_cost,
3493  mat_inner_cost;
3494  QualCost merge_qual_cost;
3495  QualCost qp_qual_cost;
3496  double mergejointuples,
3497  rescannedtuples;
3498  double rescanratio;
3499 
3500  /* Protect some assumptions below that rowcounts aren't zero */
3501  if (inner_path_rows <= 0)
3502  inner_path_rows = 1;
3503 
3504  /* Mark the path with the correct row estimate */
3505  if (path->jpath.path.param_info)
3506  path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
3507  else
3508  path->jpath.path.rows = path->jpath.path.parent->rows;
3509 
3510  /* For partial paths, scale row estimate. */
3511  if (path->jpath.path.parallel_workers > 0)
3512  {
3513  double parallel_divisor = get_parallel_divisor(&path->jpath.path);
3514 
3515  path->jpath.path.rows =
3516  clamp_row_est(path->jpath.path.rows / parallel_divisor);
3517  }
3518 
3519  /*
3520  * We could include disable_cost in the preliminary estimate, but that
3521  * would amount to optimizing for the case where the join method is
3522  * disabled, which doesn't seem like the way to bet.
3523  */
3524  if (!enable_mergejoin)
3525  startup_cost += disable_cost;
3526 
3527  /*
3528  * Compute cost of the mergequals and qpquals (other restriction clauses)
3529  * separately.
3530  */
3531  cost_qual_eval(&merge_qual_cost, mergeclauses, root);
3532  cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
3533  qp_qual_cost.startup -= merge_qual_cost.startup;
3534  qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
3535 
3536  /*
3537  * With a SEMI or ANTI join, or if the innerrel is known unique, the
3538  * executor will stop scanning for matches after the first match. When
3539  * all the joinclauses are merge clauses, this means we don't ever need to
3540  * back up the merge, and so we can skip mark/restore overhead.
3541  */
3542  if ((path->jpath.jointype == JOIN_SEMI ||
3543  path->jpath.jointype == JOIN_ANTI ||
3544  extra->inner_unique) &&
3547  path->skip_mark_restore = true;
3548  else
3549  path->skip_mark_restore = false;
3550 
3551  /*
3552  * Get approx # tuples passing the mergequals. We use approx_tuple_count
3553  * here because we need an estimate done with JOIN_INNER semantics.
3554  */
3555  mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
3556 
3557  /*
3558  * When there are equal merge keys in the outer relation, the mergejoin
3559  * must rescan any matching tuples in the inner relation. This means
3560  * re-fetching inner tuples; we have to estimate how often that happens.
3561  *
3562  * For regular inner and outer joins, the number of re-fetches can be
3563  * estimated approximately as size of merge join output minus size of
3564  * inner relation. Assume that the distinct key values are 1, 2, ..., and
3565  * denote the number of values of each key in the outer relation as m1,
3566  * m2, ...; in the inner relation, n1, n2, ... Then we have
3567  *
3568  * size of join = m1 * n1 + m2 * n2 + ...
3569  *
3570  * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
3571  * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
3572  * relation
3573  *
3574  * This equation works correctly for outer tuples having no inner match
3575  * (nk = 0), but not for inner tuples having no outer match (mk = 0); we
3576  * are effectively subtracting those from the number of rescanned tuples,
3577  * when we should not. Can we do better without expensive selectivity
3578  * computations?
3579  *
3580  * The whole issue is moot if we are working from a unique-ified outer
3581  * input, or if we know we don't need to mark/restore at all.
3582  */
3583  if (IsA(outer_path, UniquePath) || path->skip_mark_restore)
3584  rescannedtuples = 0;
3585  else
3586  {
3587  rescannedtuples = mergejointuples - inner_path_rows;
3588  /* Must clamp because of possible underestimate */
3589  if (rescannedtuples < 0)
3590  rescannedtuples = 0;
3591  }
3592 
3593  /*
3594  * We'll inflate various costs this much to account for rescanning. Note
3595  * that this is to be multiplied by something involving inner_rows, or
3596  * another number related to the portion of the inner rel we'll scan.
3597  */
3598  rescanratio = 1.0 + (rescannedtuples / inner_rows);
3599 
3600  /*
3601  * Decide whether we want to materialize the inner input to shield it from
3602  * mark/restore and performing re-fetches. Our cost model for regular
3603  * re-fetches is that a re-fetch costs the same as an original fetch,
3604  * which is probably an overestimate; but on the other hand we ignore the
3605  * bookkeeping costs of mark/restore. Not clear if it's worth developing
3606  * a more refined model. So we just need to inflate the inner run cost by
3607  * rescanratio.
3608  */
3609  bare_inner_cost = inner_run_cost * rescanratio;
3610 
3611  /*
3612  * When we interpose a Material node the re-fetch cost is assumed to be
3613  * just cpu_operator_cost per tuple, independently of the underlying
3614  * plan's cost; and we charge an extra cpu_operator_cost per original
3615  * fetch as well. Note that we're assuming the materialize node will
3616  * never spill to disk, since it only has to remember tuples back to the
3617  * last mark. (If there are a huge number of duplicates, our other cost
3618  * factors will make the path so expensive that it probably won't get
3619  * chosen anyway.) So we don't use cost_rescan here.
3620  *
3621  * Note: keep this estimate in sync with create_mergejoin_plan's labeling
3622  * of the generated Material node.
3623  */
3624  mat_inner_cost = inner_run_cost +
3625  cpu_operator_cost * inner_rows * rescanratio;
3626 
3627  /*
3628  * If we don't need mark/restore at all, we don't need materialization.
3629  */
3630  if (path->skip_mark_restore)
3631  path->materialize_inner = false;
3632 
3633  /*
3634  * Prefer materializing if it looks cheaper, unless the user has asked to
3635  * suppress materialization.
3636  */
3637  else if (enable_material && mat_inner_cost < bare_inner_cost)
3638  path->materialize_inner = true;
3639 
3640  /*
3641  * Even if materializing doesn't look cheaper, we *must* do it if the
3642  * inner path is to be used directly (without sorting) and it doesn't
3643  * support mark/restore.
3644  *
3645  * Since the inner side must be ordered, and only Sorts and IndexScans can
3646  * create order to begin with, and they both support mark/restore, you
3647  * might think there's no problem --- but you'd be wrong. Nestloop and
3648  * merge joins can *preserve* the order of their inputs, so they can be
3649  * selected as the input of a mergejoin, and they don't support
3650  * mark/restore at present.
3651  *
3652  * We don't test the value of enable_material here, because
3653  * materialization is required for correctness in this case, and turning
3654  * it off does not entitle us to deliver an invalid plan.
3655  */
3656  else if (innersortkeys == NIL &&
3657  !ExecSupportsMarkRestore(inner_path))
3658  path->materialize_inner = true;
3659 
3660  /*
3661  * Also, force materializing if the inner path is to be sorted and the
3662  * sort is expected to spill to disk. This is because the final merge
3663  * pass can be done on-the-fly if it doesn't have to support mark/restore.
3664  * We don't try to adjust the cost estimates for this consideration,
3665  * though.
3666  *
3667  * Since materialization is a performance optimization in this case,
3668  * rather than necessary for correctness, we skip it if enable_material is
3669  * off.
3670  */
3671  else if (enable_material && innersortkeys != NIL &&
3672  relation_byte_size(inner_path_rows,
3673  inner_path->pathtarget->width) >
3674  (work_mem * 1024L))
3675  path->materialize_inner = true;
3676  else
3677  path->materialize_inner = false;
3678 
3679  /* Charge the right incremental cost for the chosen case */
3680  if (path->materialize_inner)
3681  run_cost += mat_inner_cost;
3682  else
3683  run_cost += bare_inner_cost;
3684 
3685  /* CPU costs */
3686 
3687  /*
3688  * The number of tuple comparisons needed is approximately number of outer
3689  * rows plus number of inner rows plus number of rescanned tuples (can we
3690  * refine this?). At each one, we need to evaluate the mergejoin quals.
3691  */
3692  startup_cost += merge_qual_cost.startup;
3693  startup_cost += merge_qual_cost.per_tuple *
3694  (outer_skip_rows + inner_skip_rows * rescanratio);
3695  run_cost += merge_qual_cost.per_tuple *
3696  ((outer_rows - outer_skip_rows) +
3697  (inner_rows - inner_skip_rows) * rescanratio);
3698 
3699  /*
3700  * For each tuple that gets through the mergejoin proper, we charge
3701  * cpu_tuple_cost plus the cost of evaluating additional restriction
3702  * clauses that are to be applied at the join. (This is pessimistic since
3703  * not all of the quals may get evaluated at each tuple.)
3704  *
3705  * Note: we could adjust for SEMI/ANTI joins skipping some qual
3706  * evaluations here, but it's probably not worth the trouble.
3707  */
3708  startup_cost += qp_qual_cost.startup;
3709  cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
3710  run_cost += cpu_per_tuple * mergejointuples;
3711 
3712  /* tlist eval costs are paid per output row, not per tuple scanned */
3713  startup_cost += path->jpath.path.pathtarget->cost.startup;
3714  run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows;
3715 
3716  path->jpath.path.startup_cost = startup_cost;
3717  path->jpath.path.total_cost = startup_cost + run_cost;
3718 }
3719 
3720 /*
3721  * run mergejoinscansel() with caching
3722  */
3723 static MergeScanSelCache *
3725 {
3726  MergeScanSelCache *cache;
3727  ListCell *lc;
3728  Selectivity leftstartsel,
3729  leftendsel,
3730  rightstartsel,
3731  rightendsel;
3732  MemoryContext oldcontext;
3733 
3734  /* Do we have this result already? */
3735  foreach(lc, rinfo->scansel_cache)
3736  {
3737  cache = (MergeScanSelCache *) lfirst(lc);
3738  if (cache->opfamily == pathkey->pk_opfamily &&
3739  cache->collation == pathkey->pk_eclass->ec_collation &&
3740  cache->strategy == pathkey->pk_strategy &&
3741  cache->nulls_first == pathkey->pk_nulls_first)
3742  return cache;
3743  }
3744 
3745  /* Nope, do the computation */
3746  mergejoinscansel(root,
3747  (Node *) rinfo->clause,
3748  pathkey->pk_opfamily,
3749  pathkey->pk_strategy,
3750  pathkey->pk_nulls_first,
3751  &leftstartsel,
3752  &leftendsel,
3753  &rightstartsel,
3754  &rightendsel);
3755 
3756  /* Cache the result in suitably long-lived workspace */
3757  oldcontext = MemoryContextSwitchTo(root->planner_cxt);
3758 
3759  cache = (MergeScanSelCache *) palloc(sizeof(MergeScanSelCache));
3760  cache->opfamily = pathkey->pk_opfamily;
3761  cache->collation = pathkey->pk_eclass->ec_collation;
3762  cache->strategy = pathkey->pk_strategy;
3763  cache->nulls_first = pathkey->pk_nulls_first;
3764  cache->leftstartsel = leftstartsel;
3765  cache->leftendsel = leftendsel;
3766  cache->rightstartsel = rightstartsel;
3767  cache->rightendsel = rightendsel;
3768 
3769  rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache);
3770 
3771  MemoryContextSwitchTo(oldcontext);
3772 
3773  return cache;
3774 }
3775 
3776 /*
3777  * initial_cost_hashjoin
3778  * Preliminary estimate of the cost of a hashjoin path.
3779  *
3780  * This must quickly produce lower-bound estimates of the path's startup and
3781  * total costs. If we are unable to eliminate the proposed path from
3782  * consideration using the lower bounds, final_cost_hashjoin will be called
3783  * to obtain the final estimates.
3784  *
3785  * The exact division of labor between this function and final_cost_hashjoin
3786  * is private to them, and represents a tradeoff between speed of the initial
3787  * estimate and getting a tight lower bound. We choose to not examine the
3788  * join quals here (other than by counting the number of hash clauses),
3789  * so we can't do much with CPU costs. We do assume that
3790  * ExecChooseHashTableSize is cheap enough to use here.
3791  *
3792  * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
3793  * other data to be used by final_cost_hashjoin
3794  * 'jointype' is the type of join to be performed
3795  * 'hashclauses' is the list of joinclauses to be used as hash clauses
3796  * 'outer_path' is the outer input to the join
3797  * 'inner_path' is the inner input to the join
3798  * 'extra' contains miscellaneous information about the join
3799  * 'parallel_hash' indicates that inner_path is partial and that a shared
3800  * hash table will be built in parallel
3801  */
3802 void
3804  JoinType jointype,
3805  List *hashclauses,
3806  Path *outer_path, Path *inner_path,
3807  JoinPathExtraData *extra,
3808  bool parallel_hash)
3809 {
3810  Cost startup_cost = 0;
3811  Cost run_cost = 0;
3812  double outer_path_rows = outer_path->rows;
3813  double inner_path_rows = inner_path->rows;
3814  double inner_path_rows_total = inner_path_rows;
3815  int num_hashclauses = list_length(hashclauses);
3816  int numbuckets;
3817  int numbatches;
3818  int num_skew_mcvs;
3819  size_t space_allowed; /* unused */
3820 
3821  /* cost of source data */
3822  startup_cost += outer_path->startup_cost;
3823  run_cost += outer_path->total_cost - outer_path->startup_cost;
3824  startup_cost += inner_path->total_cost;
3825 
3826  /*
3827  * Cost of computing hash function: must do it once per input tuple. We
3828  * charge one cpu_operator_cost for each column's hash function. Also,
3829  * tack on one cpu_tuple_cost per inner row, to model the costs of
3830  * inserting the row into the hashtable.
3831  *
3832  * XXX when a hashclause is more complex than a single operator, we really
3833  * should charge the extra eval costs of the left or right side, as
3834  * appropriate, here. This seems more work than it's worth at the moment.
3835  */
3836  startup_cost += (cpu_operator_cost * num_hashclauses + cpu_tuple_cost)
3837  * inner_path_rows;
3838  run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;
3839 
3840  /*
3841  * If this is a parallel hash build, then the value we have for
3842  * inner_rows_total currently refers only to the rows returned by each
3843  * participant. For shared hash table size estimation, we need the total
3844  * number, so we need to undo the division.
3845  */
3846  if (parallel_hash)
3847  inner_path_rows_total *= get_parallel_divisor(inner_path);
3848 
3849  /*
3850  * Get hash table size that executor would use for inner relation.
3851  *
3852  * XXX for the moment, always assume that skew optimization will be
3853  * performed. As long as SKEW_HASH_MEM_PERCENT is small, it's not worth
3854  * trying to determine that for sure.
3855  *
3856  * XXX at some point it might be interesting to try to account for skew
3857  * optimization in the cost estimate, but for now, we don't.
3858  */
3859  ExecChooseHashTableSize(inner_path_rows_total,
3860  inner_path->pathtarget->width,
3861  true, /* useskew */
3862  parallel_hash, /* try_combined_hash_mem */
3863  outer_path->parallel_workers,
3864  &space_allowed,
3865  &numbuckets,
3866  &numbatches,
3867  &num_skew_mcvs);
3868 
3869  /*
3870  * If inner relation is too big then we will need to "batch" the join,
3871  * which implies writing and reading most of the tuples to disk an extra
3872  * time. Charge seq_page_cost per page, since the I/O should be nice and
3873  * sequential. Writing the inner rel counts as startup cost, all the rest
3874  * as run cost.
3875  */
3876  if (numbatches > 1)
3877  {
3878  double outerpages = page_size(outer_path_rows,
3879  outer_path->pathtarget->width);
3880  double innerpages = page_size(inner_path_rows,
3881  inner_path->pathtarget->width);
3882 
3883  startup_cost += seq_page_cost * innerpages;
3884  run_cost += seq_page_cost * (innerpages + 2 * outerpages);
3885  }
3886 
3887  /* CPU costs left for later */
3888 
3889  /* Public result fields */
3890  workspace->startup_cost = startup_cost;
3891  workspace->total_cost = startup_cost + run_cost;
3892  /* Save private data for final_cost_hashjoin */
3893  workspace->run_cost = run_cost;
3894  workspace->numbuckets = numbuckets;
3895  workspace->numbatches = numbatches;
3896  workspace->inner_rows_total = inner_path_rows_total;
3897 }
3898 
3899 /*
3900  * final_cost_hashjoin
3901  * Final estimate of the cost and result size of a hashjoin path.
3902  *
3903  * Note: the numbatches estimate is also saved into 'path' for use later
3904  *
3905  * 'path' is already filled in except for the rows and cost fields and
3906  * num_batches
3907  * 'workspace' is the result from initial_cost_hashjoin
3908  * 'extra' contains miscellaneous information about the join
3909  */
3910 void
3912  JoinCostWorkspace *workspace,
3913  JoinPathExtraData *extra)
3914 {
3915  Path *outer_path = path->jpath.outerjoinpath;
3916  Path *inner_path = path->jpath.innerjoinpath;
3917  double outer_path_rows = outer_path->rows;
3918  double inner_path_rows = inner_path->rows;
3919  double inner_path_rows_total = workspace->inner_rows_total;
3920  List *hashclauses = path->path_hashclauses;
3921  Cost startup_cost = workspace->startup_cost;
3922  Cost run_cost = workspace->run_cost;
3923  int numbuckets = workspace->numbuckets;
3924  int numbatches = workspace->numbatches;
3925  Cost cpu_per_tuple;
3926  QualCost hash_qual_cost;
3927  QualCost qp_qual_cost;
3928  double hashjointuples;
3929  double virtualbuckets;
3930  Selectivity innerbucketsize;
3931  Selectivity innermcvfreq;
3932  ListCell *hcl;
3933 
3934  /* Mark the path with the correct row estimate */
3935  if (path->jpath.path.param_info)
3936  path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
3937  else
3938  path->jpath.path.rows = path->jpath.path.parent->rows;
3939 
3940  /* For partial paths, scale row estimate. */
3941  if (path->jpath.path.parallel_workers > 0)
3942  {
3943  double parallel_divisor = get_parallel_divisor(&path->jpath.path);
3944 
3945  path->jpath.path.rows =
3946  clamp_row_est(path->jpath.path.rows / parallel_divisor);
3947  }
3948 
3949  /*
3950  * We could include disable_cost in the preliminary estimate, but that
3951  * would amount to optimizing for the case where the join method is
3952  * disabled, which doesn't seem like the way to bet.
3953  */
3954  if (!enable_hashjoin)
3955  startup_cost += disable_cost;
3956 
3957  /* mark the path with estimated # of batches */
3958  path->num_batches = numbatches;
3959 
3960  /* store the total number of tuples (sum of partial row estimates) */
3961  path->inner_rows_total = inner_path_rows_total;
3962 
3963  /* and compute the number of "virtual" buckets in the whole join */
3964  virtualbuckets = (double) numbuckets * (double) numbatches;
3965 
3966  /*
3967  * Determine bucketsize fraction and MCV frequency for the inner relation.
3968  * We use the smallest bucketsize or MCV frequency estimated for any
3969  * individual hashclause; this is undoubtedly conservative.
3970  *
3971  * BUT: if inner relation has been unique-ified, we can assume it's good
3972  * for hashing. This is important both because it's the right answer, and
3973  * because we avoid contaminating the cache with a value that's wrong for
3974  * non-unique-ified paths.
3975  */
3976  if (IsA(inner_path, UniquePath))
3977  {
3978  innerbucketsize = 1.0 / virtualbuckets;
3979  innermcvfreq = 0.0;
3980  }
3981  else
3982  {
3983  innerbucketsize = 1.0;
3984  innermcvfreq = 1.0;
3985  foreach(hcl, hashclauses)
3986  {
3987  RestrictInfo *restrictinfo = lfirst_node(RestrictInfo, hcl);
3988  Selectivity thisbucketsize;
3989  Selectivity thismcvfreq;
3990 
3991  /*
3992  * First we have to figure out which side of the hashjoin clause
3993  * is the inner side.
3994  *
3995  * Since we tend to visit the same clauses over and over when
3996  * planning a large query, we cache the bucket stats estimates in
3997  * the RestrictInfo node to avoid repeated lookups of statistics.
3998  */
3999  if (bms_is_subset(restrictinfo->right_relids,
4000  inner_path->parent->relids))
4001  {
4002  /* righthand side is inner */
4003  thisbucketsize = restrictinfo->right_bucketsize;
4004  if (thisbucketsize < 0)
4005  {
4006  /* not cached yet */
4008  get_rightop(restrictinfo->clause),
4009  virtualbuckets,
4010  &restrictinfo->right_mcvfreq,
4011  &restrictinfo->right_bucketsize);
4012  thisbucketsize = restrictinfo->right_bucketsize;
4013  }
4014  thismcvfreq = restrictinfo->right_mcvfreq;
4015  }
4016  else
4017  {
4018  Assert(bms_is_subset(restrictinfo->left_relids,
4019  inner_path->parent->relids));
4020  /* lefthand side is inner */
4021  thisbucketsize = restrictinfo->left_bucketsize;
4022  if (thisbucketsize < 0)
4023  {
4024  /* not cached yet */
4026  get_leftop(restrictinfo->clause),
4027  virtualbuckets,
4028  &restrictinfo->left_mcvfreq,
4029  &restrictinfo->left_bucketsize);
4030  thisbucketsize = restrictinfo->left_bucketsize;
4031  }
4032  thismcvfreq = restrictinfo->left_mcvfreq;
4033  }
4034 
4035  if (innerbucketsize > thisbucketsize)
4036  innerbucketsize = thisbucketsize;
4037  if (innermcvfreq > thismcvfreq)
4038  innermcvfreq = thismcvfreq;
4039  }
4040  }
4041 
4042  /*
4043  * If the bucket holding the inner MCV would exceed hash_mem, we don't
4044  * want to hash unless there is really no other alternative, so apply
4045  * disable_cost. (The executor normally copes with excessive memory usage
4046  * by splitting batches, but obviously it cannot separate equal values
4047  * that way, so it will be unable to drive the batch size below hash_mem
4048  * when this is true.)
4049  */
4050  if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq),
4051  inner_path->pathtarget->width) > get_hash_memory_limit())
4052  startup_cost += disable_cost;
4053 
4054  /*
4055  * Compute cost of the hashquals and qpquals (other restriction clauses)
4056  * separately.
4057  */
4058  cost_qual_eval(&hash_qual_cost, hashclauses, root);
4059  cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
4060  qp_qual_cost.startup -= hash_qual_cost.startup;
4061  qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
4062 
4063  /* CPU costs */
4064 
4065  if (path->jpath.jointype == JOIN_SEMI ||
4066  path->jpath.jointype == JOIN_ANTI ||
4067  extra->inner_unique)
4068  {
4069  double outer_matched_rows;
4070  Selectivity inner_scan_frac;
4071 
4072  /*
4073  * With a SEMI or ANTI join, or if the innerrel is known unique, the
4074  * executor will stop after the first match.
4075  *
4076  * For an outer-rel row that has at least one match, we can expect the
4077  * bucket scan to stop after a fraction 1/(match_count+1) of the
4078  * bucket's rows, if the matches are evenly distributed. Since they
4079  * probably aren't quite evenly distributed, we apply a fuzz factor of
4080  * 2.0 to that fraction. (If we used a larger fuzz factor, we'd have
4081  * to clamp inner_scan_frac to at most 1.0; but since match_count is
4082  * at least 1, no such clamp is needed now.)
4083  */
4084  outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac);
4085  inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0);
4086 
4087  startup_cost += hash_qual_cost.startup;
4088  run_cost += hash_qual_cost.per_tuple * outer_matched_rows *
4089  clamp_row_est(inner_path_rows * innerbucketsize * inner_scan_frac) * 0.5;
4090 
4091  /*
4092  * For unmatched outer-rel rows, the picture is quite a lot different.
4093  * In the first place, there is no reason to assume that these rows
4094  * preferentially hit heavily-populated buckets; instead assume they
4095  * are uncorrelated with the inner distribution and so they see an
4096  * average bucket size of inner_path_rows / virtualbuckets. In the
4097  * second place, it seems likely that they will have few if any exact
4098  * hash-code matches and so very few of the tuples in the bucket will
4099  * actually require eval of the hash quals. We don't have any good
4100  * way to estimate how many will, but for the moment assume that the
4101  * effective cost per bucket entry is one-tenth what it is for
4102  * matchable tuples.
4103  */
4104  run_cost += hash_qual_cost.per_tuple *
4105  (outer_path_rows - outer_matched_rows) *
4106  clamp_row_est(inner_path_rows / virtualbuckets) * 0.05;
4107 
4108  /* Get # of tuples that will pass the basic join */
4109  if (path->jpath.jointype == JOIN_ANTI)
4110  hashjointuples = outer_path_rows - outer_matched_rows;
4111  else
4112  hashjointuples = outer_matched_rows;
4113  }
4114  else
4115  {
4116  /*
4117  * The number of tuple comparisons needed is the number of outer
4118  * tuples times the typical number of tuples in a hash bucket, which
4119  * is the inner relation size times its bucketsize fraction. At each
4120  * one, we need to evaluate the hashjoin quals. But actually,
4121  * charging the full qual eval cost at each tuple is pessimistic,
4122  * since we don't evaluate the quals unless the hash values match
4123  * exactly. For lack of a better idea, halve the cost estimate to
4124  * allow for that.
4125  */
4126  startup_cost += hash_qual_cost.startup;
4127  run_cost += hash_qual_cost.per_tuple * outer_path_rows *
4128  clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
4129 
4130  /*
4131  * Get approx # tuples passing the hashquals. We use
4132  * approx_tuple_count here because we need an estimate done with
4133  * JOIN_INNER semantics.
4134  */
4135  hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
4136  }
4137 
4138  /*
4139  * For each tuple that gets through the hashjoin proper, we charge
4140  * cpu_tuple_cost plus the cost of evaluating additional restriction
4141  * clauses that are to be applied at the join. (This is pessimistic since
4142  * not all of the quals may get evaluated at each tuple.)
4143  */
4144  startup_cost += qp_qual_cost.startup;
4145  cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
4146  run_cost += cpu_per_tuple * hashjointuples;
4147 
4148  /* tlist eval costs are paid per output row, not per tuple scanned */
4149  startup_cost += path->jpath.path.pathtarget->cost.startup;
4150  run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows;
4151 
4152  path->jpath.path.startup_cost = startup_cost;
4153  path->jpath.path.total_cost = startup_cost + run_cost;
4154 }
4155 
4156 
4157 /*
4158  * cost_subplan
4159  * Figure the costs for a SubPlan (or initplan).
4160  *
4161  * Note: we could dig the subplan's Plan out of the root list, but in practice
4162  * all callers have it handy already, so we make them pass it.
4163  */
4164 void
4166 {
4167  QualCost sp_cost;
4168 
4169  /* Figure any cost for evaluating the testexpr */
4170  cost_qual_eval(&sp_cost,
4171  make_ands_implicit((Expr *) subplan->testexpr),
4172  root);
4173 
4174  if (subplan->useHashTable)
4175  {
4176  /*
4177  * If we are using a hash table for the subquery outputs, then the
4178  * cost of evaluating the query is a one-time cost. We charge one
4179  * cpu_operator_cost per tuple for the work of loading the hashtable,
4180  * too.
4181  */
4182  sp_cost.startup += plan->total_cost +
4183  cpu_operator_cost * plan->plan_rows;
4184 
4185  /*
4186  * The per-tuple costs include the cost of evaluating the lefthand
4187  * expressions, plus the cost of probing the hashtable. We already
4188  * accounted for the lefthand expressions as part of the testexpr, and
4189  * will also have counted one cpu_operator_cost for each comparison
4190  * operator. That is probably too low for the probing cost, but it's
4191  * hard to make a better estimate, so live with it for now.
4192  */
4193  }
4194  else
4195  {
4196  /*
4197  * Otherwise we will be rescanning the subplan output on each
4198  * evaluation. We need to estimate how much of the output we will
4199  * actually need to scan. NOTE: this logic should agree with the
4200  * tuple_fraction estimates used by make_subplan() in
4201  * plan/subselect.c.
4202  */
4203  Cost plan_run_cost = plan->total_cost - plan->startup_cost;
4204 
4205  if (subplan->subLinkType == EXISTS_SUBLINK)
4206  {
4207  /* we only need to fetch 1 tuple; clamp to avoid zero divide */
4208  sp_cost.per_tuple += plan_run_cost / clamp_row_est(plan->plan_rows);
4209  }
4210  else if (subplan->subLinkType == ALL_SUBLINK ||
4211  subplan->subLinkType == ANY_SUBLINK)
4212  {
4213  /* assume we need 50% of the tuples */
4214  sp_cost.per_tuple += 0.50 * plan_run_cost;
4215  /* also charge a cpu_operator_cost per row examined */
4216  sp_cost.per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost;
4217  }
4218  else
4219  {
4220  /* assume we need all tuples */
4221  sp_cost.per_tuple += plan_run_cost;
4222  }
4223 
4224  /*
4225  * Also account for subplan's startup cost. If the subplan is
4226  * uncorrelated or undirect correlated, AND its topmost node is one
4227  * that materializes its output, assume that we'll only need to pay
4228  * its startup cost once; otherwise assume we pay the startup cost
4229  * every time.
4230  */
4231  if (subplan->parParam == NIL &&
4233  sp_cost.startup += plan->startup_cost;
4234  else
4235  sp_cost.per_tuple += plan->startup_cost;
4236  }
4237 
4238  subplan->startup_cost = sp_cost.startup;
4239  subplan->per_call_cost = sp_cost.per_tuple;
4240 }
4241 
4242 
4243 /*
4244  * cost_rescan
4245  * Given a finished Path, estimate the costs of rescanning it after
4246  * having done so the first time. For some Path types a rescan is
4247  * cheaper than an original scan (if no parameters change), and this
4248  * function embodies knowledge about that. The default is to return
4249  * the same costs stored in the Path. (Note that the cost estimates
4250  * actually stored in Paths are always for first scans.)
4251  *
4252  * This function is not currently intended to model effects such as rescans
4253  * being cheaper due to disk block caching; what we are concerned with is
4254  * plan types wherein the executor caches results explicitly, or doesn't
4255  * redo startup calculations, etc.
4256  */
4257 static void
4259  Cost *rescan_startup_cost, /* output parameters */
4260  Cost *rescan_total_cost)
4261 {
4262  switch (path->pathtype)
4263  {
4264  case T_FunctionScan:
4265 
4266  /*
4267  * Currently, nodeFunctionscan.c always executes the function to
4268  * completion before returning any rows, and caches the results in
4269  * a tuplestore. So the function eval cost is all startup cost
4270  * and isn't paid over again on rescans. However, all run costs
4271  * will be paid over again.
4272  */
4273  *rescan_startup_cost = 0;
4274  *rescan_total_cost = path->total_cost - path->startup_cost;
4275  break;
4276  case T_HashJoin:
4277 
4278  /*
4279  * If it's a single-batch join, we don't need to rebuild the hash
4280  * table during a rescan.
4281  */
4282  if (((HashPath *) path)->num_batches == 1)
4283  {
4284  /* Startup cost is exactly the cost of hash table building */
4285  *rescan_startup_cost = 0;
4286  *rescan_total_cost = path->total_cost - path->startup_cost;
4287  }
4288  else
4289  {
4290  /* Otherwise, no special treatment */
4291  *rescan_startup_cost = path->startup_cost;
4292  *rescan_total_cost = path->total_cost;
4293  }
4294  break;
4295  case T_CteScan:
4296  case T_WorkTableScan:
4297  {
4298  /*
4299  * These plan types materialize their final result in a
4300  * tuplestore or tuplesort object. So the rescan cost is only
4301  * cpu_tuple_cost per tuple, unless the result is large enough
4302  * to spill to disk.
4303  */
4304  Cost run_cost = cpu_tuple_cost * path->rows;
4305  double nbytes = relation_byte_size(path->rows,
4306  path->pathtarget->width);
4307  long work_mem_bytes = work_mem * 1024L;
4308 
4309  if (nbytes > work_mem_bytes)
4310  {
4311  /* It will spill, so account for re-read cost */
4312  double npages = ceil(nbytes / BLCKSZ);
4313 
4314  run_cost += seq_page_cost * npages;
4315  }
4316  *rescan_startup_cost = 0;
4317  *rescan_total_cost = run_cost;
4318  }
4319  break;
4320  case T_Material:
4321  case T_Sort:
4322  {
4323  /*
4324  * These plan types not only materialize their results, but do
4325  * not implement qual filtering or projection. So they are
4326  * even cheaper to rescan than the ones above. We charge only
4327  * cpu_operator_cost per tuple. (Note: keep that in sync with
4328  * the run_cost charge in cost_sort, and also see comments in
4329  * cost_material before you change it.)
4330  */
4331  Cost run_cost = cpu_operator_cost * path->rows;
4332  double nbytes = relation_byte_size(path->rows,
4333  path->pathtarget->width);
4334  long work_mem_bytes = work_mem * 1024L;
4335 
4336  if (nbytes > work_mem_bytes)
4337  {
4338  /* It will spill, so account for re-read cost */
4339  double npages = ceil(nbytes / BLCKSZ);
4340 
4341  run_cost += seq_page_cost * npages;
4342  }
4343  *rescan_startup_cost = 0;
4344  *rescan_total_cost = run_cost;
4345  }
4346  break;
4347  case T_Memoize:
4348  /* All the hard work is done by cost_memoize_rescan */
4349  cost_memoize_rescan(root, (MemoizePath *) path,
4350  rescan_startup_cost, rescan_total_cost);
4351  break;
4352  default:
4353  *rescan_startup_cost = path->startup_cost;
4354  *rescan_total_cost = path->total_cost;
4355  break;
4356  }
4357 }
4358 
4359 
4360 /*
4361  * cost_qual_eval
4362  * Estimate the CPU costs of evaluating a WHERE clause.
4363  * The input can be either an implicitly-ANDed list of boolean
4364  * expressions, or a list of RestrictInfo nodes. (The latter is
4365  * preferred since it allows caching of the results.)
4366  * The result includes both a one-time (startup) component,
4367  * and a per-evaluation component.
4368  */
4369 void
4371 {
4372  cost_qual_eval_context context;
4373  ListCell *l;
4374 
4375  context.root = root;
4376  context.total.startup = 0;
4377  context.total.per_tuple = 0;
4378 
4379  /* We don't charge any cost for the implicit ANDing at top level ... */
4380 
4381  foreach(l, quals)
4382  {
4383  Node *qual = (Node *) lfirst(l);
4384 
4385  cost_qual_eval_walker(qual, &context);
4386  }
4387 
4388  *cost = context.total;
4389 }
4390 
4391 /*
4392  * cost_qual_eval_node
4393  * As above, for a single RestrictInfo or expression.
4394  */
4395 void
4397 {
4398  cost_qual_eval_context context;
4399 
4400  context.root = root;
4401  context.total.startup = 0;
4402  context.total.per_tuple = 0;
4403 
4404  cost_qual_eval_walker(qual, &context);
4405 
4406  *cost = context.total;
4407 }
4408 
4409 static bool
4411 {
4412  if (node == NULL)
4413  return false;
4414 
4415  /*
4416  * RestrictInfo nodes contain an eval_cost field reserved for this
4417  * routine's use, so that it's not necessary to evaluate the qual clause's
4418  * cost more than once. If the clause's cost hasn't been computed yet,
4419  * the field's startup value will contain -1.
4420  */
4421  if (IsA(node, RestrictInfo))
4422  {
4423  RestrictInfo *rinfo = (RestrictInfo *) node;
4424 
4425  if (rinfo->eval_cost.startup < 0)
4426  {
4427  cost_qual_eval_context locContext;
4428 
4429  locContext.root = context->root;
4430  locContext.total.startup = 0;
4431  locContext.total.per_tuple = 0;
4432 
4433  /*
4434  * For an OR clause, recurse into the marked-up tree so that we
4435  * set the eval_cost for contained RestrictInfos too.
4436  */
4437  if (rinfo->orclause)
4438  cost_qual_eval_walker((Node *) rinfo->orclause, &locContext);
4439  else
4440  cost_qual_eval_walker((Node *) rinfo->clause, &locContext);
4441 
4442  /*
4443  * If the RestrictInfo is marked pseudoconstant, it will be tested
4444  * only once, so treat its cost as all startup cost.
4445  */
4446  if (rinfo->pseudoconstant)
4447  {
4448  /* count one execution during startup */
4449  locContext.total.startup += locContext.total.per_tuple;
4450  locContext.total.per_tuple = 0;
4451  }
4452  rinfo->eval_cost = locContext.total;
4453  }
4454  context->total.startup += rinfo->eval_cost.startup;
4455  context->total.per_tuple += rinfo->eval_cost.per_tuple;
4456  /* do NOT recurse into children */
4457  return false;
4458  }
4459 
4460  /*
4461  * For each operator or function node in the given tree, we charge the
4462  * estimated execution cost given by pg_proc.procost (remember to multiply
4463  * this by cpu_operator_cost).
4464  *
4465  * Vars and Consts are charged zero, and so are boolean operators (AND,
4466  * OR, NOT). Simplistic, but a lot better than no model at all.
4467  *
4468  * Should we try to account for the possibility of short-circuit
4469  * evaluation of AND/OR? Probably *not*, because that would make the
4470  * results depend on the clause ordering, and we are not in any position
4471  * to expect that the current ordering of the clauses is the one that's
4472  * going to end up being used. The above per-RestrictInfo caching would
4473  * not mix well with trying to re-order clauses anyway.
4474  *
4475  * Another issue that is entirely ignored here is that if a set-returning
4476  * function is below top level in the tree, the functions/operators above
4477  * it will need to be evaluated multiple times. In practical use, such
4478  * cases arise so seldom as to not be worth the added complexity needed;
4479  * moreover, since our rowcount estimates for functions tend to be pretty
4480  * phony, the results would also be pretty phony.
4481  */
4482  if (IsA(node, FuncExpr))
4483  {
4484  add_function_cost(context->root, ((FuncExpr *) node)->funcid, node,
4485  &context->total);
4486  }
4487  else if (IsA(node, OpExpr) ||
4488  IsA(node, DistinctExpr) ||
4489  IsA(node, NullIfExpr))
4490  {
4491  /* rely on struct equivalence to treat these all alike */
4492  set_opfuncid((OpExpr *) node);
4493  add_function_cost(context->root, ((OpExpr *) node)->opfuncid, node,
4494  &context->total);
4495  }
4496  else if (IsA(node, ScalarArrayOpExpr))
4497  {
4498  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
4499  Node *arraynode = (Node *) lsecond(saop->args);
4500  QualCost sacosts;
4501  QualCost hcosts;
4502  int estarraylen = estimate_array_length(arraynode);
4503 
4504  set_sa_opfuncid(saop);
4505  sacosts.startup = sacosts.per_tuple = 0;
4506  add_function_cost(context->root, saop->opfuncid, NULL,
4507  &sacosts);
4508 
4509  if (OidIsValid(saop->hashfuncid))
4510  {
4511  /* Handle costs for hashed ScalarArrayOpExpr */
4512  hcosts.startup = hcosts.per_tuple = 0;
4513 
4514  add_function_cost(context->root, saop->hashfuncid, NULL, &hcosts);
4515  context->total.startup += sacosts.startup + hcosts.startup;
4516 
4517  /* Estimate the cost of building the hashtable. */
4518  context->total.startup += estarraylen * hcosts.per_tuple;
4519 
4520  /*
4521  * XXX should we charge a little bit for sacosts.per_tuple when
4522  * building the table, or is it ok to assume there will be zero
4523  * hash collision?
4524  */
4525 
4526  /*
4527  * Charge for hashtable lookups. Charge a single hash and a
4528  * single comparison.
4529  */
4530  context->total.per_tuple += hcosts.per_tuple + sacosts.per_tuple;
4531  }
4532  else
4533  {
4534  /*
4535  * Estimate that the operator will be applied to about half of the
4536  * array elements before the answer is determined.
4537  */
4538  context->total.startup += sacosts.startup;
4539  context->total.per_tuple += sacosts.per_tuple *
4540  estimate_array_length(arraynode) * 0.5;
4541  }
4542  }
4543  else if (IsA(node, Aggref) ||
4544  IsA(node, WindowFunc))
4545  {
4546  /*
4547  * Aggref and WindowFunc nodes are (and should be) treated like Vars,
4548  * ie, zero execution cost in the current model, because they behave
4549  * essentially like Vars at execution. We disregard the costs of
4550  * their input expressions for the same reason. The actual execution
4551  * costs of the aggregate/window functions and their arguments have to
4552  * be factored into plan-node-specific costing of the Agg or WindowAgg
4553  * plan node.
4554  */
4555  return false; /* don't recurse into children */
4556  }
4557  else if (IsA(node, GroupingFunc))
4558  {
4559  /* Treat this as having cost 1 */
4560  context->total.per_tuple += cpu_operator_cost;
4561  return false; /* don't recurse into children */
4562  }
4563  else if (IsA(node, CoerceViaIO))
4564  {
4565  CoerceViaIO *iocoerce = (CoerceViaIO *) node;
4566  Oid iofunc;
4567  Oid typioparam;
4568  bool typisvarlena;
4569 
4570  /* check the result type's input function */
4571  getTypeInputInfo(iocoerce->resulttype,
4572  &iofunc, &typioparam);
4573  add_function_cost(context->root, iofunc, NULL,
4574  &context->total);
4575  /* check the input type's output function */
4576  getTypeOutputInfo(exprType((Node *) iocoerce->arg),
4577  &iofunc, &typisvarlena);
4578  add_function_cost(context->root, iofunc, NULL,
4579  &context->total);
4580  }
4581  else if (IsA(node, ArrayCoerceExpr))
4582  {
4583  ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
4584  QualCost perelemcost;
4585 
4586  cost_qual_eval_node(&perelemcost, (Node *) acoerce->elemexpr,
4587  context->root);
4588  context->total.startup += perelemcost.startup;
4589  if (perelemcost.per_tuple > 0)
4590  context->total.per_tuple += perelemcost.per_tuple *
4591  estimate_array_length((Node *) acoerce->arg);
4592  }
4593  else if (IsA(node, RowCompareExpr))
4594  {
4595  /* Conservatively assume we will check all the columns */
4596  RowCompareExpr *rcexpr = (RowCompareExpr *) node;
4597  ListCell *lc;
4598 
4599  foreach(lc, rcexpr->opnos)
4600  {
4601  Oid opid = lfirst_oid(lc);
4602 
4603  add_function_cost(context->root, get_opcode(opid), NULL,
4604  &context->total);
4605  }
4606  }
4607  else if (IsA(node, MinMaxExpr) ||
4608  IsA(node, XmlExpr) ||
4609  IsA(node, CoerceToDomain) ||
4610  IsA(node, NextValueExpr))
4611  {
4612  /* Treat all these as having cost 1 */
4613  context->total.per_tuple += cpu_operator_cost;
4614  }
4615  else if (IsA(node, CurrentOfExpr))
4616  {
4617  /* Report high cost to prevent selection of anything but TID scan */
4618  context->total.startup += disable_cost;
4619  }
4620  else if (IsA(node, SubLink))
4621  {
4622  /* This routine should not be applied to un-planned expressions */
4623  elog(ERROR, "cannot handle unplanned sub-select");
4624  }
4625  else if (IsA(node, SubPlan))
4626  {
4627  /*
4628  * A subplan node in an expression typically indicates that the
4629  * subplan will be executed on each evaluation, so charge accordingly.
4630  * (Sub-selects that can be executed as InitPlans have already been
4631  * removed from the expression.)
4632  */
4633  SubPlan *subplan = (SubPlan *) node;
4634 
4635  context->total.startup += subplan->startup_cost;
4636  context->total.per_tuple += subplan->per_call_cost;
4637 
4638  /*
4639  * We don't want to recurse into the testexpr, because it was already
4640  * counted in the SubPlan node's costs. So we're done.
4641  */
4642  return false;
4643  }
4644  else if (IsA(node, AlternativeSubPlan))
4645  {
4646  /*
4647  * Arbitrarily use the first alternative plan for costing. (We should
4648  * certainly only include one alternative, and we don't yet have
4649  * enough information to know which one the executor is most likely to
4650  * use.)
4651  */
4652  AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
4653 
4654  return cost_qual_eval_walker((Node *) linitial(asplan->subplans),
4655  context);
4656  }
4657  else if (IsA(node, PlaceHolderVar))
4658  {
4659  /*
4660  * A PlaceHolderVar should be given cost zero when considering general
4661  * expression evaluation costs. The expense of doing the contained
4662  * expression is charged as part of the tlist eval costs of the scan
4663  * or join where the PHV is first computed (see set_rel_width and
4664  * add_placeholders_to_joinrel). If we charged it again here, we'd be
4665  * double-counting the cost for each level of plan that the PHV
4666  * bubbles up through. Hence, return without recursing into the
4667  * phexpr.
4668  */
4669  return false;
4670  }
4671 
4672  /* recurse into children */
4674  (void *) context);
4675 }
4676 
4677 /*
4678  * get_restriction_qual_cost
4679  * Compute evaluation costs of a baserel's restriction quals, plus any
4680  * movable join quals that have been pushed down to the scan.
4681  * Results are returned into *qpqual_cost.
4682  *
4683  * This is a convenience subroutine that works for seqscans and other cases
4684  * where all the given quals will be evaluated the hard way. It's not useful
4685  * for cost_index(), for example, where the index machinery takes care of
4686  * some of the quals. We assume baserestrictcost was previously set by
4687  * set_baserel_size_estimates().
4688  */
4689 static void
4691  ParamPathInfo *param_info,
4692  QualCost *qpqual_cost)
4693 {
4694  if (param_info)
4695  {
4696  /* Include costs of pushed-down clauses */
4697  cost_qual_eval(qpqual_cost, param_info->ppi_clauses, root);
4698 
4699  qpqual_cost->startup += baserel->baserestrictcost.startup;
4700  qpqual_cost->per_tuple += baserel->baserestrictcost.per_tuple;
4701  }
4702  else
4703  *qpqual_cost = baserel->baserestrictcost;
4704 }
4705 
4706 
4707 /*
4708  * compute_semi_anti_join_factors
4709  * Estimate how much of the inner input a SEMI, ANTI, or inner_unique join
4710  * can be expected to scan.
4711  *
4712  * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning
4713  * inner rows as soon as it finds a match to the current outer row.
4714  * The same happens if we have detected the inner rel is unique.
4715  * We should therefore adjust some of the cost components for this effect.
4716  * This function computes some estimates needed for these adjustments.
4717  * These estimates will be the same regardless of the particular paths used
4718  * for the outer and inner relation, so we compute these once and then pass
4719  * them to all the join cost estimation functions.
4720  *
4721  * Input parameters:
4722  * joinrel: join relation under consideration
4723  * outerrel: outer relation under consideration
4724  * innerrel: inner relation under consideration
4725  * jointype: if not JOIN_SEMI or JOIN_ANTI, we assume it's inner_unique
4726  * sjinfo: SpecialJoinInfo relevant to this join
4727  * restrictlist: join quals
4728  * Output parameters:
4729  * *semifactors is filled in (see pathnodes.h for field definitions)
4730  */
4731 void
4733  RelOptInfo *joinrel,
4734  RelOptInfo *outerrel,
4735  RelOptInfo *innerrel,
4736  JoinType jointype,
4737  SpecialJoinInfo *sjinfo,
4738  List *restrictlist,
4739  SemiAntiJoinFactors *semifactors)
4740 {
4741  Selectivity jselec;
4742  Selectivity nselec;
4743  Selectivity avgmatch;
4744  SpecialJoinInfo norm_sjinfo;
4745  List *joinquals;
4746  ListCell *l;
4747 
4748  /*
4749  * In an ANTI join, we must ignore clauses that are "pushed down", since
4750  * those won't affect the match logic. In a SEMI join, we do not
4751  * distinguish joinquals from "pushed down" quals, so just use the whole
4752  * restrictinfo list. For other outer join types, we should consider only
4753  * non-pushed-down quals, so that this devolves to an IS_OUTER_JOIN check.
4754  */
4755  if (IS_OUTER_JOIN(jointype))
4756  {
4757  joinquals = NIL;
4758  foreach(l, restrictlist)
4759  {
4760  RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
4761 
4762  if (!RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
4763  joinquals = lappend(joinquals, rinfo);
4764  }
4765  }
4766  else
4767  joinquals = restrictlist;
4768 
4769  /*
4770  * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses.
4771  */
4772  jselec = clauselist_selectivity(root,
4773  joinquals,
4774  0,
4775  (jointype == JOIN_ANTI) ? JOIN_ANTI : JOIN_SEMI,
4776  sjinfo);
4777 
4778  /*
4779  * Also get the normal inner-join selectivity of the join clauses.
4780  */
4781  norm_sjinfo.type = T_SpecialJoinInfo;
4782  norm_sjinfo.min_lefthand = outerrel->relids;
4783  norm_sjinfo.min_righthand = innerrel->relids;
4784  norm_sjinfo.syn_lefthand = outerrel->relids;
4785  norm_sjinfo.syn_righthand = innerrel->relids;
4786  norm_sjinfo.jointype = JOIN_INNER;
4787  norm_sjinfo.ojrelid = 0;
4788  norm_sjinfo.commute_above_l = NULL;
4789  norm_sjinfo.commute_above_r = NULL;
4790  norm_sjinfo.commute_below = NULL;
4791  /* we don't bother trying to make the remaining fields valid */
4792  norm_sjinfo.lhs_strict = false;
4793  norm_sjinfo.semi_can_btree = false;
4794  norm_sjinfo.semi_can_hash = false;
4795  norm_sjinfo.semi_operators = NIL;
4796  norm_sjinfo.semi_rhs_exprs = NIL;
4797 
4798  nselec = clauselist_selectivity(root,
4799  joinquals,
4800  0,
4801  JOIN_INNER,
4802  &norm_sjinfo);
4803 
4804  /* Avoid leaking a lot of ListCells */
4805  if (IS_OUTER_JOIN(jointype))
4806  list_free(joinquals);
4807 
4808  /*
4809  * jselec can be interpreted as the fraction of outer-rel rows that have
4810  * any matches (this is true for both SEMI and ANTI cases). And nselec is
4811  * the fraction of the Cartesian product that matches. So, the average
4812  * number of matches for each outer-rel row that has at least one match is
4813  * nselec * inner_rows / jselec.
4814  *
4815  * Note: it is correct to use the inner rel's "rows" count here, even
4816  * though we might later be considering a parameterized inner path with
4817  * fewer rows. This is because we have included all the join clauses in
4818  * the selectivity estimate.
4819  */
4820  if (jselec > 0) /* protect against zero divide */
4821  {
4822  avgmatch = nselec * innerrel->rows / jselec;
4823  /* Clamp to sane range */
4824  avgmatch = Max(1.0, avgmatch);
4825  }
4826  else
4827  avgmatch = 1.0;
4828 
4829  semifactors->outer_match_frac = jselec;
4830  semifactors->match_count = avgmatch;
4831 }
4832 
4833 /*
4834  * has_indexed_join_quals
4835  * Check whether all the joinquals of a nestloop join are used as
4836  * inner index quals.
4837  *
4838  * If the inner path of a SEMI/ANTI join is an indexscan (including bitmap
4839  * indexscan) that uses all the joinquals as indexquals, we can assume that an
4840  * unmatched outer tuple is cheap to process, whereas otherwise it's probably
4841  * expensive.
4842  */
4843 static bool
4845 {
4846  JoinPath *joinpath = &path->jpath;
4847  Relids joinrelids = joinpath->path.parent->relids;
4848  Path *innerpath = joinpath->innerjoinpath;
4849  List *indexclauses;
4850  bool found_one;
4851  ListCell *lc;
4852 
4853  /* If join still has quals to evaluate, it's not fast */
4854  if (joinpath->joinrestrictinfo != NIL)
4855  return false;
4856  /* Nor if the inner path isn't parameterized at all */
4857  if (innerpath->param_info == NULL)
4858  return false;
4859 
4860  /* Find the indexclauses list for the inner scan */
4861  switch (innerpath->pathtype)
4862  {
4863  case T_IndexScan:
4864  case T_IndexOnlyScan:
4865  indexclauses = ((IndexPath *) innerpath)->indexclauses;
4866  break;
4867  case T_BitmapHeapScan:
4868  {
4869  /* Accept only a simple bitmap scan, not AND/OR cases */
4870  Path *bmqual = ((BitmapHeapPath *) innerpath)->bitmapqual;
4871 
4872  if (IsA(bmqual, IndexPath))
4873  indexclauses = ((IndexPath *) bmqual)->indexclauses;
4874  else
4875  return false;
4876  break;
4877  }
4878  default:
4879 
4880  /*
4881  * If it's not a simple indexscan, it probably doesn't run quickly
4882  * for zero rows out, even if it's a parameterized path using all
4883  * the joinquals.
4884  */
4885  return false;
4886  }
4887 
4888  /*
4889  * Examine the inner path's param clauses. Any that are from the outer
4890  * path must be found in the indexclauses list, either exactly or in an
4891  * equivalent form generated by equivclass.c. Also, we must find at least
4892  * one such clause, else it's a clauseless join which isn't fast.
4893  */
4894  found_one = false;
4895  foreach(lc, innerpath->param_info->ppi_clauses)
4896  {
4897  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
4898 
4899  if (join_clause_is_movable_into(rinfo,
4900  innerpath->parent->relids,
4901  joinrelids))
4902  {
4903  if (!is_redundant_with_indexclauses(rinfo, indexclauses))
4904  return false;
4905  found_one = true;
4906  }
4907  }
4908  return found_one;
4909 }
4910 
4911 
4912 /*
4913  * approx_tuple_count
4914  * Quick-and-dirty estimation of the number of join rows passing
4915  * a set of qual conditions.
4916  *
4917  * The quals can be either an implicitly-ANDed list of boolean expressions,
4918  * or a list of RestrictInfo nodes (typically the latter).
4919  *
4920  * We intentionally compute the selectivity under JOIN_INNER rules, even
4921  * if it's some type of outer join. This is appropriate because we are
4922  * trying to figure out how many tuples pass the initial merge or hash
4923  * join step.
4924  *
4925  * This is quick-and-dirty because we bypass clauselist_selectivity, and
4926  * simply multiply the independent clause selectivities together. Now
4927  * clauselist_selectivity often can't do any better than that anyhow, but
4928  * for some situations (such as range constraints) it is smarter. However,
4929  * we can't effectively cache the results of clauselist_selectivity, whereas
4930  * the individual clause selectivities can be and are cached.
4931  *
4932  * Since we are only using the results to estimate how many potential
4933  * output tuples are generated and passed through qpqual checking, it
4934  * seems OK to live with the approximation.
4935  */
4936 static double
4938 {
4939  double tuples;
4940  double outer_tuples = path->outerjoinpath->rows;
4941  double inner_tuples = path->innerjoinpath->rows;
4942  SpecialJoinInfo sjinfo;
4943  Selectivity selec = 1.0;
4944  ListCell *l;
4945 
4946  /*
4947  * Make up a SpecialJoinInfo for JOIN_INNER semantics.
4948  */
4949  sjinfo.type = T_SpecialJoinInfo;
4950  sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
4951  sjinfo.min_righthand = path->innerjoinpath->parent->relids;
4952  sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
4953  sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
4954  sjinfo.jointype = JOIN_INNER;
4955  sjinfo.ojrelid = 0;
4956  sjinfo.commute_above_l = NULL;
4957  sjinfo.commute_above_r = NULL;
4958  sjinfo.commute_below = NULL;
4959  /* we don't bother trying to make the remaining fields valid */
4960  sjinfo.lhs_strict = false;
4961  sjinfo.semi_can_btree = false;
4962  sjinfo.semi_can_hash = false;
4963  sjinfo.semi_operators = NIL;
4964  sjinfo.semi_rhs_exprs = NIL;
4965 
4966  /* Get the approximate selectivity */
4967  foreach(l, quals)
4968  {
4969  Node *qual = (Node *) lfirst(l);
4970 
4971  /* Note that clause_selectivity will be able to cache its result */
4972  selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo);
4973  }
4974 
4975  /* Apply it to the input relation sizes */
4976  tuples = selec * outer_tuples * inner_tuples;
4977 
4978  return clamp_row_est(tuples);
4979 }
4980 
4981 
4982 /*
4983  * set_baserel_size_estimates
4984  * Set the size estimates for the given base relation.
4985  *
4986  * The rel's targetlist and restrictinfo list must have been constructed
4987  * already, and rel->tuples must be set.
4988  *
4989  * We set the following fields of the rel node:
4990  * rows: the estimated number of output tuples (after applying
4991  * restriction clauses).
4992  * width: the estimated average output tuple width in bytes.
4993  * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
4994  */
4995 void
4997 {
4998  double nrows;
4999 
5000  /* Should only be applied to base relations */
5001  Assert(rel->relid > 0);
5002 
5003  nrows = rel->tuples *
5005  rel->baserestrictinfo,
5006  0,
5007  JOIN_INNER,
5008  NULL);
5009 
5010  rel->rows = clamp_row_est(nrows);
5011 
5013 
5014  set_rel_width(root, rel);
5015 }
5016 
5017 /*
5018  * get_parameterized_baserel_size
5019  * Make a size estimate for a parameterized scan of a base relation.
5020  *
5021  * 'param_clauses' lists the additional join clauses to be used.
5022  *
5023  * set_baserel_size_estimates must have been applied already.
5024  */
5025 double
5027  List *param_clauses)
5028 {
5029  List *allclauses;
5030  double nrows;
5031 
5032  /*
5033  * Estimate the number of rows returned by the parameterized scan, knowing
5034  * that it will apply all the extra join clauses as well as the rel's own
5035  * restriction clauses. Note that we force the clauses to be treated as
5036  * non-join clauses during selectivity estimation.
5037  */
5038  allclauses = list_concat_copy(param_clauses, rel->baserestrictinfo);
5039  nrows = rel->tuples *
5041  allclauses,
5042  rel->relid, /* do not use 0! */
5043  JOIN_INNER,
5044  NULL);
5045  nrows = clamp_row_est(nrows);
5046  /* For safety, make sure result is not more than the base estimate */
5047  if (nrows > rel->rows)
5048  nrows = rel->rows;
5049  return nrows;
5050 }
5051 
5052 /*
5053  * set_joinrel_size_estimates
5054  * Set the size estimates for the given join relation.
5055  *
5056  * The rel's targetlist must have been constructed already, and a
5057  * restriction clause list that matches the given component rels must
5058  * be provided.
5059  *
5060  * Since there is more than one way to make a joinrel for more than two
5061  * base relations, the results we get here could depend on which component
5062  * rel pair is provided. In theory we should get the same answers no matter
5063  * which pair is provided; in practice, since the selectivity estimation
5064  * routines don't handle all cases equally well, we might not. But there's
5065  * not much to be done about it. (Would it make sense to repeat the
5066  * calculations for each pair of input rels that's encountered, and somehow
5067  * average the results? Probably way more trouble than it's worth, and
5068  * anyway we must keep the rowcount estimate the same for all paths for the
5069  * joinrel.)
5070  *
5071  * We set only the rows field here. The reltarget field was already set by
5072  * build_joinrel_tlist, and baserestrictcost is not used for join rels.
5073  */
5074 void
5076  RelOptInfo *outer_rel,
5077  RelOptInfo *inner_rel,
5078  SpecialJoinInfo *sjinfo,
5079  List *restrictlist)
5080 {
5081  rel->rows = calc_joinrel_size_estimate(root,
5082  rel,
5083  outer_rel,
5084  inner_rel,
5085  outer_rel->rows,
5086  inner_rel->rows,
5087  sjinfo,
5088  restrictlist);
5089 }
5090 
5091 /*
5092  * get_parameterized_joinrel_size
5093  * Make a size estimate for a parameterized scan of a join relation.
5094  *
5095  * 'rel' is the joinrel under consideration.
5096  * 'outer_path', 'inner_path' are (probably also parameterized) Paths that
5097  * produce the relations being joined.
5098  * 'sjinfo' is any SpecialJoinInfo relevant to this join.
5099  * 'restrict_clauses' lists the join clauses that need to be applied at the
5100  * join node (including any movable clauses that were moved down to this join,
5101  * and not including any movable clauses that were pushed down into the
5102  * child paths).
5103  *
5104  * set_joinrel_size_estimates must have been applied already.
5105  */
5106 double
5108  Path *outer_path,
5109  Path *inner_path,
5110  SpecialJoinInfo *sjinfo,
5111  List *restrict_clauses)
5112 {
5113  double nrows;
5114 
5115  /*
5116  * Estimate the number of rows returned by the parameterized join as the
5117  * sizes of the input paths times the selectivity of the clauses that have
5118  * ended up at this join node.
5119  *
5120  * As with set_joinrel_size_estimates, the rowcount estimate could depend
5121  * on the pair of input paths provided, though ideally we'd get the same
5122  * estimate for any pair with the same parameterization.
5123  */
5124  nrows = calc_joinrel_size_estimate(root,
5125  rel,
5126  outer_path->parent,
5127  inner_path->parent,
5128  outer_path->rows,
5129  inner_path->rows,
5130  sjinfo,
5131  restrict_clauses);
5132  /* For safety, make sure result is not more than the base estimate */
5133  if (nrows > rel->rows)
5134  nrows = rel->rows;
5135  return nrows;
5136 }
5137 
5138 /*
5139  * calc_joinrel_size_estimate
5140  * Workhorse for set_joinrel_size_estimates and
5141  * get_parameterized_joinrel_size.
5142  *
5143  * outer_rel/inner_rel are the relations being joined, but they should be
5144  * assumed to have sizes outer_rows/inner_rows; those numbers might be less
5145  * than what rel->rows says, when we are considering parameterized paths.
5146  */
5147 static double
5149  RelOptInfo *joinrel,
5150  RelOptInfo *outer_rel,
5151  RelOptInfo *inner_rel,
5152  double outer_rows,
5153  double inner_rows,
5154  SpecialJoinInfo *sjinfo,
5155  List *restrictlist)
5156 {
5157  JoinType jointype = sjinfo->jointype;
5158  Selectivity fkselec;
5159  Selectivity jselec;
5160  Selectivity pselec;
5161  double nrows;
5162 
5163  /*
5164  * Compute joinclause selectivity. Note that we are only considering
5165  * clauses that become restriction clauses at this join level; we are not
5166  * double-counting them because they were not considered in estimating the
5167  * sizes of the component rels.
5168  *
5169  * First, see whether any of the joinclauses can be matched to known FK
5170  * constraints. If so, drop those clauses from the restrictlist, and
5171  * instead estimate their selectivity using FK semantics. (We do this
5172  * without regard to whether said clauses are local or "pushed down".
5173  * Probably, an FK-matching clause could never be seen as pushed down at
5174  * an outer join, since it would be strict and hence would be grounds for
5175  * join strength reduction.) fkselec gets the net selectivity for
5176  * FK-matching clauses, or 1.0 if there are none.
5177  */
5178  fkselec = get_foreign_key_join_selectivity(root,
5179  outer_rel->relids,
5180  inner_rel->relids,
5181  sjinfo,
5182  &restrictlist);
5183 
5184  /*
5185  * For an outer join, we have to distinguish the selectivity of the join's
5186  * own clauses (JOIN/ON conditions) from any clauses that were "pushed
5187  * down". For inner joins we just count them all as joinclauses.
5188  */
5189  if (IS_OUTER_JOIN(jointype))
5190  {
5191  List *joinquals = NIL;
5192  List *pushedquals = NIL;
5193  ListCell *l;
5194 
5195  /* Grovel through the clauses to separate into two lists */
5196  foreach(l, restrictlist)
5197  {
5198  RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
5199 
5200  if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
5201  pushedquals = lappend(pushedquals, rinfo);
5202  else
5203  joinquals = lappend(joinquals, rinfo);
5204  }
5205 
5206  /* Get the separate selectivities */
5207  jselec = clauselist_selectivity(root,
5208  joinquals,
5209  0,
5210  jointype,
5211  sjinfo);
5212  pselec = clauselist_selectivity(root,
5213  pushedquals,
5214  0,
5215  jointype,
5216  sjinfo);
5217 
5218  /* Avoid leaking a lot of ListCells */
5219  list_free(joinquals);
5220  list_free(pushedquals);
5221  }
5222  else
5223  {
5224  jselec = clauselist_selectivity(root,
5225  restrictlist,
5226  0,
5227  jointype,
5228  sjinfo);
5229  pselec = 0.0; /* not used, keep compiler quiet */
5230  }
5231 
5232  /*
5233  * Basically, we multiply size of Cartesian product by selectivity.
5234  *
5235  * If we are doing an outer join, take that into account: the joinqual
5236  * selectivity has to be clamped using the knowledge that the output must
5237  * be at least as large as the non-nullable input. However, any
5238  * pushed-down quals are applied after the outer join, so their
5239  * selectivity applies fully.
5240  *
5241  * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction
5242  * of LHS rows that have matches, and we apply that straightforwardly.
5243  */
5244  switch (jointype)
5245  {
5246  case JOIN_INNER:
5247  nrows = outer_rows * inner_rows * fkselec * jselec;
5248  /* pselec not used */
5249  break;
5250  case JOIN_LEFT:
5251  nrows = outer_rows * inner_rows * fkselec * jselec;
5252  if (nrows < outer_rows)
5253  nrows = outer_rows;
5254  nrows *= pselec;
5255  break;
5256  case JOIN_FULL:
5257  nrows = outer_rows * inner_rows * fkselec * jselec;
5258  if (nrows < outer_rows)
5259  nrows = outer_rows;
5260  if (nrows < inner_rows)
5261  nrows = inner_rows;
5262  nrows *= pselec;
5263  break;
5264  case JOIN_SEMI:
5265  nrows = outer_rows * fkselec * jselec;
5266  /* pselec not used */
5267  break;
5268  case JOIN_ANTI:
5269  nrows = outer_rows * (1.0 - fkselec * jselec);
5270  nrows *= pselec;
5271  break;
5272  default:
5273  /* other values not expected here */
5274  elog(ERROR, "unrecognized join type: %d", (int) jointype);
5275  nrows = 0; /* keep compiler quiet */
5276  break;
5277  }
5278 
5279  return clamp_row_est(nrows);
5280 }
5281 
5282 /*
5283  * get_foreign_key_join_selectivity
5284  * Estimate join selectivity for foreign-key-related clauses.
5285  *
5286  * Remove any clauses that can be matched to FK constraints from *restrictlist,
5287  * and return a substitute estimate of their selectivity. 1.0 is returned
5288  * when there are no such clauses.
5289  *
5290  * The reason for treating such clauses specially is that we can get better
5291  * estimates this way than by relying on clauselist_selectivity(), especially
5292  * for multi-column FKs where that function's assumption that the clauses are
5293  * independent falls down badly. But even with single-column FKs, we may be
5294  * able to get a better answer when the pg_statistic stats are missing or out
5295  * of date.
5296  */
5297 static Selectivity
5299  Relids outer_relids,
5300  Relids inner_relids,
5301  SpecialJoinInfo *sjinfo,
5302  List **restrictlist)
5303 {
5304  Selectivity fkselec = 1.0;
5305  JoinType jointype = sjinfo->jointype;
5306  List *worklist = *restrictlist;
5307  ListCell *lc;
5308 
5309  /* Consider each FK constraint that is known to match the query */
5310  foreach(lc, root->fkey_list)
5311  {
5312  ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
5313  bool ref_is_outer;
5314  List *removedlist;
5315  ListCell *cell;
5316 
5317  /*
5318  * This FK is not relevant unless it connects a baserel on one side of
5319  * this join to a baserel on the other side.
5320  */
5321  if (bms_is_member(fkinfo->con_relid, outer_relids) &&
5322  bms_is_member(fkinfo->ref_relid, inner_relids))
5323  ref_is_outer = false;
5324  else if (bms_is_member(fkinfo->ref_relid, outer_relids) &&
5325  bms_is_member(fkinfo->con_relid, inner_relids))
5326  ref_is_outer = true;
5327  else
5328  continue;
5329 
5330  /*
5331  * If we're dealing with a semi/anti join, and the FK's referenced
5332  * relation is on the outside, then knowledge of the FK doesn't help
5333  * us figure out what we need to know (which is the fraction of outer
5334  * rows that have matches). On the other hand, if the referenced rel
5335  * is on the inside, then all outer rows must have matches in the
5336  * referenced table (ignoring nulls). But any restriction or join
5337  * clauses that filter that table will reduce the fraction of matches.
5338  * We can account for restriction clauses, but it's too hard to guess
5339  * how many table rows would get through a join that's inside the RHS.
5340  * Hence, if either case applies, punt and ignore the FK.
5341  */
5342  if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
5343  (ref_is_outer || bms_membership(inner_relids) != BMS_SINGLETON))
5344  continue;
5345 
5346  /*
5347  * Modify the restrictlist by removing clauses that match the FK (and
5348  * putting them into removedlist instead). It seems unsafe to modify
5349  * the originally-passed List structure, so we make a shallow copy the
5350  * first time through.
5351  */
5352  if (worklist == *restrictlist)
5353  worklist = list_copy(worklist);
5354 
5355  removedlist = NIL;
5356  foreach(cell, worklist)
5357  {
5358  RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
5359  bool remove_it = false;
5360  int i;
5361 
5362  /* Drop this clause if it matches any column of the FK */
5363  for (i = 0; i < fkinfo->nkeys; i++)
5364  {
5365  if (rinfo->parent_ec)
5366  {
5367  /*
5368  * EC-derived clauses can only match by EC. It is okay to
5369  * consider any clause derived from the same EC as
5370  * matching the FK: even if equivclass.c chose to generate
5371  * a clause equating some other pair of Vars, it could
5372  * have generated one equating the FK's Vars. So for
5373  * purposes of estimation, we can act as though it did so.
5374  *
5375  * Note: checking parent_ec is a bit of a cheat because
5376  * there are EC-derived clauses that don't have parent_ec
5377  * set; but such clauses must compare expressions that
5378  * aren't just Vars, so they cannot match the FK anyway.
5379  */
5380  if (fkinfo->eclass[i] == rinfo->parent_ec)
5381  {
5382  remove_it = true;
5383  break;
5384  }
5385  }
5386  else
5387  {
5388  /*
5389  * Otherwise, see if rinfo was previously matched to FK as
5390  * a "loose" clause.
5391  */
5392  if (list_member_ptr(fkinfo->rinfos[i], rinfo))
5393  {
5394  remove_it = true;
5395  break;
5396  }
5397  }
5398  }
5399  if (remove_it)
5400  {
5401  worklist = foreach_delete_current(worklist, cell);
5402  removedlist = lappend(removedlist, rinfo);
5403  }
5404  }
5405 
5406  /*
5407  * If we failed to remove all the matching clauses we expected to
5408  * find, chicken out and ignore this FK; applying its selectivity
5409  * might result in double-counting. Put any clauses we did manage to
5410  * remove back into the worklist.
5411  *
5412  * Since the matching clauses are known not outerjoin-delayed, they
5413  * would normally have appeared in the initial joinclause list. If we
5414  * didn't find them, there are two possibilities:
5415  *
5416  * 1. If the FK match is based on an EC that is ec_has_const, it won't
5417  * have generated any join clauses at all. We discount such ECs while
5418  * checking to see if we have "all" the clauses. (Below, we'll adjust
5419  * the selectivity estimate for this case.)
5420  *
5421  * 2. The clauses were matched to some other FK in a previous
5422  * iteration of this loop, and thus removed from worklist. (A likely
5423  * case is that two FKs are matched to the same EC; there will be only
5424  * one EC-derived clause in the initial list, so the first FK will
5425  * consume it.) Applying both FKs' selectivity independently risks
5426  * underestimating the join size; in particular, this would undo one
5427  * of the main things that ECs were invented for, namely to avoid
5428  * double-counting the selectivity of redundant equality conditions.
5429  * Later we might think of a reasonable way to combine the estimates,
5430  * but for now, just punt, since this is a fairly uncommon situation.
5431  */
5432  if (removedlist == NIL ||
5433  list_length(removedlist) !=
5434  (fkinfo->nmatched_ec - fkinfo->nconst_ec + fkinfo->nmatched_ri))
5435  {
5436  worklist = list_concat(worklist, removedlist);
5437  continue;
5438  }
5439 
5440  /*
5441  * Finally we get to the payoff: estimate selectivity using the
5442  * knowledge that each referencing row will match exactly one row in
5443  * the referenced table.
5444  *
5445  * XXX that's not true in the presence of nulls in the referencing
5446  * column(s), so in principle we should derate the estimate for those.
5447  * However (1) if there are any strict restriction clauses for the
5448  * referencing column(s) elsewhere in the query, derating here would
5449  * be double-counting the null fraction, and (2) it's not very clear
5450  * how to combine null fractions for multiple referencing columns. So
5451  * we do nothing for now about correcting for nulls.
5452  *
5453  * XXX another point here is that if either side of an FK constraint
5454  * is an inheritance parent, we estimate as though the constraint
5455  * covers all its children as well. This is not an unreasonable
5456  * assumption for a referencing table, ie the user probably applied
5457  * identical constraints to all child tables (though perhaps we ought
5458  * to check that). But it's not possible to have done that for a
5459  * referenced table. Fortunately, precisely because that doesn't
5460  * work, it is uncommon in practice to have an FK referencing a parent
5461  * table. So, at least for now, disregard inheritance here.
5462  */
5463  if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
5464  {
5465  /*
5466  * For JOIN_SEMI and JOIN_ANTI, we only get here when the FK's
5467  * referenced table is exactly the inside of the join. The join
5468  * selectivity is defined as the fraction of LHS rows that have
5469  * matches. The FK implies that every LHS row has a match *in the
5470  * referenced table*; but any restriction clauses on it will
5471  * reduce the number of matches. Hence we take the join
5472  * selectivity as equal to the selectivity of the table's
5473  * restriction clauses, which is rows / tuples; but we must guard
5474  * against tuples == 0.
5475  */
5476  RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
5477  double ref_tuples = Max(ref_rel->tuples, 1.0);
5478 
5479  fkselec *= ref_rel->rows / ref_tuples;
5480  }
5481  else
5482  {
5483  /*
5484  * Otherwise, selectivity is exactly 1/referenced-table-size; but
5485  * guard against tuples == 0. Note we should use the raw table
5486  * tuple count, not any estimate of its filtered or joined size.
5487  */
5488  RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
5489  double ref_tuples = Max(ref_rel->tuples, 1.0);
5490 
5491  fkselec *= 1.0 / ref_tuples;
5492  }
5493 
5494  /*
5495  * If any of the FK columns participated in ec_has_const ECs, then
5496  * equivclass.c will have generated "var = const" restrictions for
5497  * each side of the join, thus reducing the sizes of both input
5498  * relations. Taking the fkselec at face value would amount to
5499  * double-counting the selectivity of the constant restriction for the
5500  * referencing Var. Hence, look for the restriction clause(s) that
5501  * were applied to the referencing Var(s), and divide out their
5502  * selectivity to correct for this.
5503  */
5504  if (fkinfo->nconst_ec > 0)
5505  {
5506  for (int i = 0; i < fkinfo->nkeys; i++)
5507  {
5508  EquivalenceClass *ec = fkinfo->eclass[i];
5509 
5510  if (ec && ec->ec_has_const)
5511  {
5512  EquivalenceMember *em = fkinfo->fk_eclass_member[i];
5514  em);
5515 
5516  if (rinfo)
5517  {
5518  Selectivity s0;
5519 
5520  s0 = clause_selectivity(root,
5521  (Node *) rinfo,
5522  0,
5523  jointype,
5524  sjinfo);
5525  if (s0 > 0)
5526  fkselec /= s0;
5527  }
5528  }
5529  }
5530  }
5531  }
5532 
5533  *restrictlist = worklist;
5534  CLAMP_PROBABILITY(fkselec);
5535  return fkselec;
5536 }
5537 
5538 /*
5539  * set_subquery_size_estimates
5540  * Set the size estimates for a base relation that is a subquery.
5541  *
5542  * The rel's targetlist and restrictinfo list must have been constructed
5543  * already, and the Paths for the subquery must have been completed.
5544  * We look at the subquery's PlannerInfo to extract data.
5545  *
5546  * We set the same fields as set_baserel_size_estimates.
5547  */
5548 void
5550 {
5551  PlannerInfo *subroot = rel->subroot;
5552  RelOptInfo *sub_final_rel;
5553  ListCell *lc;
5554 
5555  /* Should only be applied to base relations that are subqueries */
5556  Assert(rel->relid > 0);
5557  Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_SUBQUERY);
5558 
5559  /*
5560  * Copy raw number of output rows from subquery. All of its paths should
5561  * have the same output rowcount, so just look at cheapest-total.
5562  */
5563  sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
5564  rel->tuples = sub_final_rel->cheapest_total_path->rows;
5565 
5566  /*
5567  * Compute per-output-column width estimates by examining the subquery's
5568  * targetlist. For any output that is a plain Var, get the width estimate
5569  * that was made while planning the subquery. Otherwise, we leave it to
5570  * set_rel_width to fill in a datatype-based default estimate.
5571  */
5572  foreach(lc, subroot->parse->targetList)
5573  {
5574  TargetEntry *te = lfirst_node(TargetEntry, lc);
5575  Node *texpr = (Node *) te->expr;
5576  int32 item_width = 0;
5577 
5578  /* junk columns aren't visible to upper query */
5579  if (te->resjunk)
5580  continue;
5581 
5582  /*
5583  * The subquery could be an expansion of a view that's had columns
5584  * added to it since the current query was parsed, so that there are
5585  * non-junk tlist columns in it that don't correspond to any column
5586  * visible at our query level. Ignore such columns.
5587  */
5588  if (te->resno < rel->min_attr || te->resno > rel->max_attr)
5589  continue;
5590 
5591  /*
5592  * XXX This currently doesn't work for subqueries containing set
5593  * operations, because the Vars in their tlists are bogus references
5594  * to the first leaf subquery, which wouldn't give the right answer
5595  * even if we could still get to its PlannerInfo.
5596  *
5597  * Also, the subquery could be an appendrel for which all branches are
5598  * known empty due to constraint exclusion, in which case
5599  * set_append_rel_pathlist will have left the attr_widths set to zero.
5600  *
5601  * In either case, we just leave the width estimate zero until
5602  * set_rel_width fixes it.
5603  */
5604  if (IsA(texpr, Var) &&
5605  subroot->parse->setOperations == NULL)
5606  {
5607  Var *var = (Var *) texpr;
5608  RelOptInfo *subrel = find_base_rel(subroot, var->varno);
5609 
5610  item_width = subrel->attr_widths[var->varattno - subrel->min_attr];
5611  }
5612  rel->attr_widths[te->resno - rel->min_attr] = item_width;
5613  }
5614 
5615  /* Now estimate number of output rows, etc */
5616  set_baserel_size_estimates(root, rel);
5617 }
5618 
5619 /*
5620  * set_function_size_estimates
5621  * Set the size estimates for a base relation that is a function call.
5622  *
5623  * The rel's targetlist and restrictinfo list must have been constructed
5624  * already.
5625  *
5626  * We set the same fields as set_baserel_size_estimates.
5627  */
5628 void
5630 {
5631  RangeTblEntry *rte;
5632  ListCell *lc;
5633 
5634  /* Should only be applied to base relations that are functions */
5635  Assert(rel->relid > 0);
5636  rte = planner_rt_fetch(rel->relid, root);
5637  Assert(rte->rtekind == RTE_FUNCTION);
5638 
5639  /*
5640  * Estimate number of rows the functions will return. The rowcount of the
5641  * node is that of the largest function result.
5642  */
5643  rel->tuples = 0;
5644  foreach(lc, rte->functions)
5645  {
5646  RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
5647  double ntup = expression_returns_set_rows(root, rtfunc->funcexpr);
5648 
5649  if (ntup > rel->tuples)
5650  rel->tuples = ntup;
5651  }
5652 
5653  /* Now estimate number of output rows, etc */
5654  set_baserel_size_estimates(root, rel);
5655 }
5656 
5657 /*
5658  * set_function_size_estimates
5659  * Set the size estimates for a base relation that is a function call.
5660  *
5661  * The rel's targetlist and restrictinfo list must have been constructed
5662  * already.
5663  *
5664  * We set the same fields as set_tablefunc_size_estimates.
5665  */
5666 void
5668 {
5669  /* Should only be applied to base relations that are functions */
5670  Assert(rel->relid > 0);
5671  Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_TABLEFUNC);
5672 
5673  rel->tuples = 100;
5674 
5675  /* Now estimate number of output rows, etc */
5676  set_baserel_size_estimates(root, rel);
5677 }
5678 
5679 /*
5680  * set_values_size_estimates
5681  * Set the size estimates for a base relation that is a values list.
5682  *
5683  * The rel's targetlist and restrictinfo list must have been constructed
5684  * already.
5685  *
5686  * We set the same fields as set_baserel_size_estimates.
5687  */
5688 void
5690 {
5691  RangeTblEntry *rte;
5692 
5693  /* Should only be applied to base relations that are values lists */
5694  Assert(rel->relid > 0);
5695  rte = planner_rt_fetch(rel->relid, root);
5696  Assert(rte->rtekind == RTE_VALUES);
5697 
5698  /*
5699  * Estimate number of rows the values list will return. We know this
5700  * precisely based on the list length (well, barring set-returning
5701  * functions in list items, but that's a refinement not catered for
5702  * anywhere else either).
5703  */
5704  rel->tuples = list_length(rte->values_lists);
5705 
5706  /* Now estimate number of output rows, etc */
5707  set_baserel_size_estimates(root, rel);
5708 }
5709 
5710 /*
5711  * set_cte_size_estimates
5712  * Set the size estimates for a base relation that is a CTE reference.
5713  *
5714  * The rel's targetlist and restrictinfo list must have been constructed
5715  * already, and we need an estimate of the number of rows returned by the CTE
5716  * (if a regular CTE) or the non-recursive term (if a self-reference).
5717  *
5718  * We set the same fields as set_baserel_size_estimates.
5719  */
5720 void
5721 set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, double cte_rows)
5722 {
5723  RangeTblEntry *rte;
5724 
5725  /* Should only be applied to base relations that are CTE references */
5726  Assert(rel->relid > 0);
5727  rte = planner_rt_fetch(rel->relid, root);
5728  Assert(rte->rtekind == RTE_CTE);
5729 
5730  if (rte->self_reference)
5731  {
5732  /*
5733  * In a self-reference, we assume the average worktable size is a
5734  * multiple of the nonrecursive term's size. The best multiplier will
5735  * vary depending on query "fan-out", so make its value adjustable.
5736  */
5737  rel->tuples = clamp_row_est(recursive_worktable_factor * cte_rows);
5738  }
5739  else
5740  {
5741  /* Otherwise just believe the CTE's rowcount estimate */
5742  rel->tuples = cte_rows;
5743  }
5744 
5745  /* Now estimate number of output rows, etc */
5746  set_baserel_size_estimates(root, rel);
5747 }
5748 
5749 /*
5750  * set_namedtuplestore_size_estimates
5751  * Set the size estimates for a base relation that is a tuplestore reference.
5752  *
5753  * The rel's targetlist and restrictinfo list must have been constructed
5754  * already.
5755  *
5756  * We set the same fields as set_baserel_size_estimates.
5757  */
5758 void
5760 {
5761  RangeTblEntry *rte;
5762 
5763  /* Should only be applied to base relations that are tuplestore references */
5764  Assert(rel->relid > 0);
5765  rte = planner_rt_fetch(rel->relid, root);
5767 
5768  /*
5769  * Use the estimate provided by the code which is generating the named
5770  * tuplestore. In some cases, the actual number might be available; in
5771  * others the same plan will be re-used, so a "typical" value might be
5772  * estimated and used.
5773  */
5774  rel->tuples = rte->enrtuples;
5775  if (rel->tuples < 0)
5776  rel->tuples = 1000;
5777 
5778  /* Now estimate number of output rows, etc */
5779  set_baserel_size_estimates(root, rel);
5780 }
5781 
5782 /*
5783  * set_result_size_estimates
5784  * Set the size estimates for an RTE_RESULT base relation
5785  *
5786  * The rel's targetlist and restrictinfo list must have been constructed
5787  * already.
5788  *
5789  * We set the same fields as set_baserel_size_estimates.
5790  */
5791 void
5793 {
5794  /* Should only be applied to RTE_RESULT base relations */
5795  Assert(rel->relid > 0);
5796  Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_RESULT);
5797 
5798  /* RTE_RESULT always generates a single row, natively */
5799  rel->tuples = 1;
5800 
5801  /* Now estimate number of output rows, etc */
5802  set_baserel_size_estimates(root, rel);
5803 }
5804 
5805 /*
5806  * set_foreign_size_estimates
5807  * Set the size estimates for a base relation that is a foreign table.
5808  *
5809  * There is not a whole lot that we can do here; the foreign-data wrapper
5810  * is responsible for producing useful estimates. We can do a decent job
5811  * of estimating baserestrictcost, so we set that, and we also set up width
5812  * using what will be purely datatype-driven estimates from the targetlist.
5813  * There is no way to do anything sane with the rows value, so we just put
5814  * a default estimate and hope that the wrapper can improve on it. The
5815  * wrapper's GetForeignRelSize function will be called momentarily.
5816  *
5817  * The rel's targetlist and restrictinfo list must have been constructed
5818  * already.
5819  */
5820 void
5822 {
5823  /* Should only be applied to base relations */
5824  Assert(rel->relid > 0);
5825 
5826  rel->rows = 1000; /* entirely bogus default estimate */
5827 
5829 
5830  set_rel_width(root, rel);
5831 }
5832 
5833 
5834 /*
5835  * set_rel_width
5836  * Set the estimated output width of a base relation.
5837  *
5838  * The estimated output width is the sum of the per-attribute width estimates
5839  * for the actually-referenced columns, plus any PHVs or other expressions
5840  * that have to be calculated at this relation. This is the amount of data
5841  * we'd need to pass upwards in case of a sort, hash, etc.
5842  *
5843  * This function also sets reltarget->cost, so it's a bit misnamed now.
5844  *
5845  * NB: this works best on plain relations because it prefers to look at
5846  * real Vars. For subqueries, set_subquery_size_estimates will already have
5847  * copied up whatever per-column estimates were made within the subquery,
5848  * and for other types of rels there isn't much we can do anyway. We fall
5849  * back on (fairly stupid) datatype-based width estimates if we can't get
5850  * any better number.
5851  *
5852  * The per-attribute width estimates are cached for possible re-use while
5853  * building join relations or post-scan/join pathtargets.
5854  */
5855 static void
5857 {
5858  Oid reloid = planner_rt_fetch(rel->relid, root)->relid;
5859  int32 tuple_width = 0;
5860  bool have_wholerow_var = false;
5861  ListCell *lc;
5862 
5863  /* Vars are assumed to have cost zero, but other exprs do not */
5864  rel->reltarget->cost.startup = 0;
5865  rel->reltarget->cost.per_tuple = 0;
5866 
5867  foreach(lc, rel->reltarget->exprs)
5868  {
5869  Node *node = (Node *) lfirst(lc);
5870 
5871  /*
5872  * Ordinarily, a Var in a rel's targetlist must belong to that rel;
5873  * but there are corner cases involving LATERAL references where that
5874  * isn't so. If the Var has the wrong varno, fall through to the
5875  * generic case (it doesn't seem worth the trouble to be any smarter).
5876  */
5877  if (IsA(node, Var) &&
5878  ((Var *) node)->varno == rel->relid)
5879  {
5880  Var *var = (Var *) node;
5881  int ndx;
5882  int32 item_width;
5883 
5884  Assert(var->varattno >= rel->min_attr);
5885  Assert(var->varattno <= rel->max_attr);
5886 
5887  ndx = var->varattno - rel->min_attr;
5888 
5889  /*
5890  * If it's a whole-row Var, we'll deal with it below after we have
5891  * already cached as many attr widths as possible.
5892  */
5893  if (var->varattno == 0)
5894  {
5895  have_wholerow_var = true;
5896  continue;
5897  }
5898 
5899  /*
5900  * The width may have been cached already (especially if it's a
5901  * subquery), so don't duplicate effort.
5902  */
5903  if (rel->attr_widths[ndx] > 0)
5904  {
5905  tuple_width += rel->attr_widths[ndx];
5906  continue;
5907  }
5908 
5909  /* Try to get column width from statistics */
5910  if (reloid != InvalidOid && var->varattno > 0)
5911  {
5912  item_width = get_attavgwidth(reloid, var->varattno);
5913  if (item_width > 0)
5914  {
5915  rel->attr_widths[ndx] = item_width;
5916  tuple_width += item_width;
5917  continue;
5918  }
5919  }
5920 
5921  /*
5922  * Not a plain relation, or can't find statistics for it. Estimate
5923  * using just the type info.
5924  */
5925  item_width = get_typavgwidth(var->vartype, var->vartypmod);
5926  Assert(item_width > 0);
5927  rel->attr_widths[ndx] = item_width;
5928  tuple_width += item_width;
5929  }
5930  else if (IsA(node, PlaceHolderVar))
5931  {
5932  /*
5933  * We will need to evaluate the PHV's contained expression while
5934  * scanning this rel, so be sure to include it in reltarget->cost.
5935  */
5936  PlaceHolderVar *phv = (PlaceHolderVar *) node;
5937  PlaceHolderInfo *phinfo = find_placeholder_info(root, phv);
5938  QualCost cost;
5939 
5940  tuple_width += phinfo->ph_width;
5941  cost_qual_eval_node(&cost, (Node *) phv->phexpr, root);
5942  rel->reltarget->cost.startup += cost.startup;
5943  rel->reltarget->cost.per_tuple += cost.per_tuple;
5944  }
5945  else
5946  {
5947  /*
5948  * We could be looking at an expression pulled up from a subquery,
5949  * or a ROW() representing a whole-row child Var, etc. Do what we
5950  * can using the expression type information.
5951  */
5952  int32 item_width;
5953  QualCost cost;
5954 
5955  item_width = get_typavgwidth(exprType(node), exprTypmod(node));
5956  Assert(item_width > 0);
5957  tuple_width += item_width;
5958  /* Not entirely clear if we need to account for cost, but do so */
5959  cost_qual_eval_node(&cost, node, root);
5960  rel->reltarget->cost.startup += cost.startup;
5961  rel->reltarget->cost.per_tuple += cost.per_tuple;
5962  }
5963  }
5964 
5965  /*
5966  * If we have a whole-row reference, estimate its width as the sum of
5967  * per-column widths plus heap tuple header overhead.
5968  */
5969  if (have_wholerow_var)
5970  {
5971  int32 wholerow_width = MAXALIGN(SizeofHeapTupleHeader);
5972 
5973  if (reloid != InvalidOid)
5974  {
5975  /* Real relation, so estimate true tuple width */
5976  wholerow_width += get_relation_data_width(reloid,
5977  rel->attr_widths - rel->min_attr);
5978  }
5979  else
5980  {
5981  /* Do what we can with info for a phony rel */
5982  AttrNumber i;
5983 
5984  for (i = 1; i <= rel->max_attr; i++)
5985  wholerow_width += rel->attr_widths[i - rel->min_attr];
5986  }
5987 
5988  rel->attr_widths[0 - rel->min_attr] = wholerow_width;
5989 
5990  /*
5991  * Include the whole-row Var as part of the output tuple. Yes, that
5992  * really is what happens at runtime.
5993  */
5994  tuple_width += wholerow_width;
5995  }
5996 
5997  Assert(tuple_width >= 0);
5998  rel->reltarget->width = tuple_width;
5999 }
6000 
6001 /*
6002  * set_pathtarget_cost_width
6003  * Set the estimated eval cost and output width of a PathTarget tlist.
6004  *
6005  * As a notational convenience, returns the same PathTarget pointer passed in.
6006  *
6007  * Most, though not quite all, uses of this function occur after we've run
6008  * set_rel_width() for base relations; so we can usually obtain cached width
6009  * estimates for Vars. If we can't, fall back on datatype-based width
6010  * estimates. Present early-planning uses of PathTargets don't need accurate
6011  * widths badly enough to justify going to the catalogs for better data.
6012  */
6013 PathTarget *
6015 {
6016  int32 tuple_width = 0;