PostgreSQL Source Code  git master
costsize.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * costsize.c
4  * Routines to compute (and set) relation sizes and path costs
5  *
6  * Path costs are measured in arbitrary units established by these basic
7  * parameters:
8  *
9  * seq_page_cost Cost of a sequential page fetch
10  * random_page_cost Cost of a non-sequential page fetch
11  * cpu_tuple_cost Cost of typical CPU time to process a tuple
12  * cpu_index_tuple_cost Cost of typical CPU time to process an index tuple
13  * cpu_operator_cost Cost of CPU time to execute an operator or function
14  * parallel_tuple_cost Cost of CPU time to pass a tuple from worker to leader backend
15  * parallel_setup_cost Cost of setting up shared memory for parallelism
16  *
17  * We expect that the kernel will typically do some amount of read-ahead
18  * optimization; this in conjunction with seek costs means that seq_page_cost
19  * is normally considerably less than random_page_cost. (However, if the
20  * database is fully cached in RAM, it is reasonable to set them equal.)
21  *
22  * We also use a rough estimate "effective_cache_size" of the number of
23  * disk pages in Postgres + OS-level disk cache. (We can't simply use
24  * NBuffers for this purpose because that would ignore the effects of
25  * the kernel's disk cache.)
26  *
27  * Obviously, taking constants for these values is an oversimplification,
28  * but it's tough enough to get any useful estimates even at this level of
29  * detail. Note that all of these parameters are user-settable, in case
30  * the default values are drastically off for a particular platform.
31  *
32  * seq_page_cost and random_page_cost can also be overridden for an individual
33  * tablespace, in case some data is on a fast disk and other data is on a slow
34  * disk. Per-tablespace overrides never apply to temporary work files such as
35  * an external sort or a materialize node that overflows work_mem.
36  *
37  * We compute two separate costs for each path:
38  * total_cost: total estimated cost to fetch all tuples
39  * startup_cost: cost that is expended before first tuple is fetched
40  * In some scenarios, such as when there is a LIMIT or we are implementing
41  * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the
42  * path's result. A caller can estimate the cost of fetching a partial
43  * result by interpolating between startup_cost and total_cost. In detail:
44  * actual_cost = startup_cost +
45  * (total_cost - startup_cost) * tuples_to_fetch / path->rows;
46  * Note that a base relation's rows count (and, by extension, plan_rows for
47  * plan nodes below the LIMIT node) are set without regard to any LIMIT, so
48  * that this equation works properly. (Note: while path->rows is never zero
49  * for ordinary relations, it is zero for paths for provably-empty relations,
50  * so beware of division-by-zero.) The LIMIT is applied as a top-level
51  * plan node.
52  *
53  * Each path stores the total number of disabled nodes that exist at or
54  * below that point in the plan tree. This is regarded as a component of
55  * the cost, and paths with fewer disabled nodes should be regarded as
56  * cheaper than those with more. Disabled nodes occur when the user sets
57  * a GUC like enable_seqscan=false. We can't necessarily respect such a
58  * setting in every part of the plan tree, but we want to respect in as many
59  * parts of the plan tree as possible. Simpler schemes like storing a Boolean
60  * here rather than a count fail to do that. We used to disable nodes by
61  * adding a large constant to the startup cost, but that distorted planning
62  * in other ways.
63  *
64  * For largely historical reasons, most of the routines in this module use
65  * the passed result Path only to store their results (rows, startup_cost and
66  * total_cost) into. All the input data they need is passed as separate
67  * parameters, even though much of it could be extracted from the Path.
68  * An exception is made for the cost_XXXjoin() routines, which expect all
69  * the other fields of the passed XXXPath to be filled in, and similarly
70  * cost_index() assumes the passed IndexPath is valid except for its output
71  * values.
72  *
73  *
74  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
75  * Portions Copyright (c) 1994, Regents of the University of California
76  *
77  * IDENTIFICATION
78  * src/backend/optimizer/path/costsize.c
79  *
80  *-------------------------------------------------------------------------
81  */
82 
83 #include "postgres.h"
84 
85 #include <limits.h>
86 #include <math.h>
87 
88 #include "access/amapi.h"
89 #include "access/htup_details.h"
90 #include "access/tsmapi.h"
91 #include "executor/executor.h"
92 #include "executor/nodeAgg.h"
93 #include "executor/nodeHash.h"
94 #include "executor/nodeMemoize.h"
95 #include "miscadmin.h"
96 #include "nodes/makefuncs.h"
97 #include "nodes/nodeFuncs.h"
98 #include "optimizer/clauses.h"
99 #include "optimizer/cost.h"
100 #include "optimizer/optimizer.h"
101 #include "optimizer/pathnode.h"
102 #include "optimizer/paths.h"
103 #include "optimizer/placeholder.h"
104 #include "optimizer/plancat.h"
105 #include "optimizer/restrictinfo.h"
106 #include "parser/parsetree.h"
107 #include "utils/lsyscache.h"
108 #include "utils/selfuncs.h"
109 #include "utils/spccache.h"
110 #include "utils/tuplesort.h"
111 
112 
113 #define LOG2(x) (log(x) / 0.693147180559945)
114 
115 /*
116  * Append and MergeAppend nodes are less expensive than some other operations
117  * which use cpu_tuple_cost; instead of adding a separate GUC, estimate the
118  * per-tuple cost as cpu_tuple_cost multiplied by this value.
119  */
120 #define APPEND_CPU_COST_MULTIPLIER 0.5
121 
122 /*
123  * Maximum value for row estimates. We cap row estimates to this to help
124  * ensure that costs based on these estimates remain within the range of what
125  * double can represent. add_path() wouldn't act sanely given infinite or NaN
126  * cost values.
127  */
128 #define MAXIMUM_ROWCOUNT 1e100
129 
138 
140 
142 
144 
145 bool enable_seqscan = true;
146 bool enable_indexscan = true;
148 bool enable_bitmapscan = true;
149 bool enable_tidscan = true;
150 bool enable_sort = true;
152 bool enable_hashagg = true;
153 bool enable_nestloop = true;
154 bool enable_material = true;
155 bool enable_memoize = true;
156 bool enable_mergejoin = true;
157 bool enable_hashjoin = true;
158 bool enable_gathermerge = true;
166 
167 typedef struct
168 {
172 
173 static List *extract_nonindex_conditions(List *qual_clauses, List *indexclauses);
175  RestrictInfo *rinfo,
176  PathKey *pathkey);
177 static void cost_rescan(PlannerInfo *root, Path *path,
178  Cost *rescan_startup_cost, Cost *rescan_total_cost);
181  ParamPathInfo *param_info,
182  QualCost *qpqual_cost);
183 static bool has_indexed_join_quals(NestPath *path);
184 static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
185  List *quals);
187  RelOptInfo *joinrel,
188  RelOptInfo *outer_rel,
189  RelOptInfo *inner_rel,
190  double outer_rows,
191  double inner_rows,
192  SpecialJoinInfo *sjinfo,
193  List *restrictlist);
195  Relids outer_relids,
196  Relids inner_relids,
197  SpecialJoinInfo *sjinfo,
198  List **restrictlist);
199 static Cost append_nonpartial_cost(List *subpaths, int numpaths,
200  int parallel_workers);
201 static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
202 static int32 get_expr_width(PlannerInfo *root, const Node *expr);
203 static double relation_byte_size(double tuples, int width);
204 static double page_size(double tuples, int width);
205 static double get_parallel_divisor(Path *path);
206 
207 
208 /*
209  * clamp_row_est
210  * Force a row-count estimate to a sane value.
211  */
212 double
213 clamp_row_est(double nrows)
214 {
215  /*
216  * Avoid infinite and NaN row estimates. Costs derived from such values
217  * are going to be useless. Also force the estimate to be at least one
218  * row, to make explain output look better and to avoid possible
219  * divide-by-zero when interpolating costs. Make it an integer, too.
220  */
221  if (nrows > MAXIMUM_ROWCOUNT || isnan(nrows))
222  nrows = MAXIMUM_ROWCOUNT;
223  else if (nrows <= 1.0)
224  nrows = 1.0;
225  else
226  nrows = rint(nrows);
227 
228  return nrows;
229 }
230 
231 /*
232  * clamp_width_est
233  * Force a tuple-width estimate to a sane value.
234  *
235  * The planner represents datatype width and tuple width estimates as int32.
236  * When summing column width estimates to create a tuple width estimate,
237  * it's possible to reach integer overflow in edge cases. To ensure sane
238  * behavior, we form such sums in int64 arithmetic and then apply this routine
239  * to clamp to int32 range.
240  */
241 int32
242 clamp_width_est(int64 tuple_width)
243 {
244  /*
245  * Anything more than MaxAllocSize is clearly bogus, since we could not
246  * create a tuple that large.
247  */
248  if (tuple_width > MaxAllocSize)
249  return (int32) MaxAllocSize;
250 
251  /*
252  * Unlike clamp_row_est, we just Assert that the value isn't negative,
253  * rather than masking such errors.
254  */
255  Assert(tuple_width >= 0);
256 
257  return (int32) tuple_width;
258 }
259 
260 /*
261  * clamp_cardinality_to_long
262  * Cast a Cardinality value to a sane long value.
263  */
264 long
266 {
267  /*
268  * Just for paranoia's sake, ensure we do something sane with negative or
269  * NaN values.
270  */
271  if (isnan(x))
272  return LONG_MAX;
273  if (x <= 0)
274  return 0;
275 
276  /*
277  * If "long" is 64 bits, then LONG_MAX cannot be represented exactly as a
278  * double. Casting it to double and back may well result in overflow due
279  * to rounding, so avoid doing that. We trust that any double value that
280  * compares strictly less than "(double) LONG_MAX" will cast to a
281  * representable "long" value.
282  */
283  return (x < (double) LONG_MAX) ? (long) x : LONG_MAX;
284 }
285 
286 
287 /*
288  * cost_seqscan
289  * Determines and returns the cost of scanning a relation sequentially.
290  *
291  * 'baserel' is the relation to be scanned
292  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
293  */
294 void
296  RelOptInfo *baserel, ParamPathInfo *param_info)
297 {
298  Cost startup_cost = 0;
299  Cost cpu_run_cost;
300  Cost disk_run_cost;
301  double spc_seq_page_cost;
302  QualCost qpqual_cost;
303  Cost cpu_per_tuple;
304 
305  /* Should only be applied to base relations */
306  Assert(baserel->relid > 0);
307  Assert(baserel->rtekind == RTE_RELATION);
308 
309  /* Mark the path with the correct row estimate */
310  if (param_info)
311  path->rows = param_info->ppi_rows;
312  else
313  path->rows = baserel->rows;
314 
315  /* fetch estimated page cost for tablespace containing table */
317  NULL,
318  &spc_seq_page_cost);
319 
320  /*
321  * disk costs
322  */
323  disk_run_cost = spc_seq_page_cost * baserel->pages;
324 
325  /* CPU costs */
326  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
327 
328  startup_cost += qpqual_cost.startup;
329  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
330  cpu_run_cost = cpu_per_tuple * baserel->tuples;
331  /* tlist eval costs are paid per output row, not per tuple scanned */
332  startup_cost += path->pathtarget->cost.startup;
333  cpu_run_cost += path->pathtarget->cost.per_tuple * path->rows;
334 
335  /* Adjust costing for parallelism, if used. */
336  if (path->parallel_workers > 0)
337  {
338  double parallel_divisor = get_parallel_divisor(path);
339 
340  /* The CPU cost is divided among all the workers. */
341  cpu_run_cost /= parallel_divisor;
342 
343  /*
344  * It may be possible to amortize some of the I/O cost, but probably
345  * not very much, because most operating systems already do aggressive
346  * prefetching. For now, we assume that the disk run cost can't be
347  * amortized at all.
348  */
349 
350  /*
351  * In the case of a parallel plan, the row count needs to represent
352  * the number of tuples processed per worker.
353  */
354  path->rows = clamp_row_est(path->rows / parallel_divisor);
355  }
356 
357  path->disabled_nodes = enable_seqscan ? 0 : 1;
358  path->startup_cost = startup_cost;
359  path->total_cost = startup_cost + cpu_run_cost + disk_run_cost;
360 }
361 
362 /*
363  * cost_samplescan
364  * Determines and returns the cost of scanning a relation using sampling.
365  *
366  * 'baserel' is the relation to be scanned
367  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
368  */
369 void
371  RelOptInfo *baserel, ParamPathInfo *param_info)
372 {
373  Cost startup_cost = 0;
374  Cost run_cost = 0;
375  RangeTblEntry *rte;
376  TableSampleClause *tsc;
377  TsmRoutine *tsm;
378  double spc_seq_page_cost,
379  spc_random_page_cost,
380  spc_page_cost;
381  QualCost qpqual_cost;
382  Cost cpu_per_tuple;
383 
384  /* Should only be applied to base relations with tablesample clauses */
385  Assert(baserel->relid > 0);
386  rte = planner_rt_fetch(baserel->relid, root);
387  Assert(rte->rtekind == RTE_RELATION);
388  tsc = rte->tablesample;
389  Assert(tsc != NULL);
390  tsm = GetTsmRoutine(tsc->tsmhandler);
391 
392  /* Mark the path with the correct row estimate */
393  if (param_info)
394  path->rows = param_info->ppi_rows;
395  else
396  path->rows = baserel->rows;
397 
398  /* fetch estimated page cost for tablespace containing table */
400  &spc_random_page_cost,
401  &spc_seq_page_cost);
402 
403  /* if NextSampleBlock is used, assume random access, else sequential */
404  spc_page_cost = (tsm->NextSampleBlock != NULL) ?
405  spc_random_page_cost : spc_seq_page_cost;
406 
407  /*
408  * disk costs (recall that baserel->pages has already been set to the
409  * number of pages the sampling method will visit)
410  */
411  run_cost += spc_page_cost * baserel->pages;
412 
413  /*
414  * CPU costs (recall that baserel->tuples has already been set to the
415  * number of tuples the sampling method will select). Note that we ignore
416  * execution cost of the TABLESAMPLE parameter expressions; they will be
417  * evaluated only once per scan, and in most usages they'll likely be
418  * simple constants anyway. We also don't charge anything for the
419  * calculations the sampling method might do internally.
420  */
421  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
422 
423  startup_cost += qpqual_cost.startup;
424  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
425  run_cost += cpu_per_tuple * baserel->tuples;
426  /* tlist eval costs are paid per output row, not per tuple scanned */
427  startup_cost += path->pathtarget->cost.startup;
428  run_cost += path->pathtarget->cost.per_tuple * path->rows;
429 
430  path->disabled_nodes = 0;
431  path->startup_cost = startup_cost;
432  path->total_cost = startup_cost + run_cost;
433 }
434 
435 /*
436  * cost_gather
437  * Determines and returns the cost of gather path.
438  *
439  * 'rel' is the relation to be operated upon
440  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
441  * 'rows' may be used to point to a row estimate; if non-NULL, it overrides
442  * both 'rel' and 'param_info'. This is useful when the path doesn't exactly
443  * correspond to any particular RelOptInfo.
444  */
445 void
447  RelOptInfo *rel, ParamPathInfo *param_info,
448  double *rows)
449 {
450  Cost startup_cost = 0;
451  Cost run_cost = 0;
452 
453  /* Mark the path with the correct row estimate */
454  if (rows)
455  path->path.rows = *rows;
456  else if (param_info)
457  path->path.rows = param_info->ppi_rows;
458  else
459  path->path.rows = rel->rows;
460 
461  startup_cost = path->subpath->startup_cost;
462 
463  run_cost = path->subpath->total_cost - path->subpath->startup_cost;
464 
465  /* Parallel setup and communication cost. */
466  startup_cost += parallel_setup_cost;
467  run_cost += parallel_tuple_cost * path->path.rows;
468 
469  path->path.disabled_nodes = path->subpath->disabled_nodes;
470  path->path.startup_cost = startup_cost;
471  path->path.total_cost = (startup_cost + run_cost);
472 }
473 
474 /*
475  * cost_gather_merge
476  * Determines and returns the cost of gather merge path.
477  *
478  * GatherMerge merges several pre-sorted input streams, using a heap that at
479  * any given instant holds the next tuple from each stream. If there are N
480  * streams, we need about N*log2(N) tuple comparisons to construct the heap at
481  * startup, and then for each output tuple, about log2(N) comparisons to
482  * replace the top heap entry with the next tuple from the same stream.
483  */
484 void
486  RelOptInfo *rel, ParamPathInfo *param_info,
487  int input_disabled_nodes,
488  Cost input_startup_cost, Cost input_total_cost,
489  double *rows)
490 {
491  Cost startup_cost = 0;
492  Cost run_cost = 0;
493  Cost comparison_cost;
494  double N;
495  double logN;
496 
497  /* Mark the path with the correct row estimate */
498  if (rows)
499  path->path.rows = *rows;
500  else if (param_info)
501  path->path.rows = param_info->ppi_rows;
502  else
503  path->path.rows = rel->rows;
504 
505  /*
506  * Add one to the number of workers to account for the leader. This might
507  * be overgenerous since the leader will do less work than other workers
508  * in typical cases, but we'll go with it for now.
509  */
510  Assert(path->num_workers > 0);
511  N = (double) path->num_workers + 1;
512  logN = LOG2(N);
513 
514  /* Assumed cost per tuple comparison */
515  comparison_cost = 2.0 * cpu_operator_cost;
516 
517  /* Heap creation cost */
518  startup_cost += comparison_cost * N * logN;
519 
520  /* Per-tuple heap maintenance cost */
521  run_cost += path->path.rows * comparison_cost * logN;
522 
523  /* small cost for heap management, like cost_merge_append */
524  run_cost += cpu_operator_cost * path->path.rows;
525 
526  /*
527  * Parallel setup and communication cost. Since Gather Merge, unlike
528  * Gather, requires us to block until a tuple is available from every
529  * worker, we bump the IPC cost up a little bit as compared with Gather.
530  * For lack of a better idea, charge an extra 5%.
531  */
532  startup_cost += parallel_setup_cost;
533  run_cost += parallel_tuple_cost * path->path.rows * 1.05;
534 
535  path->path.disabled_nodes = input_disabled_nodes
536  + (enable_gathermerge ? 0 : 1);
537  path->path.startup_cost = startup_cost + input_startup_cost;
538  path->path.total_cost = (startup_cost + run_cost + input_total_cost);
539 }
540 
541 /*
542  * cost_index
543  * Determines and returns the cost of scanning a relation using an index.
544  *
545  * 'path' describes the indexscan under consideration, and is complete
546  * except for the fields to be set by this routine
547  * 'loop_count' is the number of repetitions of the indexscan to factor into
548  * estimates of caching behavior
549  *
550  * In addition to rows, startup_cost and total_cost, cost_index() sets the
551  * path's indextotalcost and indexselectivity fields. These values will be
552  * needed if the IndexPath is used in a BitmapIndexScan.
553  *
554  * NOTE: path->indexquals must contain only clauses usable as index
555  * restrictions. Any additional quals evaluated as qpquals may reduce the
556  * number of returned tuples, but they won't reduce the number of tuples
557  * we have to fetch from the table, so they don't reduce the scan cost.
558  */
559 void
560 cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
561  bool partial_path)
562 {
563  IndexOptInfo *index = path->indexinfo;
564  RelOptInfo *baserel = index->rel;
565  bool indexonly = (path->path.pathtype == T_IndexOnlyScan);
566  amcostestimate_function amcostestimate;
567  List *qpquals;
568  Cost startup_cost = 0;
569  Cost run_cost = 0;
570  Cost cpu_run_cost = 0;
571  Cost indexStartupCost;
572  Cost indexTotalCost;
573  Selectivity indexSelectivity;
574  double indexCorrelation,
575  csquared;
576  double spc_seq_page_cost,
577  spc_random_page_cost;
578  Cost min_IO_cost,
579  max_IO_cost;
580  QualCost qpqual_cost;
581  Cost cpu_per_tuple;
582  double tuples_fetched;
583  double pages_fetched;
584  double rand_heap_pages;
585  double index_pages;
586 
587  /* Should only be applied to base relations */
588  Assert(IsA(baserel, RelOptInfo) &&
590  Assert(baserel->relid > 0);
591  Assert(baserel->rtekind == RTE_RELATION);
592 
593  /*
594  * Mark the path with the correct row estimate, and identify which quals
595  * will need to be enforced as qpquals. We need not check any quals that
596  * are implied by the index's predicate, so we can use indrestrictinfo not
597  * baserestrictinfo as the list of relevant restriction clauses for the
598  * rel.
599  */
600  if (path->path.param_info)
601  {
602  path->path.rows = path->path.param_info->ppi_rows;
603  /* qpquals come from the rel's restriction clauses and ppi_clauses */
605  path->indexclauses),
606  extract_nonindex_conditions(path->path.param_info->ppi_clauses,
607  path->indexclauses));
608  }
609  else
610  {
611  path->path.rows = baserel->rows;
612  /* qpquals come from just the rel's restriction clauses */
614  path->indexclauses);
615  }
616 
617  /* we don't need to check enable_indexonlyscan; indxpath.c does that */
618  path->path.disabled_nodes = enable_indexscan ? 0 : 1;
619 
620  /*
621  * Call index-access-method-specific code to estimate the processing cost
622  * for scanning the index, as well as the selectivity of the index (ie,
623  * the fraction of main-table tuples we will have to retrieve) and its
624  * correlation to the main-table tuple order. We need a cast here because
625  * pathnodes.h uses a weak function type to avoid including amapi.h.
626  */
627  amcostestimate = (amcostestimate_function) index->amcostestimate;
628  amcostestimate(root, path, loop_count,
629  &indexStartupCost, &indexTotalCost,
630  &indexSelectivity, &indexCorrelation,
631  &index_pages);
632 
633  /*
634  * Save amcostestimate's results for possible use in bitmap scan planning.
635  * We don't bother to save indexStartupCost or indexCorrelation, because a
636  * bitmap scan doesn't care about either.
637  */
638  path->indextotalcost = indexTotalCost;
639  path->indexselectivity = indexSelectivity;
640 
641  /* all costs for touching index itself included here */
642  startup_cost += indexStartupCost;
643  run_cost += indexTotalCost - indexStartupCost;
644 
645  /* estimate number of main-table tuples fetched */
646  tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
647 
648  /* fetch estimated page costs for tablespace containing table */
650  &spc_random_page_cost,
651  &spc_seq_page_cost);
652 
653  /*----------
654  * Estimate number of main-table pages fetched, and compute I/O cost.
655  *
656  * When the index ordering is uncorrelated with the table ordering,
657  * we use an approximation proposed by Mackert and Lohman (see
658  * index_pages_fetched() for details) to compute the number of pages
659  * fetched, and then charge spc_random_page_cost per page fetched.
660  *
661  * When the index ordering is exactly correlated with the table ordering
662  * (just after a CLUSTER, for example), the number of pages fetched should
663  * be exactly selectivity * table_size. What's more, all but the first
664  * will be sequential fetches, not the random fetches that occur in the
665  * uncorrelated case. So if the number of pages is more than 1, we
666  * ought to charge
667  * spc_random_page_cost + (pages_fetched - 1) * spc_seq_page_cost
668  * For partially-correlated indexes, we ought to charge somewhere between
669  * these two estimates. We currently interpolate linearly between the
670  * estimates based on the correlation squared (XXX is that appropriate?).
671  *
672  * If it's an index-only scan, then we will not need to fetch any heap
673  * pages for which the visibility map shows all tuples are visible.
674  * Hence, reduce the estimated number of heap fetches accordingly.
675  * We use the measured fraction of the entire heap that is all-visible,
676  * which might not be particularly relevant to the subset of the heap
677  * that this query will fetch; but it's not clear how to do better.
678  *----------
679  */
680  if (loop_count > 1)
681  {
682  /*
683  * For repeated indexscans, the appropriate estimate for the
684  * uncorrelated case is to scale up the number of tuples fetched in
685  * the Mackert and Lohman formula by the number of scans, so that we
686  * estimate the number of pages fetched by all the scans; then
687  * pro-rate the costs for one scan. In this case we assume all the
688  * fetches are random accesses.
689  */
690  pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
691  baserel->pages,
692  (double) index->pages,
693  root);
694 
695  if (indexonly)
696  pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
697 
698  rand_heap_pages = pages_fetched;
699 
700  max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
701 
702  /*
703  * In the perfectly correlated case, the number of pages touched by
704  * each scan is selectivity * table_size, and we can use the Mackert
705  * and Lohman formula at the page level to estimate how much work is
706  * saved by caching across scans. We still assume all the fetches are
707  * random, though, which is an overestimate that's hard to correct for
708  * without double-counting the cache effects. (But in most cases
709  * where such a plan is actually interesting, only one page would get
710  * fetched per scan anyway, so it shouldn't matter much.)
711  */
712  pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
713 
714  pages_fetched = index_pages_fetched(pages_fetched * loop_count,
715  baserel->pages,
716  (double) index->pages,
717  root);
718 
719  if (indexonly)
720  pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
721 
722  min_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
723  }
724  else
725  {
726  /*
727  * Normal case: apply the Mackert and Lohman formula, and then
728  * interpolate between that and the correlation-derived result.
729  */
730  pages_fetched = index_pages_fetched(tuples_fetched,
731  baserel->pages,
732  (double) index->pages,
733  root);
734 
735  if (indexonly)
736  pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
737 
738  rand_heap_pages = pages_fetched;
739 
740  /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
741  max_IO_cost = pages_fetched * spc_random_page_cost;
742 
743  /* min_IO_cost is for the perfectly correlated case (csquared=1) */
744  pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
745 
746  if (indexonly)
747  pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
748 
749  if (pages_fetched > 0)
750  {
751  min_IO_cost = spc_random_page_cost;
752  if (pages_fetched > 1)
753  min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost;
754  }
755  else
756  min_IO_cost = 0;
757  }
758 
759  if (partial_path)
760  {
761  /*
762  * For index only scans compute workers based on number of index pages
763  * fetched; the number of heap pages we fetch might be so small as to
764  * effectively rule out parallelism, which we don't want to do.
765  */
766  if (indexonly)
767  rand_heap_pages = -1;
768 
769  /*
770  * Estimate the number of parallel workers required to scan index. Use
771  * the number of heap pages computed considering heap fetches won't be
772  * sequential as for parallel scans the pages are accessed in random
773  * order.
774  */
776  rand_heap_pages,
777  index_pages,
779 
780  /*
781  * Fall out if workers can't be assigned for parallel scan, because in
782  * such a case this path will be rejected. So there is no benefit in
783  * doing extra computation.
784  */
785  if (path->path.parallel_workers <= 0)
786  return;
787 
788  path->path.parallel_aware = true;
789  }
790 
791  /*
792  * Now interpolate based on estimated index order correlation to get total
793  * disk I/O cost for main table accesses.
794  */
795  csquared = indexCorrelation * indexCorrelation;
796 
797  run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
798 
799  /*
800  * Estimate CPU costs per tuple.
801  *
802  * What we want here is cpu_tuple_cost plus the evaluation costs of any
803  * qual clauses that we have to evaluate as qpquals.
804  */
805  cost_qual_eval(&qpqual_cost, qpquals, root);
806 
807  startup_cost += qpqual_cost.startup;
808  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
809 
810  cpu_run_cost += cpu_per_tuple * tuples_fetched;
811 
812  /* tlist eval costs are paid per output row, not per tuple scanned */
813  startup_cost += path->path.pathtarget->cost.startup;
814  cpu_run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
815 
816  /* Adjust costing for parallelism, if used. */
817  if (path->path.parallel_workers > 0)
818  {
819  double parallel_divisor = get_parallel_divisor(&path->path);
820 
821  path->path.rows = clamp_row_est(path->path.rows / parallel_divisor);
822 
823  /* The CPU cost is divided among all the workers. */
824  cpu_run_cost /= parallel_divisor;
825  }
826 
827  run_cost += cpu_run_cost;
828 
829  path->path.startup_cost = startup_cost;
830  path->path.total_cost = startup_cost + run_cost;
831 }
832 
833 /*
834  * extract_nonindex_conditions
835  *
836  * Given a list of quals to be enforced in an indexscan, extract the ones that
837  * will have to be applied as qpquals (ie, the index machinery won't handle
838  * them). Here we detect only whether a qual clause is directly redundant
839  * with some indexclause. If the index path is chosen for use, createplan.c
840  * will try a bit harder to get rid of redundant qual conditions; specifically
841  * it will see if quals can be proven to be implied by the indexquals. But
842  * it does not seem worth the cycles to try to factor that in at this stage,
843  * since we're only trying to estimate qual eval costs. Otherwise this must
844  * match the logic in create_indexscan_plan().
845  *
846  * qual_clauses, and the result, are lists of RestrictInfos.
847  * indexclauses is a list of IndexClauses.
848  */
849 static List *
850 extract_nonindex_conditions(List *qual_clauses, List *indexclauses)
851 {
852  List *result = NIL;
853  ListCell *lc;
854 
855  foreach(lc, qual_clauses)
856  {
857  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
858 
859  if (rinfo->pseudoconstant)
860  continue; /* we may drop pseudoconstants here */
861  if (is_redundant_with_indexclauses(rinfo, indexclauses))
862  continue; /* dup or derived from same EquivalenceClass */
863  /* ... skip the predicate proof attempt createplan.c will try ... */
864  result = lappend(result, rinfo);
865  }
866  return result;
867 }
868 
869 /*
870  * index_pages_fetched
871  * Estimate the number of pages actually fetched after accounting for
872  * cache effects.
873  *
874  * We use an approximation proposed by Mackert and Lohman, "Index Scans
875  * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
876  * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
877  * The Mackert and Lohman approximation is that the number of pages
878  * fetched is
879  * PF =
880  * min(2TNs/(2T+Ns), T) when T <= b
881  * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b)
882  * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b)
883  * where
884  * T = # pages in table
885  * N = # tuples in table
886  * s = selectivity = fraction of table to be scanned
887  * b = # buffer pages available (we include kernel space here)
888  *
889  * We assume that effective_cache_size is the total number of buffer pages
890  * available for the whole query, and pro-rate that space across all the
891  * tables in the query and the index currently under consideration. (This
892  * ignores space needed for other indexes used by the query, but since we
893  * don't know which indexes will get used, we can't estimate that very well;
894  * and in any case counting all the tables may well be an overestimate, since
895  * depending on the join plan not all the tables may be scanned concurrently.)
896  *
897  * The product Ns is the number of tuples fetched; we pass in that
898  * product rather than calculating it here. "pages" is the number of pages
899  * in the object under consideration (either an index or a table).
900  * "index_pages" is the amount to add to the total table space, which was
901  * computed for us by make_one_rel.
902  *
903  * Caller is expected to have ensured that tuples_fetched is greater than zero
904  * and rounded to integer (see clamp_row_est). The result will likewise be
905  * greater than zero and integral.
906  */
907 double
908 index_pages_fetched(double tuples_fetched, BlockNumber pages,
909  double index_pages, PlannerInfo *root)
910 {
911  double pages_fetched;
912  double total_pages;
913  double T,
914  b;
915 
916  /* T is # pages in table, but don't allow it to be zero */
917  T = (pages > 1) ? (double) pages : 1.0;
918 
919  /* Compute number of pages assumed to be competing for cache space */
920  total_pages = root->total_table_pages + index_pages;
921  total_pages = Max(total_pages, 1.0);
922  Assert(T <= total_pages);
923 
924  /* b is pro-rated share of effective_cache_size */
925  b = (double) effective_cache_size * T / total_pages;
926 
927  /* force it positive and integral */
928  if (b <= 1.0)
929  b = 1.0;
930  else
931  b = ceil(b);
932 
933  /* This part is the Mackert and Lohman formula */
934  if (T <= b)
935  {
936  pages_fetched =
937  (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
938  if (pages_fetched >= T)
939  pages_fetched = T;
940  else
941  pages_fetched = ceil(pages_fetched);
942  }
943  else
944  {
945  double lim;
946 
947  lim = (2.0 * T * b) / (2.0 * T - b);
948  if (tuples_fetched <= lim)
949  {
950  pages_fetched =
951  (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
952  }
953  else
954  {
955  pages_fetched =
956  b + (tuples_fetched - lim) * (T - b) / T;
957  }
958  pages_fetched = ceil(pages_fetched);
959  }
960  return pages_fetched;
961 }
962 
963 /*
964  * get_indexpath_pages
965  * Determine the total size of the indexes used in a bitmap index path.
966  *
967  * Note: if the same index is used more than once in a bitmap tree, we will
968  * count it multiple times, which perhaps is the wrong thing ... but it's
969  * not completely clear, and detecting duplicates is difficult, so ignore it
970  * for now.
971  */
972 static double
974 {
975  double result = 0;
976  ListCell *l;
977 
978  if (IsA(bitmapqual, BitmapAndPath))
979  {
980  BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
981 
982  foreach(l, apath->bitmapquals)
983  {
984  result += get_indexpath_pages((Path *) lfirst(l));
985  }
986  }
987  else if (IsA(bitmapqual, BitmapOrPath))
988  {
989  BitmapOrPath *opath = (BitmapOrPath *) bitmapqual;
990 
991  foreach(l, opath->bitmapquals)
992  {
993  result += get_indexpath_pages((Path *) lfirst(l));
994  }
995  }
996  else if (IsA(bitmapqual, IndexPath))
997  {
998  IndexPath *ipath = (IndexPath *) bitmapqual;
999 
1000  result = (double) ipath->indexinfo->pages;
1001  }
1002  else
1003  elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
1004 
1005  return result;
1006 }
1007 
1008 /*
1009  * cost_bitmap_heap_scan
1010  * Determines and returns the cost of scanning a relation using a bitmap
1011  * index-then-heap plan.
1012  *
1013  * 'baserel' is the relation to be scanned
1014  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1015  * 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths
1016  * 'loop_count' is the number of repetitions of the indexscan to factor into
1017  * estimates of caching behavior
1018  *
1019  * Note: the component IndexPaths in bitmapqual should have been costed
1020  * using the same loop_count.
1021  */
1022 void
1024  ParamPathInfo *param_info,
1025  Path *bitmapqual, double loop_count)
1026 {
1027  Cost startup_cost = 0;
1028  Cost run_cost = 0;
1029  Cost indexTotalCost;
1030  QualCost qpqual_cost;
1031  Cost cpu_per_tuple;
1032  Cost cost_per_page;
1033  Cost cpu_run_cost;
1034  double tuples_fetched;
1035  double pages_fetched;
1036  double spc_seq_page_cost,
1037  spc_random_page_cost;
1038  double T;
1039 
1040  /* Should only be applied to base relations */
1041  Assert(IsA(baserel, RelOptInfo));
1042  Assert(baserel->relid > 0);
1043  Assert(baserel->rtekind == RTE_RELATION);
1044 
1045  /* Mark the path with the correct row estimate */
1046  if (param_info)
1047  path->rows = param_info->ppi_rows;
1048  else
1049  path->rows = baserel->rows;
1050 
1051  pages_fetched = compute_bitmap_pages(root, baserel, bitmapqual,
1052  loop_count, &indexTotalCost,
1053  &tuples_fetched);
1054 
1055  startup_cost += indexTotalCost;
1056  T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
1057 
1058  /* Fetch estimated page costs for tablespace containing table. */
1060  &spc_random_page_cost,
1061  &spc_seq_page_cost);
1062 
1063  /*
1064  * For small numbers of pages we should charge spc_random_page_cost
1065  * apiece, while if nearly all the table's pages are being read, it's more
1066  * appropriate to charge spc_seq_page_cost apiece. The effect is
1067  * nonlinear, too. For lack of a better idea, interpolate like this to
1068  * determine the cost per page.
1069  */
1070  if (pages_fetched >= 2.0)
1071  cost_per_page = spc_random_page_cost -
1072  (spc_random_page_cost - spc_seq_page_cost)
1073  * sqrt(pages_fetched / T);
1074  else
1075  cost_per_page = spc_random_page_cost;
1076 
1077  run_cost += pages_fetched * cost_per_page;
1078 
1079  /*
1080  * Estimate CPU costs per tuple.
1081  *
1082  * Often the indexquals don't need to be rechecked at each tuple ... but
1083  * not always, especially not if there are enough tuples involved that the
1084  * bitmaps become lossy. For the moment, just assume they will be
1085  * rechecked always. This means we charge the full freight for all the
1086  * scan clauses.
1087  */
1088  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1089 
1090  startup_cost += qpqual_cost.startup;
1091  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
1092  cpu_run_cost = cpu_per_tuple * tuples_fetched;
1093 
1094  /* Adjust costing for parallelism, if used. */
1095  if (path->parallel_workers > 0)
1096  {
1097  double parallel_divisor = get_parallel_divisor(path);
1098 
1099  /* The CPU cost is divided among all the workers. */
1100  cpu_run_cost /= parallel_divisor;
1101 
1102  path->rows = clamp_row_est(path->rows / parallel_divisor);
1103  }
1104 
1105 
1106  run_cost += cpu_run_cost;
1107 
1108  /* tlist eval costs are paid per output row, not per tuple scanned */
1109  startup_cost += path->pathtarget->cost.startup;
1110  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1111 
1112  path->disabled_nodes = enable_bitmapscan ? 0 : 1;
1113  path->startup_cost = startup_cost;
1114  path->total_cost = startup_cost + run_cost;
1115 }
1116 
1117 /*
1118  * cost_bitmap_tree_node
1119  * Extract cost and selectivity from a bitmap tree node (index/and/or)
1120  */
1121 void
1123 {
1124  if (IsA(path, IndexPath))
1125  {
1126  *cost = ((IndexPath *) path)->indextotalcost;
1127  *selec = ((IndexPath *) path)->indexselectivity;
1128 
1129  /*
1130  * Charge a small amount per retrieved tuple to reflect the costs of
1131  * manipulating the bitmap. This is mostly to make sure that a bitmap
1132  * scan doesn't look to be the same cost as an indexscan to retrieve a
1133  * single tuple.
1134  */
1135  *cost += 0.1 * cpu_operator_cost * path->rows;
1136  }
1137  else if (IsA(path, BitmapAndPath))
1138  {
1139  *cost = path->total_cost;
1140  *selec = ((BitmapAndPath *) path)->bitmapselectivity;
1141  }
1142  else if (IsA(path, BitmapOrPath))
1143  {
1144  *cost = path->total_cost;
1145  *selec = ((BitmapOrPath *) path)->bitmapselectivity;
1146  }
1147  else
1148  {
1149  elog(ERROR, "unrecognized node type: %d", nodeTag(path));
1150  *cost = *selec = 0; /* keep compiler quiet */
1151  }
1152 }
1153 
1154 /*
1155  * cost_bitmap_and_node
1156  * Estimate the cost of a BitmapAnd node
1157  *
1158  * Note that this considers only the costs of index scanning and bitmap
1159  * creation, not the eventual heap access. In that sense the object isn't
1160  * truly a Path, but it has enough path-like properties (costs in particular)
1161  * to warrant treating it as one. We don't bother to set the path rows field,
1162  * however.
1163  */
1164 void
1166 {
1167  Cost totalCost;
1168  Selectivity selec;
1169  ListCell *l;
1170 
1171  /*
1172  * We estimate AND selectivity on the assumption that the inputs are
1173  * independent. This is probably often wrong, but we don't have the info
1174  * to do better.
1175  *
1176  * The runtime cost of the BitmapAnd itself is estimated at 100x
1177  * cpu_operator_cost for each tbm_intersect needed. Probably too small,
1178  * definitely too simplistic?
1179  */
1180  totalCost = 0.0;
1181  selec = 1.0;
1182  foreach(l, path->bitmapquals)
1183  {
1184  Path *subpath = (Path *) lfirst(l);
1185  Cost subCost;
1186  Selectivity subselec;
1187 
1188  cost_bitmap_tree_node(subpath, &subCost, &subselec);
1189 
1190  selec *= subselec;
1191 
1192  totalCost += subCost;
1193  if (l != list_head(path->bitmapquals))
1194  totalCost += 100.0 * cpu_operator_cost;
1195  }
1196  path->bitmapselectivity = selec;
1197  path->path.rows = 0; /* per above, not used */
1198  path->path.disabled_nodes = 0;
1199  path->path.startup_cost = totalCost;
1200  path->path.total_cost = totalCost;
1201 }
1202 
1203 /*
1204  * cost_bitmap_or_node
1205  * Estimate the cost of a BitmapOr node
1206  *
1207  * See comments for cost_bitmap_and_node.
1208  */
1209 void
1211 {
1212  Cost totalCost;
1213  Selectivity selec;
1214  ListCell *l;
1215 
1216  /*
1217  * We estimate OR selectivity on the assumption that the inputs are
1218  * non-overlapping, since that's often the case in "x IN (list)" type
1219  * situations. Of course, we clamp to 1.0 at the end.
1220  *
1221  * The runtime cost of the BitmapOr itself is estimated at 100x
1222  * cpu_operator_cost for each tbm_union needed. Probably too small,
1223  * definitely too simplistic? We are aware that the tbm_unions are
1224  * optimized out when the inputs are BitmapIndexScans.
1225  */
1226  totalCost = 0.0;
1227  selec = 0.0;
1228  foreach(l, path->bitmapquals)
1229  {
1230  Path *subpath = (Path *) lfirst(l);
1231  Cost subCost;
1232  Selectivity subselec;
1233 
1234  cost_bitmap_tree_node(subpath, &subCost, &subselec);
1235 
1236  selec += subselec;
1237 
1238  totalCost += subCost;
1239  if (l != list_head(path->bitmapquals) &&
1240  !IsA(subpath, IndexPath))
1241  totalCost += 100.0 * cpu_operator_cost;
1242  }
1243  path->bitmapselectivity = Min(selec, 1.0);
1244  path->path.rows = 0; /* per above, not used */
1245  path->path.startup_cost = totalCost;
1246  path->path.total_cost = totalCost;
1247 }
1248 
1249 /*
1250  * cost_tidscan
1251  * Determines and returns the cost of scanning a relation using TIDs.
1252  *
1253  * 'baserel' is the relation to be scanned
1254  * 'tidquals' is the list of TID-checkable quals
1255  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1256  */
1257 void
1259  RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info)
1260 {
1261  Cost startup_cost = 0;
1262  Cost run_cost = 0;
1263  QualCost qpqual_cost;
1264  Cost cpu_per_tuple;
1265  QualCost tid_qual_cost;
1266  double ntuples;
1267  ListCell *l;
1268  double spc_random_page_cost;
1269 
1270  /* Should only be applied to base relations */
1271  Assert(baserel->relid > 0);
1272  Assert(baserel->rtekind == RTE_RELATION);
1273  Assert(tidquals != NIL);
1274 
1275  /* Mark the path with the correct row estimate */
1276  if (param_info)
1277  path->rows = param_info->ppi_rows;
1278  else
1279  path->rows = baserel->rows;
1280 
1281  /* Count how many tuples we expect to retrieve */
1282  ntuples = 0;
1283  foreach(l, tidquals)
1284  {
1285  RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
1286  Expr *qual = rinfo->clause;
1287 
1288  /*
1289  * We must use a TID scan for CurrentOfExpr; in any other case, we
1290  * should be generating a TID scan only if enable_tidscan=true. Also,
1291  * if CurrentOfExpr is the qual, there should be only one.
1292  */
1294  Assert(list_length(tidquals) == 1 || !IsA(qual, CurrentOfExpr));
1295 
1296  if (IsA(qual, ScalarArrayOpExpr))
1297  {
1298  /* Each element of the array yields 1 tuple */
1299  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual;
1300  Node *arraynode = (Node *) lsecond(saop->args);
1301 
1302  ntuples += estimate_array_length(root, arraynode);
1303  }
1304  else if (IsA(qual, CurrentOfExpr))
1305  {
1306  /* CURRENT OF yields 1 tuple */
1307  ntuples++;
1308  }
1309  else
1310  {
1311  /* It's just CTID = something, count 1 tuple */
1312  ntuples++;
1313  }
1314  }
1315 
1316  /*
1317  * The TID qual expressions will be computed once, any other baserestrict
1318  * quals once per retrieved tuple.
1319  */
1320  cost_qual_eval(&tid_qual_cost, tidquals, root);
1321 
1322  /* fetch estimated page cost for tablespace containing table */
1324  &spc_random_page_cost,
1325  NULL);
1326 
1327  /* disk costs --- assume each tuple on a different page */
1328  run_cost += spc_random_page_cost * ntuples;
1329 
1330  /* Add scanning CPU costs */
1331  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1332 
1333  /* XXX currently we assume TID quals are a subset of qpquals */
1334  startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
1335  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
1336  tid_qual_cost.per_tuple;
1337  run_cost += cpu_per_tuple * ntuples;
1338 
1339  /* tlist eval costs are paid per output row, not per tuple scanned */
1340  startup_cost += path->pathtarget->cost.startup;
1341  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1342 
1343  /*
1344  * There are assertions above verifying that we only reach this function
1345  * either when enable_tidscan=true or when the TID scan is the only legal
1346  * path, so it's safe to set disabled_nodes to zero here.
1347  */
1348  path->disabled_nodes = 0;
1349  path->startup_cost = startup_cost;
1350  path->total_cost = startup_cost + run_cost;
1351 }
1352 
1353 /*
1354  * cost_tidrangescan
1355  * Determines and sets the costs of scanning a relation using a range of
1356  * TIDs for 'path'
1357  *
1358  * 'baserel' is the relation to be scanned
1359  * 'tidrangequals' is the list of TID-checkable range quals
1360  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1361  */
1362 void
1364  RelOptInfo *baserel, List *tidrangequals,
1365  ParamPathInfo *param_info)
1366 {
1367  Selectivity selectivity;
1368  double pages;
1369  Cost startup_cost = 0;
1370  Cost run_cost = 0;
1371  QualCost qpqual_cost;
1372  Cost cpu_per_tuple;
1373  QualCost tid_qual_cost;
1374  double ntuples;
1375  double nseqpages;
1376  double spc_random_page_cost;
1377  double spc_seq_page_cost;
1378 
1379  /* Should only be applied to base relations */
1380  Assert(baserel->relid > 0);
1381  Assert(baserel->rtekind == RTE_RELATION);
1382 
1383  /* Mark the path with the correct row estimate */
1384  if (param_info)
1385  path->rows = param_info->ppi_rows;
1386  else
1387  path->rows = baserel->rows;
1388 
1389  /* Count how many tuples and pages we expect to scan */
1390  selectivity = clauselist_selectivity(root, tidrangequals, baserel->relid,
1391  JOIN_INNER, NULL);
1392  pages = ceil(selectivity * baserel->pages);
1393 
1394  if (pages <= 0.0)
1395  pages = 1.0;
1396 
1397  /*
1398  * The first page in a range requires a random seek, but each subsequent
1399  * page is just a normal sequential page read. NOTE: it's desirable for
1400  * TID Range Scans to cost more than the equivalent Sequential Scans,
1401  * because Seq Scans have some performance advantages such as scan
1402  * synchronization and parallelizability, and we'd prefer one of them to
1403  * be picked unless a TID Range Scan really is better.
1404  */
1405  ntuples = selectivity * baserel->tuples;
1406  nseqpages = pages - 1.0;
1407 
1408  /*
1409  * The TID qual expressions will be computed once, any other baserestrict
1410  * quals once per retrieved tuple.
1411  */
1412  cost_qual_eval(&tid_qual_cost, tidrangequals, root);
1413 
1414  /* fetch estimated page cost for tablespace containing table */
1416  &spc_random_page_cost,
1417  &spc_seq_page_cost);
1418 
1419  /* disk costs; 1 random page and the remainder as seq pages */
1420  run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages;
1421 
1422  /* Add scanning CPU costs */
1423  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1424 
1425  /*
1426  * XXX currently we assume TID quals are a subset of qpquals at this
1427  * point; they will be removed (if possible) when we create the plan, so
1428  * we subtract their cost from the total qpqual cost. (If the TID quals
1429  * can't be removed, this is a mistake and we're going to underestimate
1430  * the CPU cost a bit.)
1431  */
1432  startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
1433  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
1434  tid_qual_cost.per_tuple;
1435  run_cost += cpu_per_tuple * ntuples;
1436 
1437  /* tlist eval costs are paid per output row, not per tuple scanned */
1438  startup_cost += path->pathtarget->cost.startup;
1439  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1440 
1441  /* we should not generate this path type when enable_tidscan=false */
1443  path->disabled_nodes = 0;
1444  path->startup_cost = startup_cost;
1445  path->total_cost = startup_cost + run_cost;
1446 }
1447 
1448 /*
1449  * cost_subqueryscan
1450  * Determines and returns the cost of scanning a subquery RTE.
1451  *
1452  * 'baserel' is the relation to be scanned
1453  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1454  * 'trivial_pathtarget' is true if the pathtarget is believed to be trivial.
1455  */
1456 void
1458  RelOptInfo *baserel, ParamPathInfo *param_info,
1459  bool trivial_pathtarget)
1460 {
1461  Cost startup_cost;
1462  Cost run_cost;
1463  List *qpquals;
1464  QualCost qpqual_cost;
1465  Cost cpu_per_tuple;
1466 
1467  /* Should only be applied to base relations that are subqueries */
1468  Assert(baserel->relid > 0);
1469  Assert(baserel->rtekind == RTE_SUBQUERY);
1470 
1471  /*
1472  * We compute the rowcount estimate as the subplan's estimate times the
1473  * selectivity of relevant restriction clauses. In simple cases this will
1474  * come out the same as baserel->rows; but when dealing with parallelized
1475  * paths we must do it like this to get the right answer.
1476  */
1477  if (param_info)
1478  qpquals = list_concat_copy(param_info->ppi_clauses,
1479  baserel->baserestrictinfo);
1480  else
1481  qpquals = baserel->baserestrictinfo;
1482 
1483  path->path.rows = clamp_row_est(path->subpath->rows *
1485  qpquals,
1486  0,
1487  JOIN_INNER,
1488  NULL));
1489 
1490  /*
1491  * Cost of path is cost of evaluating the subplan, plus cost of evaluating
1492  * any restriction clauses and tlist that will be attached to the
1493  * SubqueryScan node, plus cpu_tuple_cost to account for selection and
1494  * projection overhead.
1495  */
1496  path->path.disabled_nodes = path->subpath->disabled_nodes;
1497  path->path.startup_cost = path->subpath->startup_cost;
1498  path->path.total_cost = path->subpath->total_cost;
1499 
1500  /*
1501  * However, if there are no relevant restriction clauses and the
1502  * pathtarget is trivial, then we expect that setrefs.c will optimize away
1503  * the SubqueryScan plan node altogether, so we should just make its cost
1504  * and rowcount equal to the input path's.
1505  *
1506  * Note: there are some edge cases where createplan.c will apply a
1507  * different targetlist to the SubqueryScan node, thus falsifying our
1508  * current estimate of whether the target is trivial, and making the cost
1509  * estimate (though not the rowcount) wrong. It does not seem worth the
1510  * extra complication to try to account for that exactly, especially since
1511  * that behavior falsifies other cost estimates as well.
1512  */
1513  if (qpquals == NIL && trivial_pathtarget)
1514  return;
1515 
1516  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1517 
1518  startup_cost = qpqual_cost.startup;
1519  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
1520  run_cost = cpu_per_tuple * path->subpath->rows;
1521 
1522  /* tlist eval costs are paid per output row, not per tuple scanned */
1523  startup_cost += path->path.pathtarget->cost.startup;
1524  run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
1525 
1526  path->path.startup_cost += startup_cost;
1527  path->path.total_cost += startup_cost + run_cost;
1528 }
1529 
1530 /*
1531  * cost_functionscan
1532  * Determines and returns the cost of scanning a function RTE.
1533  *
1534  * 'baserel' is the relation to be scanned
1535  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1536  */
1537 void
1539  RelOptInfo *baserel, ParamPathInfo *param_info)
1540 {
1541  Cost startup_cost = 0;
1542  Cost run_cost = 0;
1543  QualCost qpqual_cost;
1544  Cost cpu_per_tuple;
1545  RangeTblEntry *rte;
1546  QualCost exprcost;
1547 
1548  /* Should only be applied to base relations that are functions */
1549  Assert(baserel->relid > 0);
1550  rte = planner_rt_fetch(baserel->relid, root);
1551  Assert(rte->rtekind == RTE_FUNCTION);
1552 
1553  /* Mark the path with the correct row estimate */
1554  if (param_info)
1555  path->rows = param_info->ppi_rows;
1556  else
1557  path->rows = baserel->rows;
1558 
1559  /*
1560  * Estimate costs of executing the function expression(s).
1561  *
1562  * Currently, nodeFunctionscan.c always executes the functions to
1563  * completion before returning any rows, and caches the results in a
1564  * tuplestore. So the function eval cost is all startup cost, and per-row
1565  * costs are minimal.
1566  *
1567  * XXX in principle we ought to charge tuplestore spill costs if the
1568  * number of rows is large. However, given how phony our rowcount
1569  * estimates for functions tend to be, there's not a lot of point in that
1570  * refinement right now.
1571  */
1572  cost_qual_eval_node(&exprcost, (Node *) rte->functions, root);
1573 
1574  startup_cost += exprcost.startup + exprcost.per_tuple;
1575 
1576  /* Add scanning CPU costs */
1577  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1578 
1579  startup_cost += qpqual_cost.startup;
1580  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
1581  run_cost += cpu_per_tuple * baserel->tuples;
1582 
1583  /* tlist eval costs are paid per output row, not per tuple scanned */
1584  startup_cost += path->pathtarget->cost.startup;
1585  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1586 
1587  path->disabled_nodes = 0;
1588  path->startup_cost = startup_cost;
1589  path->total_cost = startup_cost + run_cost;
1590 }
1591 
1592 /*
1593  * cost_tablefuncscan
1594  * Determines and returns the cost of scanning a table function.
1595  *
1596  * 'baserel' is the relation to be scanned
1597  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1598  */
1599 void
1601  RelOptInfo *baserel, ParamPathInfo *param_info)
1602 {
1603  Cost startup_cost = 0;
1604  Cost run_cost = 0;
1605  QualCost qpqual_cost;
1606  Cost cpu_per_tuple;
1607  RangeTblEntry *rte;
1608  QualCost exprcost;
1609 
1610  /* Should only be applied to base relations that are functions */
1611  Assert(baserel->relid > 0);
1612  rte = planner_rt_fetch(baserel->relid, root);
1613  Assert(rte->rtekind == RTE_TABLEFUNC);
1614 
1615  /* Mark the path with the correct row estimate */
1616  if (param_info)
1617  path->rows = param_info->ppi_rows;
1618  else
1619  path->rows = baserel->rows;
1620 
1621  /*
1622  * Estimate costs of executing the table func expression(s).
1623  *
1624  * XXX in principle we ought to charge tuplestore spill costs if the
1625  * number of rows is large. However, given how phony our rowcount
1626  * estimates for tablefuncs tend to be, there's not a lot of point in that
1627  * refinement right now.
1628  */
1629  cost_qual_eval_node(&exprcost, (Node *) rte->tablefunc, root);
1630 
1631  startup_cost += exprcost.startup + exprcost.per_tuple;
1632 
1633  /* Add scanning CPU costs */
1634  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1635 
1636  startup_cost += qpqual_cost.startup;
1637  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
1638  run_cost += cpu_per_tuple * baserel->tuples;
1639 
1640  /* tlist eval costs are paid per output row, not per tuple scanned */
1641  startup_cost += path->pathtarget->cost.startup;
1642  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1643 
1644  path->disabled_nodes = 0;
1645  path->startup_cost = startup_cost;
1646  path->total_cost = startup_cost + run_cost;
1647 }
1648 
1649 /*
1650  * cost_valuesscan
1651  * Determines and returns the cost of scanning a VALUES RTE.
1652  *
1653  * 'baserel' is the relation to be scanned
1654  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
1655  */
1656 void
1658  RelOptInfo *baserel, ParamPathInfo *param_info)
1659 {
1660  Cost startup_cost = 0;
1661  Cost run_cost = 0;
1662  QualCost qpqual_cost;
1663  Cost cpu_per_tuple;
1664 
1665  /* Should only be applied to base relations that are values lists */
1666  Assert(baserel->relid > 0);
1667  Assert(baserel->rtekind == RTE_VALUES);
1668 
1669  /* Mark the path with the correct row estimate */
1670  if (param_info)
1671  path->rows = param_info->ppi_rows;
1672  else
1673  path->rows = baserel->rows;
1674 
1675  /*
1676  * For now, estimate list evaluation cost at one operator eval per list
1677  * (probably pretty bogus, but is it worth being smarter?)
1678  */
1679  cpu_per_tuple = cpu_operator_cost;
1680 
1681  /* Add scanning CPU costs */
1682  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1683 
1684  startup_cost += qpqual_cost.startup;
1685  cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
1686  run_cost += cpu_per_tuple * baserel->tuples;
1687 
1688  /* tlist eval costs are paid per output row, not per tuple scanned */
1689  startup_cost += path->pathtarget->cost.startup;
1690  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1691 
1692  path->disabled_nodes = 0;
1693  path->startup_cost = startup_cost;
1694  path->total_cost = startup_cost + run_cost;
1695 }
1696 
1697 /*
1698  * cost_ctescan
1699  * Determines and returns the cost of scanning a CTE RTE.
1700  *
1701  * Note: this is used for both self-reference and regular CTEs; the
1702  * possible cost differences are below the threshold of what we could
1703  * estimate accurately anyway. Note that the costs of evaluating the
1704  * referenced CTE query are added into the final plan as initplan costs,
1705  * and should NOT be counted here.
1706  */
1707 void
1709  RelOptInfo *baserel, ParamPathInfo *param_info)
1710 {
1711  Cost startup_cost = 0;
1712  Cost run_cost = 0;
1713  QualCost qpqual_cost;
1714  Cost cpu_per_tuple;
1715 
1716  /* Should only be applied to base relations that are CTEs */
1717  Assert(baserel->relid > 0);
1718  Assert(baserel->rtekind == RTE_CTE);
1719 
1720  /* Mark the path with the correct row estimate */
1721  if (param_info)
1722  path->rows = param_info->ppi_rows;
1723  else
1724  path->rows = baserel->rows;
1725 
1726  /* Charge one CPU tuple cost per row for tuplestore manipulation */
1727  cpu_per_tuple = cpu_tuple_cost;
1728 
1729  /* Add scanning CPU costs */
1730  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1731 
1732  startup_cost += qpqual_cost.startup;
1733  cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
1734  run_cost += cpu_per_tuple * baserel->tuples;
1735 
1736  /* tlist eval costs are paid per output row, not per tuple scanned */
1737  startup_cost += path->pathtarget->cost.startup;
1738  run_cost += path->pathtarget->cost.per_tuple * path->rows;
1739 
1740  path->disabled_nodes = 0;
1741  path->startup_cost = startup_cost;
1742  path->total_cost = startup_cost + run_cost;
1743 }
1744 
1745 /*
1746  * cost_namedtuplestorescan
1747  * Determines and returns the cost of scanning a named tuplestore.
1748  */
1749 void
1751  RelOptInfo *baserel, ParamPathInfo *param_info)
1752 {
1753  Cost startup_cost = 0;
1754  Cost run_cost = 0;
1755  QualCost qpqual_cost;
1756  Cost cpu_per_tuple;
1757 
1758  /* Should only be applied to base relations that are Tuplestores */
1759  Assert(baserel->relid > 0);
1760  Assert(baserel->rtekind == RTE_NAMEDTUPLESTORE);
1761 
1762  /* Mark the path with the correct row estimate */
1763  if (param_info)
1764  path->rows = param_info->ppi_rows;
1765  else
1766  path->rows = baserel->rows;
1767 
1768  /* Charge one CPU tuple cost per row for tuplestore manipulation */
1769  cpu_per_tuple = cpu_tuple_cost;
1770 
1771  /* Add scanning CPU costs */
1772  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1773 
1774  startup_cost += qpqual_cost.startup;
1775  cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple;
1776  run_cost += cpu_per_tuple * baserel->tuples;
1777 
1778  path->disabled_nodes = 0;
1779  path->startup_cost = startup_cost;
1780  path->total_cost = startup_cost + run_cost;
1781 }
1782 
1783 /*
1784  * cost_resultscan
1785  * Determines and returns the cost of scanning an RTE_RESULT relation.
1786  */
1787 void
1789  RelOptInfo *baserel, ParamPathInfo *param_info)
1790 {
1791  Cost startup_cost = 0;
1792  Cost run_cost = 0;
1793  QualCost qpqual_cost;
1794  Cost cpu_per_tuple;
1795 
1796  /* Should only be applied to RTE_RESULT base relations */
1797  Assert(baserel->relid > 0);
1798  Assert(baserel->rtekind == RTE_RESULT);
1799 
1800  /* Mark the path with the correct row estimate */
1801  if (param_info)
1802  path->rows = param_info->ppi_rows;
1803  else
1804  path->rows = baserel->rows;
1805 
1806  /* We charge qual cost plus cpu_tuple_cost */
1807  get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
1808 
1809  startup_cost += qpqual_cost.startup;
1810  cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
1811  run_cost += cpu_per_tuple * baserel->tuples;
1812 
1813  path->disabled_nodes = 0;
1814  path->startup_cost = startup_cost;
1815  path->total_cost = startup_cost + run_cost;
1816 }
1817 
1818 /*
1819  * cost_recursive_union
1820  * Determines and returns the cost of performing a recursive union,
1821  * and also the estimated output size.
1822  *
1823  * We are given Paths for the nonrecursive and recursive terms.
1824  */
1825 void
1826 cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
1827 {
1828  Cost startup_cost;
1829  Cost total_cost;
1830  double total_rows;
1831 
1832  /* We probably have decent estimates for the non-recursive term */
1833  startup_cost = nrterm->startup_cost;
1834  total_cost = nrterm->total_cost;
1835  total_rows = nrterm->rows;
1836 
1837  /*
1838  * We arbitrarily assume that about 10 recursive iterations will be
1839  * needed, and that we've managed to get a good fix on the cost and output
1840  * size of each one of them. These are mighty shaky assumptions but it's
1841  * hard to see how to do better.
1842  */
1843  total_cost += 10 * rterm->total_cost;
1844  total_rows += 10 * rterm->rows;
1845 
1846  /*
1847  * Also charge cpu_tuple_cost per row to account for the costs of
1848  * manipulating the tuplestores. (We don't worry about possible
1849  * spill-to-disk costs.)
1850  */
1851  total_cost += cpu_tuple_cost * total_rows;
1852 
1853  runion->disabled_nodes = nrterm->disabled_nodes + rterm->disabled_nodes;
1854  runion->startup_cost = startup_cost;
1855  runion->total_cost = total_cost;
1856  runion->rows = total_rows;
1857  runion->pathtarget->width = Max(nrterm->pathtarget->width,
1858  rterm->pathtarget->width);
1859 }
1860 
1861 /*
1862  * cost_tuplesort
1863  * Determines and returns the cost of sorting a relation using tuplesort,
1864  * not including the cost of reading the input data.
1865  *
1866  * If the total volume of data to sort is less than sort_mem, we will do
1867  * an in-memory sort, which requires no I/O and about t*log2(t) tuple
1868  * comparisons for t tuples.
1869  *
1870  * If the total volume exceeds sort_mem, we switch to a tape-style merge
1871  * algorithm. There will still be about t*log2(t) tuple comparisons in
1872  * total, but we will also need to write and read each tuple once per
1873  * merge pass. We expect about ceil(logM(r)) merge passes where r is the
1874  * number of initial runs formed and M is the merge order used by tuplesort.c.
1875  * Since the average initial run should be about sort_mem, we have
1876  * disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
1877  * cpu = comparison_cost * t * log2(t)
1878  *
1879  * If the sort is bounded (i.e., only the first k result tuples are needed)
1880  * and k tuples can fit into sort_mem, we use a heap method that keeps only
1881  * k tuples in the heap; this will require about t*log2(k) tuple comparisons.
1882  *
1883  * The disk traffic is assumed to be 3/4ths sequential and 1/4th random
1884  * accesses (XXX can't we refine that guess?)
1885  *
1886  * By default, we charge two operator evals per tuple comparison, which should
1887  * be in the right ballpark in most cases. The caller can tweak this by
1888  * specifying nonzero comparison_cost; typically that's used for any extra
1889  * work that has to be done to prepare the inputs to the comparison operators.
1890  *
1891  * 'tuples' is the number of tuples in the relation
1892  * 'width' is the average tuple width in bytes
1893  * 'comparison_cost' is the extra cost per comparison, if any
1894  * 'sort_mem' is the number of kilobytes of work memory allowed for the sort
1895  * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
1896  */
1897 static void
1898 cost_tuplesort(Cost *startup_cost, Cost *run_cost,
1899  double tuples, int width,
1900  Cost comparison_cost, int sort_mem,
1901  double limit_tuples)
1902 {
1903  double input_bytes = relation_byte_size(tuples, width);
1904  double output_bytes;
1905  double output_tuples;
1906  long sort_mem_bytes = sort_mem * 1024L;
1907 
1908  /*
1909  * We want to be sure the cost of a sort is never estimated as zero, even
1910  * if passed-in tuple count is zero. Besides, mustn't do log(0)...
1911  */
1912  if (tuples < 2.0)
1913  tuples = 2.0;
1914 
1915  /* Include the default cost-per-comparison */
1916  comparison_cost += 2.0 * cpu_operator_cost;
1917 
1918  /* Do we have a useful LIMIT? */
1919  if (limit_tuples > 0 && limit_tuples < tuples)
1920  {
1921  output_tuples = limit_tuples;
1922  output_bytes = relation_byte_size(output_tuples, width);
1923  }
1924  else
1925  {
1926  output_tuples = tuples;
1927  output_bytes = input_bytes;
1928  }
1929 
1930  if (output_bytes > sort_mem_bytes)
1931  {
1932  /*
1933  * We'll have to use a disk-based sort of all the tuples
1934  */
1935  double npages = ceil(input_bytes / BLCKSZ);
1936  double nruns = input_bytes / sort_mem_bytes;
1937  double mergeorder = tuplesort_merge_order(sort_mem_bytes);
1938  double log_runs;
1939  double npageaccesses;
1940 
1941  /*
1942  * CPU costs
1943  *
1944  * Assume about N log2 N comparisons
1945  */
1946  *startup_cost = comparison_cost * tuples * LOG2(tuples);
1947 
1948  /* Disk costs */
1949 
1950  /* Compute logM(r) as log(r) / log(M) */
1951  if (nruns > mergeorder)
1952  log_runs = ceil(log(nruns) / log(mergeorder));
1953  else
1954  log_runs = 1.0;
1955  npageaccesses = 2.0 * npages * log_runs;
1956  /* Assume 3/4ths of accesses are sequential, 1/4th are not */
1957  *startup_cost += npageaccesses *
1958  (seq_page_cost * 0.75 + random_page_cost * 0.25);
1959  }
1960  else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
1961  {
1962  /*
1963  * We'll use a bounded heap-sort keeping just K tuples in memory, for
1964  * a total number of tuple comparisons of N log2 K; but the constant
1965  * factor is a bit higher than for quicksort. Tweak it so that the
1966  * cost curve is continuous at the crossover point.
1967  */
1968  *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
1969  }
1970  else
1971  {
1972  /* We'll use plain quicksort on all the input tuples */
1973  *startup_cost = comparison_cost * tuples * LOG2(tuples);
1974  }
1975 
1976  /*
1977  * Also charge a small amount (arbitrarily set equal to operator cost) per
1978  * extracted tuple. We don't charge cpu_tuple_cost because a Sort node
1979  * doesn't do qual-checking or projection, so it has less overhead than
1980  * most plan nodes. Note it's correct to use tuples not output_tuples
1981  * here --- the upper LIMIT will pro-rate the run cost so we'd be double
1982  * counting the LIMIT otherwise.
1983  */
1984  *run_cost = cpu_operator_cost * tuples;
1985 }
1986 
1987 /*
1988  * cost_incremental_sort
1989  * Determines and returns the cost of sorting a relation incrementally, when
1990  * the input path is presorted by a prefix of the pathkeys.
1991  *
1992  * 'presorted_keys' is the number of leading pathkeys by which the input path
1993  * is sorted.
1994  *
1995  * We estimate the number of groups into which the relation is divided by the
1996  * leading pathkeys, and then calculate the cost of sorting a single group
1997  * with tuplesort using cost_tuplesort().
1998  */
1999 void
2001  PlannerInfo *root, List *pathkeys, int presorted_keys,
2002  int input_disabled_nodes,
2003  Cost input_startup_cost, Cost input_total_cost,
2004  double input_tuples, int width, Cost comparison_cost, int sort_mem,
2005  double limit_tuples)
2006 {
2007  Cost startup_cost,
2008  run_cost,
2009  input_run_cost = input_total_cost - input_startup_cost;
2010  double group_tuples,
2011  input_groups;
2012  Cost group_startup_cost,
2013  group_run_cost,
2014  group_input_run_cost;
2015  List *presortedExprs = NIL;
2016  ListCell *l;
2017  bool unknown_varno = false;
2018 
2019  Assert(presorted_keys > 0 && presorted_keys < list_length(pathkeys));
2020 
2021  /*
2022  * We want to be sure the cost of a sort is never estimated as zero, even
2023  * if passed-in tuple count is zero. Besides, mustn't do log(0)...
2024  */
2025  if (input_tuples < 2.0)
2026  input_tuples = 2.0;
2027 
2028  /* Default estimate of number of groups, capped to one group per row. */
2029  input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT);
2030 
2031  /*
2032  * Extract presorted keys as list of expressions.
2033  *
2034  * We need to be careful about Vars containing "varno 0" which might have
2035  * been introduced by generate_append_tlist, which would confuse
2036  * estimate_num_groups (in fact it'd fail for such expressions). See
2037  * recurse_set_operations which has to deal with the same issue.
2038  *
2039  * Unlike recurse_set_operations we can't access the original target list
2040  * here, and even if we could it's not very clear how useful would that be
2041  * for a set operation combining multiple tables. So we simply detect if
2042  * there are any expressions with "varno 0" and use the default
2043  * DEFAULT_NUM_DISTINCT in that case.
2044  *
2045  * We might also use either 1.0 (a single group) or input_tuples (each row
2046  * being a separate group), pretty much the worst and best case for
2047  * incremental sort. But those are extreme cases and using something in
2048  * between seems reasonable. Furthermore, generate_append_tlist is used
2049  * for set operations, which are likely to produce mostly unique output
2050  * anyway - from that standpoint the DEFAULT_NUM_DISTINCT is defensive
2051  * while maintaining lower startup cost.
2052  */
2053  foreach(l, pathkeys)
2054  {
2055  PathKey *key = (PathKey *) lfirst(l);
2056  EquivalenceMember *member = (EquivalenceMember *)
2057  linitial(key->pk_eclass->ec_members);
2058 
2059  /*
2060  * Check if the expression contains Var with "varno 0" so that we
2061  * don't call estimate_num_groups in that case.
2062  */
2063  if (bms_is_member(0, pull_varnos(root, (Node *) member->em_expr)))
2064  {
2065  unknown_varno = true;
2066  break;
2067  }
2068 
2069  /* expression not containing any Vars with "varno 0" */
2070  presortedExprs = lappend(presortedExprs, member->em_expr);
2071 
2072  if (foreach_current_index(l) + 1 >= presorted_keys)
2073  break;
2074  }
2075 
2076  /* Estimate the number of groups with equal presorted keys. */
2077  if (!unknown_varno)
2078  input_groups = estimate_num_groups(root, presortedExprs, input_tuples,
2079  NULL, NULL);
2080 
2081  group_tuples = input_tuples / input_groups;
2082  group_input_run_cost = input_run_cost / input_groups;
2083 
2084  /*
2085  * Estimate the average cost of sorting of one group where presorted keys
2086  * are equal.
2087  */
2088  cost_tuplesort(&group_startup_cost, &group_run_cost,
2089  group_tuples, width, comparison_cost, sort_mem,
2090  limit_tuples);
2091 
2092  /*
2093  * Startup cost of incremental sort is the startup cost of its first group
2094  * plus the cost of its input.
2095  */
2096  startup_cost = group_startup_cost + input_startup_cost +
2097  group_input_run_cost;
2098 
2099  /*
2100  * After we started producing tuples from the first group, the cost of
2101  * producing all the tuples is given by the cost to finish processing this
2102  * group, plus the total cost to process the remaining groups, plus the
2103  * remaining cost of input.
2104  */
2105  run_cost = group_run_cost + (group_run_cost + group_startup_cost) *
2106  (input_groups - 1) + group_input_run_cost * (input_groups - 1);
2107 
2108  /*
2109  * Incremental sort adds some overhead by itself. Firstly, it has to
2110  * detect the sort groups. This is roughly equal to one extra copy and
2111  * comparison per tuple.
2112  */
2113  run_cost += (cpu_tuple_cost + comparison_cost) * input_tuples;
2114 
2115  /*
2116  * Additionally, we charge double cpu_tuple_cost for each input group to
2117  * account for the tuplesort_reset that's performed after each group.
2118  */
2119  run_cost += 2.0 * cpu_tuple_cost * input_groups;
2120 
2121  path->rows = input_tuples;
2122 
2123  /* should not generate these paths when enable_incremental_sort=false */
2125  path->disabled_nodes = input_disabled_nodes;
2126 
2127  path->startup_cost = startup_cost;
2128  path->total_cost = startup_cost + run_cost;
2129 }
2130 
2131 /*
2132  * cost_sort
2133  * Determines and returns the cost of sorting a relation, including
2134  * the cost of reading the input data.
2135  *
2136  * NOTE: some callers currently pass NIL for pathkeys because they
2137  * can't conveniently supply the sort keys. Since this routine doesn't
2138  * currently do anything with pathkeys anyway, that doesn't matter...
2139  * but if it ever does, it should react gracefully to lack of key data.
2140  * (Actually, the thing we'd most likely be interested in is just the number
2141  * of sort keys, which all callers *could* supply.)
2142  */
2143 void
2145  List *pathkeys, int input_disabled_nodes,
2146  Cost input_cost, double tuples, int width,
2147  Cost comparison_cost, int sort_mem,
2148  double limit_tuples)
2149 
2150 {
2151  Cost startup_cost;
2152  Cost run_cost;
2153 
2154  cost_tuplesort(&startup_cost, &run_cost,
2155  tuples, width,
2156  comparison_cost, sort_mem,
2157  limit_tuples);
2158 
2159  startup_cost += input_cost;
2160 
2161  path->rows = tuples;
2162  path->disabled_nodes = input_disabled_nodes + (enable_sort ? 0 : 1);
2163  path->startup_cost = startup_cost;
2164  path->total_cost = startup_cost + run_cost;
2165 }
2166 
2167 /*
2168  * append_nonpartial_cost
2169  * Estimate the cost of the non-partial paths in a Parallel Append.
2170  * The non-partial paths are assumed to be the first "numpaths" paths
2171  * from the subpaths list, and to be in order of decreasing cost.
2172  */
2173 static Cost
2174 append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
2175 {
2176  Cost *costarr;
2177  int arrlen;
2178  ListCell *l;
2179  ListCell *cell;
2180  int path_index;
2181  int min_index;
2182  int max_index;
2183 
2184  if (numpaths == 0)
2185  return 0;
2186 
2187  /*
2188  * Array length is number of workers or number of relevant paths,
2189  * whichever is less.
2190  */
2191  arrlen = Min(parallel_workers, numpaths);
2192  costarr = (Cost *) palloc(sizeof(Cost) * arrlen);
2193 
2194  /* The first few paths will each be claimed by a different worker. */
2195  path_index = 0;
2196  foreach(cell, subpaths)
2197  {
2198  Path *subpath = (Path *) lfirst(cell);
2199 
2200  if (path_index == arrlen)
2201  break;
2202  costarr[path_index++] = subpath->total_cost;
2203  }
2204 
2205  /*
2206  * Since subpaths are sorted by decreasing cost, the last one will have
2207  * the minimum cost.
2208  */
2209  min_index = arrlen - 1;
2210 
2211  /*
2212  * For each of the remaining subpaths, add its cost to the array element
2213  * with minimum cost.
2214  */
2215  for_each_cell(l, subpaths, cell)
2216  {
2217  Path *subpath = (Path *) lfirst(l);
2218 
2219  /* Consider only the non-partial paths */
2220  if (path_index++ == numpaths)
2221  break;
2222 
2223  costarr[min_index] += subpath->total_cost;
2224 
2225  /* Update the new min cost array index */
2226  min_index = 0;
2227  for (int i = 0; i < arrlen; i++)
2228  {
2229  if (costarr[i] < costarr[min_index])
2230  min_index = i;
2231  }
2232  }
2233 
2234  /* Return the highest cost from the array */
2235  max_index = 0;
2236  for (int i = 0; i < arrlen; i++)
2237  {
2238  if (costarr[i] > costarr[max_index])
2239  max_index = i;
2240  }
2241 
2242  return costarr[max_index];
2243 }
2244 
2245 /*
2246  * cost_append
2247  * Determines and returns the cost of an Append node.
2248  */
2249 void
2251 {
2252  ListCell *l;
2253 
2254  apath->path.disabled_nodes = 0;
2255  apath->path.startup_cost = 0;
2256  apath->path.total_cost = 0;
2257  apath->path.rows = 0;
2258 
2259  if (apath->subpaths == NIL)
2260  return;
2261 
2262  if (!apath->path.parallel_aware)
2263  {
2264  List *pathkeys = apath->path.pathkeys;
2265 
2266  if (pathkeys == NIL)
2267  {
2268  Path *firstsubpath = (Path *) linitial(apath->subpaths);
2269 
2270  /*
2271  * For an unordered, non-parallel-aware Append we take the startup
2272  * cost as the startup cost of the first subpath.
2273  */
2274  apath->path.startup_cost = firstsubpath->startup_cost;
2275 
2276  /*
2277  * Compute rows, number of disabled nodes, and total cost as sums
2278  * of underlying subplan values.
2279  */
2280  foreach(l, apath->subpaths)
2281  {
2282  Path *subpath = (Path *) lfirst(l);
2283 
2284  apath->path.rows += subpath->rows;
2285  apath->path.disabled_nodes += subpath->disabled_nodes;
2286  apath->path.total_cost += subpath->total_cost;
2287  }
2288  }
2289  else
2290  {
2291  /*
2292  * For an ordered, non-parallel-aware Append we take the startup
2293  * cost as the sum of the subpath startup costs. This ensures
2294  * that we don't underestimate the startup cost when a query's
2295  * LIMIT is such that several of the children have to be run to
2296  * satisfy it. This might be overkill --- another plausible hack
2297  * would be to take the Append's startup cost as the maximum of
2298  * the child startup costs. But we don't want to risk believing
2299  * that an ORDER BY LIMIT query can be satisfied at small cost
2300  * when the first child has small startup cost but later ones
2301  * don't. (If we had the ability to deal with nonlinear cost
2302  * interpolation for partial retrievals, we would not need to be
2303  * so conservative about this.)
2304  *
2305  * This case is also different from the above in that we have to
2306  * account for possibly injecting sorts into subpaths that aren't
2307  * natively ordered.
2308  */
2309  foreach(l, apath->subpaths)
2310  {
2311  Path *subpath = (Path *) lfirst(l);
2312  Path sort_path; /* dummy for result of cost_sort */
2313 
2314  if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
2315  {
2316  /*
2317  * We'll need to insert a Sort node, so include costs for
2318  * that. We can use the parent's LIMIT if any, since we
2319  * certainly won't pull more than that many tuples from
2320  * any child.
2321  */
2322  cost_sort(&sort_path,
2323  NULL, /* doesn't currently need root */
2324  pathkeys,
2325  subpath->disabled_nodes,
2326  subpath->total_cost,
2327  subpath->rows,
2328  subpath->pathtarget->width,
2329  0.0,
2330  work_mem,
2331  apath->limit_tuples);
2332  subpath = &sort_path;
2333  }
2334 
2335  apath->path.rows += subpath->rows;
2336  apath->path.disabled_nodes += subpath->disabled_nodes;
2337  apath->path.startup_cost += subpath->startup_cost;
2338  apath->path.total_cost += subpath->total_cost;
2339  }
2340  }
2341  }
2342  else /* parallel-aware */
2343  {
2344  int i = 0;
2345  double parallel_divisor = get_parallel_divisor(&apath->path);
2346 
2347  /* Parallel-aware Append never produces ordered output. */
2348  Assert(apath->path.pathkeys == NIL);
2349 
2350  /* Calculate startup cost. */
2351  foreach(l, apath->subpaths)
2352  {
2353  Path *subpath = (Path *) lfirst(l);
2354 
2355  /*
2356  * Append will start returning tuples when the child node having
2357  * lowest startup cost is done setting up. We consider only the
2358  * first few subplans that immediately get a worker assigned.
2359  */
2360  if (i == 0)
2361  apath->path.startup_cost = subpath->startup_cost;
2362  else if (i < apath->path.parallel_workers)
2363  apath->path.startup_cost = Min(apath->path.startup_cost,
2364  subpath->startup_cost);
2365 
2366  /*
2367  * Apply parallel divisor to subpaths. Scale the number of rows
2368  * for each partial subpath based on the ratio of the parallel
2369  * divisor originally used for the subpath to the one we adopted.
2370  * Also add the cost of partial paths to the total cost, but
2371  * ignore non-partial paths for now.
2372  */
2373  if (i < apath->first_partial_path)
2374  apath->path.rows += subpath->rows / parallel_divisor;
2375  else
2376  {
2377  double subpath_parallel_divisor;
2378 
2379  subpath_parallel_divisor = get_parallel_divisor(subpath);
2380  apath->path.rows += subpath->rows * (subpath_parallel_divisor /
2381  parallel_divisor);
2382  apath->path.total_cost += subpath->total_cost;
2383  }
2384 
2385  apath->path.disabled_nodes += subpath->disabled_nodes;
2386  apath->path.rows = clamp_row_est(apath->path.rows);
2387 
2388  i++;
2389  }
2390 
2391  /* Add cost for non-partial subpaths. */
2392  apath->path.total_cost +=
2394  apath->first_partial_path,
2395  apath->path.parallel_workers);
2396  }
2397 
2398  /*
2399  * Although Append does not do any selection or projection, it's not free;
2400  * add a small per-tuple overhead.
2401  */
2402  apath->path.total_cost +=
2404 }
2405 
2406 /*
2407  * cost_merge_append
2408  * Determines and returns the cost of a MergeAppend node.
2409  *
2410  * MergeAppend merges several pre-sorted input streams, using a heap that
2411  * at any given instant holds the next tuple from each stream. If there
2412  * are N streams, we need about N*log2(N) tuple comparisons to construct
2413  * the heap at startup, and then for each output tuple, about log2(N)
2414  * comparisons to replace the top entry.
2415  *
2416  * (The effective value of N will drop once some of the input streams are
2417  * exhausted, but it seems unlikely to be worth trying to account for that.)
2418  *
2419  * The heap is never spilled to disk, since we assume N is not very large.
2420  * So this is much simpler than cost_sort.
2421  *
2422  * As in cost_sort, we charge two operator evals per tuple comparison.
2423  *
2424  * 'pathkeys' is a list of sort keys
2425  * 'n_streams' is the number of input streams
2426  * 'input_disabled_nodes' is the sum of the input streams' disabled node counts
2427  * 'input_startup_cost' is the sum of the input streams' startup costs
2428  * 'input_total_cost' is the sum of the input streams' total costs
2429  * 'tuples' is the number of tuples in all the streams
2430  */
2431 void
2433  List *pathkeys, int n_streams,
2434  int input_disabled_nodes,
2435  Cost input_startup_cost, Cost input_total_cost,
2436  double tuples)
2437 {
2438  Cost startup_cost = 0;
2439  Cost run_cost = 0;
2440  Cost comparison_cost;
2441  double N;
2442  double logN;
2443 
2444  /*
2445  * Avoid log(0)...
2446  */
2447  N = (n_streams < 2) ? 2.0 : (double) n_streams;
2448  logN = LOG2(N);
2449 
2450  /* Assumed cost per tuple comparison */
2451  comparison_cost = 2.0 * cpu_operator_cost;
2452 
2453  /* Heap creation cost */
2454  startup_cost += comparison_cost * N * logN;
2455 
2456  /* Per-tuple heap maintenance cost */
2457  run_cost += tuples * comparison_cost * logN;
2458 
2459  /*
2460  * Although MergeAppend does not do any selection or projection, it's not
2461  * free; add a small per-tuple overhead.
2462  */
2463  run_cost += cpu_tuple_cost * APPEND_CPU_COST_MULTIPLIER * tuples;
2464 
2465  path->disabled_nodes = input_disabled_nodes;
2466  path->startup_cost = startup_cost + input_startup_cost;
2467  path->total_cost = startup_cost + run_cost + input_total_cost;
2468 }
2469 
2470 /*
2471  * cost_material
2472  * Determines and returns the cost of materializing a relation, including
2473  * the cost of reading the input data.
2474  *
2475  * If the total volume of data to materialize exceeds work_mem, we will need
2476  * to write it to disk, so the cost is much higher in that case.
2477  *
2478  * Note that here we are estimating the costs for the first scan of the
2479  * relation, so the materialization is all overhead --- any savings will
2480  * occur only on rescan, which is estimated in cost_rescan.
2481  */
2482 void
2484  int input_disabled_nodes,
2485  Cost input_startup_cost, Cost input_total_cost,
2486  double tuples, int width)
2487 {
2488  Cost startup_cost = input_startup_cost;
2489  Cost run_cost = input_total_cost - input_startup_cost;
2490  double nbytes = relation_byte_size(tuples, width);
2491  long work_mem_bytes = work_mem * 1024L;
2492 
2493  path->rows = tuples;
2494 
2495  /*
2496  * Whether spilling or not, charge 2x cpu_operator_cost per tuple to
2497  * reflect bookkeeping overhead. (This rate must be more than what
2498  * cost_rescan charges for materialize, ie, cpu_operator_cost per tuple;
2499  * if it is exactly the same then there will be a cost tie between
2500  * nestloop with A outer, materialized B inner and nestloop with B outer,
2501  * materialized A inner. The extra cost ensures we'll prefer
2502  * materializing the smaller rel.) Note that this is normally a good deal
2503  * less than cpu_tuple_cost; which is OK because a Material plan node
2504  * doesn't do qual-checking or projection, so it's got less overhead than
2505  * most plan nodes.
2506  */
2507  run_cost += 2 * cpu_operator_cost * tuples;
2508 
2509  /*
2510  * If we will spill to disk, charge at the rate of seq_page_cost per page.
2511  * This cost is assumed to be evenly spread through the plan run phase,
2512  * which isn't exactly accurate but our cost model doesn't allow for
2513  * nonuniform costs within the run phase.
2514  */
2515  if (nbytes > work_mem_bytes)
2516  {
2517  double npages = ceil(nbytes / BLCKSZ);
2518 
2519  run_cost += seq_page_cost * npages;
2520  }
2521 
2522  path->disabled_nodes = input_disabled_nodes + (enable_material ? 0 : 1);
2523  path->startup_cost = startup_cost;
2524  path->total_cost = startup_cost + run_cost;
2525 }
2526 
2527 /*
2528  * cost_memoize_rescan
2529  * Determines the estimated cost of rescanning a Memoize node.
2530  *
2531  * In order to estimate this, we must gain knowledge of how often we expect to
2532  * be called and how many distinct sets of parameters we are likely to be
2533  * called with. If we expect a good cache hit ratio, then we can set our
2534  * costs to account for that hit ratio, plus a little bit of cost for the
2535  * caching itself. Caching will not work out well if we expect to be called
2536  * with too many distinct parameter values. The worst-case here is that we
2537  * never see any parameter value twice, in which case we'd never get a cache
2538  * hit and caching would be a complete waste of effort.
2539  */
2540 static void
2542  Cost *rescan_startup_cost, Cost *rescan_total_cost)
2543 {
2544  EstimationInfo estinfo;
2545  ListCell *lc;
2546  Cost input_startup_cost = mpath->subpath->startup_cost;
2547  Cost input_total_cost = mpath->subpath->total_cost;
2548  double tuples = mpath->subpath->rows;
2549  double calls = mpath->calls;
2550  int width = mpath->subpath->pathtarget->width;
2551 
2552  double hash_mem_bytes;
2553  double est_entry_bytes;
2554  double est_cache_entries;
2555  double ndistinct;
2556  double evict_ratio;
2557  double hit_ratio;
2558  Cost startup_cost;
2559  Cost total_cost;
2560 
2561  /* available cache space */
2562  hash_mem_bytes = get_hash_memory_limit();
2563 
2564  /*
2565  * Set the number of bytes each cache entry should consume in the cache.
2566  * To provide us with better estimations on how many cache entries we can
2567  * store at once, we make a call to the executor here to ask it what
2568  * memory overheads there are for a single cache entry.
2569  */
2570  est_entry_bytes = relation_byte_size(tuples, width) +
2572 
2573  /* include the estimated width for the cache keys */
2574  foreach(lc, mpath->param_exprs)
2575  est_entry_bytes += get_expr_width(root, (Node *) lfirst(lc));
2576 
2577  /* estimate on the upper limit of cache entries we can hold at once */
2578  est_cache_entries = floor(hash_mem_bytes / est_entry_bytes);
2579 
2580  /* estimate on the distinct number of parameter values */
2581  ndistinct = estimate_num_groups(root, mpath->param_exprs, calls, NULL,
2582  &estinfo);
2583 
2584  /*
2585  * When the estimation fell back on using a default value, it's a bit too
2586  * risky to assume that it's ok to use a Memoize node. The use of a
2587  * default could cause us to use a Memoize node when it's really
2588  * inappropriate to do so. If we see that this has been done, then we'll
2589  * assume that every call will have unique parameters, which will almost
2590  * certainly mean a MemoizePath will never survive add_path().
2591  */
2592  if ((estinfo.flags & SELFLAG_USED_DEFAULT) != 0)
2593  ndistinct = calls;
2594 
2595  /*
2596  * Since we've already estimated the maximum number of entries we can
2597  * store at once and know the estimated number of distinct values we'll be
2598  * called with, we'll take this opportunity to set the path's est_entries.
2599  * This will ultimately determine the hash table size that the executor
2600  * will use. If we leave this at zero, the executor will just choose the
2601  * size itself. Really this is not the right place to do this, but it's
2602  * convenient since everything is already calculated.
2603  */
2604  mpath->est_entries = Min(Min(ndistinct, est_cache_entries),
2605  PG_UINT32_MAX);
2606 
2607  /*
2608  * When the number of distinct parameter values is above the amount we can
2609  * store in the cache, then we'll have to evict some entries from the
2610  * cache. This is not free. Here we estimate how often we'll incur the
2611  * cost of that eviction.
2612  */
2613  evict_ratio = 1.0 - Min(est_cache_entries, ndistinct) / ndistinct;
2614 
2615  /*
2616  * In order to estimate how costly a single scan will be, we need to
2617  * attempt to estimate what the cache hit ratio will be. To do that we
2618  * must look at how many scans are estimated in total for this node and
2619  * how many of those scans we expect to get a cache hit.
2620  */
2621  hit_ratio = ((calls - ndistinct) / calls) *
2622  (est_cache_entries / Max(ndistinct, est_cache_entries));
2623 
2624  Assert(hit_ratio >= 0 && hit_ratio <= 1.0);
2625 
2626  /*
2627  * Set the total_cost accounting for the expected cache hit ratio. We
2628  * also add on a cpu_operator_cost to account for a cache lookup. This
2629  * will happen regardless of whether it's a cache hit or not.
2630  */
2631  total_cost = input_total_cost * (1.0 - hit_ratio) + cpu_operator_cost;
2632 
2633  /* Now adjust the total cost to account for cache evictions */
2634 
2635  /* Charge a cpu_tuple_cost for evicting the actual cache entry */
2636  total_cost += cpu_tuple_cost * evict_ratio;
2637 
2638  /*
2639  * Charge a 10th of cpu_operator_cost to evict every tuple in that entry.
2640  * The per-tuple eviction is really just a pfree, so charging a whole
2641  * cpu_operator_cost seems a little excessive.
2642  */
2643  total_cost += cpu_operator_cost / 10.0 * evict_ratio * tuples;
2644 
2645  /*
2646  * Now adjust for storing things in the cache, since that's not free
2647  * either. Everything must go in the cache. We don't proportion this
2648  * over any ratio, just apply it once for the scan. We charge a
2649  * cpu_tuple_cost for the creation of the cache entry and also a
2650  * cpu_operator_cost for each tuple we expect to cache.
2651  */
2652  total_cost += cpu_tuple_cost + cpu_operator_cost * tuples;
2653 
2654  /*
2655  * Getting the first row must be also be proportioned according to the
2656  * expected cache hit ratio.
2657  */
2658  startup_cost = input_startup_cost * (1.0 - hit_ratio);
2659 
2660  /*
2661  * Additionally we charge a cpu_tuple_cost to account for cache lookups,
2662  * which we'll do regardless of whether it was a cache hit or not.
2663  */
2664  startup_cost += cpu_tuple_cost;
2665 
2666  *rescan_startup_cost = startup_cost;
2667  *rescan_total_cost = total_cost;
2668 }
2669 
2670 /*
2671  * cost_agg
2672  * Determines and returns the cost of performing an Agg plan node,
2673  * including the cost of its input.
2674  *
2675  * aggcosts can be NULL when there are no actual aggregate functions (i.e.,
2676  * we are using a hashed Agg node just to do grouping).
2677  *
2678  * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
2679  * are for appropriately-sorted input.
2680  */
2681 void
2683  AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
2684  int numGroupCols, double numGroups,
2685  List *quals,
2686  int disabled_nodes,
2687  Cost input_startup_cost, Cost input_total_cost,
2688  double input_tuples, double input_width)
2689 {
2690  double output_tuples;
2691  Cost startup_cost;
2692  Cost total_cost;
2693  AggClauseCosts dummy_aggcosts;
2694 
2695  /* Use all-zero per-aggregate costs if NULL is passed */
2696  if (aggcosts == NULL)
2697  {
2698  Assert(aggstrategy == AGG_HASHED);
2699  MemSet(&dummy_aggcosts, 0, sizeof(AggClauseCosts));
2700  aggcosts = &dummy_aggcosts;
2701  }
2702 
2703  /*
2704  * The transCost.per_tuple component of aggcosts should be charged once
2705  * per input tuple, corresponding to the costs of evaluating the aggregate
2706  * transfns and their input expressions. The finalCost.per_tuple component
2707  * is charged once per output tuple, corresponding to the costs of
2708  * evaluating the finalfns. Startup costs are of course charged but once.
2709  *
2710  * If we are grouping, we charge an additional cpu_operator_cost per
2711  * grouping column per input tuple for grouping comparisons.
2712  *
2713  * We will produce a single output tuple if not grouping, and a tuple per
2714  * group otherwise. We charge cpu_tuple_cost for each output tuple.
2715  *
2716  * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the
2717  * same total CPU cost, but AGG_SORTED has lower startup cost. If the
2718  * input path is already sorted appropriately, AGG_SORTED should be
2719  * preferred (since it has no risk of memory overflow). This will happen
2720  * as long as the computed total costs are indeed exactly equal --- but if
2721  * there's roundoff error we might do the wrong thing. So be sure that
2722  * the computations below form the same intermediate values in the same
2723  * order.
2724  */
2725  if (aggstrategy == AGG_PLAIN)
2726  {
2727  startup_cost = input_total_cost;
2728  startup_cost += aggcosts->transCost.startup;
2729  startup_cost += aggcosts->transCost.per_tuple * input_tuples;
2730  startup_cost += aggcosts->finalCost.startup;
2731  startup_cost += aggcosts->finalCost.per_tuple;
2732  /* we aren't grouping */
2733  total_cost = startup_cost + cpu_tuple_cost;
2734  output_tuples = 1;
2735  }
2736  else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
2737  {
2738  /* Here we are able to deliver output on-the-fly */
2739  startup_cost = input_startup_cost;
2740  total_cost = input_total_cost;
2741  if (aggstrategy == AGG_MIXED && !enable_hashagg)
2742  ++disabled_nodes;
2743  /* calcs phrased this way to match HASHED case, see note above */
2744  total_cost += aggcosts->transCost.startup;
2745  total_cost += aggcosts->transCost.per_tuple * input_tuples;
2746  total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
2747  total_cost += aggcosts->finalCost.startup;
2748  total_cost += aggcosts->finalCost.per_tuple * numGroups;
2749  total_cost += cpu_tuple_cost * numGroups;
2750  output_tuples = numGroups;
2751  }
2752  else
2753  {
2754  /* must be AGG_HASHED */
2755  startup_cost = input_total_cost;
2756  if (!enable_hashagg)
2757  ++disabled_nodes;
2758  startup_cost += aggcosts->transCost.startup;
2759  startup_cost += aggcosts->transCost.per_tuple * input_tuples;
2760  /* cost of computing hash value */
2761  startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
2762  startup_cost += aggcosts->finalCost.startup;
2763 
2764  total_cost = startup_cost;
2765  total_cost += aggcosts->finalCost.per_tuple * numGroups;
2766  /* cost of retrieving from hash table */
2767  total_cost += cpu_tuple_cost * numGroups;
2768  output_tuples = numGroups;
2769  }
2770 
2771  /*
2772  * Add the disk costs of hash aggregation that spills to disk.
2773  *
2774  * Groups that go into the hash table stay in memory until finalized, so
2775  * spilling and reprocessing tuples doesn't incur additional invocations
2776  * of transCost or finalCost. Furthermore, the computed hash value is
2777  * stored with the spilled tuples, so we don't incur extra invocations of
2778  * the hash function.
2779  *
2780  * Hash Agg begins returning tuples after the first batch is complete.
2781  * Accrue writes (spilled tuples) to startup_cost and to total_cost;
2782  * accrue reads only to total_cost.
2783  */
2784  if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED)
2785  {
2786  double pages;
2787  double pages_written = 0.0;
2788  double pages_read = 0.0;
2789  double spill_cost;
2790  double hashentrysize;
2791  double nbatches;
2792  Size mem_limit;
2793  uint64 ngroups_limit;
2794  int num_partitions;
2795  int depth;
2796 
2797  /*
2798  * Estimate number of batches based on the computed limits. If less
2799  * than or equal to one, all groups are expected to fit in memory;
2800  * otherwise we expect to spill.
2801  */
2802  hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
2803  input_width,
2804  aggcosts->transitionSpace);
2805  hash_agg_set_limits(hashentrysize, numGroups, 0, &mem_limit,
2806  &ngroups_limit, &num_partitions);
2807 
2808  nbatches = Max((numGroups * hashentrysize) / mem_limit,
2809  numGroups / ngroups_limit);
2810 
2811  nbatches = Max(ceil(nbatches), 1.0);
2812  num_partitions = Max(num_partitions, 2);
2813 
2814  /*
2815  * The number of partitions can change at different levels of
2816  * recursion; but for the purposes of this calculation assume it stays
2817  * constant.
2818  */
2819  depth = ceil(log(nbatches) / log(num_partitions));
2820 
2821  /*
2822  * Estimate number of pages read and written. For each level of
2823  * recursion, a tuple must be written and then later read.
2824  */
2825  pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
2826  pages_written = pages_read = pages * depth;
2827 
2828  /*
2829  * HashAgg has somewhat worse IO behavior than Sort on typical
2830  * hardware/OS combinations. Account for this with a generic penalty.
2831  */
2832  pages_read *= 2.0;
2833  pages_written *= 2.0;
2834 
2835  startup_cost += pages_written * random_page_cost;
2836  total_cost += pages_written * random_page_cost;
2837  total_cost += pages_read * seq_page_cost;
2838 
2839  /* account for CPU cost of spilling a tuple and reading it back */
2840  spill_cost = depth * input_tuples * 2.0 * cpu_tuple_cost;
2841  startup_cost += spill_cost;
2842  total_cost += spill_cost;
2843  }
2844 
2845  /*
2846  * If there are quals (HAVING quals), account for their cost and
2847  * selectivity.
2848  */
2849  if (quals)
2850  {
2851  QualCost qual_cost;
2852 
2853  cost_qual_eval(&qual_cost, quals, root);
2854  startup_cost += qual_cost.startup;
2855  total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple;
2856 
2857  output_tuples = clamp_row_est(output_tuples *
2859  quals,
2860  0,
2861  JOIN_INNER,
2862  NULL));
2863  }
2864 
2865  path->rows = output_tuples;
2866  path->disabled_nodes = disabled_nodes;
2867  path->startup_cost = startup_cost;
2868  path->total_cost = total_cost;
2869 }
2870 
2871 /*
2872  * get_windowclause_startup_tuples
2873  * Estimate how many tuples we'll need to fetch from a WindowAgg's
2874  * subnode before we can output the first WindowAgg tuple.
2875  *
2876  * How many tuples need to be read depends on the WindowClause. For example,
2877  * a WindowClause with no PARTITION BY and no ORDER BY requires that all
2878  * subnode tuples are read and aggregated before the WindowAgg can output
2879  * anything. If there's a PARTITION BY, then we only need to look at tuples
2880  * in the first partition. Here we attempt to estimate just how many
2881  * 'input_tuples' the WindowAgg will need to read for the given WindowClause
2882  * before the first tuple can be output.
2883  */
2884 static double
2886  double input_tuples)
2887 {
2888  int frameOptions = wc->frameOptions;
2889  double partition_tuples;
2890  double return_tuples;
2891  double peer_tuples;
2892 
2893  /*
2894  * First, figure out how many partitions there are likely to be and set
2895  * partition_tuples according to that estimate.
2896  */
2897  if (wc->partitionClause != NIL)
2898  {
2899  double num_partitions;
2900  List *partexprs = get_sortgrouplist_exprs(wc->partitionClause,
2901  root->parse->targetList);
2902 
2903  num_partitions = estimate_num_groups(root, partexprs, input_tuples,
2904  NULL, NULL);
2905  list_free(partexprs);
2906 
2907  partition_tuples = input_tuples / num_partitions;
2908  }
2909  else
2910  {
2911  /* all tuples belong to the same partition */
2912  partition_tuples = input_tuples;
2913  }
2914 
2915  /* estimate the number of tuples in each peer group */
2916  if (wc->orderClause != NIL)
2917  {
2918  double num_groups;
2919  List *orderexprs;
2920 
2921  orderexprs = get_sortgrouplist_exprs(wc->orderClause,
2922  root->parse->targetList);
2923 
2924  /* estimate out how many peer groups there are in the partition */
2925  num_groups = estimate_num_groups(root, orderexprs,
2926  partition_tuples, NULL,
2927  NULL);
2928  list_free(orderexprs);
2929  peer_tuples = partition_tuples / num_groups;
2930  }
2931  else
2932  {
2933  /* no ORDER BY so only 1 tuple belongs in each peer group */
2934  peer_tuples = 1.0;
2935  }
2936 
2937  if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING)
2938  {
2939  /* include all partition rows */
2940  return_tuples = partition_tuples;
2941  }
2942  else if (frameOptions & FRAMEOPTION_END_CURRENT_ROW)
2943  {
2944  if (frameOptions & FRAMEOPTION_ROWS)
2945  {
2946  /* just count the current row */
2947  return_tuples = 1.0;
2948  }
2949  else if (frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS))
2950  {
2951  /*
2952  * When in RANGE/GROUPS mode, it's more complex. If there's no
2953  * ORDER BY, then all rows in the partition are peers, otherwise
2954  * we'll need to read the first group of peers.
2955  */
2956  if (wc->orderClause == NIL)
2957  return_tuples = partition_tuples;
2958  else
2959  return_tuples = peer_tuples;
2960  }
2961  else
2962  {
2963  /*
2964  * Something new we don't support yet? This needs attention.
2965  * We'll just return 1.0 in the meantime.
2966  */
2967  Assert(false);
2968  return_tuples = 1.0;
2969  }
2970  }
2971  else if (frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING)
2972  {
2973  /*
2974  * BETWEEN ... AND N PRECEDING will only need to read the WindowAgg's
2975  * subnode after N ROWS/RANGES/GROUPS. N can be 0, but not negative,
2976  * so we'll just assume only the current row needs to be read to fetch
2977  * the first WindowAgg row.
2978  */
2979  return_tuples = 1.0;
2980  }
2981  else if (frameOptions & FRAMEOPTION_END_OFFSET_FOLLOWING)
2982  {
2983  Const *endOffset = (Const *) wc->endOffset;
2984  double end_offset_value;
2985 
2986  /* try and figure out the value specified in the endOffset. */
2987  if (IsA(endOffset, Const))
2988  {
2989  if (endOffset->constisnull)
2990  {
2991  /*
2992  * NULLs are not allowed, but currently, there's no code to
2993  * error out if there's a NULL Const. We'll only discover
2994  * this during execution. For now, just pretend everything is
2995  * fine and assume that just the first row/range/group will be
2996  * needed.
2997  */
2998  end_offset_value = 1.0;
2999  }
3000  else
3001  {
3002  switch (endOffset->consttype)
3003  {
3004  case INT2OID:
3005  end_offset_value =
3006  (double) DatumGetInt16(endOffset->constvalue);
3007  break;
3008  case INT4OID:
3009  end_offset_value =
3010  (double) DatumGetInt32(endOffset->constvalue);
3011  break;
3012  case INT8OID:
3013  end_offset_value =
3014  (double) DatumGetInt64(endOffset->constvalue);
3015  break;
3016  default:
3017  end_offset_value =
3018  partition_tuples / peer_tuples *
3020  break;
3021  }
3022  }
3023  }
3024  else
3025  {
3026  /*
3027  * When the end bound is not a Const, we'll just need to guess. We
3028  * just make use of DEFAULT_INEQ_SEL.
3029  */
3030  end_offset_value =
3031  partition_tuples / peer_tuples * DEFAULT_INEQ_SEL;
3032  }
3033 
3034  if (frameOptions & FRAMEOPTION_ROWS)
3035  {
3036  /* include the N FOLLOWING and the current row */
3037  return_tuples = end_offset_value + 1.0;
3038  }
3039  else if (frameOptions & (FRAMEOPTION_RANGE | FRAMEOPTION_GROUPS))
3040  {
3041  /* include N FOLLOWING ranges/group and the initial range/group */
3042  return_tuples = peer_tuples * (end_offset_value + 1.0);
3043  }
3044  else
3045  {
3046  /*
3047  * Something new we don't support yet? This needs attention.
3048  * We'll just return 1.0 in the meantime.
3049  */
3050  Assert(false);
3051  return_tuples = 1.0;
3052  }
3053  }
3054  else
3055  {
3056  /*
3057  * Something new we don't support yet? This needs attention. We'll
3058  * just return 1.0 in the meantime.
3059  */
3060  Assert(false);
3061  return_tuples = 1.0;
3062  }
3063 
3064  if (wc->partitionClause != NIL || wc->orderClause != NIL)
3065  {
3066  /*
3067  * Cap the return value to the estimated partition tuples and account
3068  * for the extra tuple WindowAgg will need to read to confirm the next
3069  * tuple does not belong to the same partition or peer group.
3070  */
3071  return_tuples = Min(return_tuples + 1.0, partition_tuples);
3072  }
3073  else
3074  {
3075  /*
3076  * Cap the return value so it's never higher than the expected tuples
3077  * in the partition.
3078  */
3079  return_tuples = Min(return_tuples, partition_tuples);
3080  }
3081 
3082  /*
3083  * We needn't worry about any EXCLUDE options as those only exclude rows
3084  * from being aggregated, not from being read from the WindowAgg's
3085  * subnode.
3086  */
3087 
3088  return clamp_row_est(return_tuples);
3089 }
3090 
3091 /*
3092  * cost_windowagg
3093  * Determines and returns the cost of performing a WindowAgg plan node,
3094  * including the cost of its input.
3095  *
3096  * Input is assumed already properly sorted.
3097  */
3098 void
3100  List *windowFuncs, WindowClause *winclause,
3101  int input_disabled_nodes,
3102  Cost input_startup_cost, Cost input_total_cost,
3103  double input_tuples)
3104 {
3105  Cost startup_cost;
3106  Cost total_cost;
3107  double startup_tuples;
3108  int numPartCols;
3109  int numOrderCols;
3110  ListCell *lc;
3111 
3112  numPartCols = list_length(winclause->partitionClause);
3113  numOrderCols = list_length(winclause->orderClause);
3114 
3115  startup_cost = input_startup_cost;
3116  total_cost = input_total_cost;
3117 
3118  /*
3119  * Window functions are assumed to cost their stated execution cost, plus
3120  * the cost of evaluating their input expressions, per tuple. Since they
3121  * may in fact evaluate their inputs at multiple rows during each cycle,
3122  * this could be a drastic underestimate; but without a way to know how
3123  * many rows the window function will fetch, it's hard to do better. In
3124  * any case, it's a good estimate for all the built-in window functions,
3125  * so we'll just do this for now.
3126  */
3127  foreach(lc, windowFuncs)
3128  {
3129  WindowFunc *wfunc = lfirst_node(WindowFunc, lc);
3130  Cost wfunccost;
3131  QualCost argcosts;
3132 
3133  argcosts.startup = argcosts.per_tuple = 0;
3134  add_function_cost(root, wfunc->winfnoid, (Node *) wfunc,
3135  &argcosts);
3136  startup_cost += argcosts.startup;
3137  wfunccost = argcosts.per_tuple;
3138 
3139  /* also add the input expressions' cost to per-input-row costs */
3140  cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root);
3141  startup_cost += argcosts.startup;
3142  wfunccost += argcosts.per_tuple;
3143 
3144  /*
3145  * Add the filter's cost to per-input-row costs. XXX We should reduce
3146  * input expression costs according to filter selectivity.
3147  */
3148  cost_qual_eval_node(&argcosts, (Node *) wfunc->aggfilter, root);
3149  startup_cost += argcosts.startup;
3150  wfunccost += argcosts.per_tuple;
3151 
3152  total_cost += wfunccost * input_tuples;
3153  }
3154 
3155  /*
3156  * We also charge cpu_operator_cost per grouping column per tuple for
3157  * grouping comparisons, plus cpu_tuple_cost per tuple for general
3158  * overhead.
3159  *
3160  * XXX this neglects costs of spooling the data to disk when it overflows
3161  * work_mem. Sooner or later that should get accounted for.
3162  */
3163  total_cost += cpu_operator_cost * (numPartCols + numOrderCols) * input_tuples;
3164  total_cost += cpu_tuple_cost * input_tuples;
3165 
3166  path->rows = input_tuples;
3167  path->disabled_nodes = input_disabled_nodes;
3168  path->startup_cost = startup_cost;
3169  path->total_cost = total_cost;
3170 
3171  /*
3172  * Also, take into account how many tuples we need to read from the
3173  * subnode in order to produce the first tuple from the WindowAgg. To do
3174  * this we proportion the run cost (total cost not including startup cost)
3175  * over the estimated startup tuples. We already included the startup
3176  * cost of the subnode, so we only need to do this when the estimated
3177  * startup tuples is above 1.0.
3178  */
3179  startup_tuples = get_windowclause_startup_tuples(root, winclause,
3180  input_tuples);
3181 
3182  if (startup_tuples > 1.0)
3183  path->startup_cost += (total_cost - startup_cost) / input_tuples *
3184  (startup_tuples - 1.0);
3185 }
3186 
3187 /*
3188  * cost_group
3189  * Determines and returns the cost of performing a Group plan node,
3190  * including the cost of its input.
3191  *
3192  * Note: caller must ensure that input costs are for appropriately-sorted
3193  * input.
3194  */
3195 void
3197  int numGroupCols, double numGroups,
3198  List *quals,
3199  int input_disabled_nodes,
3200  Cost input_startup_cost, Cost input_total_cost,
3201  double input_tuples)
3202 {
3203  double output_tuples;
3204  Cost startup_cost;
3205  Cost total_cost;
3206 
3207  output_tuples = numGroups;
3208  startup_cost = input_startup_cost;
3209  total_cost = input_total_cost;
3210 
3211  /*
3212  * Charge one cpu_operator_cost per comparison per input tuple. We assume
3213  * all columns get compared at most of the tuples.
3214  */
3215  total_cost += cpu_operator_cost * input_tuples * numGroupCols;
3216 
3217  /*
3218  * If there are quals (HAVING quals), account for their cost and
3219  * selectivity.
3220  */
3221  if (quals)
3222  {
3223  QualCost qual_cost;
3224 
3225  cost_qual_eval(&qual_cost, quals, root);
3226  startup_cost += qual_cost.startup;
3227  total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple;
3228 
3229  output_tuples = clamp_row_est(output_tuples *
3231  quals,
3232  0,
3233  JOIN_INNER,
3234  NULL));
3235  }
3236 
3237  path->rows = output_tuples;
3238  path->disabled_nodes = input_disabled_nodes;
3239  path->startup_cost = startup_cost;
3240  path->total_cost = total_cost;
3241 }
3242 
3243 /*
3244  * initial_cost_nestloop
3245  * Preliminary estimate of the cost of a nestloop join path.
3246  *
3247  * This must quickly produce lower-bound estimates of the path's startup and
3248  * total costs. If we are unable to eliminate the proposed path from
3249  * consideration using the lower bounds, final_cost_nestloop will be called
3250  * to obtain the final estimates.
3251  *
3252  * The exact division of labor between this function and final_cost_nestloop
3253  * is private to them, and represents a tradeoff between speed of the initial
3254  * estimate and getting a tight lower bound. We choose to not examine the
3255  * join quals here, since that's by far the most expensive part of the
3256  * calculations. The end result is that CPU-cost considerations must be
3257  * left for the second phase; and for SEMI/ANTI joins, we must also postpone
3258  * incorporation of the inner path's run cost.
3259  *
3260  * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
3261  * other data to be used by final_cost_nestloop
3262  * 'jointype' is the type of join to be performed
3263  * 'outer_path' is the outer input to the join
3264  * 'inner_path' is the inner input to the join
3265  * 'extra' contains miscellaneous information about the join
3266  */
3267 void
3269  JoinType jointype,
3270  Path *outer_path, Path *inner_path,
3271  JoinPathExtraData *extra)
3272 {
3273  int disabled_nodes;
3274  Cost startup_cost = 0;
3275  Cost run_cost = 0;
3276  double outer_path_rows = outer_path->rows;
3277  Cost inner_rescan_start_cost;
3278  Cost inner_rescan_total_cost;
3279  Cost inner_run_cost;
3280  Cost inner_rescan_run_cost;
3281 
3282  /* Count up disabled nodes. */
3283  disabled_nodes = enable_nestloop ? 0 : 1;
3284  disabled_nodes += inner_path->disabled_nodes;
3285  disabled_nodes += outer_path->disabled_nodes;
3286 
3287  /* estimate costs to rescan the inner relation */
3288  cost_rescan(root, inner_path,
3289  &inner_rescan_start_cost,
3290  &inner_rescan_total_cost);
3291 
3292  /* cost of source data */
3293 
3294  /*
3295  * NOTE: clearly, we must pay both outer and inner paths' startup_cost
3296  * before we can start returning tuples, so the join's startup cost is
3297  * their sum. We'll also pay the inner path's rescan startup cost
3298  * multiple times.
3299  */
3300  startup_cost += outer_path->startup_cost + inner_path->startup_cost;
3301  run_cost += outer_path->total_cost - outer_path->startup_cost;
3302  if (outer_path_rows > 1)
3303  run_cost += (outer_path_rows - 1) * inner_rescan_start_cost;
3304 
3305  inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
3306  inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost;
3307 
3308  if (jointype == JOIN_SEMI || jointype == JOIN_ANTI ||
3309  extra->inner_unique)
3310  {
3311  /*
3312  * With a SEMI or ANTI join, or if the innerrel is known unique, the
3313  * executor will stop after the first match.
3314  *
3315  * Getting decent estimates requires inspection of the join quals,
3316  * which we choose to postpone to final_cost_nestloop.
3317  */
3318 
3319  /* Save private data for final_cost_nestloop */
3320  workspace->inner_run_cost = inner_run_cost;
3321  workspace->inner_rescan_run_cost = inner_rescan_run_cost;
3322  }
3323  else
3324  {
3325  /* Normal case; we'll scan whole input rel for each outer row */
3326  run_cost += inner_run_cost;
3327  if (outer_path_rows > 1)
3328  run_cost += (outer_path_rows - 1) * inner_rescan_run_cost;
3329  }
3330 
3331  /* CPU costs left for later */
3332 
3333  /* Public result fields */
3334  workspace->disabled_nodes = disabled_nodes;
3335  workspace->startup_cost = startup_cost;
3336  workspace->total_cost = startup_cost + run_cost;
3337  /* Save private data for final_cost_nestloop */
3338  workspace->run_cost = run_cost;
3339 }
3340 
3341 /*
3342  * final_cost_nestloop
3343  * Final estimate of the cost and result size of a nestloop join path.
3344  *
3345  * 'path' is already filled in except for the rows and cost fields
3346  * 'workspace' is the result from initial_cost_nestloop
3347  * 'extra' contains miscellaneous information about the join
3348  */
3349 void
3351  JoinCostWorkspace *workspace,
3352  JoinPathExtraData *extra)
3353 {
3354  Path *outer_path = path->jpath.outerjoinpath;
3355  Path *inner_path = path->jpath.innerjoinpath;
3356  double outer_path_rows = outer_path->rows;
3357  double inner_path_rows = inner_path->rows;
3358  Cost startup_cost = workspace->startup_cost;
3359  Cost run_cost = workspace->run_cost;
3360  Cost cpu_per_tuple;
3361  QualCost restrict_qual_cost;
3362  double ntuples;
3363 
3364  /* Set the number of disabled nodes. */
3365  path->jpath.path.disabled_nodes = workspace->disabled_nodes;
3366 
3367  /* Protect some assumptions below that rowcounts aren't zero */
3368  if (outer_path_rows <= 0)
3369  outer_path_rows = 1;
3370  if (inner_path_rows <= 0)
3371  inner_path_rows = 1;
3372  /* Mark the path with the correct row estimate */
3373  if (path->jpath.path.param_info)
3374  path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
3375  else
3376  path->jpath.path.rows = path->jpath.path.parent->rows;
3377 
3378  /* For partial paths, scale row estimate. */
3379  if (path->jpath.path.parallel_workers > 0)
3380  {
3381  double parallel_divisor = get_parallel_divisor(&path->jpath.path);
3382 
3383  path->jpath.path.rows =
3384  clamp_row_est(path->jpath.path.rows / parallel_divisor);
3385  }
3386 
3387  /* cost of inner-relation source data (we already dealt with outer rel) */
3388 
3389  if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI ||
3390  extra->inner_unique)
3391  {
3392  /*
3393  * With a SEMI or ANTI join, or if the innerrel is known unique, the
3394  * executor will stop after the first match.
3395  */
3396  Cost inner_run_cost = workspace->inner_run_cost;
3397  Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost;
3398  double outer_matched_rows;
3399  double outer_unmatched_rows;
3400  Selectivity inner_scan_frac;
3401 
3402  /*
3403  * For an outer-rel row that has at least one match, we can expect the
3404  * inner scan to stop after a fraction 1/(match_count+1) of the inner
3405  * rows, if the matches are evenly distributed. Since they probably
3406  * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
3407  * that fraction. (If we used a larger fuzz factor, we'd have to
3408  * clamp inner_scan_frac to at most 1.0; but since match_count is at
3409  * least 1, no such clamp is needed now.)
3410  */
3411  outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac);
3412  outer_unmatched_rows = outer_path_rows - outer_matched_rows;
3413  inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0);
3414 
3415  /*
3416  * Compute number of tuples processed (not number emitted!). First,
3417  * account for successfully-matched outer rows.
3418  */
3419  ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
3420 
3421  /*
3422  * Now we need to estimate the actual costs of scanning the inner
3423  * relation, which may be quite a bit less than N times inner_run_cost
3424  * due to early scan stops. We consider two cases. If the inner path
3425  * is an indexscan using all the joinquals as indexquals, then an
3426  * unmatched outer row results in an indexscan returning no rows,
3427  * which is probably quite cheap. Otherwise, the executor will have
3428  * to scan the whole inner rel for an unmatched row; not so cheap.
3429  */
3430  if (has_indexed_join_quals(path))
3431  {
3432  /*
3433  * Successfully-matched outer rows will only require scanning
3434  * inner_scan_frac of the inner relation. In this case, we don't
3435  * need to charge the full inner_run_cost even when that's more
3436  * than inner_rescan_run_cost, because we can assume that none of
3437  * the inner scans ever scan the whole inner relation. So it's
3438  * okay to assume that all the inner scan executions can be
3439  * fractions of the full cost, even if materialization is reducing
3440  * the rescan cost. At this writing, it's impossible to get here
3441  * for a materialized inner scan, so inner_run_cost and
3442  * inner_rescan_run_cost will be the same anyway; but just in
3443  * case, use inner_run_cost for the first matched tuple and
3444  * inner_rescan_run_cost for additional ones.
3445  */
3446  run_cost += inner_run_cost * inner_scan_frac;
3447  if (outer_matched_rows > 1)
3448  run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
3449 
3450  /*
3451  * Add the cost of inner-scan executions for unmatched outer rows.
3452  * We estimate this as the same cost as returning the first tuple
3453  * of a nonempty scan. We consider that these are all rescans,
3454  * since we used inner_run_cost once already.
3455  */
3456  run_cost += outer_unmatched_rows *
3457  inner_rescan_run_cost / inner_path_rows;
3458 
3459  /*
3460  * We won't be evaluating any quals at all for unmatched rows, so
3461  * don't add them to ntuples.
3462  */
3463  }
3464  else
3465  {
3466  /*
3467  * Here, a complicating factor is that rescans may be cheaper than
3468  * first scans. If we never scan all the way to the end of the
3469  * inner rel, it might be (depending on the plan type) that we'd
3470  * never pay the whole inner first-scan run cost. However it is
3471  * difficult to estimate whether that will happen (and it could
3472  * not happen if there are any unmatched outer rows!), so be
3473  * conservative and always charge the whole first-scan cost once.
3474  * We consider this charge to correspond to the first unmatched
3475  * outer row, unless there isn't one in our estimate, in which
3476  * case blame it on the first matched row.
3477  */
3478 
3479  /* First, count all unmatched join tuples as being processed */
3480  ntuples += outer_unmatched_rows * inner_path_rows;
3481 
3482  /* Now add the forced full scan, and decrement appropriate count */
3483  run_cost += inner_run_cost;
3484  if (outer_unmatched_rows >= 1)
3485  outer_unmatched_rows -= 1;
3486  else
3487  outer_matched_rows -= 1;
3488 
3489  /* Add inner run cost for additional outer tuples having matches */
3490  if (outer_matched_rows > 0)
3491  run_cost += outer_matched_rows * inner_rescan_run_cost * inner_scan_frac;
3492 
3493  /* Add inner run cost for additional unmatched outer tuples */
3494  if (outer_unmatched_rows > 0)
3495  run_cost += outer_unmatched_rows * inner_rescan_run_cost;
3496  }
3497  }
3498  else
3499  {
3500  /* Normal-case source costs were included in preliminary estimate */
3501 
3502  /* Compute number of tuples processed (not number emitted!) */
3503  ntuples = outer_path_rows * inner_path_rows;
3504  }
3505 
3506  /* CPU costs */
3507  cost_qual_eval(&restrict_qual_cost, path->jpath.joinrestrictinfo, root);
3508  startup_cost += restrict_qual_cost.startup;
3509  cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
3510  run_cost += cpu_per_tuple * ntuples;
3511 
3512  /* tlist eval costs are paid per output row, not per tuple scanned */
3513  startup_cost += path->jpath.path.pathtarget->cost.startup;
3514  run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows;
3515 
3516  path->jpath.path.startup_cost = startup_cost;
3517  path->jpath.path.total_cost = startup_cost + run_cost;
3518 }
3519 
3520 /*
3521  * initial_cost_mergejoin
3522  * Preliminary estimate of the cost of a mergejoin path.
3523  *
3524  * This must quickly produce lower-bound estimates of the path's startup and
3525  * total costs. If we are unable to eliminate the proposed path from
3526  * consideration using the lower bounds, final_cost_mergejoin will be called
3527  * to obtain the final estimates.
3528  *
3529  * The exact division of labor between this function and final_cost_mergejoin
3530  * is private to them, and represents a tradeoff between speed of the initial
3531  * estimate and getting a tight lower bound. We choose to not examine the
3532  * join quals here, except for obtaining the scan selectivity estimate which
3533  * is really essential (but fortunately, use of caching keeps the cost of
3534  * getting that down to something reasonable).
3535  * We also assume that cost_sort is cheap enough to use here.
3536  *
3537  * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
3538  * other data to be used by final_cost_mergejoin
3539  * 'jointype' is the type of join to be performed
3540  * 'mergeclauses' is the list of joinclauses to be used as merge clauses
3541  * 'outer_path' is the outer input to the join
3542  * 'inner_path' is the inner input to the join
3543  * 'outersortkeys' is the list of sort keys for the outer path
3544  * 'innersortkeys' is the list of sort keys for the inner path
3545  * 'extra' contains miscellaneous information about the join
3546  *
3547  * Note: outersortkeys and innersortkeys should be NIL if no explicit
3548  * sort is needed because the respective source path is already ordered.
3549  */
3550 void
3552  JoinType jointype,
3553  List *mergeclauses,
3554  Path *outer_path, Path *inner_path,
3555  List *outersortkeys, List *innersortkeys,
3556  JoinPathExtraData *extra)
3557 {
3558  int disabled_nodes;
3559  Cost startup_cost = 0;
3560  Cost run_cost = 0;
3561  double outer_path_rows = outer_path->rows;
3562  double inner_path_rows = inner_path->rows;
3563  Cost inner_run_cost;
3564  double outer_rows,
3565  inner_rows,
3566  outer_skip_rows,
3567  inner_skip_rows;
3568  Selectivity outerstartsel,
3569  outerendsel,
3570  innerstartsel,
3571  innerendsel;
3572  Path sort_path; /* dummy for result of cost_sort */
3573 
3574  /* Protect some assumptions below that rowcounts aren't zero */
3575  if (outer_path_rows <= 0)
3576  outer_path_rows = 1;
3577  if (inner_path_rows <= 0)
3578  inner_path_rows = 1;
3579 
3580  /*
3581  * A merge join will stop as soon as it exhausts either input stream
3582  * (unless it's an outer join, in which case the outer side has to be
3583  * scanned all the way anyway). Estimate fraction of the left and right
3584  * inputs that will actually need to be scanned. Likewise, we can
3585  * estimate the number of rows that will be skipped before the first join
3586  * pair is found, which should be factored into startup cost. We use only
3587  * the first (most significant) merge clause for this purpose. Since
3588  * mergejoinscansel() is a fairly expensive computation, we cache the
3589  * results in the merge clause RestrictInfo.
3590  */
3591  if (mergeclauses && jointype != JOIN_FULL)
3592  {
3593  RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses);
3594  List *opathkeys;
3595  List *ipathkeys;
3596  PathKey *opathkey;
3597  PathKey *ipathkey;
3598  MergeScanSelCache *cache;
3599 
3600  /* Get the input pathkeys to determine the sort-order details */
3601  opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys;
3602  ipathkeys = innersortkeys ? innersortkeys : inner_path->pathkeys;
3603  Assert(opathkeys);
3604  Assert(ipathkeys);
3605  opathkey = (PathKey *) linitial(opathkeys);
3606  ipathkey = (PathKey *) linitial(ipathkeys);
3607  /* debugging check */
3608  if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||
3609  opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation ||
3610  opathkey->pk_strategy != ipathkey->pk_strategy ||
3611  opathkey->pk_nulls_first != ipathkey->pk_nulls_first)
3612  elog(ERROR, "left and right pathkeys do not match in mergejoin");
3613 
3614  /* Get the selectivity with caching */
3615  cache = cached_scansel(root, firstclause, opathkey);
3616 
3617  if (bms_is_subset(firstclause->left_relids,
3618  outer_path->parent->relids))
3619  {
3620  /* left side of clause is outer */
3621  outerstartsel = cache->leftstartsel;
3622  outerendsel = cache->leftendsel;
3623  innerstartsel = cache->rightstartsel;
3624  innerendsel = cache->rightendsel;
3625  }
3626  else
3627  {
3628  /* left side of clause is inner */
3629  outerstartsel = cache->rightstartsel;
3630  outerendsel = cache->rightendsel;
3631  innerstartsel = cache->leftstartsel;
3632  innerendsel = cache->leftendsel;
3633  }
3634  if (jointype == JOIN_LEFT ||
3635  jointype == JOIN_ANTI)
3636  {
3637  outerstartsel = 0.0;
3638  outerendsel = 1.0;
3639  }
3640  else if (jointype == JOIN_RIGHT ||
3641  jointype == JOIN_RIGHT_ANTI)
3642  {
3643  innerstartsel = 0.0;
3644  innerendsel = 1.0;
3645  }
3646  }
3647  else
3648  {
3649  /* cope with clauseless or full mergejoin */
3650  outerstartsel = innerstartsel = 0.0;
3651  outerendsel = innerendsel = 1.0;
3652  }
3653 
3654  /*
3655  * Convert selectivities to row counts. We force outer_rows and
3656  * inner_rows to be at least 1, but the skip_rows estimates can be zero.
3657  */
3658  outer_skip_rows = rint(outer_path_rows * outerstartsel);
3659  inner_skip_rows = rint(inner_path_rows * innerstartsel);
3660  outer_rows = clamp_row_est(outer_path_rows * outerendsel);
3661  inner_rows = clamp_row_est(inner_path_rows * innerendsel);
3662 
3663  Assert(outer_skip_rows <= outer_rows);
3664  Assert(inner_skip_rows <= inner_rows);
3665 
3666  /*
3667  * Readjust scan selectivities to account for above rounding. This is
3668  * normally an insignificant effect, but when there are only a few rows in
3669  * the inputs, failing to do this makes for a large percentage error.
3670  */
3671  outerstartsel = outer_skip_rows / outer_path_rows;
3672  innerstartsel = inner_skip_rows / inner_path_rows;
3673  outerendsel = outer_rows / outer_path_rows;
3674  innerendsel = inner_rows / inner_path_rows;
3675 
3676  Assert(outerstartsel <= outerendsel);
3677  Assert(innerstartsel <= innerendsel);
3678 
3679  disabled_nodes = enable_mergejoin ? 0 : 1;
3680 
3681  /* cost of source data */
3682 
3683  if (outersortkeys) /* do we need to sort outer? */
3684  {
3685  cost_sort(&sort_path,
3686  root,
3687  outersortkeys,
3688  outer_path->disabled_nodes,
3689  outer_path->total_cost,
3690  outer_path_rows,
3691  outer_path->pathtarget->width,
3692  0.0,
3693  work_mem,
3694  -1.0);
3695  disabled_nodes += sort_path.disabled_nodes;
3696  startup_cost += sort_path.startup_cost;
3697  startup_cost += (sort_path.total_cost - sort_path.startup_cost)
3698  * outerstartsel;
3699  run_cost += (sort_path.total_cost - sort_path.startup_cost)
3700  * (outerendsel - outerstartsel);
3701  }
3702  else
3703  {
3704  disabled_nodes += outer_path->disabled_nodes;
3705  startup_cost += outer_path->startup_cost;
3706  startup_cost += (outer_path->total_cost - outer_path->startup_cost)
3707  * outerstartsel;
3708  run_cost += (outer_path->total_cost - outer_path->startup_cost)
3709  * (outerendsel - outerstartsel);
3710  }
3711 
3712  if (innersortkeys) /* do we need to sort inner? */
3713  {
3714  cost_sort(&sort_path,
3715  root,
3716  innersortkeys,
3717  inner_path->disabled_nodes,
3718  inner_path->total_cost,
3719  inner_path_rows,
3720  inner_path->pathtarget->width,
3721  0.0,
3722  work_mem,
3723  -1.0);
3724  disabled_nodes += sort_path.disabled_nodes;
3725  startup_cost += sort_path.startup_cost;
3726  startup_cost += (sort_path.total_cost - sort_path.startup_cost)
3727  * innerstartsel;
3728  inner_run_cost = (sort_path.total_cost - sort_path.startup_cost)
3729  * (innerendsel - innerstartsel);
3730  }
3731  else
3732  {
3733  disabled_nodes += inner_path->disabled_nodes;
3734  startup_cost += inner_path->startup_cost;
3735  startup_cost += (inner_path->total_cost - inner_path->startup_cost)
3736  * innerstartsel;
3737  inner_run_cost = (inner_path->total_cost - inner_path->startup_cost)
3738  * (innerendsel - innerstartsel);
3739  }
3740 
3741  /*
3742  * We can't yet determine whether rescanning occurs, or whether
3743  * materialization of the inner input should be done. The minimum
3744  * possible inner input cost, regardless of rescan and materialization
3745  * considerations, is inner_run_cost. We include that in
3746  * workspace->total_cost, but not yet in run_cost.
3747  */
3748 
3749  /* CPU costs left for later */
3750 
3751  /* Public result fields */
3752  workspace->disabled_nodes = disabled_nodes;
3753  workspace->startup_cost = startup_cost;
3754  workspace->total_cost = startup_cost + run_cost + inner_run_cost;
3755  /* Save private data for final_cost_mergejoin */
3756  workspace->run_cost = run_cost;
3757  workspace->inner_run_cost = inner_run_cost;
3758  workspace->outer_rows = outer_rows;
3759  workspace->inner_rows = inner_rows;
3760  workspace->outer_skip_rows = outer_skip_rows;
3761  workspace->inner_skip_rows = inner_skip_rows;
3762 }
3763 
3764 /*
3765  * final_cost_mergejoin
3766  * Final estimate of the cost and result size of a mergejoin path.
3767  *
3768  * Unlike other costsize functions, this routine makes two actual decisions:
3769  * whether the executor will need to do mark/restore, and whether we should
3770  * materialize the inner path. It would be logically cleaner to build
3771  * separate paths testing these alternatives, but that would require repeating
3772  * most of the cost calculations, which are not all that cheap. Since the
3773  * choice will not affect output pathkeys or startup cost, only total cost,
3774  * there is no possibility of wanting to keep more than one path. So it seems
3775  * best to make the decisions here and record them in the path's
3776  * skip_mark_restore and materialize_inner fields.
3777  *
3778  * Mark/restore overhead is usually required, but can be skipped if we know
3779  * that the executor need find only one match per outer tuple, and that the
3780  * mergeclauses are sufficient to identify a match.
3781  *
3782  * We materialize the inner path if we need mark/restore and either the inner
3783  * path can't support mark/restore, or it's cheaper to use an interposed
3784  * Material node to handle mark/restore.
3785  *
3786  * 'path' is already filled in except for the rows and cost fields and
3787  * skip_mark_restore and materialize_inner
3788  * 'workspace' is the result from initial_cost_mergejoin
3789  * 'extra' contains miscellaneous information about the join
3790  */
3791 void
3793  JoinCostWorkspace *workspace,
3794  JoinPathExtraData *extra)
3795 {
3796  Path *outer_path = path->jpath.outerjoinpath;
3797  Path *inner_path = path->jpath.innerjoinpath;
3798  double inner_path_rows = inner_path->rows;
3799  List *mergeclauses = path->path_mergeclauses;
3800  List *innersortkeys = path->innersortkeys;
3801  Cost startup_cost = workspace->startup_cost;
3802  Cost run_cost = workspace->run_cost;
3803  Cost inner_run_cost = workspace->inner_run_cost;
3804  double outer_rows = workspace->outer_rows;
3805  double inner_rows = workspace->inner_rows;
3806  double outer_skip_rows = workspace->outer_skip_rows;
3807  double inner_skip_rows = workspace->inner_skip_rows;
3808  Cost cpu_per_tuple,
3809  bare_inner_cost,
3810  mat_inner_cost;
3811  QualCost merge_qual_cost;
3812  QualCost qp_qual_cost;
3813  double mergejointuples,
3814  rescannedtuples;
3815  double rescanratio;
3816 
3817  /* Set the number of disabled nodes. */
3818  path->jpath.path.disabled_nodes = workspace->disabled_nodes;
3819 
3820  /* Protect some assumptions below that rowcounts aren't zero */
3821  if (inner_path_rows <= 0)
3822  inner_path_rows = 1;
3823 
3824  /* Mark the path with the correct row estimate */
3825  if (path->jpath.path.param_info)
3826  path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
3827  else
3828  path->jpath.path.rows = path->jpath.path.parent->rows;
3829 
3830  /* For partial paths, scale row estimate. */
3831  if (path->jpath.path.parallel_workers > 0)
3832  {
3833  double parallel_divisor = get_parallel_divisor(&path->jpath.path);
3834 
3835  path->jpath.path.rows =
3836  clamp_row_est(path->jpath.path.rows / parallel_divisor);
3837  }
3838 
3839  /*
3840  * Compute cost of the mergequals and qpquals (other restriction clauses)
3841  * separately.
3842  */
3843  cost_qual_eval(&merge_qual_cost, mergeclauses, root);
3844  cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
3845  qp_qual_cost.startup -= merge_qual_cost.startup;
3846  qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
3847 
3848  /*
3849  * With a SEMI or ANTI join, or if the innerrel is known unique, the
3850  * executor will stop scanning for matches after the first match. When
3851  * all the joinclauses are merge clauses, this means we don't ever need to
3852  * back up the merge, and so we can skip mark/restore overhead.
3853  */
3854  if ((path->jpath.jointype == JOIN_SEMI ||
3855  path->jpath.jointype == JOIN_ANTI ||
3856  extra->inner_unique) &&
3859  path->skip_mark_restore = true;
3860  else
3861  path->skip_mark_restore = false;
3862 
3863  /*
3864  * Get approx # tuples passing the mergequals. We use approx_tuple_count
3865  * here because we need an estimate done with JOIN_INNER semantics.
3866  */
3867  mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
3868 
3869  /*
3870  * When there are equal merge keys in the outer relation, the mergejoin
3871  * must rescan any matching tuples in the inner relation. This means
3872  * re-fetching inner tuples; we have to estimate how often that happens.
3873  *
3874  * For regular inner and outer joins, the number of re-fetches can be
3875  * estimated approximately as size of merge join output minus size of
3876  * inner relation. Assume that the distinct key values are 1, 2, ..., and
3877  * denote the number of values of each key in the outer relation as m1,
3878  * m2, ...; in the inner relation, n1, n2, ... Then we have
3879  *
3880  * size of join = m1 * n1 + m2 * n2 + ...
3881  *
3882  * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
3883  * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
3884  * relation
3885  *
3886  * This equation works correctly for outer tuples having no inner match
3887  * (nk = 0), but not for inner tuples having no outer match (mk = 0); we
3888  * are effectively subtracting those from the number of rescanned tuples,
3889  * when we should not. Can we do better without expensive selectivity
3890  * computations?
3891  *
3892  * The whole issue is moot if we are working from a unique-ified outer
3893  * input, or if we know we don't need to mark/restore at all.
3894  */
3895  if (IsA(outer_path, UniquePath) || path->skip_mark_restore)
3896  rescannedtuples = 0;
3897  else
3898  {
3899  rescannedtuples = mergejointuples - inner_path_rows;
3900  /* Must clamp because of possible underestimate */
3901  if (rescannedtuples < 0)
3902  rescannedtuples = 0;
3903  }
3904 
3905  /*
3906  * We'll inflate various costs this much to account for rescanning. Note
3907  * that this is to be multiplied by something involving inner_rows, or
3908  * another number related to the portion of the inner rel we'll scan.
3909  */
3910  rescanratio = 1.0 + (rescannedtuples / inner_rows);
3911 
3912  /*
3913  * Decide whether we want to materialize the inner input to shield it from
3914  * mark/restore and performing re-fetches. Our cost model for regular
3915  * re-fetches is that a re-fetch costs the same as an original fetch,
3916  * which is probably an overestimate; but on the other hand we ignore the
3917  * bookkeeping costs of mark/restore. Not clear if it's worth developing
3918  * a more refined model. So we just need to inflate the inner run cost by
3919  * rescanratio.
3920  */
3921  bare_inner_cost = inner_run_cost * rescanratio;
3922 
3923  /*
3924  * When we interpose a Material node the re-fetch cost is assumed to be
3925  * just cpu_operator_cost per tuple, independently of the underlying
3926  * plan's cost; and we charge an extra cpu_operator_cost per original
3927  * fetch as well. Note that we're assuming the materialize node will
3928  * never spill to disk, since it only has to remember tuples back to the
3929  * last mark. (If there are a huge number of duplicates, our other cost
3930  * factors will make the path so expensive that it probably won't get
3931  * chosen anyway.) So we don't use cost_rescan here.
3932  *
3933  * Note: keep this estimate in sync with create_mergejoin_plan's labeling
3934  * of the generated Material node.
3935  */
3936  mat_inner_cost = inner_run_cost +
3937  cpu_operator_cost * inner_rows * rescanratio;
3938 
3939  /*
3940  * If we don't need mark/restore at all, we don't need materialization.
3941  */
3942  if (path->skip_mark_restore)
3943  path->materialize_inner = false;
3944 
3945  /*
3946  * Prefer materializing if it looks cheaper, unless the user has asked to
3947  * suppress materialization.
3948  */
3949  else if (enable_material && mat_inner_cost < bare_inner_cost)
3950  path->materialize_inner = true;
3951 
3952  /*
3953  * Even if materializing doesn't look cheaper, we *must* do it if the
3954  * inner path is to be used directly (without sorting) and it doesn't
3955  * support mark/restore.
3956  *
3957  * Since the inner side must be ordered, and only Sorts and IndexScans can
3958  * create order to begin with, and they both support mark/restore, you
3959  * might think there's no problem --- but you'd be wrong. Nestloop and
3960  * merge joins can *preserve* the order of their inputs, so they can be
3961  * selected as the input of a mergejoin, and they don't support
3962  * mark/restore at present.
3963  *
3964  * We don't test the value of enable_material here, because
3965  * materialization is required for correctness in this case, and turning
3966  * it off does not entitle us to deliver an invalid plan.
3967  */
3968  else if (innersortkeys == NIL &&
3969  !ExecSupportsMarkRestore(inner_path))
3970  path->materialize_inner = true;
3971 
3972  /*
3973  * Also, force materializing if the inner path is to be sorted and the
3974  * sort is expected to spill to disk. This is because the final merge
3975  * pass can be done on-the-fly if it doesn't have to support mark/restore.
3976  * We don't try to adjust the cost estimates for this consideration,
3977  * though.
3978  *
3979  * Since materialization is a performance optimization in this case,
3980  * rather than necessary for correctness, we skip it if enable_material is
3981  * off.
3982  */
3983  else if (enable_material && innersortkeys != NIL &&
3984  relation_byte_size(inner_path_rows,
3985  inner_path->pathtarget->width) >
3986  (work_mem * 1024L))
3987  path->materialize_inner = true;
3988  else
3989  path->materialize_inner = false;
3990 
3991  /* Charge the right incremental cost for the chosen case */
3992  if (path->materialize_inner)
3993  run_cost += mat_inner_cost;
3994  else
3995  run_cost += bare_inner_cost;
3996 
3997  /* CPU costs */
3998 
3999  /*
4000  * The number of tuple comparisons needed is approximately number of outer
4001  * rows plus number of inner rows plus number of rescanned tuples (can we
4002  * refine this?). At each one, we need to evaluate the mergejoin quals.
4003  */
4004  startup_cost += merge_qual_cost.startup;
4005  startup_cost += merge_qual_cost.per_tuple *
4006  (outer_skip_rows + inner_skip_rows * rescanratio);
4007  run_cost += merge_qual_cost.per_tuple *
4008  ((outer_rows - outer_skip_rows) +
4009  (inner_rows - inner_skip_rows) * rescanratio);
4010 
4011  /*
4012  * For each tuple that gets through the mergejoin proper, we charge
4013  * cpu_tuple_cost plus the cost of evaluating additional restriction
4014  * clauses that are to be applied at the join. (This is pessimistic since
4015  * not all of the quals may get evaluated at each tuple.)
4016  *
4017  * Note: we could adjust for SEMI/ANTI joins skipping some qual
4018  * evaluations here, but it's probably not worth the trouble.
4019  */
4020  startup_cost += qp_qual_cost.startup;
4021  cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
4022  run_cost += cpu_per_tuple * mergejointuples;
4023 
4024  /* tlist eval costs are paid per output row, not per tuple scanned */
4025  startup_cost += path->jpath.path.pathtarget->cost.startup;
4026  run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows;
4027 
4028  path->jpath.path.startup_cost = startup_cost;
4029  path->jpath.path.total_cost = startup_cost + run_cost;
4030 }
4031 
4032 /*
4033  * run mergejoinscansel() with caching
4034  */
4035 static MergeScanSelCache *
4037 {
4038  MergeScanSelCache *cache;
4039  ListCell *lc;
4040  Selectivity leftstartsel,
4041  leftendsel,
4042  rightstartsel,
4043  rightendsel;
4044  MemoryContext oldcontext;
4045 
4046  /* Do we have this result already? */
4047  foreach(lc, rinfo->scansel_cache)
4048  {
4049  cache = (MergeScanSelCache *) lfirst(lc);
4050  if (cache->opfamily == pathkey->pk_opfamily &&
4051  cache->collation == pathkey->pk_eclass->ec_collation &&
4052  cache->strategy == pathkey->pk_strategy &&
4053  cache->nulls_first == pathkey->pk_nulls_first)
4054  return cache;
4055  }
4056 
4057  /* Nope, do the computation */
4059  (Node *) rinfo->clause,
4060  pathkey->pk_opfamily,
4061  pathkey->pk_strategy,
4062  pathkey->pk_nulls_first,
4063  &leftstartsel,
4064  &leftendsel,
4065  &rightstartsel,
4066  &rightendsel);
4067 
4068  /* Cache the result in suitably long-lived workspace */
4069  oldcontext = MemoryContextSwitchTo(root->planner_cxt);
4070 
4071  cache = (MergeScanSelCache *) palloc(sizeof(MergeScanSelCache));
4072  cache->opfamily = pathkey->pk_opfamily;
4073  cache->collation = pathkey->pk_eclass->ec_collation;
4074  cache->strategy = pathkey->pk_strategy;
4075  cache->nulls_first = pathkey->pk_nulls_first;
4076  cache->leftstartsel = leftstartsel;
4077  cache->leftendsel = leftendsel;
4078  cache->rightstartsel = rightstartsel;
4079  cache->rightendsel = rightendsel;
4080 
4081  rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache);
4082 
4083  MemoryContextSwitchTo(oldcontext);
4084 
4085  return cache;
4086 }
4087 
4088 /*
4089  * initial_cost_hashjoin
4090  * Preliminary estimate of the cost of a hashjoin path.
4091  *
4092  * This must quickly produce lower-bound estimates of the path's startup and
4093  * total costs. If we are unable to eliminate the proposed path from
4094  * consideration using the lower bounds, final_cost_hashjoin will be called
4095  * to obtain the final estimates.
4096  *
4097  * The exact division of labor between this function and final_cost_hashjoin
4098  * is private to them, and represents a tradeoff between speed of the initial
4099  * estimate and getting a tight lower bound. We choose to not examine the
4100  * join quals here (other than by counting the number of hash clauses),
4101  * so we can't do much with CPU costs. We do assume that
4102  * ExecChooseHashTableSize is cheap enough to use here.
4103  *
4104  * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
4105  * other data to be used by final_cost_hashjoin
4106  * 'jointype' is the type of join to be performed
4107  * 'hashclauses' is the list of joinclauses to be used as hash clauses
4108  * 'outer_path' is the outer input to the join
4109  * 'inner_path' is the inner input to the join
4110  * 'extra' contains miscellaneous information about the join
4111  * 'parallel_hash' indicates that inner_path is partial and that a shared
4112  * hash table will be built in parallel
4113  */
4114 void
4116  JoinType jointype,
4117  List *hashclauses,
4118  Path *outer_path, Path *inner_path,
4119  JoinPathExtraData *extra,
4120  bool parallel_hash)
4121 {
4122  int disabled_nodes;
4123  Cost startup_cost = 0;
4124  Cost run_cost = 0;
4125  double outer_path_rows = outer_path->rows;
4126  double inner_path_rows = inner_path->rows;
4127  double inner_path_rows_total = inner_path_rows;
4128  int num_hashclauses = list_length(hashclauses);
4129  int numbuckets;
4130  int numbatches;
4131  int num_skew_mcvs;
4132  size_t space_allowed; /* unused */
4133 
4134  /* Count up disabled nodes. */
4135  disabled_nodes = enable_hashjoin ? 0 : 1;
4136  disabled_nodes += inner_path->disabled_nodes;
4137  disabled_nodes += outer_path->disabled_nodes;
4138 
4139  /* cost of source data */
4140  startup_cost += outer_path->startup_cost;
4141  run_cost += outer_path->total_cost - outer_path->startup_cost;
4142  startup_cost += inner_path->total_cost;
4143 
4144  /*
4145  * Cost of computing hash function: must do it once per input tuple. We
4146  * charge one cpu_operator_cost for each column's hash function. Also,
4147  * tack on one cpu_tuple_cost per inner row, to model the costs of
4148  * inserting the row into the hashtable.
4149  *
4150  * XXX when a hashclause is more complex than a single operator, we really
4151  * should charge the extra eval costs of the left or right side, as
4152  * appropriate, here. This seems more work than it's worth at the moment.
4153  */
4154  startup_cost += (cpu_operator_cost * num_hashclauses + cpu_tuple_cost)
4155  * inner_path_rows;
4156  run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;
4157 
4158  /*
4159  * If this is a parallel hash build, then the value we have for
4160  * inner_rows_total currently refers only to the rows returned by each
4161  * participant. For shared hash table size estimation, we need the total
4162  * number, so we need to undo the division.
4163  */
4164  if (parallel_hash)
4165  inner_path_rows_total *= get_parallel_divisor(inner_path);
4166 
4167  /*
4168  * Get hash table size that executor would use for inner relation.
4169  *
4170  * XXX for the moment, always assume that skew optimization will be
4171  * performed. As long as SKEW_HASH_MEM_PERCENT is small, it's not worth
4172  * trying to determine that for sure.
4173  *
4174  * XXX at some point it might be interesting to try to account for skew
4175  * optimization in the cost estimate, but for now, we don't.
4176  */
4177  ExecChooseHashTableSize(inner_path_rows_total,
4178  inner_path->pathtarget->width,
4179  true, /* useskew */
4180  parallel_hash, /* try_combined_hash_mem */
4181  outer_path->parallel_workers,
4182  &space_allowed,
4183  &numbuckets,
4184  &numbatches,
4185  &num_skew_mcvs);
4186 
4187  /*
4188  * If inner relation is too big then we will need to "batch" the join,
4189  * which implies writing and reading most of the tuples to disk an extra
4190  * time. Charge seq_page_cost per page, since the I/O should be nice and
4191  * sequential. Writing the inner rel counts as startup cost, all the rest
4192  * as run cost.
4193  */
4194  if (numbatches > 1)
4195  {
4196  double outerpages = page_size(outer_path_rows,
4197  outer_path->pathtarget->width);
4198  double innerpages = page_size(inner_path_rows,
4199  inner_path->pathtarget->width);
4200 
4201  startup_cost += seq_page_cost * innerpages;
4202  run_cost += seq_page_cost * (innerpages + 2 * outerpages);
4203  }
4204 
4205  /* CPU costs left for later */
4206 
4207  /* Public result fields */
4208  workspace->disabled_nodes = disabled_nodes;
4209  workspace->startup_cost = startup_cost;
4210  workspace->total_cost = startup_cost + run_cost;
4211  /* Save private data for final_cost_hashjoin */
4212  workspace->run_cost = run_cost;
4213  workspace->numbuckets = numbuckets;
4214  workspace->numbatches = numbatches;
4215  workspace->inner_rows_total = inner_path_rows_total;
4216 }
4217 
4218 /*
4219  * final_cost_hashjoin
4220  * Final estimate of the cost and result size of a hashjoin path.
4221  *
4222  * Note: the numbatches estimate is also saved into 'path' for use later
4223  *
4224  * 'path' is already filled in except for the rows and cost fields and
4225  * num_batches
4226  * 'workspace' is the result from initial_cost_hashjoin
4227  * 'extra' contains miscellaneous information about the join
4228  */
4229 void
4231  JoinCostWorkspace *workspace,
4232  JoinPathExtraData *extra)
4233 {
4234  Path *outer_path = path->jpath.outerjoinpath;
4235  Path *inner_path = path->jpath.innerjoinpath;
4236  double outer_path_rows = outer_path->rows;
4237  double inner_path_rows = inner_path->rows;
4238  double inner_path_rows_total = workspace->inner_rows_total;
4239  List *hashclauses = path->path_hashclauses;
4240  Cost startup_cost = workspace->startup_cost;
4241  Cost run_cost = workspace->run_cost;
4242  int numbuckets = workspace->numbuckets;
4243  int numbatches = workspace->numbatches;
4244  Cost cpu_per_tuple;
4245  QualCost hash_qual_cost;
4246  QualCost qp_qual_cost;
4247  double hashjointuples;
4248  double virtualbuckets;
4249  Selectivity innerbucketsize;
4250  Selectivity innermcvfreq;
4251  ListCell *hcl;
4252 
4253  /* Set the number of disabled nodes. */
4254  path->jpath.path.disabled_nodes = workspace->disabled_nodes;
4255 
4256  /* Mark the path with the correct row estimate */
4257  if (path->jpath.path.param_info)
4258  path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
4259  else
4260  path->jpath.path.rows = path->jpath.path.parent->rows;
4261 
4262  /* For partial paths, scale row estimate. */
4263  if (path->jpath.path.parallel_workers > 0)
4264  {
4265  double parallel_divisor = get_parallel_divisor(&path->jpath.path);
4266 
4267  path->jpath.path.rows =
4268  clamp_row_est(path->jpath.path.rows / parallel_divisor);
4269  }
4270 
4271  /* mark the path with estimated # of batches */
4272  path->num_batches = numbatches;
4273 
4274  /* store the total number of tuples (sum of partial row estimates) */
4275  path->inner_rows_total = inner_path_rows_total;
4276 
4277  /* and compute the number of "virtual" buckets in the whole join */
4278  virtualbuckets = (double) numbuckets * (double) numbatches;
4279 
4280  /*
4281  * Determine bucketsize fraction and MCV frequency for the inner relation.
4282  * We use the smallest bucketsize or MCV frequency estimated for any
4283  * individual hashclause; this is undoubtedly conservative.
4284  *
4285  * BUT: if inner relation has been unique-ified, we can assume it's good
4286  * for hashing. This is important both because it's the right answer, and
4287  * because we avoid contaminating the cache with a value that's wrong for
4288  * non-unique-ified paths.
4289  */
4290  if (IsA(inner_path, UniquePath))
4291  {
4292  innerbucketsize = 1.0 / virtualbuckets;
4293  innermcvfreq = 0.0;
4294  }
4295  else
4296  {
4297  innerbucketsize = 1.0;
4298  innermcvfreq = 1.0;
4299  foreach(hcl, hashclauses)
4300  {
4301  RestrictInfo *restrictinfo = lfirst_node(RestrictInfo, hcl);
4302  Selectivity thisbucketsize;
4303  Selectivity thismcvfreq;
4304 
4305  /*
4306  * First we have to figure out which side of the hashjoin clause
4307  * is the inner side.
4308  *
4309  * Since we tend to visit the same clauses over and over when
4310  * planning a large query, we cache the bucket stats estimates in
4311  * the RestrictInfo node to avoid repeated lookups of statistics.
4312  */
4313  if (bms_is_subset(restrictinfo->right_relids,
4314  inner_path->parent->relids))
4315  {
4316  /* righthand side is inner */
4317  thisbucketsize = restrictinfo->right_bucketsize;
4318  if (thisbucketsize < 0)
4319  {
4320  /* not cached yet */
4322  get_rightop(restrictinfo->clause),
4323  virtualbuckets,
4324  &restrictinfo->right_mcvfreq,
4325  &restrictinfo->right_bucketsize);
4326  thisbucketsize = restrictinfo->right_bucketsize;
4327  }
4328  thismcvfreq = restrictinfo->right_mcvfreq;
4329  }
4330  else
4331  {
4332  Assert(bms_is_subset(restrictinfo->left_relids,
4333  inner_path->parent->relids));
4334  /* lefthand side is inner */
4335  thisbucketsize = restrictinfo->left_bucketsize;
4336  if (thisbucketsize < 0)
4337  {
4338  /* not cached yet */
4340  get_leftop(restrictinfo->clause),
4341  virtualbuckets,
4342  &restrictinfo->left_mcvfreq,
4343  &restrictinfo->left_bucketsize);
4344  thisbucketsize = restrictinfo->left_bucketsize;
4345  }
4346  thismcvfreq = restrictinfo->left_mcvfreq;
4347  }
4348 
4349  if (innerbucketsize > thisbucketsize)
4350  innerbucketsize = thisbucketsize;
4351  if (innermcvfreq > thismcvfreq)
4352  innermcvfreq = thismcvfreq;
4353  }
4354  }
4355 
4356  /*
4357  * If the bucket holding the inner MCV would exceed hash_mem, we don't
4358  * want to hash unless there is really no other alternative, so apply
4359  * disable_cost. (The executor normally copes with excessive memory usage
4360  * by splitting batches, but obviously it cannot separate equal values
4361  * that way, so it will be unable to drive the batch size below hash_mem
4362  * when this is true.)
4363  */
4364  if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq),
4365  inner_path->pathtarget->width) > get_hash_memory_limit())
4366  startup_cost += disable_cost;
4367 
4368  /*
4369  * Compute cost of the hashquals and qpquals (other restriction clauses)
4370  * separately.
4371  */
4372  cost_qual_eval(&hash_qual_cost, hashclauses, root);
4373  cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
4374  qp_qual_cost.startup -= hash_qual_cost.startup;
4375  qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
4376 
4377  /* CPU costs */
4378 
4379  if (path->jpath.jointype == JOIN_SEMI ||
4380  path->jpath.jointype == JOIN_ANTI ||
4381  extra->inner_unique)
4382  {
4383  double outer_matched_rows;
4384  Selectivity inner_scan_frac;
4385 
4386  /*
4387  * With a SEMI or ANTI join, or if the innerrel is known unique, the
4388  * executor will stop after the first match.
4389  *
4390  * For an outer-rel row that has at least one match, we can expect the
4391  * bucket scan to stop after a fraction 1/(match_count+1) of the
4392  * bucket's rows, if the matches are evenly distributed. Since they
4393  * probably aren't quite evenly distributed, we apply a fuzz factor of
4394  * 2.0 to that fraction. (If we used a larger fuzz factor, we'd have
4395  * to clamp inner_scan_frac to at most 1.0; but since match_count is
4396  * at least 1, no such clamp is needed now.)
4397  */
4398  outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac);
4399  inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0);
4400 
4401  startup_cost += hash_qual_cost.startup;
4402  run_cost += hash_qual_cost.per_tuple * outer_matched_rows *
4403  clamp_row_est(inner_path_rows * innerbucketsize * inner_scan_frac) * 0.5;
4404 
4405  /*
4406  * For unmatched outer-rel rows, the picture is quite a lot different.
4407  * In the first place, there is no reason to assume that these rows
4408  * preferentially hit heavily-populated buckets; instead assume they
4409  * are uncorrelated with the inner distribution and so they see an
4410  * average bucket size of inner_path_rows / virtualbuckets. In the
4411  * second place, it seems likely that they will have few if any exact
4412  * hash-code matches and so very few of the tuples in the bucket will
4413  * actually require eval of the hash quals. We don't have any good
4414  * way to estimate how many will, but for the moment assume that the
4415  * effective cost per bucket entry is one-tenth what it is for
4416  * matchable tuples.
4417  */
4418  run_cost += hash_qual_cost.per_tuple *
4419  (outer_path_rows - outer_matched_rows) *
4420  clamp_row_est(inner_path_rows / virtualbuckets) * 0.05;
4421 
4422  /* Get # of tuples that will pass the basic join */
4423  if (path->jpath.jointype == JOIN_ANTI)
4424  hashjointuples = outer_path_rows - outer_matched_rows;
4425  else
4426  hashjointuples = outer_matched_rows;
4427  }
4428  else
4429  {
4430  /*
4431  * The number of tuple comparisons needed is the number of outer
4432  * tuples times the typical number of tuples in a hash bucket, which
4433  * is the inner relation size times its bucketsize fraction. At each
4434  * one, we need to evaluate the hashjoin quals. But actually,
4435  * charging the full qual eval cost at each tuple is pessimistic,
4436  * since we don't evaluate the quals unless the hash values match
4437  * exactly. For lack of a better idea, halve the cost estimate to
4438  * allow for that.
4439  */
4440  startup_cost += hash_qual_cost.startup;
4441  run_cost += hash_qual_cost.per_tuple * outer_path_rows *
4442  clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
4443 
4444  /*
4445  * Get approx # tuples passing the hashquals. We use
4446  * approx_tuple_count here because we need an estimate done with
4447  * JOIN_INNER semantics.
4448  */
4449  hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
4450  }
4451 
4452  /*
4453  * For each tuple that gets through the hashjoin proper, we charge
4454  * cpu_tuple_cost plus the cost of evaluating additional restriction
4455  * clauses that are to be applied at the join. (This is pessimistic since
4456  * not all of the quals may get evaluated at each tuple.)
4457  */
4458  startup_cost += qp_qual_cost.startup;
4459  cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
4460  run_cost += cpu_per_tuple * hashjointuples;
4461 
4462  /* tlist eval costs are paid per output row, not per tuple scanned */
4463  startup_cost += path->jpath.path.pathtarget->cost.startup;
4464  run_cost += path->jpath.path.pathtarget->cost.per_tuple * path->jpath.path.rows;
4465 
4466  path->jpath.path.startup_cost = startup_cost;
4467  path->jpath.path.total_cost = startup_cost + run_cost;
4468 }
4469 
4470 
4471 /*
4472  * cost_subplan
4473  * Figure the costs for a SubPlan (or initplan).
4474  *
4475  * Note: we could dig the subplan's Plan out of the root list, but in practice
4476  * all callers have it handy already, so we make them pass it.
4477  */
4478 void
4480 {
4481  QualCost sp_cost;
4482 
4483  /* Figure any cost for evaluating the testexpr */
4484  cost_qual_eval(&sp_cost,
4485  make_ands_implicit((Expr *) subplan->testexpr),
4486  root);
4487 
4488  if (subplan->useHashTable)
4489  {
4490  /*
4491  * If we are using a hash table for the subquery outputs, then the
4492  * cost of evaluating the query is a one-time cost. We charge one
4493  * cpu_operator_cost per tuple for the work of loading the hashtable,
4494  * too.
4495  */
4496  sp_cost.startup += plan->total_cost +
4497  cpu_operator_cost * plan->plan_rows;
4498 
4499  /*
4500  * The per-tuple costs include the cost of evaluating the lefthand
4501  * expressions, plus the cost of probing the hashtable. We already
4502  * accounted for the lefthand expressions as part of the testexpr, and
4503  * will also have counted one cpu_operator_cost for each comparison
4504  * operator. That is probably too low for the probing cost, but it's
4505  * hard to make a better estimate, so live with it for now.
4506  */
4507  }
4508  else
4509  {
4510  /*
4511  * Otherwise we will be rescanning the subplan output on each
4512  * evaluation. We need to estimate how much of the output we will
4513  * actually need to scan. NOTE: this logic should agree with the
4514  * tuple_fraction estimates used by make_subplan() in
4515  * plan/subselect.c.
4516  */
4517  Cost plan_run_cost = plan->total_cost - plan->startup_cost;
4518 
4519  if (subplan->subLinkType == EXISTS_SUBLINK)
4520  {
4521  /* we only need to fetch 1 tuple; clamp to avoid zero divide */
4522  sp_cost.per_tuple += plan_run_cost / clamp_row_est(plan->plan_rows);
4523  }
4524  else if (subplan->subLinkType == ALL_SUBLINK ||
4525  subplan->subLinkType == ANY_SUBLINK)
4526  {
4527  /* assume we need 50% of the tuples */
4528  sp_cost.per_tuple += 0.50 * plan_run_cost;
4529  /* also charge a cpu_operator_cost per row examined */
4530  sp_cost.per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost;
4531  }
4532  else
4533  {
4534  /* assume we need all tuples */
4535  sp_cost.per_tuple += plan_run_cost;
4536  }
4537 
4538  /*
4539  * Also account for subplan's startup cost. If the subplan is
4540  * uncorrelated or undirect correlated, AND its topmost node is one
4541  * that materializes its output, assume that we'll only need to pay
4542  * its startup cost once; otherwise assume we pay the startup cost
4543  * every time.
4544  */
4545  if (subplan->parParam == NIL &&
4547  sp_cost.startup += plan->startup_cost;
4548  else
4549  sp_cost.per_tuple += plan->startup_cost;
4550  }
4551 
4552  subplan->startup_cost = sp_cost.startup;
4553  subplan->per_call_cost = sp_cost.per_tuple;
4554 }
4555 
4556 
4557 /*
4558  * cost_rescan
4559  * Given a finished Path, estimate the costs of rescanning it after
4560  * having done so the first time. For some Path types a rescan is
4561  * cheaper than an original scan (if no parameters change), and this
4562  * function embodies knowledge about that. The default is to return
4563  * the same costs stored in the Path. (Note that the cost estimates
4564  * actually stored in Paths are always for first scans.)
4565  *
4566  * This function is not currently intended to model effects such as rescans
4567  * being cheaper due to disk block caching; what we are concerned with is
4568  * plan types wherein the executor caches results explicitly, or doesn't
4569  * redo startup calculations, etc.
4570  */
4571 static void
4573  Cost *rescan_startup_cost, /* output parameters */
4574  Cost *rescan_total_cost)
4575 {
4576  switch (path->pathtype)
4577  {
4578  case T_FunctionScan:
4579 
4580  /*
4581  * Currently, nodeFunctionscan.c always executes the function to
4582  * completion before returning any rows, and caches the results in
4583  * a tuplestore. So the function eval cost is all startup cost
4584  * and isn't paid over again on rescans. However, all run costs
4585  * will be paid over again.
4586  */
4587  *rescan_startup_cost = 0;
4588  *rescan_total_cost = path->total_cost - path->startup_cost;
4589  break;
4590  case T_HashJoin:
4591 
4592  /*
4593  * If it's a single-batch join, we don't need to rebuild the hash
4594  * table during a rescan.
4595  */
4596  if (((HashPath *) path)->num_batches == 1)
4597  {
4598  /* Startup cost is exactly the cost of hash table building */
4599  *rescan_startup_cost = 0;
4600  *rescan_total_cost = path->total_cost - path->startup_cost;
4601  }
4602  else
4603  {
4604  /* Otherwise, no special treatment */
4605  *rescan_startup_cost = path->startup_cost;
4606  *rescan_total_cost = path->total_cost;
4607  }
4608  break;
4609  case T_CteScan:
4610  case T_WorkTableScan:
4611  {
4612  /*
4613  * These plan types materialize their final result in a
4614  * tuplestore or tuplesort object. So the rescan cost is only
4615  * cpu_tuple_cost per tuple, unless the result is large enough
4616  * to spill to disk.
4617  */
4618  Cost run_cost = cpu_tuple_cost * path->rows;
4619  double nbytes = relation_byte_size(path->rows,
4620  path->pathtarget->width);
4621  long work_mem_bytes = work_mem * 1024L;
4622 
4623  if (nbytes > work_mem_bytes)
4624  {
4625  /* It will spill, so account for re-read cost */
4626  double npages = ceil(nbytes / BLCKSZ);
4627 
4628  run_cost += seq_page_cost * npages;
4629  }
4630  *rescan_startup_cost = 0;
4631  *rescan_total_cost = run_cost;
4632  }
4633  break;
4634  case T_Material:
4635  case T_Sort:
4636  {
4637  /*
4638  * These plan types not only materialize their results, but do
4639  * not implement qual filtering or projection. So they are
4640  * even cheaper to rescan than the ones above. We charge only
4641  * cpu_operator_cost per tuple. (Note: keep that in sync with
4642  * the run_cost charge in cost_sort, and also see comments in
4643  * cost_material before you change it.)
4644  */
4645  Cost run_cost = cpu_operator_cost * path->rows;
4646  double nbytes = relation_byte_size(path->rows,
4647  path->pathtarget->width);
4648  long work_mem_bytes = work_mem * 1024L;
4649 
4650  if (nbytes > work_mem_bytes)
4651  {
4652  /* It will spill, so account for re-read cost */
4653  double npages = ceil(nbytes / BLCKSZ);
4654 
4655  run_cost += seq_page_cost * npages;
4656  }
4657  *rescan_startup_cost = 0;
4658  *rescan_total_cost = run_cost;
4659  }
4660  break;
4661  case T_Memoize:
4662  /* All the hard work is done by cost_memoize_rescan */
4664  rescan_startup_cost, rescan_total_cost);
4665  break;
4666  default:
4667  *rescan_startup_cost = path->startup_cost;
4668  *rescan_total_cost = path->total_cost;
4669  break;
4670  }
4671 }
4672 
4673 
4674 /*
4675  * cost_qual_eval
4676  * Estimate the CPU costs of evaluating a WHERE clause.
4677  * The input can be either an implicitly-ANDed list of boolean
4678  * expressions, or a list of RestrictInfo nodes. (The latter is
4679  * preferred since it allows caching of the results.)
4680  * The result includes both a one-time (startup) component,
4681  * and a per-evaluation component.
4682  */
4683 void
4685 {
4687  ListCell *l;
4688 
4689  context.root = root;
4690  context.total.startup = 0;
4691  context.total.per_tuple = 0;
4692 
4693  /* We don't charge any cost for the implicit ANDing at top level ... */
4694 
4695  foreach(l, quals)
4696  {
4697  Node *qual = (Node *) lfirst(l);
4698 
4700  }
4701 
4702  *cost = context.total;
4703 }
4704 
4705 /*
4706  * cost_qual_eval_node
4707  * As above, for a single RestrictInfo or expression.
4708  */
4709 void
4711 {
4713 
4714  context.root = root;
4715  context.total.startup = 0;
4716  context.total.per_tuple = 0;
4717 
4719 
4720  *cost = context.total;
4721 }
4722 
4723 static bool
4725 {
4726  if (node == NULL)
4727  return false;
4728 
4729  /*
4730  * RestrictInfo nodes contain an eval_cost field reserved for this
4731  * routine's use, so that it's not necessary to evaluate the qual clause's
4732  * cost more than once. If the clause's cost hasn't been computed yet,
4733  * the field's startup value will contain -1.
4734  */
4735  if (IsA(node, RestrictInfo))
4736  {
4737  RestrictInfo *rinfo = (RestrictInfo *) node;
4738 
4739  if (rinfo->eval_cost.startup < 0)
4740  {
4741  cost_qual_eval_context locContext;
4742 
4743  locContext.root = context->root;
4744  locContext.total.startup = 0;
4745  locContext.total.per_tuple = 0;
4746 
4747  /*
4748  * For an OR clause, recurse into the marked-up tree so that we
4749  * set the eval_cost for contained RestrictInfos too.
4750  */
4751  if (rinfo->orclause)
4752  cost_qual_eval_walker((Node *) rinfo->orclause, &locContext);
4753  else
4754  cost_qual_eval_walker((Node *) rinfo->clause, &locContext);
4755 
4756  /*
4757  * If the RestrictInfo is marked pseudoconstant, it will be tested
4758  * only once, so treat its cost as all startup cost.
4759  */
4760  if (rinfo->pseudoconstant)
4761  {
4762  /* count one execution during startup */
4763  locContext.total.startup += locContext.total.per_tuple;
4764  locContext.total.per_tuple = 0;
4765  }
4766  rinfo->eval_cost = locContext.total;
4767  }
4768  context->total.startup += rinfo->eval_cost.startup;
4769  context->total.per_tuple += rinfo->eval_cost.per_tuple;
4770  /* do NOT recurse into children */
4771  return false;
4772  }
4773 
4774  /*
4775  * For each operator or function node in the given tree, we charge the
4776  * estimated execution cost given by pg_proc.procost (remember to multiply
4777  * this by cpu_operator_cost).
4778  *
4779  * Vars and Consts are charged zero, and so are boolean operators (AND,
4780  * OR, NOT). Simplistic, but a lot better than no model at all.
4781  *
4782  * Should we try to account for the possibility of short-circuit
4783  * evaluation of AND/OR? Probably *not*, because that would make the
4784  * results depend on the clause ordering, and we are not in any position
4785  * to expect that the current ordering of the clauses is the one that's
4786  * going to end up being used. The above per-RestrictInfo caching would
4787  * not mix well with trying to re-order clauses anyway.
4788  *
4789  * Another issue that is entirely ignored here is that if a set-returning
4790  * function is below top level in the tree, the functions/operators above
4791  * it will need to be evaluated multiple times. In practical use, such
4792  * cases arise so seldom as to not be worth the added complexity needed;
4793  * moreover, since our rowcount estimates for functions tend to be pretty
4794  * phony, the results would also be pretty phony.
4795  */
4796  if (IsA(node, FuncExpr))
4797  {
4798  add_function_cost(context->root, ((FuncExpr *) node)->funcid, node,
4799  &context->total);
4800  }
4801  else if (IsA(node, OpExpr) ||
4802  IsA(node, DistinctExpr) ||
4803  IsA(node, NullIfExpr))
4804  {
4805  /* rely on struct equivalence to treat these all alike */
4806  set_opfuncid((OpExpr *) node);
4807  add_function_cost(context->root, ((OpExpr *) node)->opfuncid, node,
4808  &context->total);
4809  }
4810  else if (IsA(node, ScalarArrayOpExpr))
4811  {
4812  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
4813  Node *arraynode = (Node *) lsecond(saop->args);
4814  QualCost sacosts;
4815  QualCost hcosts;
4816  double estarraylen = estimate_array_length(context->root, arraynode);
4817 
4818  set_sa_opfuncid(saop);
4819  sacosts.startup = sacosts.per_tuple = 0;
4820  add_function_cost(context->root, saop->opfuncid, NULL,
4821  &sacosts);
4822 
4823  if (OidIsValid(saop->hashfuncid))
4824  {
4825  /* Handle costs for hashed ScalarArrayOpExpr */
4826  hcosts.startup = hcosts.per_tuple = 0;
4827 
4828  add_function_cost(context->root, saop->hashfuncid, NULL, &hcosts);
4829  context->total.startup += sacosts.startup + hcosts.startup;
4830 
4831  /* Estimate the cost of building the hashtable. */
4832  context->total.startup += estarraylen * hcosts.per_tuple;
4833 
4834  /*
4835  * XXX should we charge a little bit for sacosts.per_tuple when
4836  * building the table, or is it ok to assume there will be zero
4837  * hash collision?
4838  */
4839 
4840  /*
4841  * Charge for hashtable lookups. Charge a single hash and a
4842  * single comparison.
4843  */
4844  context->total.per_tuple += hcosts.per_tuple + sacosts.per_tuple;
4845  }
4846  else
4847  {
4848  /*
4849  * Estimate that the operator will be applied to about half of the
4850  * array elements before the answer is determined.
4851  */
4852  context->total.startup += sacosts.startup;
4853  context->total.per_tuple += sacosts.per_tuple *
4854  estimate_array_length(context->root, arraynode) * 0.5;
4855  }
4856  }
4857  else if (IsA(node, Aggref) ||
4858  IsA(node, WindowFunc))
4859  {
4860  /*
4861  * Aggref and WindowFunc nodes are (and should be) treated like Vars,
4862  * ie, zero execution cost in the current model, because they behave
4863  * essentially like Vars at execution. We disregard the costs of
4864  * their input expressions for the same reason. The actual execution
4865  * costs of the aggregate/window functions and their arguments have to
4866  * be factored into plan-node-specific costing of the Agg or WindowAgg
4867  * plan node.
4868  */
4869  return false; /* don't recurse into children */
4870  }
4871  else if (IsA(node, GroupingFunc))
4872  {
4873  /* Treat this as having cost 1 */
4874  context->total.per_tuple += cpu_operator_cost;
4875  return false; /* don't recurse into children */
4876  }
4877  else if (IsA(node, CoerceViaIO))
4878  {
4879  CoerceViaIO *iocoerce = (CoerceViaIO *) node;
4880  Oid iofunc;
4881  Oid typioparam;
4882  bool typisvarlena;
4883 
4884  /* check the result type's input function */
4885  getTypeInputInfo(iocoerce->resulttype,
4886  &iofunc, &typioparam);
4887  add_function_cost(context->root, iofunc, NULL,
4888  &context->total);
4889  /* check the input type's output function */
4890  getTypeOutputInfo(exprType((Node *) iocoerce->arg),
4891  &iofunc, &typisvarlena);
4892  add_function_cost(context->root, iofunc, NULL,
4893  &context->total);
4894  }
4895  else if (IsA(node, ArrayCoerceExpr))
4896  {
4897  ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
4898  QualCost perelemcost;
4899 
4900  cost_qual_eval_node(&perelemcost, (Node *) acoerce->elemexpr,
4901  context->root);
4902  context->total.startup += perelemcost.startup;
4903  if (perelemcost.per_tuple > 0)
4904  context->total.per_tuple += perelemcost.per_tuple *
4905  estimate_array_length(context->root, (Node *) acoerce->arg);
4906  }
4907  else if (IsA(node, RowCompareExpr))
4908  {
4909  /* Conservatively assume we will check all the columns */
4910  RowCompareExpr *rcexpr = (RowCompareExpr *) node;
4911  ListCell *lc;
4912 
4913  foreach(lc, rcexpr->opnos)
4914  {
4915  Oid opid = lfirst_oid(lc);
4916 
4917  add_function_cost(context->root, get_opcode(opid), NULL,
4918  &context->total);
4919  }
4920  }
4921  else if (IsA(node, MinMaxExpr) ||
4922  IsA(node, SQLValueFunction) ||
4923  IsA(node, XmlExpr) ||
4924  IsA(node, CoerceToDomain) ||
4925  IsA(node, NextValueExpr) ||
4926  IsA(node, JsonExpr))
4927  {
4928  /* Treat all these as having cost 1 */
4929  context->total.per_tuple += cpu_operator_cost;
4930  }
4931  else if (IsA(node, SubLink))
4932  {
4933  /* This routine should not be applied to un-planned expressions */
4934  elog(ERROR, "cannot handle unplanned sub-select");
4935  }
4936  else if (IsA(node, SubPlan))
4937  {
4938  /*
4939  * A subplan node in an expression typically indicates that the
4940  * subplan will be executed on each evaluation, so charge accordingly.
4941  * (Sub-selects that can be executed as InitPlans have already been
4942  * removed from the expression.)
4943  */
4944  SubPlan *subplan = (SubPlan *) node;
4945 
4946  context->total.startup += subplan->startup_cost;
4947  context->total.per_tuple += subplan->per_call_cost;
4948 
4949  /*
4950  * We don't want to recurse into the testexpr, because it was already
4951  * counted in the SubPlan node's costs. So we're done.
4952  */
4953  return false;
4954  }
4955  else if (IsA(node, AlternativeSubPlan))
4956  {
4957  /*
4958  * Arbitrarily use the first alternative plan for costing. (We should
4959  * certainly only include one alternative, and we don't yet have
4960  * enough information to know which one the executor is most likely to
4961  * use.)
4962  */
4963  AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
4964 
4965  return cost_qual_eval_walker((Node *) linitial(asplan->subplans),
4966  context);
4967  }
4968  else if (IsA(node, PlaceHolderVar))
4969  {
4970  /*
4971  * A PlaceHolderVar should be given cost zero when considering general
4972  * expression evaluation costs. The expense of doing the contained
4973  * expression is charged as part of the tlist eval costs of the scan
4974  * or join where the PHV is first computed (see set_rel_width and
4975  * add_placeholders_to_joinrel). If we charged it again here, we'd be
4976  * double-counting the cost for each level of plan that the PHV
4977  * bubbles up through. Hence, return without recursing into the
4978  * phexpr.
4979  */
4980  return false;
4981  }
4982 
4983  /* recurse into children */
4985  (void *) context);
4986 }
4987 
4988 /*
4989  * get_restriction_qual_cost
4990  * Compute evaluation costs of a baserel's restriction quals, plus any
4991  * movable join quals that have been pushed down to the scan.
4992  * Results are returned into *qpqual_cost.
4993  *
4994  * This is a convenience subroutine that works for seqscans and other cases
4995  * where all the given quals will be evaluated the hard way. It's not useful
4996  * for cost_index(), for example, where the index machinery takes care of
4997  * some of the quals. We assume baserestrictcost was previously set by
4998  * set_baserel_size_estimates().
4999  */
5000 static void
5002  ParamPathInfo *param_info,
5003  QualCost *qpqual_cost)
5004 {
5005  if (param_info)
5006  {
5007  /* Include costs of pushed-down clauses */
5008  cost_qual_eval(qpqual_cost, param_info->ppi_clauses, root);
5009 
5010  qpqual_cost->startup += baserel->baserestrictcost.startup;
5011  qpqual_cost->per_tuple += baserel->baserestrictcost.per_tuple;
5012  }
5013  else
5014  *qpqual_cost = baserel->baserestrictcost;
5015 }
5016 
5017 
5018 /*
5019  * compute_semi_anti_join_factors
5020  * Estimate how much of the inner input a SEMI, ANTI, or inner_unique join
5021  * can be expected to scan.
5022  *
5023  * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning
5024  * inner rows as soon as it finds a match to the current outer row.
5025  * The same happens if we have detected the inner rel is unique.
5026  * We should therefore adjust some of the cost components for this effect.
5027  * This function computes some estimates needed for these adjustments.
5028  * These estimates will be the same regardless of the particular paths used
5029  * for the outer and inner relation, so we compute these once and then pass
5030  * them to all the join cost estimation functions.
5031  *
5032  * Input parameters:
5033  * joinrel: join relation under consideration
5034  * outerrel: outer relation under consideration
5035  * innerrel: inner relation under consideration
5036  * jointype: if not JOIN_SEMI or JOIN_ANTI, we assume it's inner_unique
5037  * sjinfo: SpecialJoinInfo relevant to this join
5038  * restrictlist: join quals
5039  * Output parameters:
5040  * *semifactors is filled in (see pathnodes.h for field definitions)
5041  */
5042 void
5044  RelOptInfo *joinrel,
5045  RelOptInfo *outerrel,
5046  RelOptInfo *innerrel,
5047  JoinType jointype,
5048  SpecialJoinInfo *sjinfo,
5049  List *restrictlist,
5050  SemiAntiJoinFactors *semifactors)
5051 {
5052  Selectivity jselec;
5053  Selectivity nselec;
5054  Selectivity avgmatch;
5055  SpecialJoinInfo norm_sjinfo;
5056  List *joinquals;
5057  ListCell *l;
5058 
5059  /*
5060  * In an ANTI join, we must ignore clauses that are "pushed down", since
5061  * those won't affect the match logic. In a SEMI join, we do not
5062  * distinguish joinquals from "pushed down" quals, so just use the whole
5063  * restrictinfo list. For other outer join types, we should consider only
5064  * non-pushed-down quals, so that this devolves to an IS_OUTER_JOIN check.
5065  */
5066  if (IS_OUTER_JOIN(jointype))
5067  {
5068  joinquals = NIL;
5069  foreach(l, restrictlist)
5070  {
5071  RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
5072 
5073  if (!RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
5074  joinquals = lappend(joinquals, rinfo);
5075  }
5076  }
5077  else
5078  joinquals = restrictlist;
5079 
5080  /*
5081  * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses.
5082  */
5083  jselec = clauselist_selectivity(root,
5084  joinquals,
5085  0,
5086  (jointype == JOIN_ANTI) ? JOIN_ANTI : JOIN_SEMI,
5087  sjinfo);
5088 
5089  /*
5090  * Also get the normal inner-join selectivity of the join clauses.
5091  */
5092  init_dummy_sjinfo(&norm_sjinfo, outerrel->relids, innerrel->relids);
5093 
5094  nselec = clauselist_selectivity(root,
5095  joinquals,
5096  0,
5097  JOIN_INNER,
5098  &norm_sjinfo);
5099 
5100  /* Avoid leaking a lot of ListCells */
5101  if (IS_OUTER_JOIN(jointype))
5102  list_free(joinquals);
5103 
5104  /*
5105  * jselec can be interpreted as the fraction of outer-rel rows that have
5106  * any matches (this is true for both SEMI and ANTI cases). And nselec is
5107  * the fraction of the Cartesian product that matches. So, the average
5108  * number of matches for each outer-rel row that has at least one match is
5109  * nselec * inner_rows / jselec.
5110  *
5111  * Note: it is correct to use the inner rel's "rows" count here, even
5112  * though we might later be considering a parameterized inner path with
5113  * fewer rows. This is because we have included all the join clauses in
5114  * the selectivity estimate.
5115  */
5116  if (jselec > 0) /* protect against zero divide */
5117  {
5118  avgmatch = nselec * innerrel->rows / jselec;
5119  /* Clamp to sane range */
5120  avgmatch = Max(1.0, avgmatch);
5121  }
5122  else
5123  avgmatch = 1.0;
5124 
5125  semifactors->outer_match_frac = jselec;
5126  semifactors->match_count = avgmatch;
5127 }
5128 
5129 /*
5130  * has_indexed_join_quals
5131  * Check whether all the joinquals of a nestloop join are used as
5132  * inner index quals.
5133  *
5134  * If the inner path of a SEMI/ANTI join is an indexscan (including bitmap
5135  * indexscan) that uses all the joinquals as indexquals, we can assume that an
5136  * unmatched outer tuple is cheap to process, whereas otherwise it's probably
5137  * expensive.
5138  */
5139 static bool
5141 {
5142  JoinPath *joinpath = &path->jpath;
5143  Relids joinrelids = joinpath->path.parent->relids;
5144  Path *innerpath = joinpath->innerjoinpath;
5145  List *indexclauses;
5146  bool found_one;
5147  ListCell *lc;
5148 
5149  /* If join still has quals to evaluate, it's not fast */
5150  if (joinpath->joinrestrictinfo != NIL)
5151  return false;
5152  /* Nor if the inner path isn't parameterized at all */
5153  if (innerpath->param_info == NULL)
5154  return false;
5155 
5156  /* Find the indexclauses list for the inner scan */
5157  switch (innerpath->pathtype)
5158  {
5159  case T_IndexScan:
5160  case T_IndexOnlyScan:
5161  indexclauses = ((IndexPath *) innerpath)->indexclauses;
5162  break;
5163  case T_BitmapHeapScan:
5164  {
5165  /* Accept only a simple bitmap scan, not AND/OR cases */
5166  Path *bmqual = ((BitmapHeapPath *) innerpath)->bitmapqual;
5167 
5168  if (IsA(bmqual, IndexPath))
5169  indexclauses = ((IndexPath *) bmqual)->indexclauses;
5170  else
5171  return false;
5172  break;
5173  }
5174  default:
5175 
5176  /*
5177  * If it's not a simple indexscan, it probably doesn't run quickly
5178  * for zero rows out, even if it's a parameterized path using all
5179  * the joinquals.
5180  */
5181  return false;
5182  }
5183 
5184  /*
5185  * Examine the inner path's param clauses. Any that are from the outer
5186  * path must be found in the indexclauses list, either exactly or in an
5187  * equivalent form generated by equivclass.c. Also, we must find at least
5188  * one such clause, else it's a clauseless join which isn't fast.
5189  */
5190  found_one = false;
5191  foreach(lc, innerpath->param_info->ppi_clauses)
5192  {
5193  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
5194 
5195  if (join_clause_is_movable_into(rinfo,
5196  innerpath->parent->relids,
5197  joinrelids))
5198  {
5199  if (!is_redundant_with_indexclauses(rinfo, indexclauses))
5200  return false;
5201  found_one = true;
5202  }
5203  }
5204  return found_one;
5205 }
5206 
5207 
5208 /*
5209  * approx_tuple_count
5210  * Quick-and-dirty estimation of the number of join rows passing
5211  * a set of qual conditions.
5212  *
5213  * The quals can be either an implicitly-ANDed list of boolean expressions,
5214  * or a list of RestrictInfo nodes (typically the latter).
5215  *
5216  * We intentionally compute the selectivity under JOIN_INNER rules, even
5217  * if it's some type of outer join. This is appropriate because we are
5218  * trying to figure out how many tuples pass the initial merge or hash
5219  * join step.
5220  *
5221  * This is quick-and-dirty because we bypass clauselist_selectivity, and
5222  * simply multiply the independent clause selectivities together. Now
5223  * clauselist_selectivity often can't do any better than that anyhow, but
5224  * for some situations (such as range constraints) it is smarter. However,
5225  * we can't effectively cache the results of clauselist_selectivity, whereas
5226  * the individual clause selectivities can be and are cached.
5227  *
5228  * Since we are only using the results to estimate how many potential
5229  * output tuples are generated and passed through qpqual checking, it
5230  * seems OK to live with the approximation.
5231  */
5232 static double
5234 {
5235  double tuples;
5236  double outer_tuples = path->outerjoinpath->rows;
5237  double inner_tuples = path->innerjoinpath->rows;
5238  SpecialJoinInfo sjinfo;
5239  Selectivity selec = 1.0;
5240  ListCell *l;
5241 
5242  /*
5243  * Make up a SpecialJoinInfo for JOIN_INNER semantics.
5244  */
5245  init_dummy_sjinfo(&sjinfo, path->outerjoinpath->parent->relids,
5246  path->innerjoinpath->parent->relids);
5247 
5248  /* Get the approximate selectivity */
5249  foreach(l, quals)
5250  {
5251  Node *qual = (Node *) lfirst(l);
5252 
5253  /* Note that clause_selectivity will be able to cache its result */
5254  selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo);
5255  }
5256 
5257  /* Apply it to the input relation sizes */
5258  tuples = selec * outer_tuples * inner_tuples;
5259 
5260  return clamp_row_est(tuples);
5261 }
5262 
5263 
5264 /*
5265  * set_baserel_size_estimates
5266  * Set the size estimates for the given base relation.
5267  *
5268  * The rel's targetlist and restrictinfo list must have been constructed
5269  * already, and rel->tuples must be set.
5270  *
5271  * We set the following fields of the rel node:
5272  * rows: the estimated number of output tuples (after applying
5273  * restriction clauses).
5274  * width: the estimated average output tuple width in bytes.
5275  * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
5276  */
5277 void
5279 {
5280  double nrows;
5281 
5282  /* Should only be applied to base relations */
5283  Assert(rel->relid > 0);
5284 
5285  nrows = rel->tuples *
5287  rel->baserestrictinfo,
5288  0,
5289  JOIN_INNER,
5290  NULL);
5291 
5292  rel->rows = clamp_row_est(nrows);
5293 
5295 
5296  set_rel_width(root, rel);
5297 }
5298 
5299 /*
5300  * get_parameterized_baserel_size
5301  * Make a size estimate for a parameterized scan of a base relation.
5302  *
5303  * 'param_clauses' lists the additional join clauses to be used.
5304  *
5305  * set_baserel_size_estimates must have been applied already.
5306  */
5307 double
5309  List *param_clauses)
5310 {
5311  List *allclauses;
5312  double nrows;
5313 
5314  /*
5315  * Estimate the number of rows returned by the parameterized scan, knowing
5316  * that it will apply all the extra join clauses as well as the rel's own
5317  * restriction clauses. Note that we force the clauses to be treated as
5318  * non-join clauses during selectivity estimation.
5319  */
5320  allclauses = list_concat_copy(param_clauses, rel->baserestrictinfo);
5321  nrows = rel->tuples *
5323  allclauses,
5324  rel->relid, /* do not use 0! */
5325  JOIN_INNER,
5326  NULL);
5327  nrows = clamp_row_est(nrows);
5328  /* For safety, make sure result is not more than the base estimate */
5329  if (nrows > rel->rows)
5330  nrows = rel->rows;
5331  return nrows;
5332 }
5333 
5334 /*
5335  * set_joinrel_size_estimates
5336  * Set the size estimates for the given join relation.
5337  *
5338  * The rel's targetlist must have been constructed already, and a
5339  * restriction clause list that matches the given component rels must
5340  * be provided.
5341  *
5342  * Since there is more than one way to make a joinrel for more than two
5343  * base relations, the results we get here could depend on which component
5344  * rel pair is provided. In theory we should get the same answers no matter
5345  * which pair is provided; in practice, since the selectivity estimation
5346  * routines don't handle all cases equally well, we might not. But there's
5347  * not much to be done about it. (Would it make sense to repeat the
5348  * calculations for each pair of input rels that's encountered, and somehow
5349  * average the results? Probably way more trouble than it's worth, and
5350  * anyway we must keep the rowcount estimate the same for all paths for the
5351  * joinrel.)
5352  *
5353  * We set only the rows field here. The reltarget field was already set by
5354  * build_joinrel_tlist, and baserestrictcost is not used for join rels.
5355  */
5356 void
5358  RelOptInfo *outer_rel,
5359  RelOptInfo *inner_rel,
5360  SpecialJoinInfo *sjinfo,
5361  List *restrictlist)
5362 {
5364  rel,
5365  outer_rel,
5366  inner_rel,
5367  outer_rel->rows,
5368  inner_rel->rows,
5369  sjinfo,
5370  restrictlist);
5371 }
5372 
5373 /*
5374  * get_parameterized_joinrel_size
5375  * Make a size estimate for a parameterized scan of a join relation.
5376  *
5377  * 'rel' is the joinrel under consideration.
5378  * 'outer_path', 'inner_path' are (probably also parameterized) Paths that
5379  * produce the relations being joined.
5380  * 'sjinfo' is any SpecialJoinInfo relevant to this join.
5381  * 'restrict_clauses' lists the join clauses that need to be applied at the
5382  * join node (including any movable clauses that were moved down to this join,
5383  * and not including any movable clauses that were pushed down into the
5384  * child paths).
5385  *
5386  * set_joinrel_size_estimates must have been applied already.
5387  */
5388 double
5390  Path *outer_path,
5391  Path *inner_path,
5392  SpecialJoinInfo *sjinfo,
5393  List *restrict_clauses)
5394 {
5395  double nrows;
5396 
5397  /*
5398  * Estimate the number of rows returned by the parameterized join as the
5399  * sizes of the input paths times the selectivity of the clauses that have
5400  * ended up at this join node.
5401  *
5402  * As with set_joinrel_size_estimates, the rowcount estimate could depend
5403  * on the pair of input paths provided, though ideally we'd get the same
5404  * estimate for any pair with the same parameterization.
5405  */
5407  rel,
5408  outer_path->parent,
5409  inner_path->parent,
5410  outer_path->rows,
5411  inner_path->rows,
5412  sjinfo,
5413  restrict_clauses);
5414  /* For safety, make sure result is not more than the base estimate */
5415  if (nrows > rel->rows)
5416  nrows = rel->rows;
5417  return nrows;
5418 }
5419 
5420 /*
5421  * calc_joinrel_size_estimate
5422  * Workhorse for set_joinrel_size_estimates and
5423  * get_parameterized_joinrel_size.
5424  *
5425  * outer_rel/inner_rel are the relations being joined, but they should be
5426  * assumed to have sizes outer_rows/inner_rows; those numbers might be less
5427  * than what rel->rows says, when we are considering parameterized paths.
5428  */
5429 static double
5431  RelOptInfo *joinrel,
5432  RelOptInfo *outer_rel,
5433  RelOptInfo *inner_rel,
5434  double outer_rows,
5435  double inner_rows,
5436  SpecialJoinInfo *sjinfo,
5437  List *restrictlist)
5438 {
5439  JoinType jointype = sjinfo->jointype;
5440  Selectivity fkselec;
5441  Selectivity jselec;
5442  Selectivity pselec;
5443  double nrows;
5444 
5445  /*
5446  * Compute joinclause selectivity. Note that we are only considering
5447  * clauses that become restriction clauses at this join level; we are not
5448  * double-counting them because they were not considered in estimating the
5449  * sizes of the component rels.
5450  *
5451  * First, see whether any of the joinclauses can be matched to known FK
5452  * constraints. If so, drop those clauses from the restrictlist, and
5453  * instead estimate their selectivity using FK semantics. (We do this
5454  * without regard to whether said clauses are local or "pushed down".
5455  * Probably, an FK-matching clause could never be seen as pushed down at
5456  * an outer join, since it would be strict and hence would be grounds for
5457  * join strength reduction.) fkselec gets the net selectivity for
5458  * FK-matching clauses, or 1.0 if there are none.
5459  */
5461  outer_rel->relids,
5462  inner_rel->relids,
5463  sjinfo,
5464  &restrictlist);
5465 
5466  /*
5467  * For an outer join, we have to distinguish the selectivity of the join's
5468  * own clauses (JOIN/ON conditions) from any clauses that were "pushed
5469  * down". For inner joins we just count them all as joinclauses.
5470  */
5471  if (IS_OUTER_JOIN(jointype))
5472  {
5473  List *joinquals = NIL;
5474  List *pushedquals = NIL;
5475  ListCell *l;
5476 
5477  /* Grovel through the clauses to separate into two lists */
5478  foreach(l, restrictlist)
5479  {
5480  RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
5481 
5482  if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
5483  pushedquals = lappend(pushedquals, rinfo);
5484  else
5485  joinquals = lappend(joinquals, rinfo);
5486  }
5487 
5488  /* Get the separate selectivities */
5489  jselec = clauselist_selectivity(root,
5490  joinquals,
5491  0,
5492  jointype,
5493  sjinfo);
5494  pselec = clauselist_selectivity(root,
5495  pushedquals,
5496  0,
5497  jointype,
5498  sjinfo);
5499 
5500  /* Avoid leaking a lot of ListCells */
5501  list_free(joinquals);
5502  list_free(pushedquals);
5503  }
5504  else
5505  {
5506  jselec = clauselist_selectivity(root,
5507  restrictlist,
5508  0,
5509  jointype,
5510  sjinfo);
5511  pselec = 0.0; /* not used, keep compiler quiet */
5512  }
5513 
5514  /*
5515  * Basically, we multiply size of Cartesian product by selectivity.
5516  *
5517  * If we are doing an outer join, take that into account: the joinqual
5518  * selectivity has to be clamped using the knowledge that the output must
5519  * be at least as large as the non-nullable input. However, any
5520  * pushed-down quals are applied after the outer join, so their
5521  * selectivity applies fully.
5522  *
5523  * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction
5524  * of LHS rows that have matches, and we apply that straightforwardly.
5525  */
5526  switch (jointype)
5527  {
5528  case JOIN_INNER:
5529  nrows = outer_rows * inner_rows * fkselec * jselec;
5530  /* pselec not used */
5531  break;
5532  case JOIN_LEFT:
5533  nrows = outer_rows * inner_rows * fkselec * jselec;
5534  if (nrows < outer_rows)
5535  nrows = outer_rows;
5536  nrows *= pselec;
5537  break;
5538  case JOIN_FULL:
5539  nrows = outer_rows * inner_rows * fkselec * jselec;
5540  if (nrows < outer_rows)
5541  nrows = outer_rows;
5542  if (nrows < inner_rows)
5543  nrows = inner_rows;
5544  nrows *= pselec;
5545  break;
5546  case JOIN_SEMI:
5547  nrows = outer_rows * fkselec * jselec;
5548  /* pselec not used */
5549  break;
5550  case JOIN_ANTI:
5551  nrows = outer_rows * (1.0 - fkselec * jselec);
5552  nrows *= pselec;
5553  break;
5554  default:
5555  /* other values not expected here */
5556  elog(ERROR, "unrecognized join type: %d", (int) jointype);
5557  nrows = 0; /* keep compiler quiet */
5558  break;
5559  }
5560 
5561  return clamp_row_est(nrows);
5562 }
5563 
5564 /*
5565  * get_foreign_key_join_selectivity
5566  * Estimate join selectivity for foreign-key-related clauses.
5567  *
5568  * Remove any clauses that can be matched to FK constraints from *restrictlist,
5569  * and return a substitute estimate of their selectivity. 1.0 is returned
5570  * when there are no such clauses.
5571  *
5572  * The reason for treating such clauses specially is that we can get better
5573  * estimates this way than by relying on clauselist_selectivity(), especially
5574  * for multi-column FKs where that function's assumption that the clauses are
5575  * independent falls down badly. But even with single-column FKs, we may be
5576  * able to get a better answer when the pg_statistic stats are missing or out
5577  * of date.
5578  */
5579 static Selectivity
5581  Relids outer_relids,
5582  Relids inner_relids,
5583  SpecialJoinInfo *sjinfo,
5584  List **restrictlist)
5585 {
5586  Selectivity fkselec = 1.0;
5587  JoinType jointype = sjinfo->jointype;
5588  List *worklist = *restrictlist;
5589  ListCell *lc;
5590 
5591  /* Consider each FK constraint that is known to match the query */
5592  foreach(lc, root->fkey_list)
5593  {
5594  ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
5595  bool ref_is_outer;
5596  List *removedlist;
5597  ListCell *cell;
5598 
5599  /*
5600  * This FK is not relevant unless it connects a baserel on one side of
5601  * this join to a baserel on the other side.
5602  */
5603  if (bms_is_member(fkinfo->con_relid, outer_relids) &&
5604  bms_is_member(fkinfo->ref_relid, inner_relids))
5605  ref_is_outer = false;
5606  else if (bms_is_member(fkinfo->ref_relid, outer_relids) &&
5607  bms_is_member(fkinfo->con_relid, inner_relids))
5608  ref_is_outer = true;
5609  else
5610  continue;
5611 
5612  /*
5613  * If we're dealing with a semi/anti join, and the FK's referenced
5614  * relation is on the outside, then knowledge of the FK doesn't help
5615  * us figure out what we need to know (which is the fraction of outer
5616  * rows that have matches). On the other hand, if the referenced rel
5617  * is on the inside, then all outer rows must have matches in the
5618  * referenced table (ignoring nulls). But any restriction or join
5619  * clauses that filter that table will reduce the fraction of matches.
5620  * We can account for restriction clauses, but it's too hard to guess
5621  * how many table rows would get through a join that's inside the RHS.
5622  * Hence, if either case applies, punt and ignore the FK.
5623  */
5624  if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
5625  (ref_is_outer || bms_membership(inner_relids) != BMS_SINGLETON))
5626  continue;
5627 
5628  /*
5629  * Modify the restrictlist by removing clauses that match the FK (and
5630  * putting them into removedlist instead). It seems unsafe to modify
5631  * the originally-passed List structure, so we make a shallow copy the
5632  * first time through.
5633  */
5634  if (worklist == *restrictlist)
5635  worklist = list_copy(worklist);
5636 
5637  removedlist = NIL;
5638  foreach(cell, worklist)
5639  {
5640  RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
5641  bool remove_it = false;
5642  int i;
5643 
5644  /* Drop this clause if it matches any column of the FK */
5645  for (i = 0; i < fkinfo->nkeys; i++)
5646  {
5647  if (rinfo->parent_ec)
5648  {
5649  /*
5650  * EC-derived clauses can only match by EC. It is okay to
5651  * consider any clause derived from the same EC as
5652  * matching the FK: even if equivclass.c chose to generate
5653  * a clause equating some other pair of Vars, it could
5654  * have generated one equating the FK's Vars. So for
5655  * purposes of estimation, we can act as though it did so.
5656  *
5657  * Note: checking parent_ec is a bit of a cheat because
5658  * there are EC-derived clauses that don't have parent_ec
5659  * set; but such clauses must compare expressions that
5660  * aren't just Vars, so they cannot match the FK anyway.
5661  */
5662  if (fkinfo->eclass[i] == rinfo->parent_ec)
5663  {
5664  remove_it = true;
5665  break;
5666  }
5667  }
5668  else
5669  {
5670  /*
5671  * Otherwise, see if rinfo was previously matched to FK as
5672  * a "loose" clause.
5673  */
5674  if (list_member_ptr(fkinfo->rinfos[i], rinfo))
5675  {
5676  remove_it = true;
5677  break;
5678  }
5679  }
5680  }
5681  if (remove_it)
5682  {
5683  worklist = foreach_delete_current(worklist, cell);
5684  removedlist = lappend(removedlist, rinfo);
5685  }
5686  }
5687 
5688  /*
5689  * If we failed to remove all the matching clauses we expected to
5690  * find, chicken out and ignore this FK; applying its selectivity
5691  * might result in double-counting. Put any clauses we did manage to
5692  * remove back into the worklist.
5693  *
5694  * Since the matching clauses are known not outerjoin-delayed, they
5695  * would normally have appeared in the initial joinclause list. If we
5696  * didn't find them, there are two possibilities:
5697  *
5698  * 1. If the FK match is based on an EC that is ec_has_const, it won't
5699  * have generated any join clauses at all. We discount such ECs while
5700  * checking to see if we have "all" the clauses. (Below, we'll adjust
5701  * the selectivity estimate for this case.)
5702  *
5703  * 2. The clauses were matched to some other FK in a previous
5704  * iteration of this loop, and thus removed from worklist. (A likely
5705  * case is that two FKs are matched to the same EC; there will be only
5706  * one EC-derived clause in the initial list, so the first FK will
5707  * consume it.) Applying both FKs' selectivity independently risks
5708  * underestimating the join size; in particular, this would undo one
5709  * of the main things that ECs were invented for, namely to avoid
5710  * double-counting the selectivity of redundant equality conditions.
5711  * Later we might think of a reasonable way to combine the estimates,
5712  * but for now, just punt, since this is a fairly uncommon situation.
5713  */
5714  if (removedlist == NIL ||
5715  list_length(removedlist) !=
5716  (fkinfo->nmatched_ec - fkinfo->nconst_ec + fkinfo->nmatched_ri))
5717  {
5718  worklist = list_concat(worklist, removedlist);
5719  continue;
5720  }
5721 
5722  /*
5723  * Finally we get to the payoff: estimate selectivity using the
5724  * knowledge that each referencing row will match exactly one row in
5725  * the referenced table.
5726  *
5727  * XXX that's not true in the presence of nulls in the referencing
5728  * column(s), so in principle we should derate the estimate for those.
5729  * However (1) if there are any strict restriction clauses for the
5730  * referencing column(s) elsewhere in the query, derating here would
5731  * be double-counting the null fraction, and (2) it's not very clear
5732  * how to combine null fractions for multiple referencing columns. So
5733  * we do nothing for now about correcting for nulls.
5734  *
5735  * XXX another point here is that if either side of an FK constraint
5736  * is an inheritance parent, we estimate as though the constraint
5737  * covers all its children as well. This is not an unreasonable
5738  * assumption for a referencing table, ie the user probably applied
5739  * identical constraints to all child tables (though perhaps we ought
5740  * to check that). But it's not possible to have done that for a
5741  * referenced table. Fortunately, precisely because that doesn't
5742  * work, it is uncommon in practice to have an FK referencing a parent
5743  * table. So, at least for now, disregard inheritance here.
5744  */
5745  if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
5746  {
5747  /*
5748  * For JOIN_SEMI and JOIN_ANTI, we only get here when the FK's
5749  * referenced table is exactly the inside of the join. The join
5750  * selectivity is defined as the fraction of LHS rows that have
5751  * matches. The FK implies that every LHS row has a match *in the
5752  * referenced table*; but any restriction clauses on it will
5753  * reduce the number of matches. Hence we take the join
5754  * selectivity as equal to the selectivity of the table's
5755  * restriction clauses, which is rows / tuples; but we must guard
5756  * against tuples == 0.
5757  */
5758  RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
5759  double ref_tuples = Max(ref_rel->tuples, 1.0);
5760 
5761  fkselec *= ref_rel->rows / ref_tuples;
5762  }
5763  else
5764  {
5765  /*
5766  * Otherwise, selectivity is exactly 1/referenced-table-size; but
5767  * guard against tuples == 0. Note we should use the raw table
5768  * tuple count, not any estimate of its filtered or joined size.
5769  */
5770  RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
5771  double ref_tuples = Max(ref_rel->tuples, 1.0);
5772 
5773  fkselec *= 1.0 / ref_tuples;
5774  }
5775 
5776  /*
5777  * If any of the FK columns participated in ec_has_const ECs, then
5778  * equivclass.c will have generated "var = const" restrictions for
5779  * each side of the join, thus reducing the sizes of both input
5780  * relations. Taking the fkselec at face value would amount to
5781  * double-counting the selectivity of the constant restriction for the
5782  * referencing Var. Hence, look for the restriction clause(s) that
5783  * were applied to the referencing Var(s), and divide out their
5784  * selectivity to correct for this.
5785  */
5786  if (fkinfo->nconst_ec > 0)
5787  {
5788  for (int i = 0; i < fkinfo->nkeys; i++)
5789  {
5790  EquivalenceClass *ec = fkinfo->eclass[i];
5791 
5792  if (ec && ec->ec_has_const)
5793  {
5794  EquivalenceMember *em = fkinfo->fk_eclass_member[i];
5796  em);
5797 
5798  if (rinfo)
5799  {
5800  Selectivity s0;
5801 
5802  s0 = clause_selectivity(root,
5803  (Node *) rinfo,
5804  0,
5805  jointype,
5806  sjinfo);
5807  if (s0 > 0)
5808  fkselec /= s0;
5809  }
5810  }
5811  }
5812  }
5813  }
5814 
5815  *restrictlist = worklist;
5816  CLAMP_PROBABILITY(fkselec);
5817  return fkselec;
5818 }
5819 
5820 /*
5821  * set_subquery_size_estimates
5822  * Set the size estimates for a base relation that is a subquery.
5823  *
5824  * The rel's targetlist and restrictinfo list must have been constructed
5825  * already, and the Paths for the subquery must have been completed.
5826  * We look at the subquery's PlannerInfo to extract data.
5827  *
5828  * We set the same fields as set_baserel_size_estimates.
5829  */
5830 void
5832 {
5833  PlannerInfo *subroot = rel->subroot;
5834  RelOptInfo *sub_final_rel;
5835  ListCell *lc;
5836 
5837  /* Should only be applied to base relations that are subqueries */
5838  Assert(rel->relid > 0);
5839  Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_SUBQUERY);
5840 
5841  /*
5842  * Copy raw number of output rows from subquery. All of its paths should
5843  * have the same output rowcount, so just look at cheapest-total.
5844  */
5845  sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
5846  rel->tuples = sub_final_rel->cheapest_total_path->rows;
5847 
5848  /*
5849  * Compute per-output-column width estimates by examining the subquery's
5850  * targetlist. For any output that is a plain Var, get the width estimate
5851  * that was made while planning the subquery. Otherwise, we leave it to
5852  * set_rel_width to fill in a datatype-based default estimate.
5853  */
5854  foreach(lc, subroot->parse->targetList)
5855  {
5856  TargetEntry *te = lfirst_node(TargetEntry, lc);
5857  Node *texpr = (Node *) te->expr;
5858  int32 item_width = 0;
5859 
5860  /* junk columns aren't visible to upper query */
5861  if (te->resjunk)
5862  continue;
5863 
5864  /*
5865  * The subquery could be an expansion of a view that's had columns
5866  * added to it since the current query was parsed, so that there are
5867  * non-junk tlist columns in it that don't correspond to any column
5868  * visible at our query level. Ignore such columns.
5869  */
5870  if (te->resno < rel->min_attr || te->resno > rel->max_attr)
5871  continue;
5872 
5873  /*
5874  * XXX This currently doesn't work for subqueries containing set
5875  * operations, because the Vars in their tlists are bogus references
5876  * to the first leaf subquery, which wouldn't give the right answer
5877  * even if we could still get to its PlannerInfo.
5878  *
5879  * Also, the subquery could be an appendrel for which all branches are
5880  * known empty due to constraint exclusion, in which case
5881  * set_append_rel_pathlist will have left the attr_widths set to zero.
5882  *
5883  * In either case, we just leave the width estimate zero until
5884  * set_rel_width fixes it.
5885  */
5886  if (IsA(texpr, Var) &&
5887  subroot->parse->setOperations == NULL)
5888  {
5889  Var *var = (Var *) texpr;
5890  RelOptInfo *subrel = find_base_rel(subroot, var->varno);
5891 
5892  item_width = subrel->attr_widths[var->varattno - subrel->min_attr];
5893  }
5894  rel->attr_widths[te->resno - rel->min_attr] = item_width;
5895  }
5896 
5897  /* Now estimate number of output rows, etc */
5899 }
5900 
5901 /*
5902  * set_function_size_estimates
5903  * Set the size estimates for a base relation that is a function call.
5904  *
5905  * The rel's targetlist and restrictinfo list must have been constructed
5906  * already.
5907  *
5908  * We set the same fields as set_baserel_size_estimates.
5909  */
5910 void
5912 {
5913  RangeTblEntry *rte;
5914  ListCell *lc;
5915 
5916  /* Should only be applied to base relations that are functions */
5917  Assert(rel->relid > 0);
5918  rte = planner_rt_fetch(rel->relid, root);
5919  Assert(rte->rtekind == RTE_FUNCTION);
5920 
5921  /*
5922  * Estimate number of rows the functions will return. The rowcount of the
5923  * node is that of the largest function result.
5924  */
5925  rel->tuples = 0;
5926  foreach(lc, rte->functions)
5927  {
5928  RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
5929  double ntup = expression_returns_set_rows(root, rtfunc->funcexpr);
5930 
5931  if (ntup > rel->tuples)
5932  rel->tuples = ntup;
5933  }
5934 
5935  /* Now estimate number of output rows, etc */
5937 }
5938 
5939 /*
5940  * set_function_size_estimates
5941  * Set the size estimates for a base relation that is a function call.
5942  *
5943  * The rel's targetlist and restrictinfo list must have been constructed
5944  * already.
5945  *
5946  * We set the same fields as set_tablefunc_size_estimates.
5947  */
5948 void
5950 {
5951  /* Should only be applied to base relations that are functions */
5952  Assert(rel->relid > 0);
5953  Assert(planner_rt_fetch(rel->relid, root)->rtekind == RTE_TABLEFUNC);
5954 
5955  rel->tuples = 100;
5956 
5957  /* Now estimate number of output rows, etc */
5959 }
5960 
5961 /*
5962  * set_values_size_estimates
5963  * Set the size estimates for a base relation that is a values list.
5964  *
5965  * The rel's targetlist and restrictinfo list must have been constructed
5966  * already.
5967  *
5968  * We set the same fields as set_baserel_size_estimates.
5969  */
5970 void
5972 {
5973  RangeTblEntry *rte;
5974 
5975  /* Should only be applied to base relations that are values lists */
5976  Assert(rel->relid > 0);
5977  rte = planner_rt_fetch(rel->relid, root);
5978  Assert(rte->rtekind == RTE_VALUES);