PostgreSQL Source Code  git master
selfuncs.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * selfuncs.c
4  * Selectivity functions and index cost estimation functions for
5  * standard operators and index access methods.
6  *
7  * Selectivity routines are registered in the pg_operator catalog
8  * in the "oprrest" and "oprjoin" attributes.
9  *
10  * Index cost functions are located via the index AM's API struct,
11  * which is obtained from the handler function registered in pg_am.
12  *
13  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
14  * Portions Copyright (c) 1994, Regents of the University of California
15  *
16  *
17  * IDENTIFICATION
18  * src/backend/utils/adt/selfuncs.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 
23 /*----------
24  * Operator selectivity estimation functions are called to estimate the
25  * selectivity of WHERE clauses whose top-level operator is their operator.
26  * We divide the problem into two cases:
27  * Restriction clause estimation: the clause involves vars of just
28  * one relation.
29  * Join clause estimation: the clause involves vars of multiple rels.
30  * Join selectivity estimation is far more difficult and usually less accurate
31  * than restriction estimation.
32  *
33  * When dealing with the inner scan of a nestloop join, we consider the
34  * join's joinclauses as restriction clauses for the inner relation, and
35  * treat vars of the outer relation as parameters (a/k/a constants of unknown
36  * values). So, restriction estimators need to be able to accept an argument
37  * telling which relation is to be treated as the variable.
38  *
39  * The call convention for a restriction estimator (oprrest function) is
40  *
41  * Selectivity oprrest (PlannerInfo *root,
42  * Oid operator,
43  * List *args,
44  * int varRelid);
45  *
46  * root: general information about the query (rtable and RelOptInfo lists
47  * are particularly important for the estimator).
48  * operator: OID of the specific operator in question.
49  * args: argument list from the operator clause.
50  * varRelid: if not zero, the relid (rtable index) of the relation to
51  * be treated as the variable relation. May be zero if the args list
52  * is known to contain vars of only one relation.
53  *
54  * This is represented at the SQL level (in pg_proc) as
55  *
56  * float8 oprrest (internal, oid, internal, int4);
57  *
58  * The result is a selectivity, that is, a fraction (0 to 1) of the rows
59  * of the relation that are expected to produce a TRUE result for the
60  * given operator.
61  *
62  * The call convention for a join estimator (oprjoin function) is similar
63  * except that varRelid is not needed, and instead join information is
64  * supplied:
65  *
66  * Selectivity oprjoin (PlannerInfo *root,
67  * Oid operator,
68  * List *args,
69  * JoinType jointype,
70  * SpecialJoinInfo *sjinfo);
71  *
72  * float8 oprjoin (internal, oid, internal, int2, internal);
73  *
74  * (Before Postgres 8.4, join estimators had only the first four of these
75  * parameters. That signature is still allowed, but deprecated.) The
76  * relationship between jointype and sjinfo is explained in the comments for
77  * clause_selectivity() --- the short version is that jointype is usually
78  * best ignored in favor of examining sjinfo.
79  *
80  * Join selectivity for regular inner and outer joins is defined as the
81  * fraction (0 to 1) of the cross product of the relations that is expected
82  * to produce a TRUE result for the given operator. For both semi and anti
83  * joins, however, the selectivity is defined as the fraction of the left-hand
84  * side relation's rows that are expected to have a match (ie, at least one
85  * row with a TRUE result) in the right-hand side.
86  *
87  * For both oprrest and oprjoin functions, the operator's input collation OID
88  * (if any) is passed using the standard fmgr mechanism, so that the estimator
89  * function can fetch it with PG_GET_COLLATION(). Note, however, that all
90  * statistics in pg_statistic are currently built using the relevant column's
91  * collation. Thus, in most cases where we are looking at statistics, we
92  * should ignore the operator collation and use the stats entry's collation.
93  * We expect that the error induced by doing this is usually not large enough
94  * to justify complicating matters. In any case, doing otherwise would yield
95  * entirely garbage results for ordered stats data such as histograms.
96  *----------
97  */
98 
99 #include "postgres.h"
100 
101 #include <ctype.h>
102 #include <math.h>
103 
104 #include "access/brin.h"
105 #include "access/brin_page.h"
106 #include "access/gin.h"
107 #include "access/table.h"
108 #include "access/tableam.h"
109 #include "access/visibilitymap.h"
110 #include "catalog/pg_am.h"
111 #include "catalog/pg_collation.h"
112 #include "catalog/pg_operator.h"
113 #include "catalog/pg_statistic.h"
115 #include "executor/nodeAgg.h"
116 #include "miscadmin.h"
117 #include "nodes/makefuncs.h"
118 #include "nodes/nodeFuncs.h"
119 #include "optimizer/clauses.h"
120 #include "optimizer/cost.h"
121 #include "optimizer/optimizer.h"
122 #include "optimizer/pathnode.h"
123 #include "optimizer/paths.h"
124 #include "optimizer/plancat.h"
125 #include "parser/parse_clause.h"
126 #include "parser/parsetree.h"
127 #include "statistics/statistics.h"
128 #include "storage/bufmgr.h"
129 #include "utils/builtins.h"
130 #include "utils/date.h"
131 #include "utils/datum.h"
132 #include "utils/fmgroids.h"
133 #include "utils/index_selfuncs.h"
134 #include "utils/lsyscache.h"
135 #include "utils/memutils.h"
136 #include "utils/pg_locale.h"
137 #include "utils/rel.h"
138 #include "utils/selfuncs.h"
139 #include "utils/snapmgr.h"
140 #include "utils/spccache.h"
141 #include "utils/syscache.h"
142 #include "utils/timestamp.h"
143 #include "utils/typcache.h"
144 
145 
146 /* Hooks for plugins to get control when we ask for stats */
149 
150 static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
151 static double eqjoinsel_inner(Oid opfuncoid,
152  VariableStatData *vardata1, VariableStatData *vardata2,
153  double nd1, double nd2,
154  bool isdefault1, bool isdefault2,
155  AttStatsSlot *sslot1, AttStatsSlot *sslot2,
156  Form_pg_statistic stats1, Form_pg_statistic stats2,
157  bool have_mcvs1, bool have_mcvs2);
158 static double eqjoinsel_semi(Oid opfuncoid,
159  VariableStatData *vardata1, VariableStatData *vardata2,
160  double nd1, double nd2,
161  bool isdefault1, bool isdefault2,
162  AttStatsSlot *sslot1, AttStatsSlot *sslot2,
163  Form_pg_statistic stats1, Form_pg_statistic stats2,
164  bool have_mcvs1, bool have_mcvs2,
165  RelOptInfo *inner_rel);
167  RelOptInfo *rel, List **varinfos, double *ndistinct);
168 static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid,
169  double *scaledvalue,
170  Datum lobound, Datum hibound, Oid boundstypid,
171  double *scaledlobound, double *scaledhibound);
172 static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure);
173 static void convert_string_to_scalar(char *value,
174  double *scaledvalue,
175  char *lobound,
176  double *scaledlobound,
177  char *hibound,
178  double *scaledhibound);
180  double *scaledvalue,
181  Datum lobound,
182  double *scaledlobound,
183  Datum hibound,
184  double *scaledhibound);
185 static double convert_one_string_to_scalar(char *value,
186  int rangelo, int rangehi);
187 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
188  int rangelo, int rangehi);
189 static char *convert_string_datum(Datum value, Oid typid, Oid collid,
190  bool *failure);
191 static double convert_timevalue_to_scalar(Datum value, Oid typid,
192  bool *failure);
193 static void examine_simple_variable(PlannerInfo *root, Var *var,
194  VariableStatData *vardata);
195 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
196  Oid sortop, Datum *min, Datum *max);
197 static bool get_actual_variable_range(PlannerInfo *root,
198  VariableStatData *vardata,
199  Oid sortop,
200  Datum *min, Datum *max);
201 static bool get_actual_variable_endpoint(Relation heapRel,
202  Relation indexRel,
203  ScanDirection indexscandir,
204  ScanKey scankeys,
205  int16 typLen,
206  bool typByVal,
207  TupleTableSlot *tableslot,
208  MemoryContext outercontext,
209  Datum *endpointDatum);
210 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
211 
212 
213 /*
214  * eqsel - Selectivity of "=" for any data types.
215  *
216  * Note: this routine is also used to estimate selectivity for some
217  * operators that are not "=" but have comparable selectivity behavior,
218  * such as "~=" (geometric approximate-match). Even for "=", we must
219  * keep in mind that the left and right datatypes may differ.
220  */
221 Datum
223 {
224  PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
225 }
226 
227 /*
228  * Common code for eqsel() and neqsel()
229  */
230 static double
232 {
234  Oid operator = PG_GETARG_OID(1);
235  List *args = (List *) PG_GETARG_POINTER(2);
236  int varRelid = PG_GETARG_INT32(3);
237  VariableStatData vardata;
238  Node *other;
239  bool varonleft;
240  double selec;
241 
242  /*
243  * When asked about <>, we do the estimation using the corresponding =
244  * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
245  */
246  if (negate)
247  {
248  operator = get_negator(operator);
249  if (!OidIsValid(operator))
250  {
251  /* Use default selectivity (should we raise an error instead?) */
252  return 1.0 - DEFAULT_EQ_SEL;
253  }
254  }
255 
256  /*
257  * If expression is not variable = something or something = variable, then
258  * punt and return a default estimate.
259  */
260  if (!get_restriction_variable(root, args, varRelid,
261  &vardata, &other, &varonleft))
262  return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
263 
264  /*
265  * We can do a lot better if the something is a constant. (Note: the
266  * Const might result from estimation rather than being a simple constant
267  * in the query.)
268  */
269  if (IsA(other, Const))
270  selec = var_eq_const(&vardata, operator,
271  ((Const *) other)->constvalue,
272  ((Const *) other)->constisnull,
273  varonleft, negate);
274  else
275  selec = var_eq_non_const(&vardata, operator, other,
276  varonleft, negate);
277 
278  ReleaseVariableStats(vardata);
279 
280  return selec;
281 }
282 
283 /*
284  * var_eq_const --- eqsel for var = const case
285  *
286  * This is exported so that some other estimation functions can use it.
287  */
288 double
289 var_eq_const(VariableStatData *vardata, Oid operator,
290  Datum constval, bool constisnull,
291  bool varonleft, bool negate)
292 {
293  double selec;
294  double nullfrac = 0.0;
295  bool isdefault;
296  Oid opfuncoid;
297 
298  /*
299  * If the constant is NULL, assume operator is strict and return zero, ie,
300  * operator will never return TRUE. (It's zero even for a negator op.)
301  */
302  if (constisnull)
303  return 0.0;
304 
305  /*
306  * Grab the nullfrac for use below. Note we allow use of nullfrac
307  * regardless of security check.
308  */
309  if (HeapTupleIsValid(vardata->statsTuple))
310  {
311  Form_pg_statistic stats;
312 
313  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
314  nullfrac = stats->stanullfrac;
315  }
316 
317  /*
318  * If we matched the var to a unique index or DISTINCT clause, assume
319  * there is exactly one match regardless of anything else. (This is
320  * slightly bogus, since the index or clause's equality operator might be
321  * different from ours, but it's much more likely to be right than
322  * ignoring the information.)
323  */
324  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
325  {
326  selec = 1.0 / vardata->rel->tuples;
327  }
328  else if (HeapTupleIsValid(vardata->statsTuple) &&
330  (opfuncoid = get_opcode(operator))))
331  {
332  AttStatsSlot sslot;
333  bool match = false;
334  int i;
335 
336  /*
337  * Is the constant "=" to any of the column's most common values?
338  * (Although the given operator may not really be "=", we will assume
339  * that seeing whether it returns TRUE is an appropriate test. If you
340  * don't like this, maybe you shouldn't be using eqsel for your
341  * operator...)
342  */
343  if (get_attstatsslot(&sslot, vardata->statsTuple,
344  STATISTIC_KIND_MCV, InvalidOid,
346  {
347  FmgrInfo eqproc;
348 
349  fmgr_info(opfuncoid, &eqproc);
350 
351  for (i = 0; i < sslot.nvalues; i++)
352  {
353  /* be careful to apply operator right way 'round */
354  if (varonleft)
355  match = DatumGetBool(FunctionCall2Coll(&eqproc,
356  sslot.stacoll,
357  sslot.values[i],
358  constval));
359  else
360  match = DatumGetBool(FunctionCall2Coll(&eqproc,
361  sslot.stacoll,
362  constval,
363  sslot.values[i]));
364  if (match)
365  break;
366  }
367  }
368  else
369  {
370  /* no most-common-value info available */
371  i = 0; /* keep compiler quiet */
372  }
373 
374  if (match)
375  {
376  /*
377  * Constant is "=" to this common value. We know selectivity
378  * exactly (or as exactly as ANALYZE could calculate it, anyway).
379  */
380  selec = sslot.numbers[i];
381  }
382  else
383  {
384  /*
385  * Comparison is against a constant that is neither NULL nor any
386  * of the common values. Its selectivity cannot be more than
387  * this:
388  */
389  double sumcommon = 0.0;
390  double otherdistinct;
391 
392  for (i = 0; i < sslot.nnumbers; i++)
393  sumcommon += sslot.numbers[i];
394  selec = 1.0 - sumcommon - nullfrac;
395  CLAMP_PROBABILITY(selec);
396 
397  /*
398  * and in fact it's probably a good deal less. We approximate that
399  * all the not-common values share this remaining fraction
400  * equally, so we divide by the number of other distinct values.
401  */
402  otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
403  sslot.nnumbers;
404  if (otherdistinct > 1)
405  selec /= otherdistinct;
406 
407  /*
408  * Another cross-check: selectivity shouldn't be estimated as more
409  * than the least common "most common value".
410  */
411  if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
412  selec = sslot.numbers[sslot.nnumbers - 1];
413  }
414 
415  free_attstatsslot(&sslot);
416  }
417  else
418  {
419  /*
420  * No ANALYZE stats available, so make a guess using estimated number
421  * of distinct values and assuming they are equally common. (The guess
422  * is unlikely to be very good, but we do know a few special cases.)
423  */
424  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
425  }
426 
427  /* now adjust if we wanted <> rather than = */
428  if (negate)
429  selec = 1.0 - selec - nullfrac;
430 
431  /* result should be in range, but make sure... */
432  CLAMP_PROBABILITY(selec);
433 
434  return selec;
435 }
436 
437 /*
438  * var_eq_non_const --- eqsel for var = something-other-than-const case
439  *
440  * This is exported so that some other estimation functions can use it.
441  */
442 double
444  Node *other,
445  bool varonleft, bool negate)
446 {
447  double selec;
448  double nullfrac = 0.0;
449  bool isdefault;
450 
451  /*
452  * Grab the nullfrac for use below.
453  */
454  if (HeapTupleIsValid(vardata->statsTuple))
455  {
456  Form_pg_statistic stats;
457 
458  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
459  nullfrac = stats->stanullfrac;
460  }
461 
462  /*
463  * If we matched the var to a unique index or DISTINCT clause, assume
464  * there is exactly one match regardless of anything else. (This is
465  * slightly bogus, since the index or clause's equality operator might be
466  * different from ours, but it's much more likely to be right than
467  * ignoring the information.)
468  */
469  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
470  {
471  selec = 1.0 / vardata->rel->tuples;
472  }
473  else if (HeapTupleIsValid(vardata->statsTuple))
474  {
475  double ndistinct;
476  AttStatsSlot sslot;
477 
478  /*
479  * Search is for a value that we do not know a priori, but we will
480  * assume it is not NULL. Estimate the selectivity as non-null
481  * fraction divided by number of distinct values, so that we get a
482  * result averaged over all possible values whether common or
483  * uncommon. (Essentially, we are assuming that the not-yet-known
484  * comparison value is equally likely to be any of the possible
485  * values, regardless of their frequency in the table. Is that a good
486  * idea?)
487  */
488  selec = 1.0 - nullfrac;
489  ndistinct = get_variable_numdistinct(vardata, &isdefault);
490  if (ndistinct > 1)
491  selec /= ndistinct;
492 
493  /*
494  * Cross-check: selectivity should never be estimated as more than the
495  * most common value's.
496  */
497  if (get_attstatsslot(&sslot, vardata->statsTuple,
498  STATISTIC_KIND_MCV, InvalidOid,
500  {
501  if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
502  selec = sslot.numbers[0];
503  free_attstatsslot(&sslot);
504  }
505  }
506  else
507  {
508  /*
509  * No ANALYZE stats available, so make a guess using estimated number
510  * of distinct values and assuming they are equally common. (The guess
511  * is unlikely to be very good, but we do know a few special cases.)
512  */
513  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
514  }
515 
516  /* now adjust if we wanted <> rather than = */
517  if (negate)
518  selec = 1.0 - selec - nullfrac;
519 
520  /* result should be in range, but make sure... */
521  CLAMP_PROBABILITY(selec);
522 
523  return selec;
524 }
525 
526 /*
527  * neqsel - Selectivity of "!=" for any data types.
528  *
529  * This routine is also used for some operators that are not "!="
530  * but have comparable selectivity behavior. See above comments
531  * for eqsel().
532  */
533 Datum
535 {
536  PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true));
537 }
538 
539 /*
540  * scalarineqsel - Selectivity of "<", "<=", ">", ">=" for scalars.
541  *
542  * This is the guts of scalarltsel/scalarlesel/scalargtsel/scalargesel.
543  * The isgt and iseq flags distinguish which of the four cases apply.
544  *
545  * The caller has commuted the clause, if necessary, so that we can treat
546  * the variable as being on the left. The caller must also make sure that
547  * the other side of the clause is a non-null Const, and dissect that into
548  * a value and datatype. (This definition simplifies some callers that
549  * want to estimate against a computed value instead of a Const node.)
550  *
551  * This routine works for any datatype (or pair of datatypes) known to
552  * convert_to_scalar(). If it is applied to some other datatype,
553  * it will return an approximate estimate based on assuming that the constant
554  * value falls in the middle of the bin identified by binary search.
555  */
556 static double
557 scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
558  VariableStatData *vardata, Datum constval, Oid consttype)
559 {
560  Form_pg_statistic stats;
561  FmgrInfo opproc;
562  double mcv_selec,
563  hist_selec,
564  sumcommon;
565  double selec;
566 
567  if (!HeapTupleIsValid(vardata->statsTuple))
568  {
569  /*
570  * No stats are available. Typically this means we have to fall back
571  * on the default estimate; but if the variable is CTID then we can
572  * make an estimate based on comparing the constant to the table size.
573  */
574  if (vardata->var && IsA(vardata->var, Var) &&
575  ((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber)
576  {
577  ItemPointer itemptr;
578  double block;
579  double density;
580 
581  /*
582  * If the relation's empty, we're going to include all of it.
583  * (This is mostly to avoid divide-by-zero below.)
584  */
585  if (vardata->rel->pages == 0)
586  return 1.0;
587 
588  itemptr = (ItemPointer) DatumGetPointer(constval);
589  block = ItemPointerGetBlockNumberNoCheck(itemptr);
590 
591  /*
592  * Determine the average number of tuples per page (density).
593  *
594  * Since the last page will, on average, be only half full, we can
595  * estimate it to have half as many tuples as earlier pages. So
596  * give it half the weight of a regular page.
597  */
598  density = vardata->rel->tuples / (vardata->rel->pages - 0.5);
599 
600  /* If target is the last page, use half the density. */
601  if (block >= vardata->rel->pages - 1)
602  density *= 0.5;
603 
604  /*
605  * Using the average tuples per page, calculate how far into the
606  * page the itemptr is likely to be and adjust block accordingly,
607  * by adding that fraction of a whole block (but never more than a
608  * whole block, no matter how high the itemptr's offset is). Here
609  * we are ignoring the possibility of dead-tuple line pointers,
610  * which is fairly bogus, but we lack the info to do better.
611  */
612  if (density > 0.0)
613  {
615 
616  block += Min(offset / density, 1.0);
617  }
618 
619  /*
620  * Convert relative block number to selectivity. Again, the last
621  * page has only half weight.
622  */
623  selec = block / (vardata->rel->pages - 0.5);
624 
625  /*
626  * The calculation so far gave us a selectivity for the "<=" case.
627  * We'll have one less tuple for "<" and one additional tuple for
628  * ">=", the latter of which we'll reverse the selectivity for
629  * below, so we can simply subtract one tuple for both cases. The
630  * cases that need this adjustment can be identified by iseq being
631  * equal to isgt.
632  */
633  if (iseq == isgt && vardata->rel->tuples >= 1.0)
634  selec -= (1.0 / vardata->rel->tuples);
635 
636  /* Finally, reverse the selectivity for the ">", ">=" cases. */
637  if (isgt)
638  selec = 1.0 - selec;
639 
640  CLAMP_PROBABILITY(selec);
641  return selec;
642  }
643 
644  /* no stats available, so default result */
645  return DEFAULT_INEQ_SEL;
646  }
647  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
648 
649  fmgr_info(get_opcode(operator), &opproc);
650 
651  /*
652  * If we have most-common-values info, add up the fractions of the MCV
653  * entries that satisfy MCV OP CONST. These fractions contribute directly
654  * to the result selectivity. Also add up the total fraction represented
655  * by MCV entries.
656  */
657  mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
658  &sumcommon);
659 
660  /*
661  * If there is a histogram, determine which bin the constant falls in, and
662  * compute the resulting contribution to selectivity.
663  */
664  hist_selec = ineq_histogram_selectivity(root, vardata,
665  &opproc, isgt, iseq,
666  constval, consttype);
667 
668  /*
669  * Now merge the results from the MCV and histogram calculations,
670  * realizing that the histogram covers only the non-null values that are
671  * not listed in MCV.
672  */
673  selec = 1.0 - stats->stanullfrac - sumcommon;
674 
675  if (hist_selec >= 0.0)
676  selec *= hist_selec;
677  else
678  {
679  /*
680  * If no histogram but there are values not accounted for by MCV,
681  * arbitrarily assume half of them will match.
682  */
683  selec *= 0.5;
684  }
685 
686  selec += mcv_selec;
687 
688  /* result should be in range, but make sure... */
689  CLAMP_PROBABILITY(selec);
690 
691  return selec;
692 }
693 
694 /*
695  * mcv_selectivity - Examine the MCV list for selectivity estimates
696  *
697  * Determine the fraction of the variable's MCV population that satisfies
698  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. Also
699  * compute the fraction of the total column population represented by the MCV
700  * list. This code will work for any boolean-returning predicate operator.
701  *
702  * The function result is the MCV selectivity, and the fraction of the
703  * total population is returned into *sumcommonp. Zeroes are returned
704  * if there is no MCV list.
705  */
706 double
708  Datum constval, bool varonleft,
709  double *sumcommonp)
710 {
711  double mcv_selec,
712  sumcommon;
713  AttStatsSlot sslot;
714  int i;
715 
716  mcv_selec = 0.0;
717  sumcommon = 0.0;
718 
719  if (HeapTupleIsValid(vardata->statsTuple) &&
720  statistic_proc_security_check(vardata, opproc->fn_oid) &&
721  get_attstatsslot(&sslot, vardata->statsTuple,
722  STATISTIC_KIND_MCV, InvalidOid,
724  {
725  for (i = 0; i < sslot.nvalues; i++)
726  {
727  if (varonleft ?
729  sslot.stacoll,
730  sslot.values[i],
731  constval)) :
733  sslot.stacoll,
734  constval,
735  sslot.values[i])))
736  mcv_selec += sslot.numbers[i];
737  sumcommon += sslot.numbers[i];
738  }
739  free_attstatsslot(&sslot);
740  }
741 
742  *sumcommonp = sumcommon;
743  return mcv_selec;
744 }
745 
746 /*
747  * histogram_selectivity - Examine the histogram for selectivity estimates
748  *
749  * Determine the fraction of the variable's histogram entries that satisfy
750  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.
751  *
752  * This code will work for any boolean-returning predicate operator, whether
753  * or not it has anything to do with the histogram sort operator. We are
754  * essentially using the histogram just as a representative sample. However,
755  * small histograms are unlikely to be all that representative, so the caller
756  * should be prepared to fall back on some other estimation approach when the
757  * histogram is missing or very small. It may also be prudent to combine this
758  * approach with another one when the histogram is small.
759  *
760  * If the actual histogram size is not at least min_hist_size, we won't bother
761  * to do the calculation at all. Also, if the n_skip parameter is > 0, we
762  * ignore the first and last n_skip histogram elements, on the grounds that
763  * they are outliers and hence not very representative. Typical values for
764  * these parameters are 10 and 1.
765  *
766  * The function result is the selectivity, or -1 if there is no histogram
767  * or it's smaller than min_hist_size.
768  *
769  * The output parameter *hist_size receives the actual histogram size,
770  * or zero if no histogram. Callers may use this number to decide how
771  * much faith to put in the function result.
772  *
773  * Note that the result disregards both the most-common-values (if any) and
774  * null entries. The caller is expected to combine this result with
775  * statistics for those portions of the column population. It may also be
776  * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
777  */
778 double
780  Datum constval, bool varonleft,
781  int min_hist_size, int n_skip,
782  int *hist_size)
783 {
784  double result;
785  AttStatsSlot sslot;
786 
787  /* check sanity of parameters */
788  Assert(n_skip >= 0);
789  Assert(min_hist_size > 2 * n_skip);
790 
791  if (HeapTupleIsValid(vardata->statsTuple) &&
792  statistic_proc_security_check(vardata, opproc->fn_oid) &&
793  get_attstatsslot(&sslot, vardata->statsTuple,
794  STATISTIC_KIND_HISTOGRAM, InvalidOid,
796  {
797  *hist_size = sslot.nvalues;
798  if (sslot.nvalues >= min_hist_size)
799  {
800  int nmatch = 0;
801  int i;
802 
803  for (i = n_skip; i < sslot.nvalues - n_skip; i++)
804  {
805  if (varonleft ?
807  sslot.stacoll,
808  sslot.values[i],
809  constval)) :
811  sslot.stacoll,
812  constval,
813  sslot.values[i])))
814  nmatch++;
815  }
816  result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
817  }
818  else
819  result = -1;
820  free_attstatsslot(&sslot);
821  }
822  else
823  {
824  *hist_size = 0;
825  result = -1;
826  }
827 
828  return result;
829 }
830 
831 /*
832  * ineq_histogram_selectivity - Examine the histogram for scalarineqsel
833  *
834  * Determine the fraction of the variable's histogram population that
835  * satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST.
836  * The isgt and iseq flags distinguish which of the four cases apply.
837  *
838  * Returns -1 if there is no histogram (valid results will always be >= 0).
839  *
840  * Note that the result disregards both the most-common-values (if any) and
841  * null entries. The caller is expected to combine this result with
842  * statistics for those portions of the column population.
843  *
844  * This is exported so that some other estimation functions can use it.
845  */
846 double
848  VariableStatData *vardata,
849  FmgrInfo *opproc, bool isgt, bool iseq,
850  Datum constval, Oid consttype)
851 {
852  double hist_selec;
853  AttStatsSlot sslot;
854 
855  hist_selec = -1.0;
856 
857  /*
858  * Someday, ANALYZE might store more than one histogram per rel/att,
859  * corresponding to more than one possible sort ordering defined for the
860  * column type. However, to make that work we will need to figure out
861  * which staop to search for --- it's not necessarily the one we have at
862  * hand! (For example, we might have a '<=' operator rather than the '<'
863  * operator that will appear in staop.) For now, assume that whatever
864  * appears in pg_statistic is sorted the same way our operator sorts, or
865  * the reverse way if isgt is true.
866  */
867  if (HeapTupleIsValid(vardata->statsTuple) &&
868  statistic_proc_security_check(vardata, opproc->fn_oid) &&
869  get_attstatsslot(&sslot, vardata->statsTuple,
870  STATISTIC_KIND_HISTOGRAM, InvalidOid,
872  {
873  if (sslot.nvalues > 1)
874  {
875  /*
876  * Use binary search to find the desired location, namely the
877  * right end of the histogram bin containing the comparison value,
878  * which is the leftmost entry for which the comparison operator
879  * succeeds (if isgt) or fails (if !isgt). (If the given operator
880  * isn't actually sort-compatible with the histogram, you'll get
881  * garbage results ... but probably not any more garbage-y than
882  * you would have from the old linear search.)
883  *
884  * In this loop, we pay no attention to whether the operator iseq
885  * or not; that detail will be mopped up below. (We cannot tell,
886  * anyway, whether the operator thinks the values are equal.)
887  *
888  * If the binary search accesses the first or last histogram
889  * entry, we try to replace that endpoint with the true column min
890  * or max as found by get_actual_variable_range(). This
891  * ameliorates misestimates when the min or max is moving as a
892  * result of changes since the last ANALYZE. Note that this could
893  * result in effectively including MCVs into the histogram that
894  * weren't there before, but we don't try to correct for that.
895  */
896  double histfrac;
897  int lobound = 0; /* first possible slot to search */
898  int hibound = sslot.nvalues; /* last+1 slot to search */
899  bool have_end = false;
900 
901  /*
902  * If there are only two histogram entries, we'll want up-to-date
903  * values for both. (If there are more than two, we need at most
904  * one of them to be updated, so we deal with that within the
905  * loop.)
906  */
907  if (sslot.nvalues == 2)
908  have_end = get_actual_variable_range(root,
909  vardata,
910  sslot.staop,
911  &sslot.values[0],
912  &sslot.values[1]);
913 
914  while (lobound < hibound)
915  {
916  int probe = (lobound + hibound) / 2;
917  bool ltcmp;
918 
919  /*
920  * If we find ourselves about to compare to the first or last
921  * histogram entry, first try to replace it with the actual
922  * current min or max (unless we already did so above).
923  */
924  if (probe == 0 && sslot.nvalues > 2)
925  have_end = get_actual_variable_range(root,
926  vardata,
927  sslot.staop,
928  &sslot.values[0],
929  NULL);
930  else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
931  have_end = get_actual_variable_range(root,
932  vardata,
933  sslot.staop,
934  NULL,
935  &sslot.values[probe]);
936 
937  ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
938  sslot.stacoll,
939  sslot.values[probe],
940  constval));
941  if (isgt)
942  ltcmp = !ltcmp;
943  if (ltcmp)
944  lobound = probe + 1;
945  else
946  hibound = probe;
947  }
948 
949  if (lobound <= 0)
950  {
951  /*
952  * Constant is below lower histogram boundary. More
953  * precisely, we have found that no entry in the histogram
954  * satisfies the inequality clause (if !isgt) or they all do
955  * (if isgt). We estimate that that's true of the entire
956  * table, so set histfrac to 0.0 (which we'll flip to 1.0
957  * below, if isgt).
958  */
959  histfrac = 0.0;
960  }
961  else if (lobound >= sslot.nvalues)
962  {
963  /*
964  * Inverse case: constant is above upper histogram boundary.
965  */
966  histfrac = 1.0;
967  }
968  else
969  {
970  /* We have values[i-1] <= constant <= values[i]. */
971  int i = lobound;
972  double eq_selec = 0;
973  double val,
974  high,
975  low;
976  double binfrac;
977 
978  /*
979  * In the cases where we'll need it below, obtain an estimate
980  * of the selectivity of "x = constval". We use a calculation
981  * similar to what var_eq_const() does for a non-MCV constant,
982  * ie, estimate that all distinct non-MCV values occur equally
983  * often. But multiplication by "1.0 - sumcommon - nullfrac"
984  * will be done by our caller, so we shouldn't do that here.
985  * Therefore we can't try to clamp the estimate by reference
986  * to the least common MCV; the result would be too small.
987  *
988  * Note: since this is effectively assuming that constval
989  * isn't an MCV, it's logically dubious if constval in fact is
990  * one. But we have to apply *some* correction for equality,
991  * and anyway we cannot tell if constval is an MCV, since we
992  * don't have a suitable equality operator at hand.
993  */
994  if (i == 1 || isgt == iseq)
995  {
996  double otherdistinct;
997  bool isdefault;
998  AttStatsSlot mcvslot;
999 
1000  /* Get estimated number of distinct values */
1001  otherdistinct = get_variable_numdistinct(vardata,
1002  &isdefault);
1003 
1004  /* Subtract off the number of known MCVs */
1005  if (get_attstatsslot(&mcvslot, vardata->statsTuple,
1006  STATISTIC_KIND_MCV, InvalidOid,
1008  {
1009  otherdistinct -= mcvslot.nnumbers;
1010  free_attstatsslot(&mcvslot);
1011  }
1012 
1013  /* If result doesn't seem sane, leave eq_selec at 0 */
1014  if (otherdistinct > 1)
1015  eq_selec = 1.0 / otherdistinct;
1016  }
1017 
1018  /*
1019  * Convert the constant and the two nearest bin boundary
1020  * values to a uniform comparison scale, and do a linear
1021  * interpolation within this bin.
1022  */
1023  if (convert_to_scalar(constval, consttype, sslot.stacoll,
1024  &val,
1025  sslot.values[i - 1], sslot.values[i],
1026  vardata->vartype,
1027  &low, &high))
1028  {
1029  if (high <= low)
1030  {
1031  /* cope if bin boundaries appear identical */
1032  binfrac = 0.5;
1033  }
1034  else if (val <= low)
1035  binfrac = 0.0;
1036  else if (val >= high)
1037  binfrac = 1.0;
1038  else
1039  {
1040  binfrac = (val - low) / (high - low);
1041 
1042  /*
1043  * Watch out for the possibility that we got a NaN or
1044  * Infinity from the division. This can happen
1045  * despite the previous checks, if for example "low"
1046  * is -Infinity.
1047  */
1048  if (isnan(binfrac) ||
1049  binfrac < 0.0 || binfrac > 1.0)
1050  binfrac = 0.5;
1051  }
1052  }
1053  else
1054  {
1055  /*
1056  * Ideally we'd produce an error here, on the grounds that
1057  * the given operator shouldn't have scalarXXsel
1058  * registered as its selectivity func unless we can deal
1059  * with its operand types. But currently, all manner of
1060  * stuff is invoking scalarXXsel, so give a default
1061  * estimate until that can be fixed.
1062  */
1063  binfrac = 0.5;
1064  }
1065 
1066  /*
1067  * Now, compute the overall selectivity across the values
1068  * represented by the histogram. We have i-1 full bins and
1069  * binfrac partial bin below the constant.
1070  */
1071  histfrac = (double) (i - 1) + binfrac;
1072  histfrac /= (double) (sslot.nvalues - 1);
1073 
1074  /*
1075  * At this point, histfrac is an estimate of the fraction of
1076  * the population represented by the histogram that satisfies
1077  * "x <= constval". Somewhat remarkably, this statement is
1078  * true regardless of which operator we were doing the probes
1079  * with, so long as convert_to_scalar() delivers reasonable
1080  * results. If the probe constant is equal to some histogram
1081  * entry, we would have considered the bin to the left of that
1082  * entry if probing with "<" or ">=", or the bin to the right
1083  * if probing with "<=" or ">"; but binfrac would have come
1084  * out as 1.0 in the first case and 0.0 in the second, leading
1085  * to the same histfrac in either case. For probe constants
1086  * between histogram entries, we find the same bin and get the
1087  * same estimate with any operator.
1088  *
1089  * The fact that the estimate corresponds to "x <= constval"
1090  * and not "x < constval" is because of the way that ANALYZE
1091  * constructs the histogram: each entry is, effectively, the
1092  * rightmost value in its sample bucket. So selectivity
1093  * values that are exact multiples of 1/(histogram_size-1)
1094  * should be understood as estimates including a histogram
1095  * entry plus everything to its left.
1096  *
1097  * However, that breaks down for the first histogram entry,
1098  * which necessarily is the leftmost value in its sample
1099  * bucket. That means the first histogram bin is slightly
1100  * narrower than the rest, by an amount equal to eq_selec.
1101  * Another way to say that is that we want "x <= leftmost" to
1102  * be estimated as eq_selec not zero. So, if we're dealing
1103  * with the first bin (i==1), rescale to make that true while
1104  * adjusting the rest of that bin linearly.
1105  */
1106  if (i == 1)
1107  histfrac += eq_selec * (1.0 - binfrac);
1108 
1109  /*
1110  * "x <= constval" is good if we want an estimate for "<=" or
1111  * ">", but if we are estimating for "<" or ">=", we now need
1112  * to decrease the estimate by eq_selec.
1113  */
1114  if (isgt == iseq)
1115  histfrac -= eq_selec;
1116  }
1117 
1118  /*
1119  * Now the estimate is finished for "<" and "<=" cases. If we are
1120  * estimating for ">" or ">=", flip it.
1121  */
1122  hist_selec = isgt ? (1.0 - histfrac) : histfrac;
1123 
1124  /*
1125  * The histogram boundaries are only approximate to begin with,
1126  * and may well be out of date anyway. Therefore, don't believe
1127  * extremely small or large selectivity estimates --- unless we
1128  * got actual current endpoint values from the table, in which
1129  * case just do the usual sanity clamp. Somewhat arbitrarily, we
1130  * set the cutoff for other cases at a hundredth of the histogram
1131  * resolution.
1132  */
1133  if (have_end)
1134  CLAMP_PROBABILITY(hist_selec);
1135  else
1136  {
1137  double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1138 
1139  if (hist_selec < cutoff)
1140  hist_selec = cutoff;
1141  else if (hist_selec > 1.0 - cutoff)
1142  hist_selec = 1.0 - cutoff;
1143  }
1144  }
1145 
1146  free_attstatsslot(&sslot);
1147  }
1148 
1149  return hist_selec;
1150 }
1151 
1152 /*
1153  * Common wrapper function for the selectivity estimators that simply
1154  * invoke scalarineqsel().
1155  */
1156 static Datum
1158 {
1159  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1160  Oid operator = PG_GETARG_OID(1);
1161  List *args = (List *) PG_GETARG_POINTER(2);
1162  int varRelid = PG_GETARG_INT32(3);
1163  VariableStatData vardata;
1164  Node *other;
1165  bool varonleft;
1166  Datum constval;
1167  Oid consttype;
1168  double selec;
1169 
1170  /*
1171  * If expression is not variable op something or something op variable,
1172  * then punt and return a default estimate.
1173  */
1174  if (!get_restriction_variable(root, args, varRelid,
1175  &vardata, &other, &varonleft))
1177 
1178  /*
1179  * Can't do anything useful if the something is not a constant, either.
1180  */
1181  if (!IsA(other, Const))
1182  {
1183  ReleaseVariableStats(vardata);
1185  }
1186 
1187  /*
1188  * If the constant is NULL, assume operator is strict and return zero, ie,
1189  * operator will never return TRUE.
1190  */
1191  if (((Const *) other)->constisnull)
1192  {
1193  ReleaseVariableStats(vardata);
1194  PG_RETURN_FLOAT8(0.0);
1195  }
1196  constval = ((Const *) other)->constvalue;
1197  consttype = ((Const *) other)->consttype;
1198 
1199  /*
1200  * Force the var to be on the left to simplify logic in scalarineqsel.
1201  */
1202  if (!varonleft)
1203  {
1204  operator = get_commutator(operator);
1205  if (!operator)
1206  {
1207  /* Use default selectivity (should we raise an error instead?) */
1208  ReleaseVariableStats(vardata);
1210  }
1211  isgt = !isgt;
1212  }
1213 
1214  /* The rest of the work is done by scalarineqsel(). */
1215  selec = scalarineqsel(root, operator, isgt, iseq,
1216  &vardata, constval, consttype);
1217 
1218  ReleaseVariableStats(vardata);
1219 
1220  PG_RETURN_FLOAT8((float8) selec);
1221 }
1222 
1223 /*
1224  * scalarltsel - Selectivity of "<" for scalars.
1225  */
1226 Datum
1228 {
1229  return scalarineqsel_wrapper(fcinfo, false, false);
1230 }
1231 
1232 /*
1233  * scalarlesel - Selectivity of "<=" for scalars.
1234  */
1235 Datum
1237 {
1238  return scalarineqsel_wrapper(fcinfo, false, true);
1239 }
1240 
1241 /*
1242  * scalargtsel - Selectivity of ">" for scalars.
1243  */
1244 Datum
1246 {
1247  return scalarineqsel_wrapper(fcinfo, true, false);
1248 }
1249 
1250 /*
1251  * scalargesel - Selectivity of ">=" for scalars.
1252  */
1253 Datum
1255 {
1256  return scalarineqsel_wrapper(fcinfo, true, true);
1257 }
1258 
1259 /*
1260  * boolvarsel - Selectivity of Boolean variable.
1261  *
1262  * This can actually be called on any boolean-valued expression. If it
1263  * involves only Vars of the specified relation, and if there are statistics
1264  * about the Var or expression (the latter is possible if it's indexed) then
1265  * we'll produce a real estimate; otherwise it's just a default.
1266  */
1268 boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
1269 {
1270  VariableStatData vardata;
1271  double selec;
1272 
1273  examine_variable(root, arg, varRelid, &vardata);
1274  if (HeapTupleIsValid(vardata.statsTuple))
1275  {
1276  /*
1277  * A boolean variable V is equivalent to the clause V = 't', so we
1278  * compute the selectivity as if that is what we have.
1279  */
1280  selec = var_eq_const(&vardata, BooleanEqualOperator,
1281  BoolGetDatum(true), false, true, false);
1282  }
1283  else
1284  {
1285  /* Otherwise, the default estimate is 0.5 */
1286  selec = 0.5;
1287  }
1288  ReleaseVariableStats(vardata);
1289  return selec;
1290 }
1291 
1292 /*
1293  * booltestsel - Selectivity of BooleanTest Node.
1294  */
1297  int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1298 {
1299  VariableStatData vardata;
1300  double selec;
1301 
1302  examine_variable(root, arg, varRelid, &vardata);
1303 
1304  if (HeapTupleIsValid(vardata.statsTuple))
1305  {
1306  Form_pg_statistic stats;
1307  double freq_null;
1308  AttStatsSlot sslot;
1309 
1310  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1311  freq_null = stats->stanullfrac;
1312 
1313  if (get_attstatsslot(&sslot, vardata.statsTuple,
1314  STATISTIC_KIND_MCV, InvalidOid,
1316  && sslot.nnumbers > 0)
1317  {
1318  double freq_true;
1319  double freq_false;
1320 
1321  /*
1322  * Get first MCV frequency and derive frequency for true.
1323  */
1324  if (DatumGetBool(sslot.values[0]))
1325  freq_true = sslot.numbers[0];
1326  else
1327  freq_true = 1.0 - sslot.numbers[0] - freq_null;
1328 
1329  /*
1330  * Next derive frequency for false. Then use these as appropriate
1331  * to derive frequency for each case.
1332  */
1333  freq_false = 1.0 - freq_true - freq_null;
1334 
1335  switch (booltesttype)
1336  {
1337  case IS_UNKNOWN:
1338  /* select only NULL values */
1339  selec = freq_null;
1340  break;
1341  case IS_NOT_UNKNOWN:
1342  /* select non-NULL values */
1343  selec = 1.0 - freq_null;
1344  break;
1345  case IS_TRUE:
1346  /* select only TRUE values */
1347  selec = freq_true;
1348  break;
1349  case IS_NOT_TRUE:
1350  /* select non-TRUE values */
1351  selec = 1.0 - freq_true;
1352  break;
1353  case IS_FALSE:
1354  /* select only FALSE values */
1355  selec = freq_false;
1356  break;
1357  case IS_NOT_FALSE:
1358  /* select non-FALSE values */
1359  selec = 1.0 - freq_false;
1360  break;
1361  default:
1362  elog(ERROR, "unrecognized booltesttype: %d",
1363  (int) booltesttype);
1364  selec = 0.0; /* Keep compiler quiet */
1365  break;
1366  }
1367 
1368  free_attstatsslot(&sslot);
1369  }
1370  else
1371  {
1372  /*
1373  * No most-common-value info available. Still have null fraction
1374  * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1375  * for null fraction and assume a 50-50 split of TRUE and FALSE.
1376  */
1377  switch (booltesttype)
1378  {
1379  case IS_UNKNOWN:
1380  /* select only NULL values */
1381  selec = freq_null;
1382  break;
1383  case IS_NOT_UNKNOWN:
1384  /* select non-NULL values */
1385  selec = 1.0 - freq_null;
1386  break;
1387  case IS_TRUE:
1388  case IS_FALSE:
1389  /* Assume we select half of the non-NULL values */
1390  selec = (1.0 - freq_null) / 2.0;
1391  break;
1392  case IS_NOT_TRUE:
1393  case IS_NOT_FALSE:
1394  /* Assume we select NULLs plus half of the non-NULLs */
1395  /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1396  selec = (freq_null + 1.0) / 2.0;
1397  break;
1398  default:
1399  elog(ERROR, "unrecognized booltesttype: %d",
1400  (int) booltesttype);
1401  selec = 0.0; /* Keep compiler quiet */
1402  break;
1403  }
1404  }
1405  }
1406  else
1407  {
1408  /*
1409  * If we can't get variable statistics for the argument, perhaps
1410  * clause_selectivity can do something with it. We ignore the
1411  * possibility of a NULL value when using clause_selectivity, and just
1412  * assume the value is either TRUE or FALSE.
1413  */
1414  switch (booltesttype)
1415  {
1416  case IS_UNKNOWN:
1417  selec = DEFAULT_UNK_SEL;
1418  break;
1419  case IS_NOT_UNKNOWN:
1420  selec = DEFAULT_NOT_UNK_SEL;
1421  break;
1422  case IS_TRUE:
1423  case IS_NOT_FALSE:
1424  selec = (double) clause_selectivity(root, arg,
1425  varRelid,
1426  jointype, sjinfo);
1427  break;
1428  case IS_FALSE:
1429  case IS_NOT_TRUE:
1430  selec = 1.0 - (double) clause_selectivity(root, arg,
1431  varRelid,
1432  jointype, sjinfo);
1433  break;
1434  default:
1435  elog(ERROR, "unrecognized booltesttype: %d",
1436  (int) booltesttype);
1437  selec = 0.0; /* Keep compiler quiet */
1438  break;
1439  }
1440  }
1441 
1442  ReleaseVariableStats(vardata);
1443 
1444  /* result should be in range, but make sure... */
1445  CLAMP_PROBABILITY(selec);
1446 
1447  return (Selectivity) selec;
1448 }
1449 
1450 /*
1451  * nulltestsel - Selectivity of NullTest Node.
1452  */
1455  int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1456 {
1457  VariableStatData vardata;
1458  double selec;
1459 
1460  examine_variable(root, arg, varRelid, &vardata);
1461 
1462  if (HeapTupleIsValid(vardata.statsTuple))
1463  {
1464  Form_pg_statistic stats;
1465  double freq_null;
1466 
1467  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1468  freq_null = stats->stanullfrac;
1469 
1470  switch (nulltesttype)
1471  {
1472  case IS_NULL:
1473 
1474  /*
1475  * Use freq_null directly.
1476  */
1477  selec = freq_null;
1478  break;
1479  case IS_NOT_NULL:
1480 
1481  /*
1482  * Select not unknown (not null) values. Calculate from
1483  * freq_null.
1484  */
1485  selec = 1.0 - freq_null;
1486  break;
1487  default:
1488  elog(ERROR, "unrecognized nulltesttype: %d",
1489  (int) nulltesttype);
1490  return (Selectivity) 0; /* keep compiler quiet */
1491  }
1492  }
1493  else if (vardata.var && IsA(vardata.var, Var) &&
1494  ((Var *) vardata.var)->varattno < 0)
1495  {
1496  /*
1497  * There are no stats for system columns, but we know they are never
1498  * NULL.
1499  */
1500  selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
1501  }
1502  else
1503  {
1504  /*
1505  * No ANALYZE stats available, so make a guess
1506  */
1507  switch (nulltesttype)
1508  {
1509  case IS_NULL:
1510  selec = DEFAULT_UNK_SEL;
1511  break;
1512  case IS_NOT_NULL:
1513  selec = DEFAULT_NOT_UNK_SEL;
1514  break;
1515  default:
1516  elog(ERROR, "unrecognized nulltesttype: %d",
1517  (int) nulltesttype);
1518  return (Selectivity) 0; /* keep compiler quiet */
1519  }
1520  }
1521 
1522  ReleaseVariableStats(vardata);
1523 
1524  /* result should be in range, but make sure... */
1525  CLAMP_PROBABILITY(selec);
1526 
1527  return (Selectivity) selec;
1528 }
1529 
1530 /*
1531  * strip_array_coercion - strip binary-compatible relabeling from an array expr
1532  *
1533  * For array values, the parser normally generates ArrayCoerceExpr conversions,
1534  * but it seems possible that RelabelType might show up. Also, the planner
1535  * is not currently tense about collapsing stacked ArrayCoerceExpr nodes,
1536  * so we need to be ready to deal with more than one level.
1537  */
1538 static Node *
1540 {
1541  for (;;)
1542  {
1543  if (node && IsA(node, ArrayCoerceExpr))
1544  {
1545  ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
1546 
1547  /*
1548  * If the per-element expression is just a RelabelType on top of
1549  * CaseTestExpr, then we know it's a binary-compatible relabeling.
1550  */
1551  if (IsA(acoerce->elemexpr, RelabelType) &&
1552  IsA(((RelabelType *) acoerce->elemexpr)->arg, CaseTestExpr))
1553  node = (Node *) acoerce->arg;
1554  else
1555  break;
1556  }
1557  else if (node && IsA(node, RelabelType))
1558  {
1559  /* We don't really expect this case, but may as well cope */
1560  node = (Node *) ((RelabelType *) node)->arg;
1561  }
1562  else
1563  break;
1564  }
1565  return node;
1566 }
1567 
1568 /*
1569  * scalararraysel - Selectivity of ScalarArrayOpExpr Node.
1570  */
1573  ScalarArrayOpExpr *clause,
1574  bool is_join_clause,
1575  int varRelid,
1576  JoinType jointype,
1577  SpecialJoinInfo *sjinfo)
1578 {
1579  Oid operator = clause->opno;
1580  bool useOr = clause->useOr;
1581  bool isEquality = false;
1582  bool isInequality = false;
1583  Node *leftop;
1584  Node *rightop;
1585  Oid nominal_element_type;
1586  Oid nominal_element_collation;
1587  TypeCacheEntry *typentry;
1588  RegProcedure oprsel;
1589  FmgrInfo oprselproc;
1590  Selectivity s1;
1591  Selectivity s1disjoint;
1592 
1593  /* First, deconstruct the expression */
1594  Assert(list_length(clause->args) == 2);
1595  leftop = (Node *) linitial(clause->args);
1596  rightop = (Node *) lsecond(clause->args);
1597 
1598  /* aggressively reduce both sides to constants */
1599  leftop = estimate_expression_value(root, leftop);
1600  rightop = estimate_expression_value(root, rightop);
1601 
1602  /* get nominal (after relabeling) element type of rightop */
1603  nominal_element_type = get_base_element_type(exprType(rightop));
1604  if (!OidIsValid(nominal_element_type))
1605  return (Selectivity) 0.5; /* probably shouldn't happen */
1606  /* get nominal collation, too, for generating constants */
1607  nominal_element_collation = exprCollation(rightop);
1608 
1609  /* look through any binary-compatible relabeling of rightop */
1610  rightop = strip_array_coercion(rightop);
1611 
1612  /*
1613  * Detect whether the operator is the default equality or inequality
1614  * operator of the array element type.
1615  */
1616  typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1617  if (OidIsValid(typentry->eq_opr))
1618  {
1619  if (operator == typentry->eq_opr)
1620  isEquality = true;
1621  else if (get_negator(operator) == typentry->eq_opr)
1622  isInequality = true;
1623  }
1624 
1625  /*
1626  * If it is equality or inequality, we might be able to estimate this as a
1627  * form of array containment; for instance "const = ANY(column)" can be
1628  * treated as "ARRAY[const] <@ column". scalararraysel_containment tries
1629  * that, and returns the selectivity estimate if successful, or -1 if not.
1630  */
1631  if ((isEquality || isInequality) && !is_join_clause)
1632  {
1633  s1 = scalararraysel_containment(root, leftop, rightop,
1634  nominal_element_type,
1635  isEquality, useOr, varRelid);
1636  if (s1 >= 0.0)
1637  return s1;
1638  }
1639 
1640  /*
1641  * Look up the underlying operator's selectivity estimator. Punt if it
1642  * hasn't got one.
1643  */
1644  if (is_join_clause)
1645  oprsel = get_oprjoin(operator);
1646  else
1647  oprsel = get_oprrest(operator);
1648  if (!oprsel)
1649  return (Selectivity) 0.5;
1650  fmgr_info(oprsel, &oprselproc);
1651 
1652  /*
1653  * In the array-containment check above, we must only believe that an
1654  * operator is equality or inequality if it is the default btree equality
1655  * operator (or its negator) for the element type, since those are the
1656  * operators that array containment will use. But in what follows, we can
1657  * be a little laxer, and also believe that any operators using eqsel() or
1658  * neqsel() as selectivity estimator act like equality or inequality.
1659  */
1660  if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1661  isEquality = true;
1662  else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1663  isInequality = true;
1664 
1665  /*
1666  * We consider three cases:
1667  *
1668  * 1. rightop is an Array constant: deconstruct the array, apply the
1669  * operator's selectivity function for each array element, and merge the
1670  * results in the same way that clausesel.c does for AND/OR combinations.
1671  *
1672  * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1673  * function for each element of the ARRAY[] construct, and merge.
1674  *
1675  * 3. otherwise, make a guess ...
1676  */
1677  if (rightop && IsA(rightop, Const))
1678  {
1679  Datum arraydatum = ((Const *) rightop)->constvalue;
1680  bool arrayisnull = ((Const *) rightop)->constisnull;
1681  ArrayType *arrayval;
1682  int16 elmlen;
1683  bool elmbyval;
1684  char elmalign;
1685  int num_elems;
1686  Datum *elem_values;
1687  bool *elem_nulls;
1688  int i;
1689 
1690  if (arrayisnull) /* qual can't succeed if null array */
1691  return (Selectivity) 0.0;
1692  arrayval = DatumGetArrayTypeP(arraydatum);
1694  &elmlen, &elmbyval, &elmalign);
1695  deconstruct_array(arrayval,
1696  ARR_ELEMTYPE(arrayval),
1697  elmlen, elmbyval, elmalign,
1698  &elem_values, &elem_nulls, &num_elems);
1699 
1700  /*
1701  * For generic operators, we assume the probability of success is
1702  * independent for each array element. But for "= ANY" or "<> ALL",
1703  * if the array elements are distinct (which'd typically be the case)
1704  * then the probabilities are disjoint, and we should just sum them.
1705  *
1706  * If we were being really tense we would try to confirm that the
1707  * elements are all distinct, but that would be expensive and it
1708  * doesn't seem to be worth the cycles; it would amount to penalizing
1709  * well-written queries in favor of poorly-written ones. However, we
1710  * do protect ourselves a little bit by checking whether the
1711  * disjointness assumption leads to an impossible (out of range)
1712  * probability; if so, we fall back to the normal calculation.
1713  */
1714  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1715 
1716  for (i = 0; i < num_elems; i++)
1717  {
1718  List *args;
1719  Selectivity s2;
1720 
1721  args = list_make2(leftop,
1722  makeConst(nominal_element_type,
1723  -1,
1724  nominal_element_collation,
1725  elmlen,
1726  elem_values[i],
1727  elem_nulls[i],
1728  elmbyval));
1729  if (is_join_clause)
1730  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1731  clause->inputcollid,
1732  PointerGetDatum(root),
1733  ObjectIdGetDatum(operator),
1734  PointerGetDatum(args),
1735  Int16GetDatum(jointype),
1736  PointerGetDatum(sjinfo)));
1737  else
1738  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1739  clause->inputcollid,
1740  PointerGetDatum(root),
1741  ObjectIdGetDatum(operator),
1742  PointerGetDatum(args),
1743  Int32GetDatum(varRelid)));
1744 
1745  if (useOr)
1746  {
1747  s1 = s1 + s2 - s1 * s2;
1748  if (isEquality)
1749  s1disjoint += s2;
1750  }
1751  else
1752  {
1753  s1 = s1 * s2;
1754  if (isInequality)
1755  s1disjoint += s2 - 1.0;
1756  }
1757  }
1758 
1759  /* accept disjoint-probability estimate if in range */
1760  if ((useOr ? isEquality : isInequality) &&
1761  s1disjoint >= 0.0 && s1disjoint <= 1.0)
1762  s1 = s1disjoint;
1763  }
1764  else if (rightop && IsA(rightop, ArrayExpr) &&
1765  !((ArrayExpr *) rightop)->multidims)
1766  {
1767  ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
1768  int16 elmlen;
1769  bool elmbyval;
1770  ListCell *l;
1771 
1772  get_typlenbyval(arrayexpr->element_typeid,
1773  &elmlen, &elmbyval);
1774 
1775  /*
1776  * We use the assumption of disjoint probabilities here too, although
1777  * the odds of equal array elements are rather higher if the elements
1778  * are not all constants (which they won't be, else constant folding
1779  * would have reduced the ArrayExpr to a Const). In this path it's
1780  * critical to have the sanity check on the s1disjoint estimate.
1781  */
1782  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1783 
1784  foreach(l, arrayexpr->elements)
1785  {
1786  Node *elem = (Node *) lfirst(l);
1787  List *args;
1788  Selectivity s2;
1789 
1790  /*
1791  * Theoretically, if elem isn't of nominal_element_type we should
1792  * insert a RelabelType, but it seems unlikely that any operator
1793  * estimation function would really care ...
1794  */
1795  args = list_make2(leftop, elem);
1796  if (is_join_clause)
1797  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1798  clause->inputcollid,
1799  PointerGetDatum(root),
1800  ObjectIdGetDatum(operator),
1801  PointerGetDatum(args),
1802  Int16GetDatum(jointype),
1803  PointerGetDatum(sjinfo)));
1804  else
1805  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1806  clause->inputcollid,
1807  PointerGetDatum(root),
1808  ObjectIdGetDatum(operator),
1809  PointerGetDatum(args),
1810  Int32GetDatum(varRelid)));
1811 
1812  if (useOr)
1813  {
1814  s1 = s1 + s2 - s1 * s2;
1815  if (isEquality)
1816  s1disjoint += s2;
1817  }
1818  else
1819  {
1820  s1 = s1 * s2;
1821  if (isInequality)
1822  s1disjoint += s2 - 1.0;
1823  }
1824  }
1825 
1826  /* accept disjoint-probability estimate if in range */
1827  if ((useOr ? isEquality : isInequality) &&
1828  s1disjoint >= 0.0 && s1disjoint <= 1.0)
1829  s1 = s1disjoint;
1830  }
1831  else
1832  {
1833  CaseTestExpr *dummyexpr;
1834  List *args;
1835  Selectivity s2;
1836  int i;
1837 
1838  /*
1839  * We need a dummy rightop to pass to the operator selectivity
1840  * routine. It can be pretty much anything that doesn't look like a
1841  * constant; CaseTestExpr is a convenient choice.
1842  */
1843  dummyexpr = makeNode(CaseTestExpr);
1844  dummyexpr->typeId = nominal_element_type;
1845  dummyexpr->typeMod = -1;
1846  dummyexpr->collation = clause->inputcollid;
1847  args = list_make2(leftop, dummyexpr);
1848  if (is_join_clause)
1849  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1850  clause->inputcollid,
1851  PointerGetDatum(root),
1852  ObjectIdGetDatum(operator),
1853  PointerGetDatum(args),
1854  Int16GetDatum(jointype),
1855  PointerGetDatum(sjinfo)));
1856  else
1857  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1858  clause->inputcollid,
1859  PointerGetDatum(root),
1860  ObjectIdGetDatum(operator),
1861  PointerGetDatum(args),
1862  Int32GetDatum(varRelid)));
1863  s1 = useOr ? 0.0 : 1.0;
1864 
1865  /*
1866  * Arbitrarily assume 10 elements in the eventual array value (see
1867  * also estimate_array_length). We don't risk an assumption of
1868  * disjoint probabilities here.
1869  */
1870  for (i = 0; i < 10; i++)
1871  {
1872  if (useOr)
1873  s1 = s1 + s2 - s1 * s2;
1874  else
1875  s1 = s1 * s2;
1876  }
1877  }
1878 
1879  /* result should be in range, but make sure... */
1880  CLAMP_PROBABILITY(s1);
1881 
1882  return s1;
1883 }
1884 
1885 /*
1886  * Estimate number of elements in the array yielded by an expression.
1887  *
1888  * It's important that this agree with scalararraysel.
1889  */
1890 int
1892 {
1893  /* look through any binary-compatible relabeling of arrayexpr */
1894  arrayexpr = strip_array_coercion(arrayexpr);
1895 
1896  if (arrayexpr && IsA(arrayexpr, Const))
1897  {
1898  Datum arraydatum = ((Const *) arrayexpr)->constvalue;
1899  bool arrayisnull = ((Const *) arrayexpr)->constisnull;
1900  ArrayType *arrayval;
1901 
1902  if (arrayisnull)
1903  return 0;
1904  arrayval = DatumGetArrayTypeP(arraydatum);
1905  return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
1906  }
1907  else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
1908  !((ArrayExpr *) arrayexpr)->multidims)
1909  {
1910  return list_length(((ArrayExpr *) arrayexpr)->elements);
1911  }
1912  else
1913  {
1914  /* default guess --- see also scalararraysel */
1915  return 10;
1916  }
1917 }
1918 
1919 /*
1920  * rowcomparesel - Selectivity of RowCompareExpr Node.
1921  *
1922  * We estimate RowCompare selectivity by considering just the first (high
1923  * order) columns, which makes it equivalent to an ordinary OpExpr. While
1924  * this estimate could be refined by considering additional columns, it
1925  * seems unlikely that we could do a lot better without multi-column
1926  * statistics.
1927  */
1930  RowCompareExpr *clause,
1931  int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1932 {
1933  Selectivity s1;
1934  Oid opno = linitial_oid(clause->opnos);
1935  Oid inputcollid = linitial_oid(clause->inputcollids);
1936  List *opargs;
1937  bool is_join_clause;
1938 
1939  /* Build equivalent arg list for single operator */
1940  opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
1941 
1942  /*
1943  * Decide if it's a join clause. This should match clausesel.c's
1944  * treat_as_join_clause(), except that we intentionally consider only the
1945  * leading columns and not the rest of the clause.
1946  */
1947  if (varRelid != 0)
1948  {
1949  /*
1950  * Caller is forcing restriction mode (eg, because we are examining an
1951  * inner indexscan qual).
1952  */
1953  is_join_clause = false;
1954  }
1955  else if (sjinfo == NULL)
1956  {
1957  /*
1958  * It must be a restriction clause, since it's being evaluated at a
1959  * scan node.
1960  */
1961  is_join_clause = false;
1962  }
1963  else
1964  {
1965  /*
1966  * Otherwise, it's a join if there's more than one relation used.
1967  */
1968  is_join_clause = (NumRelids((Node *) opargs) > 1);
1969  }
1970 
1971  if (is_join_clause)
1972  {
1973  /* Estimate selectivity for a join clause. */
1974  s1 = join_selectivity(root, opno,
1975  opargs,
1976  inputcollid,
1977  jointype,
1978  sjinfo);
1979  }
1980  else
1981  {
1982  /* Estimate selectivity for a restriction clause. */
1983  s1 = restriction_selectivity(root, opno,
1984  opargs,
1985  inputcollid,
1986  varRelid);
1987  }
1988 
1989  return s1;
1990 }
1991 
1992 /*
1993  * eqjoinsel - Join selectivity of "="
1994  */
1995 Datum
1997 {
1998  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1999  Oid operator = PG_GETARG_OID(1);
2000  List *args = (List *) PG_GETARG_POINTER(2);
2001 
2002 #ifdef NOT_USED
2003  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
2004 #endif
2006  double selec;
2007  double selec_inner;
2008  VariableStatData vardata1;
2009  VariableStatData vardata2;
2010  double nd1;
2011  double nd2;
2012  bool isdefault1;
2013  bool isdefault2;
2014  Oid opfuncoid;
2015  AttStatsSlot sslot1;
2016  AttStatsSlot sslot2;
2017  Form_pg_statistic stats1 = NULL;
2018  Form_pg_statistic stats2 = NULL;
2019  bool have_mcvs1 = false;
2020  bool have_mcvs2 = false;
2021  bool join_is_reversed;
2022  RelOptInfo *inner_rel;
2023 
2024  get_join_variables(root, args, sjinfo,
2025  &vardata1, &vardata2, &join_is_reversed);
2026 
2027  nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
2028  nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
2029 
2030  opfuncoid = get_opcode(operator);
2031 
2032  memset(&sslot1, 0, sizeof(sslot1));
2033  memset(&sslot2, 0, sizeof(sslot2));
2034 
2035  if (HeapTupleIsValid(vardata1.statsTuple))
2036  {
2037  /* note we allow use of nullfrac regardless of security check */
2038  stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
2039  if (statistic_proc_security_check(&vardata1, opfuncoid))
2040  have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
2041  STATISTIC_KIND_MCV, InvalidOid,
2043  }
2044 
2045  if (HeapTupleIsValid(vardata2.statsTuple))
2046  {
2047  /* note we allow use of nullfrac regardless of security check */
2048  stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
2049  if (statistic_proc_security_check(&vardata2, opfuncoid))
2050  have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
2051  STATISTIC_KIND_MCV, InvalidOid,
2053  }
2054 
2055  /* We need to compute the inner-join selectivity in all cases */
2056  selec_inner = eqjoinsel_inner(opfuncoid,
2057  &vardata1, &vardata2,
2058  nd1, nd2,
2059  isdefault1, isdefault2,
2060  &sslot1, &sslot2,
2061  stats1, stats2,
2062  have_mcvs1, have_mcvs2);
2063 
2064  switch (sjinfo->jointype)
2065  {
2066  case JOIN_INNER:
2067  case JOIN_LEFT:
2068  case JOIN_FULL:
2069  selec = selec_inner;
2070  break;
2071  case JOIN_SEMI:
2072  case JOIN_ANTI:
2073 
2074  /*
2075  * Look up the join's inner relation. min_righthand is sufficient
2076  * information because neither SEMI nor ANTI joins permit any
2077  * reassociation into or out of their RHS, so the righthand will
2078  * always be exactly that set of rels.
2079  */
2080  inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2081 
2082  if (!join_is_reversed)
2083  selec = eqjoinsel_semi(opfuncoid,
2084  &vardata1, &vardata2,
2085  nd1, nd2,
2086  isdefault1, isdefault2,
2087  &sslot1, &sslot2,
2088  stats1, stats2,
2089  have_mcvs1, have_mcvs2,
2090  inner_rel);
2091  else
2092  {
2093  Oid commop = get_commutator(operator);
2094  Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
2095 
2096  selec = eqjoinsel_semi(commopfuncoid,
2097  &vardata2, &vardata1,
2098  nd2, nd1,
2099  isdefault2, isdefault1,
2100  &sslot2, &sslot1,
2101  stats2, stats1,
2102  have_mcvs2, have_mcvs1,
2103  inner_rel);
2104  }
2105 
2106  /*
2107  * We should never estimate the output of a semijoin to be more
2108  * rows than we estimate for an inner join with the same input
2109  * rels and join condition; it's obviously impossible for that to
2110  * happen. The former estimate is N1 * Ssemi while the latter is
2111  * N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner. Doing
2112  * this is worthwhile because of the shakier estimation rules we
2113  * use in eqjoinsel_semi, particularly in cases where it has to
2114  * punt entirely.
2115  */
2116  selec = Min(selec, inner_rel->rows * selec_inner);
2117  break;
2118  default:
2119  /* other values not expected here */
2120  elog(ERROR, "unrecognized join type: %d",
2121  (int) sjinfo->jointype);
2122  selec = 0; /* keep compiler quiet */
2123  break;
2124  }
2125 
2126  free_attstatsslot(&sslot1);
2127  free_attstatsslot(&sslot2);
2128 
2129  ReleaseVariableStats(vardata1);
2130  ReleaseVariableStats(vardata2);
2131 
2132  CLAMP_PROBABILITY(selec);
2133 
2134  PG_RETURN_FLOAT8((float8) selec);
2135 }
2136 
2137 /*
2138  * eqjoinsel_inner --- eqjoinsel for normal inner join
2139  *
2140  * We also use this for LEFT/FULL outer joins; it's not presently clear
2141  * that it's worth trying to distinguish them here.
2142  */
2143 static double
2145  VariableStatData *vardata1, VariableStatData *vardata2,
2146  double nd1, double nd2,
2147  bool isdefault1, bool isdefault2,
2148  AttStatsSlot *sslot1, AttStatsSlot *sslot2,
2149  Form_pg_statistic stats1, Form_pg_statistic stats2,
2150  bool have_mcvs1, bool have_mcvs2)
2151 {
2152  double selec;
2153 
2154  if (have_mcvs1 && have_mcvs2)
2155  {
2156  /*
2157  * We have most-common-value lists for both relations. Run through
2158  * the lists to see which MCVs actually join to each other with the
2159  * given operator. This allows us to determine the exact join
2160  * selectivity for the portion of the relations represented by the MCV
2161  * lists. We still have to estimate for the remaining population, but
2162  * in a skewed distribution this gives us a big leg up in accuracy.
2163  * For motivation see the analysis in Y. Ioannidis and S.
2164  * Christodoulakis, "On the propagation of errors in the size of join
2165  * results", Technical Report 1018, Computer Science Dept., University
2166  * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2167  */
2168  FmgrInfo eqproc;
2169  bool *hasmatch1;
2170  bool *hasmatch2;
2171  double nullfrac1 = stats1->stanullfrac;
2172  double nullfrac2 = stats2->stanullfrac;
2173  double matchprodfreq,
2174  matchfreq1,
2175  matchfreq2,
2176  unmatchfreq1,
2177  unmatchfreq2,
2178  otherfreq1,
2179  otherfreq2,
2180  totalsel1,
2181  totalsel2;
2182  int i,
2183  nmatches;
2184 
2185  fmgr_info(opfuncoid, &eqproc);
2186  hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2187  hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
2188 
2189  /*
2190  * Note we assume that each MCV will match at most one member of the
2191  * other MCV list. If the operator isn't really equality, there could
2192  * be multiple matches --- but we don't look for them, both for speed
2193  * and because the math wouldn't add up...
2194  */
2195  matchprodfreq = 0.0;
2196  nmatches = 0;
2197  for (i = 0; i < sslot1->nvalues; i++)
2198  {
2199  int j;
2200 
2201  for (j = 0; j < sslot2->nvalues; j++)
2202  {
2203  if (hasmatch2[j])
2204  continue;
2205  if (DatumGetBool(FunctionCall2Coll(&eqproc,
2206  sslot1->stacoll,
2207  sslot1->values[i],
2208  sslot2->values[j])))
2209  {
2210  hasmatch1[i] = hasmatch2[j] = true;
2211  matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
2212  nmatches++;
2213  break;
2214  }
2215  }
2216  }
2217  CLAMP_PROBABILITY(matchprodfreq);
2218  /* Sum up frequencies of matched and unmatched MCVs */
2219  matchfreq1 = unmatchfreq1 = 0.0;
2220  for (i = 0; i < sslot1->nvalues; i++)
2221  {
2222  if (hasmatch1[i])
2223  matchfreq1 += sslot1->numbers[i];
2224  else
2225  unmatchfreq1 += sslot1->numbers[i];
2226  }
2227  CLAMP_PROBABILITY(matchfreq1);
2228  CLAMP_PROBABILITY(unmatchfreq1);
2229  matchfreq2 = unmatchfreq2 = 0.0;
2230  for (i = 0; i < sslot2->nvalues; i++)
2231  {
2232  if (hasmatch2[i])
2233  matchfreq2 += sslot2->numbers[i];
2234  else
2235  unmatchfreq2 += sslot2->numbers[i];
2236  }
2237  CLAMP_PROBABILITY(matchfreq2);
2238  CLAMP_PROBABILITY(unmatchfreq2);
2239  pfree(hasmatch1);
2240  pfree(hasmatch2);
2241 
2242  /*
2243  * Compute total frequency of non-null values that are not in the MCV
2244  * lists.
2245  */
2246  otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2247  otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2248  CLAMP_PROBABILITY(otherfreq1);
2249  CLAMP_PROBABILITY(otherfreq2);
2250 
2251  /*
2252  * We can estimate the total selectivity from the point of view of
2253  * relation 1 as: the known selectivity for matched MCVs, plus
2254  * unmatched MCVs that are assumed to match against random members of
2255  * relation 2's non-MCV population, plus non-MCV values that are
2256  * assumed to match against random members of relation 2's unmatched
2257  * MCVs plus non-MCV values.
2258  */
2259  totalsel1 = matchprodfreq;
2260  if (nd2 > sslot2->nvalues)
2261  totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues);
2262  if (nd2 > nmatches)
2263  totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2264  (nd2 - nmatches);
2265  /* Same estimate from the point of view of relation 2. */
2266  totalsel2 = matchprodfreq;
2267  if (nd1 > sslot1->nvalues)
2268  totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues);
2269  if (nd1 > nmatches)
2270  totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2271  (nd1 - nmatches);
2272 
2273  /*
2274  * Use the smaller of the two estimates. This can be justified in
2275  * essentially the same terms as given below for the no-stats case: to
2276  * a first approximation, we are estimating from the point of view of
2277  * the relation with smaller nd.
2278  */
2279  selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2280  }
2281  else
2282  {
2283  /*
2284  * We do not have MCV lists for both sides. Estimate the join
2285  * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2286  * is plausible if we assume that the join operator is strict and the
2287  * non-null values are about equally distributed: a given non-null
2288  * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2289  * of rel2, so total join rows are at most
2290  * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2291  * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2292  * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2293  * with MIN() is an upper bound. Using the MIN() means we estimate
2294  * from the point of view of the relation with smaller nd (since the
2295  * larger nd is determining the MIN). It is reasonable to assume that
2296  * most tuples in this rel will have join partners, so the bound is
2297  * probably reasonably tight and should be taken as-is.
2298  *
2299  * XXX Can we be smarter if we have an MCV list for just one side? It
2300  * seems that if we assume equal distribution for the other side, we
2301  * end up with the same answer anyway.
2302  */
2303  double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2304  double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2305 
2306  selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2307  if (nd1 > nd2)
2308  selec /= nd1;
2309  else
2310  selec /= nd2;
2311  }
2312 
2313  return selec;
2314 }
2315 
2316 /*
2317  * eqjoinsel_semi --- eqjoinsel for semi join
2318  *
2319  * (Also used for anti join, which we are supposed to estimate the same way.)
2320  * Caller has ensured that vardata1 is the LHS variable.
2321  * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
2322  */
2323 static double
2325  VariableStatData *vardata1, VariableStatData *vardata2,
2326  double nd1, double nd2,
2327  bool isdefault1, bool isdefault2,
2328  AttStatsSlot *sslot1, AttStatsSlot *sslot2,
2329  Form_pg_statistic stats1, Form_pg_statistic stats2,
2330  bool have_mcvs1, bool have_mcvs2,
2331  RelOptInfo *inner_rel)
2332 {
2333  double selec;
2334 
2335  /*
2336  * We clamp nd2 to be not more than what we estimate the inner relation's
2337  * size to be. This is intuitively somewhat reasonable since obviously
2338  * there can't be more than that many distinct values coming from the
2339  * inner rel. The reason for the asymmetry (ie, that we don't clamp nd1
2340  * likewise) is that this is the only pathway by which restriction clauses
2341  * applied to the inner rel will affect the join result size estimate,
2342  * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2343  * only the outer rel's size. If we clamped nd1 we'd be double-counting
2344  * the selectivity of outer-rel restrictions.
2345  *
2346  * We can apply this clamping both with respect to the base relation from
2347  * which the join variable comes (if there is just one), and to the
2348  * immediate inner input relation of the current join.
2349  *
2350  * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2351  * great, maybe, but it didn't come out of nowhere either. This is most
2352  * helpful when the inner relation is empty and consequently has no stats.
2353  */
2354  if (vardata2->rel)
2355  {
2356  if (nd2 >= vardata2->rel->rows)
2357  {
2358  nd2 = vardata2->rel->rows;
2359  isdefault2 = false;
2360  }
2361  }
2362  if (nd2 >= inner_rel->rows)
2363  {
2364  nd2 = inner_rel->rows;
2365  isdefault2 = false;
2366  }
2367 
2368  if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid))
2369  {
2370  /*
2371  * We have most-common-value lists for both relations. Run through
2372  * the lists to see which MCVs actually join to each other with the
2373  * given operator. This allows us to determine the exact join
2374  * selectivity for the portion of the relations represented by the MCV
2375  * lists. We still have to estimate for the remaining population, but
2376  * in a skewed distribution this gives us a big leg up in accuracy.
2377  */
2378  FmgrInfo eqproc;
2379  bool *hasmatch1;
2380  bool *hasmatch2;
2381  double nullfrac1 = stats1->stanullfrac;
2382  double matchfreq1,
2383  uncertainfrac,
2384  uncertain;
2385  int i,
2386  nmatches,
2387  clamped_nvalues2;
2388 
2389  /*
2390  * The clamping above could have resulted in nd2 being less than
2391  * sslot2->nvalues; in which case, we assume that precisely the nd2
2392  * most common values in the relation will appear in the join input,
2393  * and so compare to only the first nd2 members of the MCV list. Of
2394  * course this is frequently wrong, but it's the best bet we can make.
2395  */
2396  clamped_nvalues2 = Min(sslot2->nvalues, nd2);
2397 
2398  fmgr_info(opfuncoid, &eqproc);
2399  hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2400  hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2401 
2402  /*
2403  * Note we assume that each MCV will match at most one member of the
2404  * other MCV list. If the operator isn't really equality, there could
2405  * be multiple matches --- but we don't look for them, both for speed
2406  * and because the math wouldn't add up...
2407  */
2408  nmatches = 0;
2409  for (i = 0; i < sslot1->nvalues; i++)
2410  {
2411  int j;
2412 
2413  for (j = 0; j < clamped_nvalues2; j++)
2414  {
2415  if (hasmatch2[j])
2416  continue;
2417  if (DatumGetBool(FunctionCall2Coll(&eqproc,
2418  sslot1->stacoll,
2419  sslot1->values[i],
2420  sslot2->values[j])))
2421  {
2422  hasmatch1[i] = hasmatch2[j] = true;
2423  nmatches++;
2424  break;
2425  }
2426  }
2427  }
2428  /* Sum up frequencies of matched MCVs */
2429  matchfreq1 = 0.0;
2430  for (i = 0; i < sslot1->nvalues; i++)
2431  {
2432  if (hasmatch1[i])
2433  matchfreq1 += sslot1->numbers[i];
2434  }
2435  CLAMP_PROBABILITY(matchfreq1);
2436  pfree(hasmatch1);
2437  pfree(hasmatch2);
2438 
2439  /*
2440  * Now we need to estimate the fraction of relation 1 that has at
2441  * least one join partner. We know for certain that the matched MCVs
2442  * do, so that gives us a lower bound, but we're really in the dark
2443  * about everything else. Our crude approach is: if nd1 <= nd2 then
2444  * assume all non-null rel1 rows have join partners, else assume for
2445  * the uncertain rows that a fraction nd2/nd1 have join partners. We
2446  * can discount the known-matched MCVs from the distinct-values counts
2447  * before doing the division.
2448  *
2449  * Crude as the above is, it's completely useless if we don't have
2450  * reliable ndistinct values for both sides. Hence, if either nd1 or
2451  * nd2 is default, punt and assume half of the uncertain rows have
2452  * join partners.
2453  */
2454  if (!isdefault1 && !isdefault2)
2455  {
2456  nd1 -= nmatches;
2457  nd2 -= nmatches;
2458  if (nd1 <= nd2 || nd2 < 0)
2459  uncertainfrac = 1.0;
2460  else
2461  uncertainfrac = nd2 / nd1;
2462  }
2463  else
2464  uncertainfrac = 0.5;
2465  uncertain = 1.0 - matchfreq1 - nullfrac1;
2466  CLAMP_PROBABILITY(uncertain);
2467  selec = matchfreq1 + uncertainfrac * uncertain;
2468  }
2469  else
2470  {
2471  /*
2472  * Without MCV lists for both sides, we can only use the heuristic
2473  * about nd1 vs nd2.
2474  */
2475  double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2476 
2477  if (!isdefault1 && !isdefault2)
2478  {
2479  if (nd1 <= nd2 || nd2 < 0)
2480  selec = 1.0 - nullfrac1;
2481  else
2482  selec = (nd2 / nd1) * (1.0 - nullfrac1);
2483  }
2484  else
2485  selec = 0.5 * (1.0 - nullfrac1);
2486  }
2487 
2488  return selec;
2489 }
2490 
2491 /*
2492  * neqjoinsel - Join selectivity of "!="
2493  */
2494 Datum
2496 {
2497  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2498  Oid operator = PG_GETARG_OID(1);
2499  List *args = (List *) PG_GETARG_POINTER(2);
2500  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
2502  float8 result;
2503 
2504  if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
2505  {
2506  /*
2507  * For semi-joins, if there is more than one distinct value in the RHS
2508  * relation then every non-null LHS row must find a row to join since
2509  * it can only be equal to one of them. We'll assume that there is
2510  * always more than one distinct RHS value for the sake of stability,
2511  * though in theory we could have special cases for empty RHS
2512  * (selectivity = 0) and single-distinct-value RHS (selectivity =
2513  * fraction of LHS that has the same value as the single RHS value).
2514  *
2515  * For anti-joins, if we use the same assumption that there is more
2516  * than one distinct key in the RHS relation, then every non-null LHS
2517  * row must be suppressed by the anti-join.
2518  *
2519  * So either way, the selectivity estimate should be 1 - nullfrac.
2520  */
2521  VariableStatData leftvar;
2522  VariableStatData rightvar;
2523  bool reversed;
2524  HeapTuple statsTuple;
2525  double nullfrac;
2526 
2527  get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed);
2528  statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple;
2529  if (HeapTupleIsValid(statsTuple))
2530  nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac;
2531  else
2532  nullfrac = 0.0;
2533  ReleaseVariableStats(leftvar);
2534  ReleaseVariableStats(rightvar);
2535 
2536  result = 1.0 - nullfrac;
2537  }
2538  else
2539  {
2540  /*
2541  * We want 1 - eqjoinsel() where the equality operator is the one
2542  * associated with this != operator, that is, its negator.
2543  */
2544  Oid eqop = get_negator(operator);
2545 
2546  if (eqop)
2547  {
2549  PointerGetDatum(root),
2550  ObjectIdGetDatum(eqop),
2551  PointerGetDatum(args),
2552  Int16GetDatum(jointype),
2553  PointerGetDatum(sjinfo)));
2554  }
2555  else
2556  {
2557  /* Use default selectivity (should we raise an error instead?) */
2558  result = DEFAULT_EQ_SEL;
2559  }
2560  result = 1.0 - result;
2561  }
2562 
2563  PG_RETURN_FLOAT8(result);
2564 }
2565 
2566 /*
2567  * scalarltjoinsel - Join selectivity of "<" for scalars
2568  */
2569 Datum
2571 {
2573 }
2574 
2575 /*
2576  * scalarlejoinsel - Join selectivity of "<=" for scalars
2577  */
2578 Datum
2580 {
2582 }
2583 
2584 /*
2585  * scalargtjoinsel - Join selectivity of ">" for scalars
2586  */
2587 Datum
2589 {
2591 }
2592 
2593 /*
2594  * scalargejoinsel - Join selectivity of ">=" for scalars
2595  */
2596 Datum
2598 {
2600 }
2601 
2602 
2603 /*
2604  * mergejoinscansel - Scan selectivity of merge join.
2605  *
2606  * A merge join will stop as soon as it exhausts either input stream.
2607  * Therefore, if we can estimate the ranges of both input variables,
2608  * we can estimate how much of the input will actually be read. This
2609  * can have a considerable impact on the cost when using indexscans.
2610  *
2611  * Also, we can estimate how much of each input has to be read before the
2612  * first join pair is found, which will affect the join's startup time.
2613  *
2614  * clause should be a clause already known to be mergejoinable. opfamily,
2615  * strategy, and nulls_first specify the sort ordering being used.
2616  *
2617  * The outputs are:
2618  * *leftstart is set to the fraction of the left-hand variable expected
2619  * to be scanned before the first join pair is found (0 to 1).
2620  * *leftend is set to the fraction of the left-hand variable expected
2621  * to be scanned before the join terminates (0 to 1).
2622  * *rightstart, *rightend similarly for the right-hand variable.
2623  */
2624 void
2626  Oid opfamily, int strategy, bool nulls_first,
2627  Selectivity *leftstart, Selectivity *leftend,
2628  Selectivity *rightstart, Selectivity *rightend)
2629 {
2630  Node *left,
2631  *right;
2632  VariableStatData leftvar,
2633  rightvar;
2634  int op_strategy;
2635  Oid op_lefttype;
2636  Oid op_righttype;
2637  Oid opno,
2638  lsortop,
2639  rsortop,
2640  lstatop,
2641  rstatop,
2642  ltop,
2643  leop,
2644  revltop,
2645  revleop;
2646  bool isgt;
2647  Datum leftmin,
2648  leftmax,
2649  rightmin,
2650  rightmax;
2651  double selec;
2652 
2653  /* Set default results if we can't figure anything out. */
2654  /* XXX should default "start" fraction be a bit more than 0? */
2655  *leftstart = *rightstart = 0.0;
2656  *leftend = *rightend = 1.0;
2657 
2658  /* Deconstruct the merge clause */
2659  if (!is_opclause(clause))
2660  return; /* shouldn't happen */
2661  opno = ((OpExpr *) clause)->opno;
2662  left = get_leftop((Expr *) clause);
2663  right = get_rightop((Expr *) clause);
2664  if (!right)
2665  return; /* shouldn't happen */
2666 
2667  /* Look for stats for the inputs */
2668  examine_variable(root, left, 0, &leftvar);
2669  examine_variable(root, right, 0, &rightvar);
2670 
2671  /* Extract the operator's declared left/right datatypes */
2672  get_op_opfamily_properties(opno, opfamily, false,
2673  &op_strategy,
2674  &op_lefttype,
2675  &op_righttype);
2676  Assert(op_strategy == BTEqualStrategyNumber);
2677 
2678  /*
2679  * Look up the various operators we need. If we don't find them all, it
2680  * probably means the opfamily is broken, but we just fail silently.
2681  *
2682  * Note: we expect that pg_statistic histograms will be sorted by the '<'
2683  * operator, regardless of which sort direction we are considering.
2684  */
2685  switch (strategy)
2686  {
2687  case BTLessStrategyNumber:
2688  isgt = false;
2689  if (op_lefttype == op_righttype)
2690  {
2691  /* easy case */
2692  ltop = get_opfamily_member(opfamily,
2693  op_lefttype, op_righttype,
2695  leop = get_opfamily_member(opfamily,
2696  op_lefttype, op_righttype,
2698  lsortop = ltop;
2699  rsortop = ltop;
2700  lstatop = lsortop;
2701  rstatop = rsortop;
2702  revltop = ltop;
2703  revleop = leop;
2704  }
2705  else
2706  {
2707  ltop = get_opfamily_member(opfamily,
2708  op_lefttype, op_righttype,
2710  leop = get_opfamily_member(opfamily,
2711  op_lefttype, op_righttype,
2713  lsortop = get_opfamily_member(opfamily,
2714  op_lefttype, op_lefttype,
2716  rsortop = get_opfamily_member(opfamily,
2717  op_righttype, op_righttype,
2719  lstatop = lsortop;
2720  rstatop = rsortop;
2721  revltop = get_opfamily_member(opfamily,
2722  op_righttype, op_lefttype,
2724  revleop = get_opfamily_member(opfamily,
2725  op_righttype, op_lefttype,
2727  }
2728  break;
2730  /* descending-order case */
2731  isgt = true;
2732  if (op_lefttype == op_righttype)
2733  {
2734  /* easy case */
2735  ltop = get_opfamily_member(opfamily,
2736  op_lefttype, op_righttype,
2738  leop = get_opfamily_member(opfamily,
2739  op_lefttype, op_righttype,
2741  lsortop = ltop;
2742  rsortop = ltop;
2743  lstatop = get_opfamily_member(opfamily,
2744  op_lefttype, op_lefttype,
2746  rstatop = lstatop;
2747  revltop = ltop;
2748  revleop = leop;
2749  }
2750  else
2751  {
2752  ltop = get_opfamily_member(opfamily,
2753  op_lefttype, op_righttype,
2755  leop = get_opfamily_member(opfamily,
2756  op_lefttype, op_righttype,
2758  lsortop = get_opfamily_member(opfamily,
2759  op_lefttype, op_lefttype,
2761  rsortop = get_opfamily_member(opfamily,
2762  op_righttype, op_righttype,
2764  lstatop = get_opfamily_member(opfamily,
2765  op_lefttype, op_lefttype,
2767  rstatop = get_opfamily_member(opfamily,
2768  op_righttype, op_righttype,
2770  revltop = get_opfamily_member(opfamily,
2771  op_righttype, op_lefttype,
2773  revleop = get_opfamily_member(opfamily,
2774  op_righttype, op_lefttype,
2776  }
2777  break;
2778  default:
2779  goto fail; /* shouldn't get here */
2780  }
2781 
2782  if (!OidIsValid(lsortop) ||
2783  !OidIsValid(rsortop) ||
2784  !OidIsValid(lstatop) ||
2785  !OidIsValid(rstatop) ||
2786  !OidIsValid(ltop) ||
2787  !OidIsValid(leop) ||
2788  !OidIsValid(revltop) ||
2789  !OidIsValid(revleop))
2790  goto fail; /* insufficient info in catalogs */
2791 
2792  /* Try to get ranges of both inputs */
2793  if (!isgt)
2794  {
2795  if (!get_variable_range(root, &leftvar, lstatop,
2796  &leftmin, &leftmax))
2797  goto fail; /* no range available from stats */
2798  if (!get_variable_range(root, &rightvar, rstatop,
2799  &rightmin, &rightmax))
2800  goto fail; /* no range available from stats */
2801  }
2802  else
2803  {
2804  /* need to swap the max and min */
2805  if (!get_variable_range(root, &leftvar, lstatop,
2806  &leftmax, &leftmin))
2807  goto fail; /* no range available from stats */
2808  if (!get_variable_range(root, &rightvar, rstatop,
2809  &rightmax, &rightmin))
2810  goto fail; /* no range available from stats */
2811  }
2812 
2813  /*
2814  * Now, the fraction of the left variable that will be scanned is the
2815  * fraction that's <= the right-side maximum value. But only believe
2816  * non-default estimates, else stick with our 1.0.
2817  */
2818  selec = scalarineqsel(root, leop, isgt, true, &leftvar,
2819  rightmax, op_righttype);
2820  if (selec != DEFAULT_INEQ_SEL)
2821  *leftend = selec;
2822 
2823  /* And similarly for the right variable. */
2824  selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
2825  leftmax, op_lefttype);
2826  if (selec != DEFAULT_INEQ_SEL)
2827  *rightend = selec;
2828 
2829  /*
2830  * Only one of the two "end" fractions can really be less than 1.0;
2831  * believe the smaller estimate and reset the other one to exactly 1.0. If
2832  * we get exactly equal estimates (as can easily happen with self-joins),
2833  * believe neither.
2834  */
2835  if (*leftend > *rightend)
2836  *leftend = 1.0;
2837  else if (*leftend < *rightend)
2838  *rightend = 1.0;
2839  else
2840  *leftend = *rightend = 1.0;
2841 
2842  /*
2843  * Also, the fraction of the left variable that will be scanned before the
2844  * first join pair is found is the fraction that's < the right-side
2845  * minimum value. But only believe non-default estimates, else stick with
2846  * our own default.
2847  */
2848  selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
2849  rightmin, op_righttype);
2850  if (selec != DEFAULT_INEQ_SEL)
2851  *leftstart = selec;
2852 
2853  /* And similarly for the right variable. */
2854  selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
2855  leftmin, op_lefttype);
2856  if (selec != DEFAULT_INEQ_SEL)
2857  *rightstart = selec;
2858 
2859  /*
2860  * Only one of the two "start" fractions can really be more than zero;
2861  * believe the larger estimate and reset the other one to exactly 0.0. If
2862  * we get exactly equal estimates (as can easily happen with self-joins),
2863  * believe neither.
2864  */
2865  if (*leftstart < *rightstart)
2866  *leftstart = 0.0;
2867  else if (*leftstart > *rightstart)
2868  *rightstart = 0.0;
2869  else
2870  *leftstart = *rightstart = 0.0;
2871 
2872  /*
2873  * If the sort order is nulls-first, we're going to have to skip over any
2874  * nulls too. These would not have been counted by scalarineqsel, and we
2875  * can safely add in this fraction regardless of whether we believe
2876  * scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
2877  */
2878  if (nulls_first)
2879  {
2880  Form_pg_statistic stats;
2881 
2882  if (HeapTupleIsValid(leftvar.statsTuple))
2883  {
2884  stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
2885  *leftstart += stats->stanullfrac;
2886  CLAMP_PROBABILITY(*leftstart);
2887  *leftend += stats->stanullfrac;
2888  CLAMP_PROBABILITY(*leftend);
2889  }
2890  if (HeapTupleIsValid(rightvar.statsTuple))
2891  {
2892  stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
2893  *rightstart += stats->stanullfrac;
2894  CLAMP_PROBABILITY(*rightstart);
2895  *rightend += stats->stanullfrac;
2896  CLAMP_PROBABILITY(*rightend);
2897  }
2898  }
2899 
2900  /* Disbelieve start >= end, just in case that can happen */
2901  if (*leftstart >= *leftend)
2902  {
2903  *leftstart = 0.0;
2904  *leftend = 1.0;
2905  }
2906  if (*rightstart >= *rightend)
2907  {
2908  *rightstart = 0.0;
2909  *rightend = 1.0;
2910  }
2911 
2912 fail:
2913  ReleaseVariableStats(leftvar);
2914  ReleaseVariableStats(rightvar);
2915 }
2916 
2917 
2918 /*
2919  * Helper routine for estimate_num_groups: add an item to a list of
2920  * GroupVarInfos, but only if it's not known equal to any of the existing
2921  * entries.
2922  */
2923 typedef struct
2924 {
2925  Node *var; /* might be an expression, not just a Var */
2926  RelOptInfo *rel; /* relation it belongs to */
2927  double ndistinct; /* # distinct values */
2928 } GroupVarInfo;
2929 
2930 static List *
2932  Node *var, VariableStatData *vardata)
2933 {
2934  GroupVarInfo *varinfo;
2935  double ndistinct;
2936  bool isdefault;
2937  ListCell *lc;
2938 
2939  ndistinct = get_variable_numdistinct(vardata, &isdefault);
2940 
2941  foreach(lc, varinfos)
2942  {
2943  varinfo = (GroupVarInfo *) lfirst(lc);
2944 
2945  /* Drop exact duplicates */
2946  if (equal(var, varinfo->var))
2947  return varinfos;
2948 
2949  /*
2950  * Drop known-equal vars, but only if they belong to different
2951  * relations (see comments for estimate_num_groups)
2952  */
2953  if (vardata->rel != varinfo->rel &&
2954  exprs_known_equal(root, var, varinfo->var))
2955  {
2956  if (varinfo->ndistinct <= ndistinct)
2957  {
2958  /* Keep older item, forget new one */
2959  return varinfos;
2960  }
2961  else
2962  {
2963  /* Delete the older item */
2964  varinfos = foreach_delete_current(varinfos, lc);
2965  }
2966  }
2967  }
2968 
2969  varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
2970 
2971  varinfo->var = var;
2972  varinfo->rel = vardata->rel;
2973  varinfo->ndistinct = ndistinct;
2974  varinfos = lappend(varinfos, varinfo);
2975  return varinfos;
2976 }
2977 
2978 /*
2979  * estimate_num_groups - Estimate number of groups in a grouped query
2980  *
2981  * Given a query having a GROUP BY clause, estimate how many groups there
2982  * will be --- ie, the number of distinct combinations of the GROUP BY
2983  * expressions.
2984  *
2985  * This routine is also used to estimate the number of rows emitted by
2986  * a DISTINCT filtering step; that is an isomorphic problem. (Note:
2987  * actually, we only use it for DISTINCT when there's no grouping or
2988  * aggregation ahead of the DISTINCT.)
2989  *
2990  * Inputs:
2991  * root - the query
2992  * groupExprs - list of expressions being grouped by
2993  * input_rows - number of rows estimated to arrive at the group/unique
2994  * filter step
2995  * pgset - NULL, or a List** pointing to a grouping set to filter the
2996  * groupExprs against
2997  *
2998  * Given the lack of any cross-correlation statistics in the system, it's
2999  * impossible to do anything really trustworthy with GROUP BY conditions
3000  * involving multiple Vars. We should however avoid assuming the worst
3001  * case (all possible cross-product terms actually appear as groups) since
3002  * very often the grouped-by Vars are highly correlated. Our current approach
3003  * is as follows:
3004  * 1. Expressions yielding boolean are assumed to contribute two groups,
3005  * independently of their content, and are ignored in the subsequent
3006  * steps. This is mainly because tests like "col IS NULL" break the
3007  * heuristic used in step 2 especially badly.
3008  * 2. Reduce the given expressions to a list of unique Vars used. For
3009  * example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
3010  * It is clearly correct not to count the same Var more than once.
3011  * It is also reasonable to treat f(x) the same as x: f() cannot
3012  * increase the number of distinct values (unless it is volatile,
3013  * which we consider unlikely for grouping), but it probably won't
3014  * reduce the number of distinct values much either.
3015  * As a special case, if a GROUP BY expression can be matched to an
3016  * expressional index for which we have statistics, then we treat the
3017  * whole expression as though it were just a Var.
3018  * 3. If the list contains Vars of different relations that are known equal
3019  * due to equivalence classes, then drop all but one of the Vars from each
3020  * known-equal set, keeping the one with smallest estimated # of values
3021  * (since the extra values of the others can't appear in joined rows).
3022  * Note the reason we only consider Vars of different relations is that
3023  * if we considered ones of the same rel, we'd be double-counting the
3024  * restriction selectivity of the equality in the next step.
3025  * 4. For Vars within a single source rel, we multiply together the numbers
3026  * of values, clamp to the number of rows in the rel (divided by 10 if
3027  * more than one Var), and then multiply by a factor based on the
3028  * selectivity of the restriction clauses for that rel. When there's
3029  * more than one Var, the initial product is probably too high (it's the
3030  * worst case) but clamping to a fraction of the rel's rows seems to be a
3031  * helpful heuristic for not letting the estimate get out of hand. (The
3032  * factor of 10 is derived from pre-Postgres-7.4 practice.) The factor
3033  * we multiply by to adjust for the restriction selectivity assumes that
3034  * the restriction clauses are independent of the grouping, which may not
3035  * be a valid assumption, but it's hard to do better.
3036  * 5. If there are Vars from multiple rels, we repeat step 4 for each such
3037  * rel, and multiply the results together.
3038  * Note that rels not containing grouped Vars are ignored completely, as are
3039  * join clauses. Such rels cannot increase the number of groups, and we
3040  * assume such clauses do not reduce the number either (somewhat bogus,
3041  * but we don't have the info to do better).
3042  */
3043 double
3044 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
3045  List **pgset)
3046 {
3047  List *varinfos = NIL;
3048  double srf_multiplier = 1.0;
3049  double numdistinct;
3050  ListCell *l;
3051  int i;
3052 
3053  /*
3054  * We don't ever want to return an estimate of zero groups, as that tends
3055  * to lead to division-by-zero and other unpleasantness. The input_rows
3056  * estimate is usually already at least 1, but clamp it just in case it
3057  * isn't.
3058  */
3059  input_rows = clamp_row_est(input_rows);
3060 
3061  /*
3062  * If no grouping columns, there's exactly one group. (This can't happen
3063  * for normal cases with GROUP BY or DISTINCT, but it is possible for
3064  * corner cases with set operations.)
3065  */
3066  if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3067  return 1.0;
3068 
3069  /*
3070  * Count groups derived from boolean grouping expressions. For other
3071  * expressions, find the unique Vars used, treating an expression as a Var
3072  * if we can find stats for it. For each one, record the statistical
3073  * estimate of number of distinct values (total in its table, without
3074  * regard for filtering).
3075  */
3076  numdistinct = 1.0;
3077 
3078  i = 0;
3079  foreach(l, groupExprs)
3080  {
3081  Node *groupexpr = (Node *) lfirst(l);
3082  double this_srf_multiplier;
3083  VariableStatData vardata;
3084  List *varshere;
3085  ListCell *l2;
3086 
3087  /* is expression in this grouping set? */
3088  if (pgset && !list_member_int(*pgset, i++))
3089  continue;
3090 
3091  /*
3092  * Set-returning functions in grouping columns are a bit problematic.
3093  * The code below will effectively ignore their SRF nature and come up
3094  * with a numdistinct estimate as though they were scalar functions.
3095  * We compensate by scaling up the end result by the largest SRF
3096  * rowcount estimate. (This will be an overestimate if the SRF
3097  * produces multiple copies of any output value, but it seems best to
3098  * assume the SRF's outputs are distinct. In any case, it's probably
3099  * pointless to worry too much about this without much better
3100  * estimates for SRF output rowcounts than we have today.)
3101  */
3102  this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3103  if (srf_multiplier < this_srf_multiplier)
3104  srf_multiplier = this_srf_multiplier;
3105 
3106  /* Short-circuit for expressions returning boolean */
3107  if (exprType(groupexpr) == BOOLOID)
3108  {
3109  numdistinct *= 2.0;
3110  continue;
3111  }
3112 
3113  /*
3114  * If examine_variable is able to deduce anything about the GROUP BY
3115  * expression, treat it as a single variable even if it's really more
3116  * complicated.
3117  */
3118  examine_variable(root, groupexpr, 0, &vardata);
3119  if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3120  {
3121  varinfos = add_unique_group_var(root, varinfos,
3122  groupexpr, &vardata);
3123  ReleaseVariableStats(vardata);
3124  continue;
3125  }
3126  ReleaseVariableStats(vardata);
3127 
3128  /*
3129  * Else pull out the component Vars. Handle PlaceHolderVars by
3130  * recursing into their arguments (effectively assuming that the
3131  * PlaceHolderVar doesn't change the number of groups, which boils
3132  * down to ignoring the possible addition of nulls to the result set).
3133  */
3134  varshere = pull_var_clause(groupexpr,
3138 
3139  /*
3140  * If we find any variable-free GROUP BY item, then either it is a
3141  * constant (and we can ignore it) or it contains a volatile function;
3142  * in the latter case we punt and assume that each input row will
3143  * yield a distinct group.
3144  */
3145  if (varshere == NIL)
3146  {
3147  if (contain_volatile_functions(groupexpr))
3148  return input_rows;
3149  continue;
3150  }
3151 
3152  /*
3153  * Else add variables to varinfos list
3154  */
3155  foreach(l2, varshere)
3156  {
3157  Node *var = (Node *) lfirst(l2);
3158 
3159  examine_variable(root, var, 0, &vardata);
3160  varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3161  ReleaseVariableStats(vardata);
3162  }
3163  }
3164 
3165  /*
3166  * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3167  * list.
3168  */
3169  if (varinfos == NIL)
3170  {
3171  /* Apply SRF multiplier as we would do in the long path */
3172  numdistinct *= srf_multiplier;
3173  /* Round off */
3174  numdistinct = ceil(numdistinct);
3175  /* Guard against out-of-range answers */
3176  if (numdistinct > input_rows)
3177  numdistinct = input_rows;
3178  if (numdistinct < 1.0)
3179  numdistinct = 1.0;
3180  return numdistinct;
3181  }
3182 
3183  /*
3184  * Group Vars by relation and estimate total numdistinct.
3185  *
3186  * For each iteration of the outer loop, we process the frontmost Var in
3187  * varinfos, plus all other Vars in the same relation. We remove these
3188  * Vars from the newvarinfos list for the next iteration. This is the
3189  * easiest way to group Vars of same rel together.
3190  */
3191  do
3192  {
3193  GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3194  RelOptInfo *rel = varinfo1->rel;
3195  double reldistinct = 1;
3196  double relmaxndistinct = reldistinct;
3197  int relvarcount = 0;
3198  List *newvarinfos = NIL;
3199  List *relvarinfos = NIL;
3200 
3201  /*
3202  * Split the list of varinfos in two - one for the current rel, one
3203  * for remaining Vars on other rels.
3204  */
3205  relvarinfos = lappend(relvarinfos, varinfo1);
3206  for_each_cell(l, varinfos, list_second_cell(varinfos))
3207  {
3208  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3209 
3210  if (varinfo2->rel == varinfo1->rel)
3211  {
3212  /* varinfos on current rel */
3213  relvarinfos = lappend(relvarinfos, varinfo2);
3214  }
3215  else
3216  {
3217  /* not time to process varinfo2 yet */
3218  newvarinfos = lappend(newvarinfos, varinfo2);
3219  }
3220  }
3221 
3222  /*
3223  * Get the numdistinct estimate for the Vars of this rel. We
3224  * iteratively search for multivariate n-distinct with maximum number
3225  * of vars; assuming that each var group is independent of the others,
3226  * we multiply them together. Any remaining relvarinfos after no more
3227  * multivariate matches are found are assumed independent too, so
3228  * their individual ndistinct estimates are multiplied also.
3229  *
3230  * While iterating, count how many separate numdistinct values we
3231  * apply. We apply a fudge factor below, but only if we multiplied
3232  * more than one such values.
3233  */
3234  while (relvarinfos)
3235  {
3236  double mvndistinct;
3237 
3238  if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3239  &mvndistinct))
3240  {
3241  reldistinct *= mvndistinct;
3242  if (relmaxndistinct < mvndistinct)
3243  relmaxndistinct = mvndistinct;
3244  relvarcount++;
3245  }
3246  else
3247  {
3248  foreach(l, relvarinfos)
3249  {
3250  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3251 
3252  reldistinct *= varinfo2->ndistinct;
3253  if (relmaxndistinct < varinfo2->ndistinct)
3254  relmaxndistinct = varinfo2->ndistinct;
3255  relvarcount++;
3256  }
3257 
3258  /* we're done with this relation */
3259  relvarinfos = NIL;
3260  }
3261  }
3262 
3263  /*
3264  * Sanity check --- don't divide by zero if empty relation.
3265  */
3266  Assert(IS_SIMPLE_REL(rel));
3267  if (rel->tuples > 0)
3268  {
3269  /*
3270  * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3271  * fudge factor is because the Vars are probably correlated but we
3272  * don't know by how much. We should never clamp to less than the
3273  * largest ndistinct value for any of the Vars, though, since
3274  * there will surely be at least that many groups.
3275  */
3276  double clamp = rel->tuples;
3277 
3278  if (relvarcount > 1)
3279  {
3280  clamp *= 0.1;
3281  if (clamp < relmaxndistinct)
3282  {
3283  clamp = relmaxndistinct;
3284  /* for sanity in case some ndistinct is too large: */
3285  if (clamp > rel->tuples)
3286  clamp = rel->tuples;
3287  }
3288  }
3289  if (reldistinct > clamp)
3290  reldistinct = clamp;
3291 
3292  /*
3293  * Update the estimate based on the restriction selectivity,
3294  * guarding against division by zero when reldistinct is zero.
3295  * Also skip this if we know that we are returning all rows.
3296  */
3297  if (reldistinct > 0 && rel->rows < rel->tuples)
3298  {
3299  /*
3300  * Given a table containing N rows with n distinct values in a
3301  * uniform distribution, if we select p rows at random then
3302  * the expected number of distinct values selected is
3303  *
3304  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3305  *
3306  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3307  *
3308  * See "Approximating block accesses in database
3309  * organizations", S. B. Yao, Communications of the ACM,
3310  * Volume 20 Issue 4, April 1977 Pages 260-261.
3311  *
3312  * Alternatively, re-arranging the terms from the factorials,
3313  * this may be written as
3314  *
3315  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3316  *
3317  * This form of the formula is more efficient to compute in
3318  * the common case where p is larger than N/n. Additionally,
3319  * as pointed out by Dell'Era, if i << N for all terms in the
3320  * product, it can be approximated by
3321  *
3322  * n * (1 - ((N-p)/N)^(N/n))
3323  *
3324  * See "Expected distinct values when selecting from a bag
3325  * without replacement", Alberto Dell'Era,
3326  * http://www.adellera.it/investigations/distinct_balls/.
3327  *
3328  * The condition i << N is equivalent to n >> 1, so this is a
3329  * good approximation when the number of distinct values in
3330  * the table is large. It turns out that this formula also
3331  * works well even when n is small.
3332  */
3333  reldistinct *=
3334  (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3335  rel->tuples / reldistinct));
3336  }
3337  reldistinct = clamp_row_est(reldistinct);
3338 
3339  /*
3340  * Update estimate of total distinct groups.
3341  */
3342  numdistinct *= reldistinct;
3343  }
3344 
3345  varinfos = newvarinfos;
3346  } while (varinfos != NIL);
3347 
3348  /* Now we can account for the effects of any SRFs */
3349  numdistinct *= srf_multiplier;
3350 
3351  /* Round off */
3352  numdistinct = ceil(numdistinct);
3353 
3354  /* Guard against out-of-range answers */
3355  if (numdistinct > input_rows)
3356  numdistinct = input_rows;
3357  if (numdistinct < 1.0)
3358  numdistinct = 1.0;
3359 
3360  return numdistinct;
3361 }
3362 
3363 /*
3364  * Estimate hash bucket statistics when the specified expression is used
3365  * as a hash key for the given number of buckets.
3366  *
3367  * This attempts to determine two values:
3368  *
3369  * 1. The frequency of the most common value of the expression (returns
3370  * zero into *mcv_freq if we can't get that).
3371  *
3372  * 2. The "bucketsize fraction", ie, average number of entries in a bucket
3373  * divided by total tuples in relation.
3374  *
3375  * XXX This is really pretty bogus since we're effectively assuming that the
3376  * distribution of hash keys will be the same after applying restriction
3377  * clauses as it was in the underlying relation. However, we are not nearly
3378  * smart enough to figure out how the restrict clauses might change the
3379  * distribution, so this will have to do for now.
3380  *
3381  * We are passed the number of buckets the executor will use for the given
3382  * input relation. If the data were perfectly distributed, with the same
3383  * number of tuples going into each available bucket, then the bucketsize
3384  * fraction would be 1/nbuckets. But this happy state of affairs will occur
3385  * only if (a) there are at least nbuckets distinct data values, and (b)
3386  * we have a not-too-skewed data distribution. Otherwise the buckets will
3387  * be nonuniformly occupied. If the other relation in the join has a key
3388  * distribution similar to this one's, then the most-loaded buckets are
3389  * exactly those that will be probed most often. Therefore, the "average"
3390  * bucket size for costing purposes should really be taken as something close
3391  * to the "worst case" bucket size. We try to estimate this by adjusting the
3392  * fraction if there are too few distinct data values, and then scaling up
3393  * by the ratio of the most common value's frequency to the average frequency.
3394  *
3395  * If no statistics are available, use a default estimate of 0.1. This will
3396  * discourage use of a hash rather strongly if the inner relation is large,
3397  * which is what we want. We do not want to hash unless we know that the
3398  * inner rel is well-dispersed (or the alternatives seem much worse).
3399  *
3400  * The caller should also check that the mcv_freq is not so large that the
3401  * most common value would by itself require an impractically large bucket.
3402  * In a hash join, the executor can split buckets if they get too big, but
3403  * obviously that doesn't help for a bucket that contains many duplicates of
3404  * the same value.
3405  */
3406 void
3407 estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
3408  Selectivity *mcv_freq,
3409  Selectivity *bucketsize_frac)
3410 {
3411  VariableStatData vardata;
3412  double estfract,
3413  ndistinct,
3414  stanullfrac,
3415  avgfreq;
3416  bool isdefault;
3417  AttStatsSlot sslot;
3418 
3419  examine_variable(root, hashkey, 0, &vardata);
3420 
3421  /* Look up the frequency of the most common value, if available */
3422  *mcv_freq = 0.0;
3423 
3424  if (HeapTupleIsValid(vardata.statsTuple))
3425  {
3426  if (get_attstatsslot(&sslot, vardata.statsTuple,
3427  STATISTIC_KIND_MCV, InvalidOid,
3429  {
3430  /*
3431  * The first MCV stat is for the most common value.
3432  */
3433  if (sslot.nnumbers > 0)
3434  *mcv_freq = sslot.numbers[0];
3435  free_attstatsslot(&sslot);
3436  }
3437  }
3438 
3439  /* Get number of distinct values */
3440  ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3441 
3442  /*
3443  * If ndistinct isn't real, punt. We normally return 0.1, but if the
3444  * mcv_freq is known to be even higher than that, use it instead.
3445  */
3446  if (isdefault)
3447  {
3448  *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3449  ReleaseVariableStats(vardata);
3450  return;
3451  }
3452 
3453  /* Get fraction that are null */
3454  if (HeapTupleIsValid(vardata.statsTuple))
3455  {
3456  Form_pg_statistic stats;
3457 
3458  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3459  stanullfrac = stats->stanullfrac;
3460  }
3461  else
3462  stanullfrac = 0.0;
3463 
3464  /* Compute avg freq of all distinct data values in raw relation */
3465  avgfreq = (1.0 - stanullfrac) / ndistinct;
3466 
3467  /*
3468  * Adjust ndistinct to account for restriction clauses. Observe we are
3469  * assuming that the data distribution is affected uniformly by the
3470  * restriction clauses!
3471  *
3472  * XXX Possibly better way, but much more expensive: multiply by
3473  * selectivity of rel's restriction clauses that mention the target Var.
3474  */
3475  if (vardata.rel && vardata.rel->tuples > 0)
3476  {
3477  ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3478  ndistinct = clamp_row_est(ndistinct);
3479  }
3480 
3481  /*
3482  * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3483  * number of buckets is less than the expected number of distinct values;
3484  * otherwise it is 1/ndistinct.
3485  */
3486  if (ndistinct > nbuckets)
3487  estfract = 1.0 / nbuckets;
3488  else
3489  estfract = 1.0 / ndistinct;
3490 
3491  /*
3492  * Adjust estimated bucketsize upward to account for skewed distribution.
3493  */
3494  if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3495  estfract *= *mcv_freq / avgfreq;
3496 
3497  /*
3498  * Clamp bucketsize to sane range (the above adjustment could easily
3499  * produce an out-of-range result). We set the lower bound a little above
3500  * zero, since zero isn't a very sane result.
3501  */
3502  if (estfract < 1.0e-6)
3503  estfract = 1.0e-6;
3504  else if (estfract > 1.0)
3505  estfract = 1.0;
3506 
3507  *bucketsize_frac = (Selectivity) estfract;
3508 
3509  ReleaseVariableStats(vardata);
3510 }
3511 
3512 /*
3513  * estimate_hashagg_tablesize
3514  * estimate the number of bytes that a hash aggregate hashtable will
3515  * require based on the agg_costs, path width and number of groups.
3516  *
3517  * We return the result as "double" to forestall any possible overflow
3518  * problem in the multiplication by dNumGroups.
3519  *
3520  * XXX this may be over-estimating the size now that hashagg knows to omit
3521  * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3522  * grouping columns not in the hashed set are counted here even though hashagg
3523  * won't store them. Is this a problem?
3524  */
3525 double
3527  double dNumGroups)
3528 {
3529  Size hashentrysize;
3530 
3531  /* Estimate per-hash-entry space at tuple width... */
3532  hashentrysize = MAXALIGN(path->pathtarget->width) +
3534 
3535  /* plus space for pass-by-ref transition values... */
3536  hashentrysize += agg_costs->transitionSpace;
3537  /* plus the per-hash-entry overhead */
3538  hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3539 
3540  /*
3541  * Note that this disregards the effect of fill-factor and growth policy
3542  * of the hash table. That's probably ok, given that the default
3543  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3544  * "double-in-size" growth policies here.
3545  */
3546  return hashentrysize * dNumGroups;
3547 }
3548 
3549 
3550 /*-------------------------------------------------------------------------
3551  *
3552  * Support routines
3553  *
3554  *-------------------------------------------------------------------------
3555  */
3556 
3557 /*
3558  * Find applicable ndistinct statistics for the given list of VarInfos (which
3559  * must all belong to the given rel), and update *ndistinct to the estimate of
3560  * the MVNDistinctItem that best matches. If a match it found, *varinfos is
3561  * updated to remove the list of matched varinfos.
3562  *
3563  * Varinfos that aren't for simple Vars are ignored.
3564  *
3565  * Return true if we're able to find a match, false otherwise.
3566  */
3567 static bool
3569  List **varinfos, double *ndistinct)
3570 {
3571  ListCell *lc;
3572  Bitmapset *attnums = NULL;
3573  int nmatches;
3574  Oid statOid = InvalidOid;
3575  MVNDistinct *stats;
3576  Bitmapset *matched = NULL;
3577 
3578  /* bail out immediately if the table has no extended statistics */
3579  if (!rel->statlist)
3580  return false;
3581 
3582  /* Determine the attnums we're looking for */
3583  foreach(lc, *varinfos)
3584  {
3585  GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3587 
3588  Assert(varinfo->rel == rel);
3589 
3590  if (!IsA(varinfo->var, Var))
3591  continue;
3592 
3593  attnum = ((Var *) varinfo->var)->varattno;
3594 
3595  if (!AttrNumberIsForUserDefinedAttr(attnum))
3596  continue;
3597 
3598  attnums = bms_add_member(attnums, attnum);
3599  }
3600 
3601  /* look for the ndistinct statistics matching the most vars */
3602  nmatches = 1; /* we require at least two matches */
3603  foreach(lc, rel->statlist)
3604  {
3605  StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
3606  Bitmapset *shared;
3607  int nshared;
3608 
3609  /* skip statistics of other kinds */
3610  if (info->kind != STATS_EXT_NDISTINCT)
3611  continue;
3612 
3613  /* compute attnums shared by the vars and the statistics object */
3614  shared = bms_intersect(info->keys, attnums);
3615  nshared = bms_num_members(shared);
3616 
3617  /*
3618  * Does this statistics object match more columns than the currently
3619  * best object? If so, use this one instead.
3620  *
3621  * XXX This should break ties using name of the object, or something
3622  * like that, to make the outcome stable.
3623  */
3624  if (nshared > nmatches)
3625  {
3626  statOid = info->statOid;
3627  nmatches = nshared;
3628  matched = shared;
3629  }
3630  }
3631 
3632  /* No match? */
3633  if (statOid == InvalidOid)
3634  return false;
3635  Assert(nmatches > 1 && matched != NULL);
3636 
3637  stats = statext_ndistinct_load(statOid);
3638 
3639  /*
3640  * If we have a match, search it for the specific item that matches (there
3641  * must be one), and construct the output values.
3642  */
3643  if (stats)
3644  {
3645  int i;
3646  List *newlist = NIL;
3647  MVNDistinctItem *item = NULL;
3648 
3649  /* Find the specific item that exactly matches the combination */
3650  for (i = 0; i < stats->nitems; i++)
3651  {
3652  MVNDistinctItem *tmpitem = &stats->items[i];
3653 
3654  if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
3655  {
3656  item = tmpitem;
3657  break;
3658  }
3659  }
3660 
3661  /* make sure we found an item */
3662  if (!item)
3663  elog(ERROR, "corrupt MVNDistinct entry");
3664 
3665  /* Form the output varinfo list, keeping only unmatched ones */
3666  foreach(lc, *varinfos)
3667  {
3668  GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3670 
3671  if (!IsA(varinfo->var, Var))
3672  {
3673  newlist = lappend(newlist, varinfo);
3674  continue;
3675  }
3676 
3677  attnum = ((Var *) varinfo->var)->varattno;
3678 
3679  if (!AttrNumberIsForUserDefinedAttr(attnum))
3680  continue;
3681 
3682  if (!bms_is_member(attnum, matched))
3683  newlist = lappend(newlist, varinfo);
3684  }
3685 
3686  *varinfos = newlist;
3687  *ndistinct = item->ndistinct;
3688  return true;
3689  }
3690 
3691  return false;
3692 }
3693 
3694 /*
3695  * convert_to_scalar
3696  * Convert non-NULL values of the indicated types to the comparison
3697  * scale needed by scalarineqsel().
3698  * Returns "true" if successful.
3699  *
3700  * XXX this routine is a hack: ideally we should look up the conversion
3701  * subroutines in pg_type.
3702  *
3703  * All numeric datatypes are simply converted to their equivalent
3704  * "double" values. (NUMERIC values that are outside the range of "double"
3705  * are clamped to +/- HUGE_VAL.)
3706  *
3707  * String datatypes are converted by convert_string_to_scalar(),
3708  * which is explained below. The reason why this routine deals with
3709  * three values at a time, not just one, is that we need it for strings.
3710  *
3711  * The bytea datatype is just enough different from strings that it has
3712  * to be treated separately.
3713  *
3714  * The several datatypes representing absolute times are all converted
3715  * to Timestamp, which is actually a double, and then we just use that
3716  * double value. Note this will give correct results even for the "special"
3717  * values of Timestamp, since those are chosen to compare correctly;
3718  * see timestamp_cmp.
3719  *
3720  * The several datatypes representing relative times (intervals) are all
3721  * converted to measurements expressed in seconds.
3722  */
3723 static bool
3724 convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue,
3725  Datum lobound, Datum hibound, Oid boundstypid,
3726  double *scaledlobound, double *scaledhibound)
3727 {
3728  bool failure = false;
3729 
3730  /*
3731  * Both the valuetypid and the boundstypid should exactly match the
3732  * declared input type(s) of the operator we are invoked for. However,
3733  * extensions might try to use scalarineqsel as estimator for operators
3734  * with input type(s) we don't handle here; in such cases, we want to
3735  * return false, not fail. In any case, we mustn't assume that valuetypid
3736  * and boundstypid are identical.
3737  *
3738  * XXX The histogram we are interpolating between points of could belong
3739  * to a column that's only binary-compatible with the declared type. In
3740  * essence we are assuming that the semantics of binary-compatible types
3741  * are enough alike that we can use a histogram generated with one type's
3742  * operators to estimate selectivity for the other's. This is outright
3743  * wrong in some cases --- in particular signed versus unsigned
3744  * interpretation could trip us up. But it's useful enough in the
3745  * majority of cases that we do it anyway. Should think about more
3746  * rigorous ways to do it.
3747  */
3748  switch (valuetypid)
3749  {
3750  /*
3751  * Built-in numeric types
3752  */
3753  case BOOLOID:
3754  case INT2OID:
3755  case INT4OID:
3756  case INT8OID:
3757  case FLOAT4OID:
3758  case FLOAT8OID:
3759  case NUMERICOID:
3760  case OIDOID:
3761  case REGPROCOID:
3762  case REGPROCEDUREOID:
3763  case REGOPEROID:
3764  case REGOPERATOROID:
3765  case REGCLASSOID:
3766  case REGTYPEOID:
3767  case REGCONFIGOID:
3768  case REGDICTIONARYOID:
3769  case REGROLEOID:
3770  case REGNAMESPACEOID:
3771  *scaledvalue = convert_numeric_to_scalar(value, valuetypid,
3772  &failure);
3773  *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
3774  &failure);
3775  *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
3776  &failure);
3777  return !failure;
3778 
3779  /*
3780  * Built-in string types
3781  */
3782  case CHAROID:
3783  case BPCHAROID:
3784  case VARCHAROID:
3785  case TEXTOID:
3786  case NAMEOID:
3787  {
3788  char *valstr = convert_string_datum(value, valuetypid,
3789  collid, &failure);
3790  char *lostr = convert_string_datum(lobound, boundstypid,
3791  collid, &failure);
3792  char *histr = convert_string_datum(hibound, boundstypid,
3793  collid, &failure);
3794 
3795  /*
3796  * Bail out if any of the values is not of string type. We
3797  * might leak converted strings for the other value(s), but
3798  * that's not worth troubling over.
3799  */
3800  if (failure)
3801  return false;
3802 
3803  convert_string_to_scalar(valstr, scaledvalue,
3804  lostr, scaledlobound,
3805  histr, scaledhibound);
3806  pfree(valstr);
3807  pfree(lostr);
3808  pfree(histr);
3809  return true;
3810  }
3811 
3812  /*
3813  * Built-in bytea type
3814  */
3815  case BYTEAOID:
3816  {
3817  /* We only support bytea vs bytea comparison */
3818  if (boundstypid != BYTEAOID)
3819  return false;
3820  convert_bytea_to_scalar(value, scaledvalue,
3821  lobound, scaledlobound,
3822  hibound, scaledhibound);
3823  return true;
3824  }
3825 
3826  /*
3827  * Built-in time types
3828  */
3829  case TIMESTAMPOID:
3830  case TIMESTAMPTZOID:
3831  case DATEOID:
3832  case INTERVALOID:
3833  case TIMEOID:
3834  case TIMETZOID:
3835  *scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
3836  &failure);
3837  *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
3838  &failure);
3839  *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
3840  &failure);
3841  return !failure;
3842 
3843  /*
3844  * Built-in network types
3845  */
3846  case INETOID:
3847  case CIDROID:
3848  case MACADDROID:
3849  case MACADDR8OID:
3850  *scaledvalue = convert_network_to_scalar(value, valuetypid,
3851  &failure);
3852  *scaledlobound = convert_network_to_scalar(lobound, boundstypid,
3853  &failure);
3854  *scaledhibound = convert_network_to_scalar(hibound, boundstypid,
3855  &failure);
3856  return !failure;
3857  }
3858  /* Don't know how to convert */
3859  *scaledvalue = *scaledlobound = *scaledhibound = 0;
3860  return false;
3861 }
3862 
3863 /*
3864  * Do convert_to_scalar()'s work for any numeric data type.
3865  *
3866  * On failure (e.g., unsupported typid), set *failure to true;
3867  * otherwise, that variable is not changed.
3868  */
3869 static double
3871 {
3872  switch (typid)
3873  {
3874  case BOOLOID:
3875  return (double) DatumGetBool(value);
3876  case INT2OID:
3877  return (double) DatumGetInt16(value);
3878  case INT4OID:
3879  return (double) DatumGetInt32(value);
3880  case INT8OID:
3881  return (double) DatumGetInt64(value);
3882  case FLOAT4OID:
3883  return (double) DatumGetFloat4(value);
3884  case FLOAT8OID:
3885  return (double) DatumGetFloat8(value);
3886  case NUMERICOID:
3887  /* Note: out-of-range values will be clamped to +-HUGE_VAL */
3888  return (double)
3890  value));
3891  case OIDOID:
3892  case REGPROCOID:
3893  case REGPROCEDUREOID:
3894  case REGOPEROID:
3895  case REGOPERATOROID:
3896  case REGCLASSOID:
3897  case REGTYPEOID:
3898  case REGCONFIGOID:
3899  case REGDICTIONARYOID:
3900  case REGROLEOID:
3901  case REGNAMESPACEOID:
3902  /* we can treat OIDs as integers... */
3903  return (double) DatumGetObjectId(value);
3904  }
3905 
3906  *failure = true;
3907  return 0;
3908 }
3909 
3910 /*
3911  * Do convert_to_scalar()'s work for any character-string data type.
3912  *
3913  * String datatypes are converted to a scale that ranges from 0 to 1,
3914  * where we visualize the bytes of the string as fractional digits.
3915  *
3916  * We do not want the base to be 256, however, since that tends to
3917  * generate inflated selectivity estimates; few databases will have
3918  * occurrences of all 256 possible byte values at each position.
3919  * Instead, use the smallest and largest byte values seen in the bounds
3920  * as the estimated range for each byte, after some fudging to deal with
3921  * the fact that we probably aren't going to see the full range that way.
3922  *
3923  * An additional refinement is that we discard any common prefix of the
3924  * three strings before computing the scaled values. This allows us to
3925  * "zoom in" when we encounter a narrow data range. An example is a phone
3926  * number database where all the values begin with the same area code.
3927  * (Actually, the bounds will be adjacent histogram-bin-boundary values,
3928  * so this is more likely to happen than you might think.)
3929  */
3930 static void
3932  double *scaledvalue,
3933  char *lobound,
3934  double *scaledlobound,
3935  char *hibound,
3936  double *scaledhibound)
3937 {
3938  int rangelo,
3939  rangehi;
3940  char *sptr;
3941 
3942  rangelo = rangehi = (unsigned char) hibound[0];
3943  for (sptr = lobound; *sptr; sptr++)
3944  {
3945  if (rangelo > (unsigned char) *sptr)
3946  rangelo = (unsigned char) *sptr;
3947  if (rangehi < (unsigned char) *sptr)
3948  rangehi = (unsigned char) *sptr;
3949  }
3950  for (sptr = hibound; *sptr; sptr++)
3951  {
3952  if (rangelo > (unsigned char) *sptr)
3953  rangelo = (unsigned char) *sptr;
3954  if (rangehi < (unsigned char) *sptr)
3955  rangehi = (unsigned char) *sptr;
3956  }
3957  /* If range includes any upper-case ASCII chars, make it include all */
3958  if (rangelo <= 'Z' && rangehi >= 'A')
3959  {
3960  if (rangelo > 'A')
3961  rangelo = 'A';
3962  if (rangehi < 'Z')
3963  rangehi = 'Z';
3964  }
3965  /* Ditto lower-case */
3966  if (rangelo <= 'z' && rangehi >= 'a')
3967  {
3968  if (rangelo > 'a')
3969  rangelo = 'a';
3970  if (rangehi < 'z')
3971  rangehi = 'z';
3972  }
3973  /* Ditto digits */
3974  if (rangelo <= '9' && rangehi >= '0')
3975  {
3976  if (rangelo > '0')
3977  rangelo = '0';
3978  if (rangehi < '9')
3979  rangehi = '9';
3980  }
3981 
3982  /*
3983  * If range includes less than 10 chars, assume we have not got enough
3984  * data, and make it include regular ASCII set.
3985  */
3986  if (rangehi - rangelo < 9)
3987  {
3988  rangelo = ' ';
3989  rangehi = 127;
3990  }
3991 
3992  /*
3993  * Now strip any common prefix of the three strings.
3994  */
3995  while (*lobound)
3996  {
3997  if (*lobound != *hibound || *lobound != *value)
3998  break;
3999  lobound++, hibound++, value++;
4000  }
4001 
4002  /*
4003  * Now we can do the conversions.
4004  */
4005  *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4006  *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4007  *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4008 }
4009 
4010 static double
4011 convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
4012 {
4013  int slen = strlen(value);
4014  double num,
4015  denom,
4016  base;
4017 
4018  if (slen <= 0)
4019  return 0.0; /* empty string has scalar value 0 */
4020 
4021  /*
4022  * There seems little point in considering more than a dozen bytes from
4023  * the string. Since base is at least 10, that will give us nominal
4024  * resolution of at least 12 decimal digits, which is surely far more
4025  * precision than this estimation technique has got anyway (especially in
4026  * non-C locales). Also, even with the maximum possible base of 256, this
4027  * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4028  * overflow on any known machine.
4029  */
4030  if (slen > 12)
4031  slen = 12;
4032 
4033  /* Convert initial characters to fraction */
4034  base = rangehi - rangelo + 1;
4035  num = 0.0;
4036  denom = base;
4037  while (slen-- > 0)
4038  {
4039  int ch = (unsigned char) *value++;
4040 
4041  if (ch < rangelo)
4042  ch = rangelo - 1;
4043  else if (ch > rangehi)
4044  ch = rangehi + 1;
4045  num += ((double) (ch - rangelo)) / denom;
4046  denom *= base;
4047  }
4048 
4049  return num;
4050 }
4051 
4052 /*
4053  * Convert a string-type Datum into a palloc'd, null-terminated string.
4054  *
4055  * On failure (e.g., unsupported typid), set *failure to true;
4056  * otherwise, that variable is not changed. (We'll return NULL on failure.)
4057  *
4058  * When using a non-C locale, we must pass the string through strxfrm()
4059  * before continuing, so as to generate correct locale-specific results.
4060  */
4061 static char *
4062 convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
4063 {
4064  char *val;
4065 
4066  switch (typid)
4067  {
4068  case CHAROID:
4069  val = (char *) palloc(2);
4070  val[0] = DatumGetChar(value);
4071  val[1] = '\0';
4072  break;
4073  case BPCHAROID:
4074  case VARCHAROID:
4075  case TEXTOID:
4076  val = TextDatumGetCString(value);
4077  break;
4078  case NAMEOID:
4079  {
4080  NameData *nm = (NameData *) DatumGetPointer(value);
4081 
4082  val = pstrdup(NameStr(*nm));
4083  break;
4084  }
4085  default:
4086  *failure = true;
4087  return NULL;
4088  }
4089 
4090  if (!lc_collate_is_c(collid))
4091  {
4092  char *xfrmstr;
4093  size_t xfrmlen;
4094  size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4095 
4096  /*
4097  * XXX: We could guess at a suitable output buffer size and only call
4098  * strxfrm twice if our guess is too small.
4099  *
4100  * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4101  * bogus data or set an error. This is not really a problem unless it
4102  * crashes since it will only give an estimation error and nothing
4103  * fatal.
4104  */
4105  xfrmlen = strxfrm(NULL, val, 0);
4106 #ifdef WIN32
4107 
4108  /*
4109  * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4110  * of trying to allocate this much memory (and fail), just return the
4111  * original string unmodified as if we were in the C locale.
4112  */
4113  if (xfrmlen == INT_MAX)
4114  return val;
4115 #endif
4116  xfrmstr = (char *) palloc(xfrmlen + 1);
4117  xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4118 
4119  /*
4120  * Some systems (e.g., glibc) can return a smaller value from the
4121  * second call than the first; thus the Assert must be <= not ==.
4122  */
4123  Assert(xfrmlen2 <= xfrmlen);
4124  pfree(val);
4125  val = xfrmstr;
4126  }
4127 
4128  return val;
4129 }
4130 
4131 /*
4132  * Do convert_to_scalar()'s work for any bytea data type.
4133  *
4134  * Very similar to convert_string_to_scalar except we can't assume
4135  * null-termination and therefore pass explicit lengths around.
4136  *
4137  * Also, assumptions about likely "normal" ranges of characters have been
4138  * removed - a data range of 0..255 is always used, for now. (Perhaps
4139  * someday we will add information about actual byte data range to
4140  * pg_statistic.)
4141  */
4142 static void
4144  double *scaledvalue,
4145  Datum lobound,
4146  double *scaledlobound,
4147  Datum hibound,
4148  double *scaledhibound)
4149 {
4150  bytea *valuep = DatumGetByteaPP(value);
4151  bytea *loboundp = DatumGetByteaPP(lobound);
4152  bytea *hiboundp = DatumGetByteaPP(hibound);
4153  int rangelo,
4154  rangehi,
4155  valuelen = VARSIZE_ANY_EXHDR(valuep),
4156  loboundlen = VARSIZE_ANY_EXHDR(loboundp),
4157  hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
4158  i,
4159  minlen;
4160  unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep);
4161  unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp);
4162  unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp);
4163 
4164  /*
4165  * Assume bytea data is uniformly distributed across all byte values.
4166  */
4167  rangelo = 0;
4168  rangehi = 255;
4169 
4170  /*
4171  * Now strip any common prefix of the three strings.
4172  */
4173  minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4174  for (i = 0; i < minlen; i++)
4175  {
4176  if (*lostr != *histr || *lostr != *valstr)
4177  break;
4178  lostr++, histr++, valstr++;
4179  loboundlen--, hiboundlen--, valuelen--;
4180  }
4181 
4182  /*
4183  * Now we can do the conversions.
4184  */
4185  *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4186  *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4187  *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4188 }
4189 
4190 static double
4191 convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
4192  int rangelo, int rangehi)
4193 {
4194  double num,
4195  denom,
4196  base;
4197 
4198  if (valuelen <= 0)
4199  return 0.0; /* empty string has scalar value 0 */
4200 
4201  /*
4202  * Since base is 256, need not consider more than about 10 chars (even
4203  * this many seems like overkill)
4204  */
4205  if (valuelen > 10)
4206  valuelen = 10;
4207 
4208  /* Convert initial characters to fraction */
4209  base = rangehi - rangelo + 1;
4210  num = 0.0;
4211  denom = base;
4212  while (valuelen-- > 0)
4213  {
4214  int ch = *value++;
4215 
4216  if (ch < rangelo)
4217  ch = rangelo - 1;
4218  else if (ch > rangehi)
4219  ch = rangehi + 1;
4220  num += ((double) (ch - rangelo)) / denom;
4221  denom *= base;
4222  }
4223 
4224  return num;
4225 }
4226 
4227 /*
4228  * Do convert_to_scalar()'s work for any timevalue data type.
4229  *
4230  * On failure (e.g., unsupported typid), set *failure to true;
4231  * otherwise, that variable is not changed.
4232  */
4233 static double
4235 {
4236  switch (typid)
4237  {
4238  case TIMESTAMPOID:
4239  return DatumGetTimestamp(value);
4240  case TIMESTAMPTZOID:
4241  return DatumGetTimestampTz(value);
4242  case DATEOID:
4244  case INTERVALOID:
4245  {
4247 
4248  /*
4249  * Convert the month part of Interval to days using assumed
4250  * average month length of 365.25/12.0 days. Not too
4251  * accurate, but plenty good enough for our purposes.
4252  */
4253  return interval->time + interval->day * (double) USECS_PER_DAY +
4254  interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
4255  }
4256  case TIMEOID:
4257  return DatumGetTimeADT(value);
4258  case TIMETZOID:
4259  {
4260  TimeTzADT *timetz = DatumGetTimeTzADTP(value);
4261 
4262  /* use GMT-equivalent time */
4263  return (double) (timetz->time + (timetz->zone * 1000000.0));
4264  }
4265  }
4266 
4267  *failure = true;
4268  return 0;
4269 }
4270 
4271 
4272 /*
4273  * get_restriction_variable
4274  * Examine the args of a restriction clause to see if it's of the
4275  * form (variable op pseudoconstant) or (pseudoconstant op variable),
4276  * where "variable" could be either a Var or an expression in vars of a
4277  * single relation. If so, extract information about the variable,
4278  * and also indicate which side it was on and the other argument.
4279  *
4280  * Inputs:
4281  * root: the planner info
4282  * args: clause argument list
4283  * varRelid: see specs for restriction selectivity functions
4284  *
4285  * Outputs: (these are valid only if true is returned)
4286  * *vardata: gets information about variable (see examine_variable)
4287  * *other: gets other clause argument, aggressively reduced to a constant
4288  * *varonleft: set true if variable is on the left, false if on the right
4289  *
4290  * Returns true if a variable is identified, otherwise false.
4291  *
4292  * Note: if there are Vars on both sides of the clause, we must fail, because
4293  * callers are expecting that the other side will act like a pseudoconstant.
4294  */
4295 bool
4297  VariableStatData *vardata, Node **other,
4298  bool *varonleft)
4299 {
4300  Node *left,
4301  *right;
4302  VariableStatData rdata;
4303 
4304  /* Fail if not a binary opclause (probably shouldn't happen) */
4305  if (list_length(args) != 2)
4306  return false;
4307 
4308  left = (Node *) linitial(args);
4309  right = (Node *) lsecond(args);
4310 
4311  /*
4312  * Examine both sides. Note that when varRelid is nonzero, Vars of other
4313  * relations will be treated as pseudoconstants.
4314  */
4315  examine_variable(root, left, varRelid, vardata);
4316  examine_variable(root, right, varRelid, &rdata);
4317 
4318  /*
4319  * If one side is a variable and the other not, we win.
4320  */
4321  if (vardata->rel && rdata.rel == NULL)
4322  {
4323  *varonleft = true;
4324  *other = estimate_expression_value(root, rdata.var);
4325  /* Assume we need no ReleaseVariableStats(rdata) here */
4326  return true;
4327  }
4328 
4329  if (vardata->rel == NULL && rdata.rel)
4330  {
4331  *varonleft = false;
4332  *other = estimate_expression_value(root, vardata->var);
4333  /* Assume we need no ReleaseVariableStats(*vardata) here */
4334  *vardata = rdata;
4335  return true;
4336  }
4337 
4338  /* Oops, clause has wrong structure (probably var op var) */
4339  ReleaseVariableStats(*vardata);
4340  ReleaseVariableStats(rdata);
4341 
4342  return false;
4343 }
4344 
4345 /*
4346  * get_join_variables
4347  * Apply examine_variable() to each side of a join clause.
4348  * Also, attempt to identify whether the join clause has the same
4349  * or reversed sense compared to the SpecialJoinInfo.
4350  *
4351  * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
4352  * or "reversed" if it is "rhs_var OP lhs_var". In complicated cases
4353  * where we can't tell for sure, we default to assuming it's normal.
4354  */
4355 void
4357  VariableStatData *vardata1, VariableStatData *vardata2,
4358  bool *join_is_reversed)
4359 {
4360  Node *left,
4361  *right;
4362 
4363  if (list_length(args) != 2)
4364  elog(ERROR, "join operator should take two arguments");
4365 
4366  left = (Node *) linitial(args);
4367  right = (Node *) lsecond(args);
4368 
4369  examine_variable(root, left, 0, vardata1);
4370  examine_variable(root, right, 0, vardata2);
4371 
4372  if (vardata1->rel &&
4373  bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4374  *join_is_reversed = true; /* var1 is on RHS */
4375  else if (vardata2->rel &&
4376  bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4377  *join_is_reversed = true; /* var2 is on LHS */
4378  else
4379  *join_is_reversed = false;
4380 }
4381 
4382 /*
4383  * examine_variable
4384  * Try to look up statistical data about an expression.
4385  * Fill in a VariableStatData struct to describe the expression.
4386  *
4387  * Inputs:
4388  * root: the planner info
4389  * node: the expression tree to examine
4390  * varRelid: see specs for restriction selectivity functions
4391  *
4392  * Outputs: *vardata is filled as follows:
4393  * var: the input expression (with any binary relabeling stripped, if
4394  * it is or contains a variable; but otherwise the type is preserved)
4395  * rel: RelOptInfo for relation containing variable; NULL if expression
4396  * contains no Vars (NOTE this could point to a RelOptInfo of a
4397  * subquery, not one in the current query).
4398  * statsTuple: the pg_statistic entry for the variable, if one exists;
4399  * otherwise NULL.
4400  * freefunc: pointer to a function to release statsTuple with.
4401  * vartype: exposed type of the expression; this should always match
4402  * the declared input type of the operator we are estimating for.
4403  * atttype, atttypmod: actual type/typmod of the "var" expression. This is
4404  * commonly the same as the exposed type of the variable argument,
4405  * but can be different in binary-compatible-type cases.
4406  * isunique: true if we were able to match the var to a unique index or a
4407  * single-column DISTINCT clause, implying its values are unique for
4408  * this query. (Caution: this should be trusted for statistical
4409  * purposes only, since we do not check indimmediate nor verify that
4410  * the exact same definition of equality applies.)
4411  * acl_ok: true if current user has permission to read the column(s)
4412  * underlying the pg_statistic entry. This is consulted by
4413  * statistic_proc_security_check().
4414  *
4415  * Caller is responsible for doing ReleaseVariableStats() before exiting.
4416  */
4417 void
4418 examine_variable(PlannerInfo *root, Node *node, int varRelid,
4419  VariableStatData *vardata)
4420 {
4421  Node *basenode;
4422  Relids varnos;
4423  RelOptInfo *onerel;
4424 
4425  /* Make sure we don't return dangling pointers in vardata */
4426  MemSet(vardata, 0, sizeof(VariableStatData));
4427 
4428  /* Save the exposed type of the expression */
4429  vardata->vartype = exprType(node);
4430 
4431  /* Look inside any binary-compatible relabeling */
4432 
4433  if (IsA(node, RelabelType))
4434  basenode = (Node *) ((RelabelType *) node)->arg;
4435  else
4436  basenode = node;
4437 
4438  /* Fast path for a simple Var */
4439 
4440  if (IsA(basenode, Var) &&
4441  (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4442  {
4443  Var *var = (Var *) basenode;
4444 
4445  /* Set up result fields other than the stats tuple */
4446  vardata->var = basenode; /* return Var without relabeling */
4447  vardata->rel = find_base_rel(root, var->varno);
4448  vardata->atttype = var->vartype;
4449  vardata->atttypmod = var->vartypmod;
4450  vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4451 
4452  /* Try to locate some stats */
4453  examine_simple_variable(root, var, vardata);
4454 
4455  return;
4456  }
4457 
4458  /*
4459  * Okay, it's a more complicated expression. Determine variable
4460  * membership. Note that when varRelid isn't zero, only vars of that
4461  * relation are considered "real" vars.
4462  */
4463  varnos = pull_varnos(basenode);
4464 
4465  onerel = NULL;
4466 
4467  switch (bms_membership(varnos))
4468  {
4469  case BMS_EMPTY_SET:
4470  /* No Vars at all ... must be pseudo-constant clause */
4471  break;
4472  case BMS_SINGLETON:
4473  if (varRelid == 0 || bms_is_member(varRelid, varnos))
4474  {
4475  onerel = find_base_rel(root,
4476  (varRelid ? varRelid : bms_singleton_member(varnos)));
4477  vardata->rel = onerel;
4478  node = basenode; /* strip any relabeling */
4479  }
4480  /* else treat it as a constant */
4481  break;
4482  case BMS_MULTIPLE:
4483  if (varRelid == 0)
4484  {
4485  /* treat it as a variable of a join relation */
4486  vardata->rel = find_join_rel(root, varnos);
4487  node = basenode; /* strip any relabeling */
4488  }
4489  else if (bms_is_member(varRelid, varnos))
4490  {
4491  /* ignore the vars belonging to other relations */
4492  vardata->rel = find_base_rel(root, varRelid);
4493  node = basenode; /* strip any relabeling */
4494  /* note: no point in expressional-index search here */
4495  }
4496  /* else treat it as a constant */
4497  break;
4498  }
4499 
4500  bms_free(varnos);
4501 
4502  vardata->var = node;
4503  vardata->atttype = exprType(node);
4504  vardata->atttypmod = exprTypmod(node);
4505 
4506  if (onerel)
4507  {
4508  /*
4509  * We have an expression in vars of a single relation. Try to match
4510  * it to expressional index columns, in hopes of finding some
4511  * statistics.
4512  *
4513  * Note that we consider all index columns including INCLUDE columns,
4514  * since there could be stats for such columns. But the test for
4515  * uniqueness needs to be warier.
4516  *
4517  * XXX it's conceivable that there are multiple matches with different
4518  * index opfamilies; if so, we need to pick one that matches the
4519  * operator we are estimating for. FIXME later.
4520  */
4521  ListCell *ilist;
4522 
4523  foreach(ilist, onerel->indexlist)
4524  {
4525  IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4526  ListCell *indexpr_item;
4527  int pos;
4528 
4529  indexpr_item = list_head(index->indexprs);
4530  if (indexpr_item == NULL)
4531  continue; /* no expressions here... */
4532 
4533  for (pos = 0; pos < index->ncolumns; pos++)
4534  {
4535  if (index->indexkeys[pos] == 0)
4536  {
4537  Node *indexkey;
4538 
4539  if (indexpr_item == NULL)
4540  elog(ERROR, "too few entries in indexprs list");
4541  indexkey = (Node *) lfirst(indexpr_item);
4542  if (indexkey && IsA(indexkey, RelabelType))
4543  indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4544  if (equal(node, indexkey))
4545  {
4546  /*
4547  * Found a match ... is it a unique index? Tests here
4548  * should match has_unique_index().
4549  */
4550  if (index->unique &&
4551  index->nkeycolumns == 1 &&
4552  pos == 0 &&
4553  (index->indpred == NIL || index->predOK))
4554  vardata->isunique = true;
4555 
4556  /*
4557  * Has it got stats? We only consider stats for
4558  * non-partial indexes, since partial indexes probably
4559  * don't reflect whole-relation statistics; the above
4560  * check for uniqueness is the only info we take from
4561  * a partial index.
4562  *
4563  * An index stats hook, however, must make its own
4564  * decisions about what to do with partial indexes.
4565  */
4566  if (get_index_stats_hook &&
4567  (*get_index_stats_hook) (root, index->indexoid,
4568  pos + 1, vardata))
4569  {
4570  /*
4571  * The hook took control of acquiring a stats
4572  * tuple. If it did supply a tuple, it'd better
4573  * have supplied a freefunc.
4574  */
4575  if (HeapTupleIsValid(vardata->statsTuple) &&
4576  !vardata->freefunc)
4577  elog(ERROR, "no function provided to release variable stats with");
4578  }
4579  else if (index->indpred == NIL)
4580  {
4581  vardata->statsTuple =
4583  ObjectIdGetDatum(index->indexoid),
4584  Int16GetDatum(pos + 1),
4585  BoolGetDatum(false));
4586  vardata->freefunc = ReleaseSysCache;
4587 
4588  if (HeapTupleIsValid(vardata->statsTuple))
4589  {
4590  /* Get index's table for permission check */
4591  RangeTblEntry *rte;
4592  Oid userid;
4593 
4594  rte = planner_rt_fetch(index->rel->relid, root);
4595  Assert(rte->rtekind == RTE_RELATION);
4596 
4597  /*
4598  * Use checkAsUser if it's set, in case we're
4599  * accessing the table via a view.
4600  */
4601  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4602 
4603  /*
4604  * For simplicity, we insist on the whole
4605  * table being selectable, rather than trying
4606  * to identify which column(s) the index
4607  * depends on. Also require all rows to be
4608  * selectable --- there must be no
4609  * securityQuals from security barrier views
4610  * or RLS policies.
4611  */
4612  vardata->acl_ok =
4613  rte->securityQuals == NIL &&
4614  (pg_class_aclcheck(rte->relid, userid,
4615  ACL_SELECT) == ACLCHECK_OK);
4616 
4617  /*
4618  * If the user doesn't have permissions to
4619  * access an inheritance child relation, check
4620  * the permissions of the table actually
4621  * mentioned in the query, since most likely
4622  * the user does have that permission. Note
4623  * that whole-table select privilege on the
4624  * parent doesn't quite guarantee that the
4625  * user could read all columns of the child.
4626  * But in practice it's unlikely that any
4627  * interesting security violation could result
4628  * from allowing access to the expression
4629  * index's stats, so we allow it anyway. See
4630  * similar code in examine_simple_variable()
4631  * for additional comments.
4632  */
4633  if (!vardata->acl_ok &&
4634  root->append_rel_array != NULL)
4635  {
4636  AppendRelInfo *appinfo;
4637  Index varno = index->rel->relid;
4638 
4639  appinfo = root->append_rel_array[varno];
4640  while (appinfo &&
4641  planner_rt_fetch(appinfo->parent_relid,
4642  root)->rtekind == RTE_RELATION)
4643  {
4644  varno = appinfo->parent_relid;
4645  appinfo = root->append_rel_array[varno];
4646  }
4647  if (varno != index->rel->relid)
4648  {
4649  /* Repeat access check on this rel */
4650  rte = planner_rt_fetch(varno, root);
4651  Assert(rte->rtekind == RTE_RELATION);
4652 
4653  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4654 
4655  vardata->acl_ok =
4656  rte->securityQuals == NIL &&
4657  (pg_class_aclcheck(rte->relid,
4658  userid,
4659  ACL_SELECT) == ACLCHECK_OK);
4660  }
4661  }
4662  }
4663  else
4664  {
4665  /* suppress leakproofness checks later */
4666  vardata->acl_ok = true;
4667  }
4668  }
4669  if (vardata->statsTuple)
4670  break;
4671  }
4672  indexpr_item = lnext(index->indexprs, indexpr_item);
4673  }
4674  }
4675  if (vardata->statsTuple)
4676  break;
4677  }
4678  }
4679 }
4680 
4681 /*
4682  * examine_simple_variable
4683  * Handle a simple Var for examine_variable
4684  *
4685  * This is split out as a subroutine so that we can recurse to deal with
4686  * Vars referencing subqueries.
4687  *
4688  * We already filled in all the fields of *vardata except for the stats tuple.
4689  */
4690 static void
4692  VariableStatData *vardata)
4693 {
4694  RangeTblEntry *rte = root->simple_rte_array[var->varno];
4695 
4696  Assert(IsA(rte, RangeTblEntry));
4697 
4699  (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
4700  {
4701  /*
4702  * The hook took control of acquiring a stats tuple. If it did supply
4703  * a tuple, it'd better have supplied a freefunc.
4704  */
4705  if (HeapTupleIsValid(vardata->statsTuple) &&
4706  !vardata->freefunc)
4707  elog(ERROR, "no function provided to release variable stats with");
4708  }
4709  else if (rte->rtekind == RTE_RELATION)
4710  {
4711  /*
4712  * Plain table or parent of an inheritance appendrel, so look up the
4713  * column in pg_statistic
4714  */
4716  ObjectIdGetDatum(rte->relid),
4717  Int16GetDatum(var->varattno),
4718  BoolGetDatum(rte->inh));
4719  vardata->freefunc = ReleaseSysCache;
4720 
4721  if (HeapTupleIsValid(vardata->statsTuple))
4722  {
4723  Oid userid;
4724 
4725  /*
4726  * Check if user has permission to read this column. We require
4727  * all rows to be accessible, so there must be no securityQuals
4728  * from security barrier views or RLS policies. Use checkAsUser
4729  * if it's set, in case we're accessing the table via a view.
4730  */
4731  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4732 
4733  vardata->acl_ok =
4734  rte->securityQuals == NIL &&
4735  ((pg_class_aclcheck(rte->relid, userid,
4736  ACL_SELECT) == ACLCHECK_OK) ||
4737  (pg_attribute_aclcheck(rte->relid, var->varattno, userid,
4738  ACL_SELECT) == ACLCHECK_OK));
4739 
4740  /*
4741  * If the user doesn't have permissions to access an inheritance
4742  * child relation or specifically this attribute, check the
4743  * permissions of the table/column actually mentioned in the
4744  * query, since most likely the user does have that permission
4745  * (else the query will fail at runtime), and if the user can read
4746  * the column there then he can get the values of the child table
4747  * too. To do that, we must find out which of the root parent's
4748  * attributes the child relation's attribute corresponds to.
4749  */
4750  if (!vardata->acl_ok && var->varattno > 0 &&
4751  root->append_rel_array != NULL)
4752  {
4753  AppendRelInfo *appinfo;
4754  Index varno = var->varno;
4755  int varattno = var->varattno;
4756  bool found = false;
4757 
4758  appinfo = root->append_rel_array[varno];
4759 
4760  /*
4761  * Partitions are mapped to their immediate parent, not the
4762  * root parent, so must be ready to walk up multiple
4763  * AppendRelInfos. But stop if we hit a parent that is not
4764  * RTE_RELATION --- that's a flattened UNION ALL subquery, not
4765  * an inheritance parent.
4766  */
4767  while (appinfo &&
4768  planner_rt_fetch(appinfo->parent_relid,
4769  root)->rtekind == RTE_RELATION)
4770  {
4771  int parent_varattno;
4772 
4773  found = false;
4774  if (varattno <= 0 || varattno > appinfo->num_child_cols)
4775  break; /* safety check */
4776  parent_varattno = appinfo->parent_colnos[varattno - 1];
4777  if (parent_varattno == 0)
4778  break; /* Var is local to child */
4779 
4780  varno = appinfo->parent_relid;
4781  varattno = parent_varattno;
4782  found = true;
4783 
4784  /* If the parent is itself a child, continue up. */
4785  appinfo = root->append_rel_array[varno];
4786  }
4787 
4788  /*
4789  * In rare cases, the Var may be local to the child table, in
4790  * which case, we've got to live with having no access to this
4791  * column's stats.
4792  */
4793  if (!found)
4794  return;
4795 
4796  /* Repeat the access check on this parent rel & column */
4797  rte = planner_rt_fetch(varno, root);
4798  Assert(rte->rtekind == RTE_RELATION);
4799 
4800  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4801 
4802  vardata->acl_ok =
4803  rte->securityQuals == NIL &&
4804  ((pg_class_aclcheck(rte->relid, userid,
4805  ACL_SELECT) == ACLCHECK_OK) ||
4806  (pg_attribute_aclcheck(rte->relid, varattno, userid,
4807  ACL_SELECT) == ACLCHECK_OK));
4808  }
4809  }
4810  else
4811  {
4812  /* suppress any possible leakproofness checks later */
4813  vardata->acl_ok = true;
4814  }
4815  }
4816  else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
4817  {
4818  /*
4819  * Plain subquery (not one that was converted to an appendrel).
4820  */
4821  Query *subquery = rte->subquery;
4822  RelOptInfo *rel;
4823  TargetEntry *ste;
4824 
4825  /*
4826  * Punt if it's a whole-row var rather than a plain column reference.
4827  */
4828  if (var->varattno == InvalidAttrNumber)
4829  return;
4830 
4831  /*
4832  * Punt if subquery uses set operations or GROUP BY, as these will
4833  * mash underlying columns' stats beyond recognition. (Set ops are
4834  * particularly nasty; if we forged ahead, we would return stats
4835  * relevant to only the leftmost subselect...) DISTINCT is also
4836  * problematic, but we check that later because there is a possibility
4837  * of learning something even with it.
4838  */
4839  if (subquery->setOperations ||
4840  subquery->groupClause)
4841  return;
4842 
4843  /*
4844  * OK, fetch RelOptInfo for subquery. Note that we don't change the
4845  * rel returned in vardata, since caller expects it to be a rel of the
4846  * caller's query level. Because we might already be recursing, we
4847  * can't use that rel pointer either, but have to look up the Var's
4848  * rel afresh.
4849  */
4850  rel = find_base_rel(root, var->varno);
4851 
4852  /* If the subquery hasn't been planned yet, we have to punt */
4853  if (rel->subroot == NULL)
4854  return;
4855  Assert(IsA(rel->subroot, PlannerInfo));
4856 
4857  /*
4858  * Switch our attention to the subquery as mangled by the planner. It
4859  * was okay to look at the pre-planning version for the tests above,
4860  * but now we need a Var that will refer to the subroot's live
4861  * RelOptInfos. For instance, if any subquery pullup happened during
4862  * planning, Vars in the targetlist might have gotten replaced, and we
4863  * need to see the replacement expressions.
4864  */
4865  subquery = rel->subroot->parse;
4866  Assert(IsA(subquery, Query));
4867 
4868  /* Get the subquery output expression referenced by the upper Var */
4869  ste = get_tle_by_resno(subquery->targetList, var->varattno);
4870  if (ste == NULL || ste->resjunk)
4871  elog(ERROR, "subquery %s does not have attribute %d",
4872  rte->eref->aliasname, var->varattno);
4873  var = (Var *) ste->expr;
4874 
4875  /*
4876  * If subquery uses DISTINCT, we can't make use of any stats for the
4877  * variable ... but, if it's the only DISTINCT column, we are entitled
4878  * to consider it unique. We do the test this way so that it works
4879  * for cases involving DISTINCT ON.
4880  */
4881  if (subquery->distinctClause)
4882  {
4883  if (list_length(subquery->distinctClause) == 1 &&
4884  targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
4885  vardata->isunique = true;
4886  /* cannot go further */
4887  return;
4888  }
4889 
4890  /*
4891  * If the sub-query originated from a view with the security_barrier
4892  * attribute, we must not look at the variable's statistics, though it
4893  * seems all right to notice the existence of a DISTINCT clause. So
4894  * stop here.
4895  *
4896  * This is probably a harsher restriction than necessary; it's
4897  * certainly OK for the selectivity estimator (which is a C function,
4898  * and therefore omnipotent anyway) to look at the statistics. But
4899  * many selectivity estimators will happily *invoke the operator
4900  * function* to try to work out a good estimate - and that's not OK.
4901  * So for now, don't dig down for stats.
4902  */
4903  if (rte->security_barrier)
4904  return;
4905 
4906  /* Can only handle a simple Var of subquery's query level */
4907  if (var && IsA(var, Var) &&
4908  var->varlevelsup == 0)
4909  {
4910  /*
4911  * OK, recurse into the subquery. Note that the original setting
4912  * of vardata->isunique (which will surely be false) is left
4913  * unchanged in this situation. That's what we want, since even
4914  * if the underlying column is unique, the subquery may have
4915  * joined to other tables in a way that creates duplicates.
4916  */
4917  examine_simple_variable(rel->subroot, var, vardata);
4918  }
4919  }
4920  else
4921  {
4922  /*
4923  * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We
4924  * won't see RTE_JOIN here because join alias Vars have already been
4925  * flattened.) There's not much we can do with function outputs, but
4926  * maybe someday try to be smarter about VALUES and/or CTEs.
4927  */
4928  }
4929 }
4930 
4931 /*
4932  * Check whether it is permitted to call func_oid passing some of the
4933  * pg_statistic data in vardata. We allow this either if the user has SELECT
4934  * privileges on the table or column underlying the pg_statistic data or if
4935  * the function is marked leak-proof.
4936  */
4937 bool
4939 {
4940  if (vardata->acl_ok)
4941  return true;
4942 
4943  if (!OidIsValid(func_oid))
4944  return false;
4945 
4946  if (get_func_leakproof(func_oid))
4947  return true;
4948 
4949  ereport(DEBUG2,
4950  (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
4951  get_func_name(func_oid))));
4952  return false;
4953 }
4954 
4955 /*
4956  * get_variable_numdistinct
4957  * Estimate the number of distinct values of a variable.
4958  *
4959  * vardata: results of examine_variable
4960  * *isdefault: set to true if the result is a default rather than based on
4961  * anything meaningful.
4962  *
4963  * NB: be careful to produce a positive integral result, since callers may
4964  * compare the result to exact integer counts, or might divide by it.
4965  */
4966 double
4967 get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
4968 {
4969  double stadistinct;
4970  double stanullfrac = 0.0;
4971  double ntuples;
4972 
4973  *isdefault = false;
4974 
4975  /*
4976  * Determine the stadistinct value to use. There are cases where we can
4977  * get an estimate even without a pg_statistic entry, or can get a better
4978  * value than is in pg_statistic. Grab stanullfrac too if we can find it
4979  * (otherwise, assume no nulls, for lack of any better idea).
4980  */
4981  if (HeapTupleIsValid(vardata->statsTuple))
4982  {
4983  /* Use the pg_statistic entry */
4984  Form_pg_statistic stats;
4985 
4986  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
4987  stadistinct = stats->stadistinct;
4988  stanullfrac = stats->stanullfrac;
4989  }
4990  else if (vardata->vartype == BOOLOID)
4991  {
4992  /*
4993  * Special-case boolean columns: presumably, two distinct values.
4994  *
4995  * Are there any other datatypes we should wire in special estimates
4996  * for?
4997  */
4998  stadistinct = 2.0;
4999  }
5000  else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
5001  {
5002  /*
5003  * If the Var represents a column of a VALUES RTE, assume it's unique.
5004  * This could of course be very wrong, but it should tend to be true
5005  * in well-written queries. We could consider examining the VALUES'
5006  * contents to get some real statistics; but that only works if the
5007  * entries are all constants, and it would be pretty expensive anyway.
5008  */
5009  stadistinct = -1.0; /* unique (and all non null) */
5010  }
5011  else
5012  {
5013  /*
5014  * We don't keep statistics for system columns, but in some cases we
5015  * can infer distinctness anyway.
5016  */
5017  if (vardata->var && IsA(vardata->var, Var))
5018  {
5019  switch (((Var *) vardata->var)->varattno)
5020  {
5022  stadistinct = -1.0; /* unique (and all non null) */
5023  break;
5025  stadistinct = 1.0; /* only 1 value */
5026  break;
5027  default:
5028  stadistinct = 0.0; /* means "unknown" */
5029  break;
5030  }
5031  }
5032  else
5033  stadistinct = 0.0; /* means "unknown" */
5034 
5035  /*
5036  * XXX consider using estimate_num_groups on expressions?
5037  */
5038  }
5039 
5040  /*
5041  * If there is a unique index or DISTINCT clause for the variable, assume
5042  * it is unique no matter what pg_statistic says; the statistics could be
5043  * out of date, or we might have found a partial unique index that proves
5044  * the var is unique for this query. However, we'd better still believe
5045  * the null-fraction statistic.
5046  */
5047  if (vardata->isunique)
5048  stadistinct = -1.0 * (1.0 - stanullfrac);
5049 
5050  /*
5051  * If we had an absolute estimate, use that.
5052  */
5053  if (stadistinct > 0.0)
5054  return clamp_row_est(stadistinct);
5055 
5056  /*
5057  * Otherwise we need to get the relation size; punt if not available.
5058  */
5059  if (vardata->rel == NULL)
5060  {
5061  *isdefault = true;
5062  return DEFAULT_NUM_DISTINCT;
5063  }
5064  ntuples = vardata->rel->tuples;
5065  if (ntuples <= 0.0)
5066  {
5067  *isdefault = true;
5068  return DEFAULT_NUM_DISTINCT;
5069  }
5070 
5071  /*
5072  * If we had a relative estimate, use that.
5073  */
5074  if (stadistinct < 0.0)
5075  return clamp_row_est(-stadistinct * ntuples);
5076 
5077  /*
5078  * With no data, estimate ndistinct = ntuples if the table is small, else
5079  * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5080  * that the behavior isn't discontinuous.
5081  */
5082  if (ntuples < DEFAULT_NUM_DISTINCT)
5083  return clamp_row_est(ntuples);
5084 
5085  *isdefault = true;
5086  return DEFAULT_NUM_DISTINCT;
5087 }
5088 
5089 /*
5090  * get_variable_range
5091  * Estimate the minimum and maximum value of the specified variable.
5092  * If successful, store values in *min and *max, and return true.
5093  * If no data available, return false.
5094  *
5095  * sortop is the "<" comparison operator to use. This should generally
5096  * be "<" not ">", as only the former is likely to be found in pg_statistic.
5097  */
5098 static bool
5100  Datum *min, Datum *max)
5101 {
5102  Datum tmin = 0;
5103  Datum tmax = 0;
5104  bool have_data = false;
5105  int16 typLen;
5106  bool typByVal;
5107  Oid opfuncoid;
5108  AttStatsSlot sslot;
5109  int i;
5110 
5111  /*
5112  * XXX It's very tempting to try to use the actual column min and max, if
5113  * we can get them relatively-cheaply with an index probe. However, since
5114  * this function is called many times during join planning, that could
5115  * have unpleasant effects on planning speed. Need more investigation
5116  * before enabling this.
5117  */
5118 #ifdef NOT_USED
5119  if (get_actual_variable_range(root, vardata, sortop, min, max))
5120  return true;
5121 #endif
5122 
5123  if (!HeapTupleIsValid(vardata->statsTuple))
5124  {
5125  /* no stats available, so default result */
5126  return false;
5127  }
5128 
5129  /*
5130  * If we can't apply the sortop to the stats data, just fail. In
5131  * principle, if there's a histogram and no MCVs, we could return the
5132  * histogram endpoints without ever applying the sortop ... but it's
5133  * probably not worth trying, because whatever the caller wants to do with
5134  * the endpoints would likely fail the security check too.
5135  */
5136  if (!statistic_proc_security_check(vardata,
5137  (opfuncoid = get_opcode(sortop))))
5138  return false;
5139 
5140  get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5141 
5142  /*
5143  * If there is a histogram, grab the first and last values.
5144  *
5145  * If there is a histogram that is sorted with some other operator than
5146  * the one we want, fail --- this suggests that there is data we can't
5147  * use.
5148  */
5149  if (get_attstatsslot(&sslot, vardata->statsTuple,
5150  STATISTIC_KIND_HISTOGRAM, sortop,
5152  {
5153  if (sslot.nvalues > 0)
5154  {
5155  tmin = datumCopy(sslot.values[0], typByVal, typLen);
5156  tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
5157  have_data = true;
5158  }
5159  free_attstatsslot(&sslot);
5160  }
5161  else if (get_attstatsslot(&sslot, vardata->statsTuple,
5162  STATISTIC_KIND_HISTOGRAM, InvalidOid,
5163  0))
5164  {
5165  free_attstatsslot(&sslot);
5166  return false;
5167  }
5168 
5169  /*
5170  * If we have most-common-values info, look for extreme MCVs. This is
5171  * needed even if we also have a histogram, since the histogram excludes
5172  * the MCVs. However, usually the MCVs will not be the extreme values, so
5173  * avoid unnecessary data copying.
5174  */
5175  if (get_attstatsslot(&sslot, vardata->statsTuple,
5176  STATISTIC_KIND_MCV, InvalidOid,
5178  {
5179  bool tmin_is_mcv = false;
5180  bool tmax_is_mcv = false;
5181  FmgrInfo opproc;
5182 
5183  fmgr_info(opfuncoid, &opproc);
5184 
5185  for (i = 0; i < sslot.nvalues; i++)
5186  {
5187  if (!have_data)
5188  {
5189  tmin = tmax = sslot.values[i];
5190  tmin_is_mcv = tmax_is_mcv = have_data = true;
5191  continue;
5192  }
5193  if (DatumGetBool(FunctionCall2Coll(&opproc,
5194  sslot.stacoll,
5195  sslot.values[i], tmin)))
5196  {
5197  tmin = sslot.values[i];
5198  tmin_is_mcv = true;
5199  }
5200  if (DatumGetBool(FunctionCall2Coll(&opproc,
5201  sslot.stacoll,
5202  tmax, sslot.values[i])))
5203  {
5204  tmax = sslot.values[i];
5205  tmax_is_mcv = true;
5206  }
5207  }
5208  if (tmin_is_mcv)
5209  tmin = datumCopy(tmin, typByVal, typLen);
5210  if (tmax_is_mcv)
5211  tmax = datumCopy(tmax, typByVal, typLen);
5212  free_attstatsslot(&sslot);
5213  }
5214 
5215  *min = tmin;
5216  *max = tmax;
5217  return have_data;
5218 }
5219 
5220 
5221 /*
5222  * get_actual_variable_range
5223  * Attempt to identify the current *actual* minimum and/or maximum
5224  * of the specified variable, by looking for a suitable btree index
5225  * and fetching its low and/or high values.
5226  * If successful, store values in *min and *max, and return true.
5227  * (Either pointer can be NULL if that endpoint isn't needed.)
5228  * If no data available, return false.
5229  *
5230  * sortop is the "<" comparison operator to use.
5231  */
5232 static bool
5234  Oid sortop,
5235  Datum *min, Datum *max)
5236 {
5237  bool have_data = false;
5238  RelOptInfo *rel = vardata->rel;
5239  RangeTblEntry *rte;
5240  ListCell *lc;
5241 
5242  /* No hope if no relation or it doesn't have indexes */
5243  if (rel == NULL || rel->indexlist == NIL)
5244  return false;
5245  /* If it has indexes it must be a plain relation */
5246  rte = root->simple_rte_array[rel->relid];
5247  Assert(rte->rtekind == RTE_RELATION);
5248 
5249  /* Search through the indexes to see if any match our problem */
5250  foreach(lc, rel->indexlist)
5251  {
5253  ScanDirection indexscandir;
5254 
5255  /* Ignore non-btree indexes */
5256  if (index->relam != BTREE_AM_OID)
5257  continue;
5258 
5259  /*
5260  * Ignore partial indexes --- we only want stats that cover the entire
5261  * relation.
5262  */
5263  if (index->indpred != NIL)
5264  continue;
5265 
5266  /*
5267  * The index list might include hypothetical indexes inserted by a
5268  * get_relation_info hook --- don't try to access them.
5269  */
5270  if (index->hypothetical)
5271  continue;
5272 
5273  /*
5274  * The first index column must match the desired variable and sort
5275  * operator --- but we can use a descending-order index.
5276  */
5277  if (!match_index_to_operand(vardata->var, 0, index))
5278  continue;
5279  switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
5280  {
5281  case BTLessStrategyNumber:
5282  if (index->reverse_sort[0])
5283  indexscandir = BackwardScanDirection;
5284  else
5285  indexscandir = ForwardScanDirection;
5286  break;
5288  if (index->reverse_sort[0])
5289  indexscandir = ForwardScanDirection;
5290  else
5291  indexscandir = BackwardScanDirection;
5292  break;
5293  default:
5294  /* index doesn't match the sortop */
5295  continue;
5296  }
5297 
5298  /*
5299  * Found a suitable index to extract data from. Set up some data that
5300  * can be used by both invocations of get_actual_variable_endpoint.
5301  */
5302  {
5303  MemoryContext tmpcontext;
5304  MemoryContext oldcontext;
5305  Relation heapRel;
5306  Relation indexRel;
5307  TupleTableSlot *slot;
5308  int16 typLen;
5309  bool typByVal;
5310  ScanKeyData scankeys[1];
5311 
5312  /* Make sure any cruft gets recycled when we're done */
5314  "get_actual_variable_range workspace",
5316  oldcontext = MemoryContextSwitchTo(tmpcontext);
5317 
5318  /*
5319  * Open the table and index so we can read from them. We should
5320  * already have some type of lock on each.
5321  */
5322  heapRel = table_open(rte->relid, NoLock);
5323  indexRel = index_open(index->indexoid, NoLock);
5324 
5325  /* build some stuff needed for indexscan execution */
5326  slot = table_slot_create(heapRel, NULL);
5327  get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5328 
5329  /* set up an IS NOT NULL scan key so that we ignore nulls */
5330  ScanKeyEntryInitialize(&scankeys[0],
5332  1, /* index col to scan */
5333  InvalidStrategy, /* no strategy */
5334  InvalidOid, /* no strategy subtype */
5335  InvalidOid, /* no collation */
5336  InvalidOid, /* no reg proc for this */
5337  (Datum) 0); /* constant */
5338 
5339  /* If min is requested ... */
5340  if (min)
5341  {
5342  have_data = get_actual_variable_endpoint(heapRel,
5343  indexRel,
5344  indexscandir,
5345  scankeys,
5346  typLen,
5347  typByVal,
5348  slot,
5349  oldcontext,
5350  min);
5351  }
5352  else
5353  {
5354  /* If min not requested, assume index is nonempty */
5355  have_data = true;
5356  }
5357 
5358  /* If max is requested, and we didn't find the index is empty */
5359  if (max && have_data)
5360  {
5361  /* scan in the opposite direction; all else is the same */
5362  have_data = get_actual_variable_endpoint(heapRel,
5363  indexRel,
5364  -indexscandir,
5365  scankeys,
5366  typLen,
5367  typByVal,
5368  slot,
5369  oldcontext,
5370  max);
5371  }
5372 
5373  /* Clean everything up */
5375 
5376  index_close(indexRel, NoLock);
5377  table_close(heapRel, NoLock);
5378 
5379  MemoryContextSwitchTo(oldcontext);
5380  MemoryContextDelete(tmpcontext);
5381 
5382  /* And we're done */
5383  break;
5384  }
5385  }
5386 
5387  return have_data;
5388 }
5389 
5390 /*
5391  * Get one endpoint datum (min or max depending on indexscandir) from the
5392  * specified index. Return true if successful, false if index is empty.
5393  * On success, endpoint value is stored to *endpointDatum (and copied into
5394  * outercontext).
5395  *
5396  * scankeys is a 1-element scankey array set up to reject nulls.
5397  * typLen/typByVal describe the datatype of the index's first column.
5398  * tableslot is a slot suitable to hold table tuples, in case we need
5399  * to probe the heap.
5400  * (We could compute these values locally, but that would mean computing them
5401  * twice when get_actual_variable_range needs both the min and the max.)
5402  */
5403 static bool
5405  Relation indexRel,
5406  ScanDirection indexscandir,
5407  ScanKey scankeys,
5408  int16 typLen,
5409  bool typByVal,
5410  TupleTableSlot *tableslot,
5411  MemoryContext outercontext,
5412  Datum *endpointDatum)
5413 {
5414  bool have_data = false;
5415  SnapshotData SnapshotNonVacuumable;
5416  IndexScanDesc index_scan;
5417  Buffer vmbuffer = InvalidBuffer;
5418  ItemPointer tid;
5420  bool isnull[INDEX_MAX_KEYS];
5421  MemoryContext oldcontext;
5422 
5423  /*
5424  * We use the index-only-scan machinery for this. With mostly-static
5425  * tables that's a win because it avoids a heap visit. It's also a win
5426  * for dynamic data, but the reason is less obvious; read on for details.
5427  *
5428  * In principle, we should scan the index with our current active
5429  * snapshot, which is the best approximation we've got to what the query
5430  * will see when executed. But that won't be exact if a new snap is taken
5431  * before running the query, and it can be very expensive if a lot of
5432  * recently-dead or uncommitted rows exist at the beginning or end of the
5433  * index (because we'll laboriously fetch each one and reject it).
5434  * Instead, we use SnapshotNonVacuumable. That will accept recently-dead
5435  * and uncommitted rows as well as normal visible rows. On the other
5436  * hand, it will reject known-dead rows, and thus not give a bogus answer
5437  * when the extreme value has been deleted (unless the deletion was quite
5438  * recent); that case motivates not using SnapshotAny here.
5439  *
5440  * A crucial point here is that SnapshotNonVacuumable, with
5441  * RecentGlobalXmin as horizon, yields the inverse of the condition that
5442  * the indexscan will use to decide that index entries are killable (see
5443  * heap_hot_search_buffer()). Therefore, if the snapshot rejects a tuple
5444  * (or more precisely, all tuples of a HOT chain) and we have to continue
5445  * scanning past it, we know that the indexscan will mark that index entry
5446  * killed. That means that the next get_actual_variable_endpoint() call
5447  * will not have to re-consider that index entry. In this way we avoid
5448  * repetitive work when this function is used a lot during planning.
5449  *
5450  * But using SnapshotNonVacuumable creates a hazard of its own. In a
5451  * recently-created index, some index entries may point at "broken" HOT
5452  * chains in which not all the tuple versions contain data matching the
5453  * index entry. The live tuple version(s) certainly do match the index,
5454  * but SnapshotNonVacuumable can accept recently-dead tuple versions that
5455  * don't match. Hence, if we took data from the selected heap tuple, we
5456  * might get a bogus answer that's not close to the index extremal value,
5457  * or could even be NULL. We avoid this hazard because we take the data
5458  * from the index entry not the heap.
5459  */
5460  InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin);
5461 
5462  index_scan = index_beginscan(heapRel, indexRel,
5463  &SnapshotNonVacuumable,
5464  1, 0);
5465  /* Set it up for index-only scan */
5466  index_scan->xs_want_itup = true;
5467  index_rescan(index_scan, scankeys, 1, NULL, 0);
5468 
5469  /* Fetch first/next tuple in specified direction */
5470  while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
5471  {
5472  if (!VM_ALL_VISIBLE(heapRel,
5474  &vmbuffer))
5475  {
5476  /* Rats, we have to visit the heap to check visibility */
5477  if (!index_fetch_heap(index_scan, tableslot))
5478  continue; /* no visible tuple, try next index entry */
5479 
5480  /* We don't actually need the heap tuple for anything */
5481  ExecClearTuple(tableslot);
5482 
5483  /*
5484  * We don't care whether there's more than one visible tuple in
5485  * the HOT chain; if any are visible, that's good enough.
5486  */
5487  }
5488 
5489  /*
5490  * We expect that btree will return data in IndexTuple not HeapTuple
5491  * format. It's not lossy either.
5492  */
5493  if (!index_scan->xs_itup)
5494  elog(ERROR, "no data returned for index-only scan");
5495  if (index_scan->xs_recheck)
5496  elog(ERROR, "unexpected recheck indication from btree");
5497 
5498  /* OK to deconstruct the index tuple */
5499  index_deform_tuple(index_scan->xs_itup,
5500  index_scan->xs_itupdesc,
5501  values, isnull);
5502 
5503  /* Shouldn't have got a null, but be careful */
5504  if (isnull[0])
5505  elog(ERROR, "found unexpected null value in index \"%s\"",
5506  RelationGetRelationName(indexRel));
5507 
5508  /* Copy the index column value out to caller's context */
5509  oldcontext = MemoryContextSwitchTo(outercontext);
5510  *endpointDatum = datumCopy(values[0], typByVal, typLen);
5511  MemoryContextSwitchTo(oldcontext);
5512  have_data = true;
5513  break;
5514  }
5515 
5516  if (vmbuffer != InvalidBuffer)
5517  ReleaseBuffer(vmbuffer);
5518  index_endscan(index_scan);
5519 
5520  return have_data;
5521 }
5522 
5523 /*
5524  * find_join_input_rel
5525  * Look up the input relation for a join.
5526  *
5527  * We assume that the input relation's RelOptInfo must have been constructed
5528  * already.
5529  */
5530 static RelOptInfo *
5532 {
5533  RelOptInfo *rel = NULL;
5534 
5535  switch (bms_membership(relids))
5536  {
5537  case BMS_EMPTY_SET:
5538  /* should not happen */
5539  break;
5540  case BMS_SINGLETON:
5541  rel = find_base_rel(root, bms_singleton_member(relids));
5542  break;
5543  case BMS_MULTIPLE:
5544  rel = find_join_rel(root, relids);
5545  break;
5546  }
5547 
5548  if (rel == NULL)
5549  elog(ERROR, "could not find RelOptInfo for given relids");
5550 
5551  return rel;
5552 }
5553 
5554 
5555 /*-------------------------------------------------------------------------
5556  *
5557  * Index cost estimation functions
5558  *
5559  *-------------------------------------------------------------------------
5560  */
5561 
5562 /*
5563  * Extract the actual indexquals (as RestrictInfos) from an IndexClause list
5564  */
5565 List *
5567 {
5568  List *result = NIL;
5569  ListCell *lc;
5570 
5571  foreach(lc, indexclauses)
5572  {
5573  IndexClause *iclause = lfirst_node(IndexClause, lc);
5574  ListCell *lc2;
5575 
5576  foreach(lc2, iclause->indexquals)
5577  {
5578  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5579 
5580  result = lappend(result, rinfo);
5581  }
5582  }
5583  return result;
5584 }
5585 
5586 /*
5587  * Compute the total evaluation cost of the comparison operands in a list
5588  * of index qual expressions. Since we know these will be evaluated just
5589  * once per scan, there's no need to distinguish startup from per-row cost.
5590  *
5591  * This can be used either on the result of get_quals_from_indexclauses(),
5592  * or directly on an indexorderbys list. In both cases, we expect that the
5593  * index key expression is on the left side of binary clauses.
5594  */
5595 Cost
5597 {
5598  Cost qual_arg_cost = 0;
5599  ListCell *lc;
5600 
5601  foreach(lc, indexquals)
5602  {
5603  Expr *clause = (Expr *) lfirst(lc);
5604  Node *other_operand;
5605  QualCost index_qual_cost;
5606 
5607  /*
5608  * Index quals will have RestrictInfos, indexorderbys won't. Look
5609  * through RestrictInfo if present.
5610  */
5611  if (IsA(clause, RestrictInfo))
5612  clause = ((RestrictInfo *) clause)->clause;
5613 
5614  if (IsA(clause, OpExpr))
5615  {
5616  OpExpr *op = (OpExpr *) clause;
5617 
5618  other_operand = (Node *) lsecond(op->args);
5619  }
5620  else if (IsA(clause, RowCompareExpr))
5621  {
5622  RowCompareExpr *rc = (RowCompareExpr *) clause;
5623 
5624  other_operand = (Node *) rc->rargs;
5625  }
5626  else if (IsA(clause, ScalarArrayOpExpr))
5627  {
5628  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
5629 
5630  other_operand = (Node *) lsecond(saop->args);
5631  }
5632  else if (IsA(clause, NullTest))
5633  {
5634  other_operand = NULL;
5635  }
5636  else
5637  {
5638  elog(ERROR, "unsupported indexqual type: %d",
5639  (int) nodeTag(clause));
5640  other_operand = NULL; /* keep compiler quiet */
5641  }
5642 
5643  cost_qual_eval_node(&index_qual_cost, other_operand, root);
5644  qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
5645  }
5646  return qual_arg_cost;
5647 }
5648 
5649 void
5651  IndexPath *path,
5652  double loop_count,
5653  GenericCosts *costs)
5654 {
5655  IndexOptInfo *index = path->indexinfo;
5656  List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
5657  List *indexOrderBys = path->indexorderbys;
5658  Cost indexStartupCost;
5659  Cost indexTotalCost;
5660  Selectivity indexSelectivity;
5661  double indexCorrelation;
5662  double numIndexPages;
5663  double numIndexTuples;
5664  double spc_random_page_cost;
5665  double num_sa_scans;
5666  double num_outer_scans;
5667  double num_scans;
5668  double qual_op_cost;
5669  double qual_arg_cost;
5670  List *selectivityQuals;
5671  ListCell *l;
5672 
5673  /*
5674  * If the index is partial, AND the index predicate with the explicitly
5675  * given indexquals to produce a more accurate idea of the index
5676  * selectivity.
5677  */
5678  selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
5679 
5680  /*
5681  * Check for ScalarArrayOpExpr index quals, and estimate the number of
5682  * index scans that will be performed.
5683  */
5684  num_sa_scans = 1;
5685  foreach(l, indexQuals)
5686  {
5687  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
5688 
5689  if (IsA(rinfo->clause, ScalarArrayOpExpr))
5690  {
5691  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
5692  int alength = estimate_array_length(lsecond(saop->args));
5693 
5694  if (alength > 1)
5695  num_sa_scans *= alength;
5696  }
5697  }
5698 
5699  /* Estimate the fraction of main-table tuples that will be visited */
5700  indexSelectivity = clauselist_selectivity(root, selectivityQuals,
5701  index->rel->relid,
5702  JOIN_INNER,
5703  NULL);
5704 
5705  /*
5706  * If caller didn't give us an estimate, estimate the number of index
5707  * tuples that will be visited. We do it in this rather peculiar-looking
5708  * way in order to get the right answer for partial indexes.
5709  */
5710  numIndexTuples = costs->numIndexTuples;
5711  if (numIndexTuples <= 0.0)
5712  {
5713  numIndexTuples = indexSelectivity * index->rel->tuples;
5714 
5715  /*
5716  * The above calculation counts all the tuples visited across all
5717  * scans induced by ScalarArrayOpExpr nodes. We want to consider the
5718  * average per-indexscan number, so adjust. This is a handy place to
5719  * round to integer, too. (If caller supplied tuple estimate, it's
5720  * responsible for handling these considerations.)
5721  */
5722  numIndexTuples = rint(numIndexTuples / num_sa_scans);
5723  }
5724 
5725  /*
5726  * We can bound the number of tuples by the index size in any case. Also,
5727  * always estimate at least one tuple is touched, even when
5728  * indexSelectivity estimate is tiny.
5729  */
5730  if (numIndexTuples > index->tuples)
5731  numIndexTuples = index->tuples;
5732  if (numIndexTuples < 1.0)
5733  numIndexTuples = 1.0;
5734 
5735  /*
5736  * Estimate the number of index pages that will be retrieved.
5737  *
5738  * We use the simplistic method of taking a pro-rata fraction of the total
5739  * number of index pages. In effect, this counts only leaf pages and not
5740  * any overhead such as index metapage or upper tree levels.
5741  *
5742  * In practice access to upper index levels is often nearly free because
5743  * those tend to stay in cache under load; moreover, the cost involved is
5744  * highly dependent on index type. We therefore ignore such costs here
5745  * and leave it to the caller to add a suitable charge if needed.
5746  */
5747  if (index->pages > 1 && index->tuples > 1)
5748  numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
5749  else
5750  numIndexPages = 1.0;
5751 
5752  /* fetch estimated page cost for tablespace containing index */
5754  &spc_random_page_cost,
5755  NULL);
5756 
5757  /*
5758  * Now compute the disk access costs.
5759  *
5760  * The above calculations are all per-index-scan. However, if we are in a
5761  * nestloop inner scan, we can expect the scan to be repeated (with
5762  * different search keys) for each row of the outer relation. Likewise,
5763  * ScalarArrayOpExpr quals result in multiple index scans. This creates
5764  * the potential for cache effects to reduce the number of disk page
5765  * fetches needed. We want to estimate the average per-scan I/O cost in
5766  * the presence of caching.
5767  *
5768  * We use the Mackert-Lohman formula (see costsize.c for details) to
5769  * estimate the total number of page fetches that occur. While this
5770  * wasn't what it was designed for, it seems a reasonable model anyway.
5771  * Note that we are counting pages not tuples anymore, so we take N = T =
5772  * index size, as if there were one "tuple" per page.
5773  */
5774  num_outer_scans = loop_count;
5775  num_scans = num_sa_scans * num_outer_scans;
5776 
5777  if (num_scans > 1)
5778  {
5779  double pages_fetched;
5780 
5781  /* total page fetches ignoring cache effects */
5782  pages_fetched = numIndexPages * num_scans;
5783 
5784  /* use Mackert and Lohman formula to adjust for cache effects */
5785  pages_fetched = index_pages_fetched(pages_fetched,
5786  index->pages,
5787  (double) index->pages,
5788  root);
5789 
5790  /*
5791  * Now compute the total disk access cost, and then report a pro-rated
5792  * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
5793  * since that's internal to the indexscan.)
5794  */
5795  indexTotalCost = (pages_fetched * spc_random_page_cost)
5796  / num_outer_scans;
5797  }
5798  else
5799  {
5800  /*
5801  * For a single index scan, we just charge spc_random_page_cost per
5802  * page touched.
5803  */
5804  indexTotalCost = numIndexPages * spc_random_page_cost;
5805  }
5806 
5807  /*
5808  * CPU cost: any complex expressions in the indexquals will need to be
5809  * evaluated once at the start of the scan to reduce them to runtime keys
5810  * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
5811  * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
5812  * indexqual operator. Because we have numIndexTuples as a per-scan
5813  * number, we have to multiply by num_sa_scans to get the correct result
5814  * for ScalarArrayOpExpr cases. Similarly add in costs for any index
5815  * ORDER BY expressions.
5816  *
5817  * Note: this neglects the possible costs of rechecking lossy operators.
5818  * Detecting that that might be needed seems more expensive than it's
5819  * worth, though, considering all the other inaccuracies here ...
5820  */
5821  qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
5822  index_other_operands_eval_cost(root, indexOrderBys);
5823  qual_op_cost = cpu_operator_cost *
5824  (list_length(indexQuals) + list_length(indexOrderBys));
5825 
5826  indexStartupCost = qual_arg_cost;
5827  indexTotalCost += qual_arg_cost;
5828  indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
5829 
5830  /*
5831  * Generic assumption about index correlation: there isn't any.
5832  */
5833  indexCorrelation = 0.0;
5834 
5835  /*
5836  * Return everything to caller.
5837  */
5838  costs->indexStartupCost = indexStartupCost;
5839  costs->indexTotalCost = indexTotalCost;
5840  costs->indexSelectivity = indexSelectivity;
5841  costs->indexCorrelation = indexCorrelation;
5842  costs->numIndexPages = numIndexPages;
5843  costs->numIndexTuples = numIndexTuples;
5844  costs->spc_random_page_cost = spc_random_page_cost;
5845  costs->num_sa_scans = num_sa_scans;
5846 }
5847 
5848 /*
5849  * If the index is partial, add its predicate to the given qual list.
5850  *
5851  * ANDing the index predicate with the explicitly given indexquals produces
5852  * a more accurate idea of the index's selectivity. However, we need to be
5853  * careful not to insert redundant clauses, because clauselist_selectivity()
5854  * is easily fooled into computing a too-low selectivity estimate. Our
5855  * approach is to add only the predicate clause(s) that cannot be proven to
5856  * be implied by the given indexquals. This successfully handles cases such
5857  * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
5858  * There are many other cases where we won't detect redundancy, leading to a
5859  * too-low selectivity estimate, which will bias the system in favor of using
5860  * partial indexes where possible. That is not necessarily bad though.
5861  *
5862  * Note that indexQuals contains RestrictInfo nodes while the indpred
5863  * does not, so the output list will be mixed. This is OK for both
5864  * predicate_implied_by() and clauselist_selectivity(), but might be
5865  * problematic if the result were passed to other things.
5866  */
5867 List *
5869 {
5870  List *predExtraQuals = NIL;
5871  ListCell *lc;
5872 
5873  if (index->indpred == NIL)
5874  return indexQuals;
5875 
5876  foreach(lc, index->indpred)
5877  {
5878  Node *predQual = (Node *) lfirst(lc);
5879  List *oneQual = list_make1(predQual);
5880 
5881  if (!predicate_implied_by(oneQual, indexQuals, false))
5882  predExtraQuals = list_concat(predExtraQuals, oneQual);
5883  }
5884  return list_concat(predExtraQuals, indexQuals);
5885 }
5886 
5887 
5888 void
5889 btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
5890  Cost *indexStartupCost, Cost *indexTotalCost,
5891  Selectivity *indexSelectivity, double *indexCorrelation,
5892  double *indexPages)
5893 {
5894  IndexOptInfo *index = path->indexinfo;
5895  GenericCosts costs;
5896  Oid relid;
5897  AttrNumber colnum;
5898  VariableStatData vardata;
5899  double numIndexTuples;
5900  Cost descentCost;
5901  List *indexBoundQuals;
5902  int indexcol;
5903  bool eqQualHere;
5904  bool found_saop;
5905  bool found_is_null_op;
5906  double num_sa_scans;
5907  ListCell *lc;
5908 
5909  /*
5910  * For a btree scan, only leading '=' quals plus inequality quals for the
5911  * immediately next attribute contribute to index selectivity (these are
5912  * the "boundary quals" that determine the starting and stopping points of
5913  * the index scan). Additional quals can suppress visits to the heap, so
5914  * it's OK to count them in indexSelectivity, but they should not count
5915  * for estimating numIndexTuples. So we must examine the given indexquals
5916  * to find out which ones count as boundary quals. We rely on the
5917  * knowledge that they are given in index column order.
5918  *
5919  * For a RowCompareExpr, we consider only the first column, just as
5920  * rowcomparesel() does.
5921  *
5922  * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
5923  * index scans not one, but the ScalarArrayOpExpr's operator can be
5924  * considered to act the same as it normally does.
5925  */
5926  indexBoundQuals = NIL;
5927  indexcol = 0;
5928  eqQualHere = false;
5929  found_saop = false;
5930  found_is_null_op = false;
5931  num_sa_scans = 1;
5932  foreach(lc, path->indexclauses)
5933  {
5934  IndexClause *iclause = lfirst_node(IndexClause, lc);
5935  ListCell *lc2;
5936 
5937  if (indexcol != iclause->indexcol)
5938  {
5939  /* Beginning of a new column's quals */
5940  if (!eqQualHere)
5941  break; /* done if no '=' qual for indexcol */
5942  eqQualHere = false;
5943  indexcol++;
5944  if (indexcol != iclause->indexcol)
5945  break; /* no quals at all for indexcol */
5946  }
5947 
5948  /* Examine each indexqual associated with this index clause */
5949  foreach(lc2, iclause->indexquals)
5950  {
5951  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5952  Expr *clause = rinfo->clause;
5953  Oid clause_op = InvalidOid;
5954  int op_strategy;
5955 
5956  if (IsA(clause, OpExpr))
5957  {
5958  OpExpr *op = (OpExpr *) clause;
5959 
5960  clause_op = op->opno;
5961  }
5962  else if (IsA(clause, RowCompareExpr))
5963  {
5964  RowCompareExpr *rc = (RowCompareExpr *) clause;
5965 
5966  clause_op = linitial_oid(rc->opnos);
5967  }
5968  else if (IsA(clause, ScalarArrayOpExpr))
5969  {
5970  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
5971  Node *other_operand = (Node *) lsecond(saop->args);
5972  int alength = estimate_array_length(other_operand);
5973 
5974  clause_op = saop->opno;
5975  found_saop = true;
5976  /* count number of SA scans induced by indexBoundQuals only */
5977  if (alength > 1)
5978  num_sa_scans *= alength;
5979  }
5980  else if (IsA(clause, NullTest))
5981  {
5982  NullTest *nt = (NullTest *) clause;
5983 
5984  if (nt->nulltesttype == IS_NULL)
5985  {
5986  found_is_null_op = true;
5987  /* IS NULL is like = for selectivity purposes */
5988  eqQualHere = true;
5989  }
5990  }
5991  else
5992  elog(ERROR, "unsupported indexqual type: %d",
5993  (int) nodeTag(clause));
5994 
5995  /* check for equality operator */
5996  if (OidIsValid(clause_op))
5997  {
5998  op_strategy = get_op_opfamily_strategy(clause_op,
5999  index->opfamily[indexcol]);
6000  Assert(op_strategy != 0); /* not a member of opfamily?? */
6001  if (op_strategy == BTEqualStrategyNumber)
6002  eqQualHere = true;
6003  }
6004 
6005  indexBoundQuals = lappend(indexBoundQuals, rinfo);
6006  }
6007  }
6008 
6009  /*
6010  * If index is unique and we found an '=' clause for each column, we can
6011  * just assume numIndexTuples = 1 and skip the expensive
6012  * clauselist_selectivity calculations. However, a ScalarArrayOp or
6013  * NullTest invalidates that theory, even though it sets eqQualHere.
6014  */
6015  if (index->unique &&
6016  indexcol == index->nkeycolumns - 1 &&
6017  eqQualHere &&
6018  !found_saop &&
6019  !found_is_null_op)
6020  numIndexTuples = 1.0;
6021  else
6022  {
6023  List *selectivityQuals;
6024  Selectivity btreeSelectivity;
6025 
6026  /*
6027  * If the index is partial, AND the index predicate with the
6028  * index-bound quals to produce a more accurate idea of the number of
6029  * rows covered by the bound conditions.
6030  */
6031  selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals);
6032 
6033  btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
6034  index->rel->relid,
6035  JOIN_INNER,
6036  NULL);
6037  numIndexTuples = btreeSelectivity * index->rel->tuples;
6038 
6039  /*
6040  * As in genericcostestimate(), we have to adjust for any
6041  * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
6042  * to integer.
6043  */
6044  numIndexTuples = rint(numIndexTuples / num_sa_scans);
6045  }
6046 
6047  /*
6048  * Now do generic index cost estimation.
6049  */
6050  MemSet(&costs, 0, sizeof(costs));
6051  costs.numIndexTuples = numIndexTuples;
6052 
6053  genericcostestimate(root, path, loop_count, &costs);
6054 
6055  /*
6056  * Add a CPU-cost component to represent the costs of initial btree
6057  * descent. We don't charge any I/O cost for touching upper btree levels,
6058  * since they tend to stay in cache, but we still have to do about log2(N)
6059  * comparisons to descend a btree of N leaf tuples. We charge one
6060  * cpu_operator_cost per comparison.
6061  *
6062  * If there are ScalarArrayOpExprs, charge this once per SA scan. The
6063  * ones after the first one are not startup cost so far as the overall
6064  * plan is concerned, so add them only to "total" cost.
6065  */
6066  if (index->tuples > 1) /* avoid computing log(0) */
6067  {
6068  descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
6069  costs.indexStartupCost += descentCost;
6070  costs.indexTotalCost += costs.num_sa_scans * descentCost;
6071  }
6072 
6073  /*
6074  * Even though we're not charging I/O cost for touching upper btree pages,
6075  * it's still reasonable to charge some CPU cost per page descended
6076  * through. Moreover, if we had no such charge at all, bloated indexes
6077  * would appear to have the same search cost as unbloated ones, at least
6078  * in cases where only a single leaf page is expected to be visited. This
6079  * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
6080  * touched. The number of such pages is btree tree height plus one (ie,
6081  * we charge for the leaf page too). As above, charge once per SA scan.
6082  */
6083  descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6084  costs.indexStartupCost += descentCost;
6085  costs.indexTotalCost += costs.num_sa_scans * descentCost;
6086 
6087  /*
6088  * If we can get an estimate of the first column's ordering correlation C
6089  * from pg_statistic, estimate the index correlation as C for a
6090  * single-column index, or C * 0.75 for multiple columns. (The idea here
6091  * is that multiple columns dilute the importance of the first column's
6092  * ordering, but don't negate it entirely. Before 8.0 we divided the
6093  * correlation by the number of columns, but that seems too strong.)
6094  */
6095  MemSet(&vardata, 0, sizeof(vardata));
6096 
6097  if (index->indexkeys[0] != 0)
6098  {
6099  /* Simple variable --- look to stats for the underlying table */
6100  RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
6101 
6102  Assert(rte->rtekind == RTE_RELATION);
6103  relid = rte->relid;
6104  Assert(relid != InvalidOid);
6105  colnum = index->indexkeys[0];
6106 
6108  (*get_relation_stats_hook) (root, rte, colnum, &vardata))
6109  {
6110  /*
6111  * The hook took control of acquiring a stats tuple. If it did
6112  * supply a tuple, it'd better have supplied a freefunc.
6113  */
6114  if (HeapTupleIsValid(vardata.statsTuple) &&
6115  !vardata.freefunc)
6116  elog(ERROR, "no function provided to release variable stats with");
6117  }
6118  else
6119  {
6121  ObjectIdGetDatum(relid),
6122  Int16GetDatum(colnum),
6123  BoolGetDatum(rte->inh));
6124  vardata.freefunc = ReleaseSysCache;
6125  }
6126  }
6127  else
6128  {
6129  /* Expression --- maybe there are stats for the index itself */
6130  relid = index->indexoid;
6131  colnum = 1;
6132 
6133  if (get_index_stats_hook &&
6134  (*get_index_stats_hook) (root, relid, colnum, &vardata))
6135  {
6136  /*
6137  * The hook took control of acquiring a stats tuple. If it did
6138  * supply a tuple, it'd better have supplied a freefunc.
6139  */
6140  if (HeapTupleIsValid(vardata.statsTuple) &&
6141  !vardata.freefunc)
6142  elog(ERROR, "no function provided to release variable stats with");
6143  }
6144  else
6145  {
6147  ObjectIdGetDatum(relid),
6148  Int16GetDatum(colnum),
6149  BoolGetDatum(false));
6150  vardata.freefunc = ReleaseSysCache;
6151  }
6152  }
6153 
6154  if (HeapTupleIsValid(vardata.statsTuple))
6155  {
6156  Oid sortop;
6157  AttStatsSlot sslot;
6158 
6159  sortop = get_opfamily_member(index->opfamily[0],
6160  index->opcintype[0],
6161  index->opcintype[0],
6163  if (OidIsValid(sortop) &&