PostgreSQL Source Code  git master
clausesel.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * clausesel.c
4  * Routines to compute clause selectivities
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/path/clausesel.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "nodes/makefuncs.h"
18 #include "nodes/nodeFuncs.h"
19 #include "optimizer/clauses.h"
20 #include "optimizer/cost.h"
21 #include "optimizer/optimizer.h"
22 #include "optimizer/pathnode.h"
23 #include "optimizer/plancat.h"
24 #include "statistics/statistics.h"
25 #include "utils/fmgroids.h"
26 #include "utils/lsyscache.h"
27 #include "utils/selfuncs.h"
28 
29 /*
30  * Data structure for accumulating info about possible range-query
31  * clause pairs in clauselist_selectivity.
32  */
33 typedef struct RangeQueryClause
34 {
35  struct RangeQueryClause *next; /* next in linked list */
36  Node *var; /* The common variable of the clauses */
37  bool have_lobound; /* found a low-bound clause yet? */
38  bool have_hibound; /* found a high-bound clause yet? */
39  Selectivity lobound; /* Selectivity of a var > something clause */
40  Selectivity hibound; /* Selectivity of a var < something clause */
42 
43 static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
44  bool varonleft, bool isLTsel, Selectivity s2);
46  List *clauses);
48  List *clauses,
49  int varRelid,
50  JoinType jointype,
51  SpecialJoinInfo *sjinfo,
52  bool use_extended_stats);
53 
54 /****************************************************************************
55  * ROUTINES TO COMPUTE SELECTIVITIES
56  ****************************************************************************/
57 
58 /*
59  * clauselist_selectivity -
60  * Compute the selectivity of an implicitly-ANDed list of boolean
61  * expression clauses. The list can be empty, in which case 1.0
62  * must be returned. List elements may be either RestrictInfos
63  * or bare expression clauses --- the former is preferred since
64  * it allows caching of results.
65  *
66  * See clause_selectivity() for the meaning of the additional parameters.
67  *
68  * The basic approach is to apply extended statistics first, on as many
69  * clauses as possible, in order to capture cross-column dependencies etc.
70  * The remaining clauses are then estimated by taking the product of their
71  * selectivities, but that's only right if they have independent
72  * probabilities, and in reality they are often NOT independent even if they
73  * only refer to a single column. So, we want to be smarter where we can.
74  *
75  * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
76  * are recognized as possible range query components if they are restriction
77  * opclauses whose operators have scalarltsel or a related function as their
78  * restriction selectivity estimator. We pair up clauses of this form that
79  * refer to the same variable. An unpairable clause of this kind is simply
80  * multiplied into the selectivity product in the normal way. But when we
81  * find a pair, we know that the selectivities represent the relative
82  * positions of the low and high bounds within the column's range, so instead
83  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
84  * losel - 1. (To visualize this, see that hisel is the fraction of the range
85  * below the high bound, while losel is the fraction above the low bound; so
86  * hisel can be interpreted directly as a 0..1 value but we need to convert
87  * losel to 1-losel before interpreting it as a value. Then the available
88  * range is 1-losel to hisel. However, this calculation double-excludes
89  * nulls, so really we need hisel + losel + null_frac - 1.)
90  *
91  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
92  * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
93  * yields an impossible (negative) result.
94  *
95  * A free side-effect is that we can recognize redundant inequalities such
96  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
97  *
98  * Of course this is all very dependent on the behavior of the inequality
99  * selectivity functions; perhaps some day we can generalize the approach.
100  */
103  List *clauses,
104  int varRelid,
105  JoinType jointype,
106  SpecialJoinInfo *sjinfo)
107 {
108  return clauselist_selectivity_ext(root, clauses, varRelid,
109  jointype, sjinfo, true);
110 }
111 
112 /*
113  * clauselist_selectivity_ext -
114  * Extended version of clauselist_selectivity(). If "use_extended_stats"
115  * is false, all extended statistics will be ignored, and only per-column
116  * statistics will be used.
117  */
120  List *clauses,
121  int varRelid,
122  JoinType jointype,
123  SpecialJoinInfo *sjinfo,
124  bool use_extended_stats)
125 {
126  Selectivity s1 = 1.0;
127  RelOptInfo *rel;
128  Bitmapset *estimatedclauses = NULL;
129  RangeQueryClause *rqlist = NULL;
130  ListCell *l;
131  int listidx;
132 
133  /*
134  * If there's exactly one clause, just go directly to
135  * clause_selectivity_ext(). None of what we might do below is relevant.
136  */
137  if (list_length(clauses) == 1)
138  return clause_selectivity_ext(root, (Node *) linitial(clauses),
139  varRelid, jointype, sjinfo,
140  use_extended_stats);
141 
142  /*
143  * Determine if these clauses reference a single relation. If so, and if
144  * it has extended statistics, try to apply those.
145  */
146  rel = find_single_rel_for_clauses(root, clauses);
147  if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
148  {
149  /*
150  * Estimate as many clauses as possible using extended statistics.
151  *
152  * 'estimatedclauses' is populated with the 0-based list position
153  * index of clauses estimated here, and that should be ignored below.
154  */
155  s1 = statext_clauselist_selectivity(root, clauses, varRelid,
156  jointype, sjinfo, rel,
157  &estimatedclauses, false);
158  }
159 
160  /*
161  * Apply normal selectivity estimates for remaining clauses. We'll be
162  * careful to skip any clauses which were already estimated above.
163  *
164  * Anything that doesn't look like a potential rangequery clause gets
165  * multiplied into s1 and forgotten. Anything that does gets inserted into
166  * an rqlist entry.
167  */
168  listidx = -1;
169  foreach(l, clauses)
170  {
171  Node *clause = (Node *) lfirst(l);
172  RestrictInfo *rinfo;
173  Selectivity s2;
174 
175  listidx++;
176 
177  /*
178  * Skip this clause if it's already been estimated by some other
179  * statistics above.
180  */
181  if (bms_is_member(listidx, estimatedclauses))
182  continue;
183 
184  /* Compute the selectivity of this clause in isolation */
185  s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
186  use_extended_stats);
187 
188  /*
189  * Check for being passed a RestrictInfo.
190  *
191  * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
192  * 0.0; just use that rather than looking for range pairs.
193  */
194  if (IsA(clause, RestrictInfo))
195  {
196  rinfo = (RestrictInfo *) clause;
197  if (rinfo->pseudoconstant)
198  {
199  s1 = s1 * s2;
200  continue;
201  }
202  clause = (Node *) rinfo->clause;
203  }
204  else
205  rinfo = NULL;
206 
207  /*
208  * See if it looks like a restriction clause with a pseudoconstant on
209  * one side. (Anything more complicated than that might not behave in
210  * the simple way we are expecting.) Most of the tests here can be
211  * done more efficiently with rinfo than without.
212  */
213  if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
214  {
215  OpExpr *expr = (OpExpr *) clause;
216  bool varonleft = true;
217  bool ok;
218 
219  if (rinfo)
220  {
221  ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
223  rinfo->right_relids) ||
224  (varonleft = false,
226  rinfo->left_relids)));
227  }
228  else
229  {
230  ok = (NumRelids(root, clause) == 1) &&
232  (varonleft = false,
234  }
235 
236  if (ok)
237  {
238  /*
239  * If it's not a "<"/"<="/">"/">=" operator, just merge the
240  * selectivity in generically. But if it's the right oprrest,
241  * add the clause to rqlist for later processing.
242  */
243  switch (get_oprrest(expr->opno))
244  {
245  case F_SCALARLTSEL:
246  case F_SCALARLESEL:
247  addRangeClause(&rqlist, clause,
248  varonleft, true, s2);
249  break;
250  case F_SCALARGTSEL:
251  case F_SCALARGESEL:
252  addRangeClause(&rqlist, clause,
253  varonleft, false, s2);
254  break;
255  default:
256  /* Just merge the selectivity in generically */
257  s1 = s1 * s2;
258  break;
259  }
260  continue; /* drop to loop bottom */
261  }
262  }
263 
264  /* Not the right form, so treat it generically. */
265  s1 = s1 * s2;
266  }
267 
268  /*
269  * Now scan the rangequery pair list.
270  */
271  while (rqlist != NULL)
272  {
273  RangeQueryClause *rqnext;
274 
275  if (rqlist->have_lobound && rqlist->have_hibound)
276  {
277  /* Successfully matched a pair of range clauses */
278  Selectivity s2;
279 
280  /*
281  * Exact equality to the default value probably means the
282  * selectivity function punted. This is not airtight but should
283  * be good enough.
284  */
285  if (rqlist->hibound == DEFAULT_INEQ_SEL ||
286  rqlist->lobound == DEFAULT_INEQ_SEL)
287  {
289  }
290  else
291  {
292  s2 = rqlist->hibound + rqlist->lobound - 1.0;
293 
294  /* Adjust for double-exclusion of NULLs */
295  s2 += nulltestsel(root, IS_NULL, rqlist->var,
296  varRelid, jointype, sjinfo);
297 
298  /*
299  * A zero or slightly negative s2 should be converted into a
300  * small positive value; we probably are dealing with a very
301  * tight range and got a bogus result due to roundoff errors.
302  * However, if s2 is very negative, then we probably have
303  * default selectivity estimates on one or both sides of the
304  * range that we failed to recognize above for some reason.
305  */
306  if (s2 <= 0.0)
307  {
308  if (s2 < -0.01)
309  {
310  /*
311  * No data available --- use a default estimate that
312  * is small, but not real small.
313  */
315  }
316  else
317  {
318  /*
319  * It's just roundoff error; use a small positive
320  * value
321  */
322  s2 = 1.0e-10;
323  }
324  }
325  }
326  /* Merge in the selectivity of the pair of clauses */
327  s1 *= s2;
328  }
329  else
330  {
331  /* Only found one of a pair, merge it in generically */
332  if (rqlist->have_lobound)
333  s1 *= rqlist->lobound;
334  else
335  s1 *= rqlist->hibound;
336  }
337  /* release storage and advance */
338  rqnext = rqlist->next;
339  pfree(rqlist);
340  rqlist = rqnext;
341  }
342 
343  return s1;
344 }
345 
346 /*
347  * clauselist_selectivity_or -
348  * Compute the selectivity of an implicitly-ORed list of boolean
349  * expression clauses. The list can be empty, in which case 0.0
350  * must be returned. List elements may be either RestrictInfos
351  * or bare expression clauses --- the former is preferred since
352  * it allows caching of results.
353  *
354  * See clause_selectivity() for the meaning of the additional parameters.
355  *
356  * The basic approach is to apply extended statistics first, on as many
357  * clauses as possible, in order to capture cross-column dependencies etc.
358  * The remaining clauses are then estimated as if they were independent.
359  */
360 static Selectivity
362  List *clauses,
363  int varRelid,
364  JoinType jointype,
365  SpecialJoinInfo *sjinfo,
366  bool use_extended_stats)
367 {
368  Selectivity s1 = 0.0;
369  RelOptInfo *rel;
370  Bitmapset *estimatedclauses = NULL;
371  ListCell *lc;
372  int listidx;
373 
374  /*
375  * Determine if these clauses reference a single relation. If so, and if
376  * it has extended statistics, try to apply those.
377  */
378  rel = find_single_rel_for_clauses(root, clauses);
379  if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
380  {
381  /*
382  * Estimate as many clauses as possible using extended statistics.
383  *
384  * 'estimatedclauses' is populated with the 0-based list position
385  * index of clauses estimated here, and that should be ignored below.
386  */
387  s1 = statext_clauselist_selectivity(root, clauses, varRelid,
388  jointype, sjinfo, rel,
389  &estimatedclauses, true);
390  }
391 
392  /*
393  * Estimate the remaining clauses as if they were independent.
394  *
395  * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
396  * for the probable overlap of selected tuple sets.
397  *
398  * XXX is this too conservative?
399  */
400  listidx = -1;
401  foreach(lc, clauses)
402  {
403  Selectivity s2;
404 
405  listidx++;
406 
407  /*
408  * Skip this clause if it's already been estimated by some other
409  * statistics above.
410  */
411  if (bms_is_member(listidx, estimatedclauses))
412  continue;
413 
414  s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
415  jointype, sjinfo, use_extended_stats);
416 
417  s1 = s1 + s2 - s1 * s2;
418  }
419 
420  return s1;
421 }
422 
423 /*
424  * addRangeClause --- add a new range clause for clauselist_selectivity
425  *
426  * Here is where we try to match up pairs of range-query clauses
427  */
428 static void
430  bool varonleft, bool isLTsel, Selectivity s2)
431 {
432  RangeQueryClause *rqelem;
433  Node *var;
434  bool is_lobound;
435 
436  if (varonleft)
437  {
438  var = get_leftop((Expr *) clause);
439  is_lobound = !isLTsel; /* x < something is high bound */
440  }
441  else
442  {
443  var = get_rightop((Expr *) clause);
444  is_lobound = isLTsel; /* something < x is low bound */
445  }
446 
447  for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
448  {
449  /*
450  * We use full equal() here because the "var" might be a function of
451  * one or more attributes of the same relation...
452  */
453  if (!equal(var, rqelem->var))
454  continue;
455  /* Found the right group to put this clause in */
456  if (is_lobound)
457  {
458  if (!rqelem->have_lobound)
459  {
460  rqelem->have_lobound = true;
461  rqelem->lobound = s2;
462  }
463  else
464  {
465 
466  /*------
467  * We have found two similar clauses, such as
468  * x < y AND x <= z.
469  * Keep only the more restrictive one.
470  *------
471  */
472  if (rqelem->lobound > s2)
473  rqelem->lobound = s2;
474  }
475  }
476  else
477  {
478  if (!rqelem->have_hibound)
479  {
480  rqelem->have_hibound = true;
481  rqelem->hibound = s2;
482  }
483  else
484  {
485 
486  /*------
487  * We have found two similar clauses, such as
488  * x > y AND x >= z.
489  * Keep only the more restrictive one.
490  *------
491  */
492  if (rqelem->hibound > s2)
493  rqelem->hibound = s2;
494  }
495  }
496  return;
497  }
498 
499  /* No matching var found, so make a new clause-pair data structure */
500  rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
501  rqelem->var = var;
502  if (is_lobound)
503  {
504  rqelem->have_lobound = true;
505  rqelem->have_hibound = false;
506  rqelem->lobound = s2;
507  }
508  else
509  {
510  rqelem->have_lobound = false;
511  rqelem->have_hibound = true;
512  rqelem->hibound = s2;
513  }
514  rqelem->next = *rqlist;
515  *rqlist = rqelem;
516 }
517 
518 /*
519  * find_single_rel_for_clauses
520  * Examine each clause in 'clauses' and determine if all clauses
521  * reference only a single relation. If so return that relation,
522  * otherwise return NULL.
523  */
524 static RelOptInfo *
526 {
527  int lastrelid = 0;
528  ListCell *l;
529 
530  foreach(l, clauses)
531  {
532  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
533  int relid;
534 
535  /*
536  * If we have a list of bare clauses rather than RestrictInfos, we
537  * could pull out their relids the hard way with pull_varnos().
538  * However, currently the extended-stats machinery won't do anything
539  * with non-RestrictInfo clauses anyway, so there's no point in
540  * spending extra cycles; just fail if that's what we have.
541  *
542  * An exception to that rule is if we have a bare BoolExpr AND clause.
543  * We treat this as a special case because the restrictinfo machinery
544  * doesn't build RestrictInfos on top of AND clauses.
545  */
546  if (is_andclause(rinfo))
547  {
548  RelOptInfo *rel;
549 
550  rel = find_single_rel_for_clauses(root,
551  ((BoolExpr *) rinfo)->args);
552 
553  if (rel == NULL)
554  return NULL;
555  if (lastrelid == 0)
556  lastrelid = rel->relid;
557  else if (rel->relid != lastrelid)
558  return NULL;
559 
560  continue;
561  }
562 
563  if (!IsA(rinfo, RestrictInfo))
564  return NULL;
565 
566  if (bms_is_empty(rinfo->clause_relids))
567  continue; /* we can ignore variable-free clauses */
568  if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
569  return NULL; /* multiple relations in this clause */
570  if (lastrelid == 0)
571  lastrelid = relid; /* first clause referencing a relation */
572  else if (relid != lastrelid)
573  return NULL; /* relation not same as last one */
574  }
575 
576  if (lastrelid != 0)
577  return find_base_rel(root, lastrelid);
578 
579  return NULL; /* no clauses */
580 }
581 
582 /*
583  * bms_is_subset_singleton
584  *
585  * Same result as bms_is_subset(s, bms_make_singleton(x)),
586  * but a little faster and doesn't leak memory.
587  *
588  * Is this of use anywhere else? If so move to bitmapset.c ...
589  */
590 static bool
592 {
593  switch (bms_membership(s))
594  {
595  case BMS_EMPTY_SET:
596  return true;
597  case BMS_SINGLETON:
598  return bms_is_member(x, s);
599  case BMS_MULTIPLE:
600  return false;
601  }
602  /* can't get here... */
603  return false;
604 }
605 
606 /*
607  * treat_as_join_clause -
608  * Decide whether an operator clause is to be handled by the
609  * restriction or join estimator. Subroutine for clause_selectivity().
610  */
611 static inline bool
613  int varRelid, SpecialJoinInfo *sjinfo)
614 {
615  if (varRelid != 0)
616  {
617  /*
618  * Caller is forcing restriction mode (eg, because we are examining an
619  * inner indexscan qual).
620  */
621  return false;
622  }
623  else if (sjinfo == NULL)
624  {
625  /*
626  * It must be a restriction clause, since it's being evaluated at a
627  * scan node.
628  */
629  return false;
630  }
631  else
632  {
633  /*
634  * Otherwise, it's a join if there's more than one relation used. We
635  * can optimize this calculation if an rinfo was passed.
636  *
637  * XXX Since we know the clause is being evaluated at a join, the
638  * only way it could be single-relation is if it was delayed by outer
639  * joins. Although we can make use of the restriction qual estimators
640  * anyway, it seems likely that we ought to account for the
641  * probability of injected nulls somehow.
642  */
643  if (rinfo)
644  return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
645  else
646  return (NumRelids(root, clause) > 1);
647  }
648 }
649 
650 
651 /*
652  * clause_selectivity -
653  * Compute the selectivity of a general boolean expression clause.
654  *
655  * The clause can be either a RestrictInfo or a plain expression. If it's
656  * a RestrictInfo, we try to cache the selectivity for possible re-use,
657  * so passing RestrictInfos is preferred.
658  *
659  * varRelid is either 0 or a rangetable index.
660  *
661  * When varRelid is not 0, only variables belonging to that relation are
662  * considered in computing selectivity; other vars are treated as constants
663  * of unknown values. This is appropriate for estimating the selectivity of
664  * a join clause that is being used as a restriction clause in a scan of a
665  * nestloop join's inner relation --- varRelid should then be the ID of the
666  * inner relation.
667  *
668  * When varRelid is 0, all variables are treated as variables. This
669  * is appropriate for ordinary join clauses and restriction clauses.
670  *
671  * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
672  * if the clause isn't a join clause.
673  *
674  * sjinfo is NULL for a non-join clause, otherwise it provides additional
675  * context information about the join being performed. There are some
676  * special cases:
677  * 1. For a special (not INNER) join, sjinfo is always a member of
678  * root->join_info_list.
679  * 2. For an INNER join, sjinfo is just a transient struct, and only the
680  * relids and jointype fields in it can be trusted.
681  * It is possible for jointype to be different from sjinfo->jointype.
682  * This indicates we are considering a variant join: either with
683  * the LHS and RHS switched, or with one input unique-ified.
684  *
685  * Note: when passing nonzero varRelid, it's normally appropriate to set
686  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
687  * join clause; because we aren't treating it as a join clause.
688  */
691  Node *clause,
692  int varRelid,
693  JoinType jointype,
694  SpecialJoinInfo *sjinfo)
695 {
696  return clause_selectivity_ext(root, clause, varRelid,
697  jointype, sjinfo, true);
698 }
699 
700 /*
701  * clause_selectivity_ext -
702  * Extended version of clause_selectivity(). If "use_extended_stats" is
703  * false, all extended statistics will be ignored, and only per-column
704  * statistics will be used.
705  */
708  Node *clause,
709  int varRelid,
710  JoinType jointype,
711  SpecialJoinInfo *sjinfo,
712  bool use_extended_stats)
713 {
714  Selectivity s1 = 0.5; /* default for any unhandled clause type */
715  RestrictInfo *rinfo = NULL;
716  bool cacheable = false;
717 
718  if (clause == NULL) /* can this still happen? */
719  return s1;
720 
721  if (IsA(clause, RestrictInfo))
722  {
723  rinfo = (RestrictInfo *) clause;
724 
725  /*
726  * If the clause is marked pseudoconstant, then it will be used as a
727  * gating qual and should not affect selectivity estimates; hence
728  * return 1.0. The only exception is that a constant FALSE may be
729  * taken as having selectivity 0.0, since it will surely mean no rows
730  * out of the plan. This case is simple enough that we need not
731  * bother caching the result.
732  */
733  if (rinfo->pseudoconstant)
734  {
735  if (!IsA(rinfo->clause, Const))
736  return (Selectivity) 1.0;
737  }
738 
739  /*
740  * If the clause is marked redundant, always return 1.0.
741  */
742  if (rinfo->norm_selec > 1)
743  return (Selectivity) 1.0;
744 
745  /*
746  * If possible, cache the result of the selectivity calculation for
747  * the clause. We can cache if varRelid is zero or the clause
748  * contains only vars of that relid --- otherwise varRelid will affect
749  * the result, so mustn't cache. Outer join quals might be examined
750  * with either their join's actual jointype or JOIN_INNER, so we need
751  * two cache variables to remember both cases. Note: we assume the
752  * result won't change if we are switching the input relations or
753  * considering a unique-ified case, so we only need one cache variable
754  * for all non-JOIN_INNER cases.
755  */
756  if (varRelid == 0 ||
757  bms_is_subset_singleton(rinfo->clause_relids, varRelid))
758  {
759  /* Cacheable --- do we already have the result? */
760  if (jointype == JOIN_INNER)
761  {
762  if (rinfo->norm_selec >= 0)
763  return rinfo->norm_selec;
764  }
765  else
766  {
767  if (rinfo->outer_selec >= 0)
768  return rinfo->outer_selec;
769  }
770  cacheable = true;
771  }
772 
773  /*
774  * Proceed with examination of contained clause. If the clause is an
775  * OR-clause, we want to look at the variant with sub-RestrictInfos,
776  * so that per-subclause selectivities can be cached.
777  */
778  if (rinfo->orclause)
779  clause = (Node *) rinfo->orclause;
780  else
781  clause = (Node *) rinfo->clause;
782  }
783 
784  if (IsA(clause, Var))
785  {
786  Var *var = (Var *) clause;
787 
788  /*
789  * We probably shouldn't ever see an uplevel Var here, but if we do,
790  * return the default selectivity...
791  */
792  if (var->varlevelsup == 0 &&
793  (varRelid == 0 || varRelid == (int) var->varno))
794  {
795  /* Use the restriction selectivity function for a bool Var */
796  s1 = boolvarsel(root, (Node *) var, varRelid);
797  }
798  }
799  else if (IsA(clause, Const))
800  {
801  /* bool constant is pretty easy... */
802  Const *con = (Const *) clause;
803 
804  s1 = con->constisnull ? 0.0 :
805  DatumGetBool(con->constvalue) ? 1.0 : 0.0;
806  }
807  else if (IsA(clause, Param))
808  {
809  /* see if we can replace the Param */
810  Node *subst = estimate_expression_value(root, clause);
811 
812  if (IsA(subst, Const))
813  {
814  /* bool constant is pretty easy... */
815  Const *con = (Const *) subst;
816 
817  s1 = con->constisnull ? 0.0 :
818  DatumGetBool(con->constvalue) ? 1.0 : 0.0;
819  }
820  else
821  {
822  /* XXX any way to do better than default? */
823  }
824  }
825  else if (is_notclause(clause))
826  {
827  /* inverse of the selectivity of the underlying clause */
828  s1 = 1.0 - clause_selectivity_ext(root,
829  (Node *) get_notclausearg((Expr *) clause),
830  varRelid,
831  jointype,
832  sjinfo,
833  use_extended_stats);
834  }
835  else if (is_andclause(clause))
836  {
837  /* share code with clauselist_selectivity() */
838  s1 = clauselist_selectivity_ext(root,
839  ((BoolExpr *) clause)->args,
840  varRelid,
841  jointype,
842  sjinfo,
843  use_extended_stats);
844  }
845  else if (is_orclause(clause))
846  {
847  /*
848  * Almost the same thing as clauselist_selectivity, but with the
849  * clauses connected by OR.
850  */
851  s1 = clauselist_selectivity_or(root,
852  ((BoolExpr *) clause)->args,
853  varRelid,
854  jointype,
855  sjinfo,
856  use_extended_stats);
857  }
858  else if (is_opclause(clause) || IsA(clause, DistinctExpr))
859  {
860  OpExpr *opclause = (OpExpr *) clause;
861  Oid opno = opclause->opno;
862 
863  if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
864  {
865  /* Estimate selectivity for a join clause. */
866  s1 = join_selectivity(root, opno,
867  opclause->args,
868  opclause->inputcollid,
869  jointype,
870  sjinfo);
871  }
872  else
873  {
874  /* Estimate selectivity for a restriction clause. */
875  s1 = restriction_selectivity(root, opno,
876  opclause->args,
877  opclause->inputcollid,
878  varRelid);
879  }
880 
881  /*
882  * DistinctExpr has the same representation as OpExpr, but the
883  * contained operator is "=" not "<>", so we must negate the result.
884  * This estimation method doesn't give the right behavior for nulls,
885  * but it's better than doing nothing.
886  */
887  if (IsA(clause, DistinctExpr))
888  s1 = 1.0 - s1;
889  }
890  else if (is_funcclause(clause))
891  {
892  FuncExpr *funcclause = (FuncExpr *) clause;
893 
894  /* Try to get an estimate from the support function, if any */
895  s1 = function_selectivity(root,
896  funcclause->funcid,
897  funcclause->args,
898  funcclause->inputcollid,
899  treat_as_join_clause(root, clause, rinfo,
900  varRelid, sjinfo),
901  varRelid,
902  jointype,
903  sjinfo);
904  }
905  else if (IsA(clause, ScalarArrayOpExpr))
906  {
907  /* Use node specific selectivity calculation function */
908  s1 = scalararraysel(root,
909  (ScalarArrayOpExpr *) clause,
910  treat_as_join_clause(root, clause, rinfo,
911  varRelid, sjinfo),
912  varRelid,
913  jointype,
914  sjinfo);
915  }
916  else if (IsA(clause, RowCompareExpr))
917  {
918  /* Use node specific selectivity calculation function */
919  s1 = rowcomparesel(root,
920  (RowCompareExpr *) clause,
921  varRelid,
922  jointype,
923  sjinfo);
924  }
925  else if (IsA(clause, NullTest))
926  {
927  /* Use node specific selectivity calculation function */
928  s1 = nulltestsel(root,
929  ((NullTest *) clause)->nulltesttype,
930  (Node *) ((NullTest *) clause)->arg,
931  varRelid,
932  jointype,
933  sjinfo);
934  }
935  else if (IsA(clause, BooleanTest))
936  {
937  /* Use node specific selectivity calculation function */
938  s1 = booltestsel(root,
939  ((BooleanTest *) clause)->booltesttype,
940  (Node *) ((BooleanTest *) clause)->arg,
941  varRelid,
942  jointype,
943  sjinfo);
944  }
945  else if (IsA(clause, CurrentOfExpr))
946  {
947  /* CURRENT OF selects at most one row of its table */
948  CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
949  RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
950 
951  if (crel->tuples > 0)
952  s1 = 1.0 / crel->tuples;
953  }
954  else if (IsA(clause, RelabelType))
955  {
956  /* Not sure this case is needed, but it can't hurt */
957  s1 = clause_selectivity_ext(root,
958  (Node *) ((RelabelType *) clause)->arg,
959  varRelid,
960  jointype,
961  sjinfo,
962  use_extended_stats);
963  }
964  else if (IsA(clause, CoerceToDomain))
965  {
966  /* Not sure this case is needed, but it can't hurt */
967  s1 = clause_selectivity_ext(root,
968  (Node *) ((CoerceToDomain *) clause)->arg,
969  varRelid,
970  jointype,
971  sjinfo,
972  use_extended_stats);
973  }
974  else
975  {
976  /*
977  * For anything else, see if we can consider it as a boolean variable.
978  * This only works if it's an immutable expression in Vars of a single
979  * relation; but there's no point in us checking that here because
980  * boolvarsel() will do it internally, and return a suitable default
981  * selectivity if not.
982  */
983  s1 = boolvarsel(root, clause, varRelid);
984  }
985 
986  /* Cache the result if possible */
987  if (cacheable)
988  {
989  if (jointype == JOIN_INNER)
990  rinfo->norm_selec = s1;
991  else
992  rinfo->outer_selec = s1;
993  }
994 
995 #ifdef SELECTIVITY_DEBUG
996  elog(DEBUG4, "clause_selectivity: s1 %f", s1);
997 #endif /* SELECTIVITY_DEBUG */
998 
999  return s1;
1000 }
Datum constvalue
Definition: primnodes.h:214
#define NIL
Definition: pg_list.h:65
#define IsA(nodeptr, _type_)
Definition: nodes.h:584
List * statlist
Definition: pathnodes.h:708
bool is_pseudo_constant_clause_relids(Node *clause, Relids relids)
Definition: clauses.c:1878
Index varlevelsup
Definition: primnodes.h:191
List * args
Definition: primnodes.h:498
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3073
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2055
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:106
Expr * orclause
Definition: pathnodes.h:2037
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Definition: plancat.c:1782
double tuples
Definition: pathnodes.h:710
Selectivity rowcomparesel(PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:2170
Relids clause_relids
Definition: pathnodes.h:2021
bool pseudoconstant
Definition: pathnodes.h:2014
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:97
Definition: nodes.h:533
Relids left_relids
Definition: pathnodes.h:2033
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Definition: bitmapset.c:615
double Selectivity
Definition: nodes.h:666
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:181
#define DEBUG4
Definition: elog.h:22
Selectivity statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses, bool is_or)
#define lsecond(l)
Definition: pg_list.h:179
Selectivity scalararraysel(PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1813
static bool is_funcclause(const void *clause)
Definition: nodeFuncs.h:59
Selectivity norm_selec
Definition: pathnodes.h:2044
JoinType
Definition: nodes.h:700
static bool bms_is_subset_singleton(const Bitmapset *s, int x)
Definition: clausesel.c:591
void pfree(void *pointer)
Definition: mcxt.c:1057
#define linitial(l)
Definition: pg_list.h:174
Oid funcid
Definition: primnodes.h:490
char * s1
Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1695
Selectivity function_selectivity(PlannerInfo *root, Oid funcid, List *args, Oid inputcollid, bool is_join, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:1862
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:690
#define DatumGetBool(X)
Definition: postgres.h:393
Selectivity outer_selec
Definition: pathnodes.h:2047
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:1858
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:73
struct RangeQueryClause * next
Definition: clausesel.c:35
static void addRangeClause(RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2)
Definition: clausesel.c:429
Selectivity hibound
Definition: clausesel.c:40
Index relid
Definition: pathnodes.h:698
Expr * clause
Definition: pathnodes.h:2006
int NumRelids(PlannerInfo *root, Node *clause)
Definition: clauses.c:1900
Selectivity lobound
Definition: clausesel.c:39
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:701
Index varno
Definition: primnodes.h:184
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:115
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1528
static bool treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo, int varRelid, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:612
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:672
char * s2
RTEKind rtekind
Definition: pathnodes.h:700
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:85
static RelOptInfo * find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
Definition: clausesel.c:525
static Selectivity clauselist_selectivity_or(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:361
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:124
Selectivity clause_selectivity_ext(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:707
Relids right_relids
Definition: pathnodes.h:2034
Oid inputcollid
Definition: primnodes.h:497
struct RangeQueryClause RangeQueryClause
#define lfirst(lc)
Definition: pg_list.h:169
static int list_length(const List *l)
Definition: pg_list.h:149
Oid inputcollid
Definition: primnodes.h:542
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:119
void * palloc(Size size)
Definition: mcxt.c:950
#define elog(elevel,...)
Definition: elog.h:227
#define DEFAULT_RANGE_INEQ_SEL
Definition: selfuncs.h:40
void * arg
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:1821
Oid opno
Definition: primnodes.h:537
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:66
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:102
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:374
List * args
Definition: primnodes.h:543
Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1537
Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
Definition: selfuncs.c:1509
Definition: pg_list.h:50
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
bool constisnull
Definition: primnodes.h:215