PostgreSQL Source Code  git master
clausesel.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * clausesel.c
4  * Routines to compute clause selectivities
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/path/clausesel.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "nodes/nodeFuncs.h"
18 #include "optimizer/clauses.h"
19 #include "optimizer/optimizer.h"
20 #include "optimizer/pathnode.h"
21 #include "optimizer/plancat.h"
22 #include "statistics/statistics.h"
23 #include "utils/fmgroids.h"
24 #include "utils/lsyscache.h"
25 #include "utils/selfuncs.h"
26 
27 /*
28  * Data structure for accumulating info about possible range-query
29  * clause pairs in clauselist_selectivity.
30  */
31 typedef struct RangeQueryClause
32 {
33  struct RangeQueryClause *next; /* next in linked list */
34  Node *var; /* The common variable of the clauses */
35  bool have_lobound; /* found a low-bound clause yet? */
36  bool have_hibound; /* found a high-bound clause yet? */
37  Selectivity lobound; /* Selectivity of a var > something clause */
38  Selectivity hibound; /* Selectivity of a var < something clause */
40 
41 static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
42  bool varonleft, bool isLTsel, Selectivity s2);
44  List *clauses);
46  List *clauses,
47  int varRelid,
48  JoinType jointype,
49  SpecialJoinInfo *sjinfo,
50  bool use_extended_stats);
51 
52 /****************************************************************************
53  * ROUTINES TO COMPUTE SELECTIVITIES
54  ****************************************************************************/
55 
56 /*
57  * clauselist_selectivity -
58  * Compute the selectivity of an implicitly-ANDed list of boolean
59  * expression clauses. The list can be empty, in which case 1.0
60  * must be returned. List elements may be either RestrictInfos
61  * or bare expression clauses --- the former is preferred since
62  * it allows caching of results.
63  *
64  * See clause_selectivity() for the meaning of the additional parameters.
65  *
66  * The basic approach is to apply extended statistics first, on as many
67  * clauses as possible, in order to capture cross-column dependencies etc.
68  * The remaining clauses are then estimated by taking the product of their
69  * selectivities, but that's only right if they have independent
70  * probabilities, and in reality they are often NOT independent even if they
71  * only refer to a single column. So, we want to be smarter where we can.
72  *
73  * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
74  * are recognized as possible range query components if they are restriction
75  * opclauses whose operators have scalarltsel or a related function as their
76  * restriction selectivity estimator. We pair up clauses of this form that
77  * refer to the same variable. An unpairable clause of this kind is simply
78  * multiplied into the selectivity product in the normal way. But when we
79  * find a pair, we know that the selectivities represent the relative
80  * positions of the low and high bounds within the column's range, so instead
81  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
82  * losel - 1. (To visualize this, see that hisel is the fraction of the range
83  * below the high bound, while losel is the fraction above the low bound; so
84  * hisel can be interpreted directly as a 0..1 value but we need to convert
85  * losel to 1-losel before interpreting it as a value. Then the available
86  * range is 1-losel to hisel. However, this calculation double-excludes
87  * nulls, so really we need hisel + losel + null_frac - 1.)
88  *
89  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
90  * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
91  * yields an impossible (negative) result.
92  *
93  * A free side-effect is that we can recognize redundant inequalities such
94  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
95  *
96  * Of course this is all very dependent on the behavior of the inequality
97  * selectivity functions; perhaps some day we can generalize the approach.
98  */
101  List *clauses,
102  int varRelid,
103  JoinType jointype,
104  SpecialJoinInfo *sjinfo)
105 {
106  return clauselist_selectivity_ext(root, clauses, varRelid,
107  jointype, sjinfo, true);
108 }
109 
110 /*
111  * clauselist_selectivity_ext -
112  * Extended version of clauselist_selectivity(). If "use_extended_stats"
113  * is false, all extended statistics will be ignored, and only per-column
114  * statistics will be used.
115  */
118  List *clauses,
119  int varRelid,
120  JoinType jointype,
121  SpecialJoinInfo *sjinfo,
122  bool use_extended_stats)
123 {
124  Selectivity s1 = 1.0;
125  RelOptInfo *rel;
126  Bitmapset *estimatedclauses = NULL;
127  RangeQueryClause *rqlist = NULL;
128  ListCell *l;
129  int listidx;
130 
131  /*
132  * If there's exactly one clause, just go directly to
133  * clause_selectivity_ext(). None of what we might do below is relevant.
134  */
135  if (list_length(clauses) == 1)
136  return clause_selectivity_ext(root, (Node *) linitial(clauses),
137  varRelid, jointype, sjinfo,
138  use_extended_stats);
139 
140  /*
141  * Determine if these clauses reference a single relation. If so, and if
142  * it has extended statistics, try to apply those.
143  */
144  rel = find_single_rel_for_clauses(root, clauses);
145  if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
146  {
147  /*
148  * Estimate as many clauses as possible using extended statistics.
149  *
150  * 'estimatedclauses' is populated with the 0-based list position
151  * index of clauses estimated here, and that should be ignored below.
152  */
153  s1 = statext_clauselist_selectivity(root, clauses, varRelid,
154  jointype, sjinfo, rel,
155  &estimatedclauses, false);
156  }
157 
158  /*
159  * Apply normal selectivity estimates for remaining clauses. We'll be
160  * careful to skip any clauses which were already estimated above.
161  *
162  * Anything that doesn't look like a potential rangequery clause gets
163  * multiplied into s1 and forgotten. Anything that does gets inserted into
164  * an rqlist entry.
165  */
166  listidx = -1;
167  foreach(l, clauses)
168  {
169  Node *clause = (Node *) lfirst(l);
170  RestrictInfo *rinfo;
171  Selectivity s2;
172 
173  listidx++;
174 
175  /*
176  * Skip this clause if it's already been estimated by some other
177  * statistics above.
178  */
179  if (bms_is_member(listidx, estimatedclauses))
180  continue;
181 
182  /* Compute the selectivity of this clause in isolation */
183  s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
184  use_extended_stats);
185 
186  /*
187  * Check for being passed a RestrictInfo.
188  *
189  * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
190  * 0.0; just use that rather than looking for range pairs.
191  */
192  if (IsA(clause, RestrictInfo))
193  {
194  rinfo = (RestrictInfo *) clause;
195  if (rinfo->pseudoconstant)
196  {
197  s1 = s1 * s2;
198  continue;
199  }
200  clause = (Node *) rinfo->clause;
201  }
202  else
203  rinfo = NULL;
204 
205  /*
206  * See if it looks like a restriction clause with a pseudoconstant on
207  * one side. (Anything more complicated than that might not behave in
208  * the simple way we are expecting.) Most of the tests here can be
209  * done more efficiently with rinfo than without.
210  */
211  if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
212  {
213  OpExpr *expr = (OpExpr *) clause;
214  bool varonleft = true;
215  bool ok;
216 
217  if (rinfo)
218  {
219  ok = (rinfo->num_base_rels == 1) &&
221  rinfo->right_relids) ||
222  (varonleft = false,
224  rinfo->left_relids)));
225  }
226  else
227  {
228  ok = (NumRelids(root, clause) == 1) &&
230  (varonleft = false,
232  }
233 
234  if (ok)
235  {
236  /*
237  * If it's not a "<"/"<="/">"/">=" operator, just merge the
238  * selectivity in generically. But if it's the right oprrest,
239  * add the clause to rqlist for later processing.
240  */
241  switch (get_oprrest(expr->opno))
242  {
243  case F_SCALARLTSEL:
244  case F_SCALARLESEL:
245  addRangeClause(&rqlist, clause,
246  varonleft, true, s2);
247  break;
248  case F_SCALARGTSEL:
249  case F_SCALARGESEL:
250  addRangeClause(&rqlist, clause,
251  varonleft, false, s2);
252  break;
253  default:
254  /* Just merge the selectivity in generically */
255  s1 = s1 * s2;
256  break;
257  }
258  continue; /* drop to loop bottom */
259  }
260  }
261 
262  /* Not the right form, so treat it generically. */
263  s1 = s1 * s2;
264  }
265 
266  /*
267  * Now scan the rangequery pair list.
268  */
269  while (rqlist != NULL)
270  {
271  RangeQueryClause *rqnext;
272 
273  if (rqlist->have_lobound && rqlist->have_hibound)
274  {
275  /* Successfully matched a pair of range clauses */
276  Selectivity s2;
277 
278  /*
279  * Exact equality to the default value probably means the
280  * selectivity function punted. This is not airtight but should
281  * be good enough.
282  */
283  if (rqlist->hibound == DEFAULT_INEQ_SEL ||
284  rqlist->lobound == DEFAULT_INEQ_SEL)
285  {
287  }
288  else
289  {
290  s2 = rqlist->hibound + rqlist->lobound - 1.0;
291 
292  /* Adjust for double-exclusion of NULLs */
293  s2 += nulltestsel(root, IS_NULL, rqlist->var,
294  varRelid, jointype, sjinfo);
295 
296  /*
297  * A zero or slightly negative s2 should be converted into a
298  * small positive value; we probably are dealing with a very
299  * tight range and got a bogus result due to roundoff errors.
300  * However, if s2 is very negative, then we probably have
301  * default selectivity estimates on one or both sides of the
302  * range that we failed to recognize above for some reason.
303  */
304  if (s2 <= 0.0)
305  {
306  if (s2 < -0.01)
307  {
308  /*
309  * No data available --- use a default estimate that
310  * is small, but not real small.
311  */
313  }
314  else
315  {
316  /*
317  * It's just roundoff error; use a small positive
318  * value
319  */
320  s2 = 1.0e-10;
321  }
322  }
323  }
324  /* Merge in the selectivity of the pair of clauses */
325  s1 *= s2;
326  }
327  else
328  {
329  /* Only found one of a pair, merge it in generically */
330  if (rqlist->have_lobound)
331  s1 *= rqlist->lobound;
332  else
333  s1 *= rqlist->hibound;
334  }
335  /* release storage and advance */
336  rqnext = rqlist->next;
337  pfree(rqlist);
338  rqlist = rqnext;
339  }
340 
341  return s1;
342 }
343 
344 /*
345  * clauselist_selectivity_or -
346  * Compute the selectivity of an implicitly-ORed list of boolean
347  * expression clauses. The list can be empty, in which case 0.0
348  * must be returned. List elements may be either RestrictInfos
349  * or bare expression clauses --- the former is preferred since
350  * it allows caching of results.
351  *
352  * See clause_selectivity() for the meaning of the additional parameters.
353  *
354  * The basic approach is to apply extended statistics first, on as many
355  * clauses as possible, in order to capture cross-column dependencies etc.
356  * The remaining clauses are then estimated as if they were independent.
357  */
358 static Selectivity
360  List *clauses,
361  int varRelid,
362  JoinType jointype,
363  SpecialJoinInfo *sjinfo,
364  bool use_extended_stats)
365 {
366  Selectivity s1 = 0.0;
367  RelOptInfo *rel;
368  Bitmapset *estimatedclauses = NULL;
369  ListCell *lc;
370  int listidx;
371 
372  /*
373  * Determine if these clauses reference a single relation. If so, and if
374  * it has extended statistics, try to apply those.
375  */
376  rel = find_single_rel_for_clauses(root, clauses);
377  if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
378  {
379  /*
380  * Estimate as many clauses as possible using extended statistics.
381  *
382  * 'estimatedclauses' is populated with the 0-based list position
383  * index of clauses estimated here, and that should be ignored below.
384  */
385  s1 = statext_clauselist_selectivity(root, clauses, varRelid,
386  jointype, sjinfo, rel,
387  &estimatedclauses, true);
388  }
389 
390  /*
391  * Estimate the remaining clauses as if they were independent.
392  *
393  * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
394  * for the probable overlap of selected tuple sets.
395  *
396  * XXX is this too conservative?
397  */
398  listidx = -1;
399  foreach(lc, clauses)
400  {
401  Selectivity s2;
402 
403  listidx++;
404 
405  /*
406  * Skip this clause if it's already been estimated by some other
407  * statistics above.
408  */
409  if (bms_is_member(listidx, estimatedclauses))
410  continue;
411 
412  s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
413  jointype, sjinfo, use_extended_stats);
414 
415  s1 = s1 + s2 - s1 * s2;
416  }
417 
418  return s1;
419 }
420 
421 /*
422  * addRangeClause --- add a new range clause for clauselist_selectivity
423  *
424  * Here is where we try to match up pairs of range-query clauses
425  */
426 static void
428  bool varonleft, bool isLTsel, Selectivity s2)
429 {
430  RangeQueryClause *rqelem;
431  Node *var;
432  bool is_lobound;
433 
434  if (varonleft)
435  {
436  var = get_leftop((Expr *) clause);
437  is_lobound = !isLTsel; /* x < something is high bound */
438  }
439  else
440  {
441  var = get_rightop((Expr *) clause);
442  is_lobound = isLTsel; /* something < x is low bound */
443  }
444 
445  for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
446  {
447  /*
448  * We use full equal() here because the "var" might be a function of
449  * one or more attributes of the same relation...
450  */
451  if (!equal(var, rqelem->var))
452  continue;
453  /* Found the right group to put this clause in */
454  if (is_lobound)
455  {
456  if (!rqelem->have_lobound)
457  {
458  rqelem->have_lobound = true;
459  rqelem->lobound = s2;
460  }
461  else
462  {
463 
464  /*------
465  * We have found two similar clauses, such as
466  * x < y AND x <= z.
467  * Keep only the more restrictive one.
468  *------
469  */
470  if (rqelem->lobound > s2)
471  rqelem->lobound = s2;
472  }
473  }
474  else
475  {
476  if (!rqelem->have_hibound)
477  {
478  rqelem->have_hibound = true;
479  rqelem->hibound = s2;
480  }
481  else
482  {
483 
484  /*------
485  * We have found two similar clauses, such as
486  * x > y AND x >= z.
487  * Keep only the more restrictive one.
488  *------
489  */
490  if (rqelem->hibound > s2)
491  rqelem->hibound = s2;
492  }
493  }
494  return;
495  }
496 
497  /* No matching var found, so make a new clause-pair data structure */
498  rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
499  rqelem->var = var;
500  if (is_lobound)
501  {
502  rqelem->have_lobound = true;
503  rqelem->have_hibound = false;
504  rqelem->lobound = s2;
505  }
506  else
507  {
508  rqelem->have_lobound = false;
509  rqelem->have_hibound = true;
510  rqelem->hibound = s2;
511  }
512  rqelem->next = *rqlist;
513  *rqlist = rqelem;
514 }
515 
516 /*
517  * find_single_rel_for_clauses
518  * Examine each clause in 'clauses' and determine if all clauses
519  * reference only a single relation. If so return that relation,
520  * otherwise return NULL.
521  */
522 static RelOptInfo *
524 {
525  int lastrelid = 0;
526  ListCell *l;
527 
528  foreach(l, clauses)
529  {
530  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
531  int relid;
532 
533  /*
534  * If we have a list of bare clauses rather than RestrictInfos, we
535  * could pull out their relids the hard way with pull_varnos().
536  * However, currently the extended-stats machinery won't do anything
537  * with non-RestrictInfo clauses anyway, so there's no point in
538  * spending extra cycles; just fail if that's what we have.
539  *
540  * An exception to that rule is if we have a bare BoolExpr AND clause.
541  * We treat this as a special case because the restrictinfo machinery
542  * doesn't build RestrictInfos on top of AND clauses.
543  */
544  if (is_andclause(rinfo))
545  {
546  RelOptInfo *rel;
547 
549  ((BoolExpr *) rinfo)->args);
550 
551  if (rel == NULL)
552  return NULL;
553  if (lastrelid == 0)
554  lastrelid = rel->relid;
555  else if (rel->relid != lastrelid)
556  return NULL;
557 
558  continue;
559  }
560 
561  if (!IsA(rinfo, RestrictInfo))
562  return NULL;
563 
564  if (bms_is_empty(rinfo->clause_relids))
565  continue; /* we can ignore variable-free clauses */
566  if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
567  return NULL; /* multiple relations in this clause */
568  if (lastrelid == 0)
569  lastrelid = relid; /* first clause referencing a relation */
570  else if (relid != lastrelid)
571  return NULL; /* relation not same as last one */
572  }
573 
574  if (lastrelid != 0)
575  return find_base_rel(root, lastrelid);
576 
577  return NULL; /* no clauses */
578 }
579 
580 /*
581  * treat_as_join_clause -
582  * Decide whether an operator clause is to be handled by the
583  * restriction or join estimator. Subroutine for clause_selectivity().
584  */
585 static inline bool
587  int varRelid, SpecialJoinInfo *sjinfo)
588 {
589  if (varRelid != 0)
590  {
591  /*
592  * Caller is forcing restriction mode (eg, because we are examining an
593  * inner indexscan qual).
594  */
595  return false;
596  }
597  else if (sjinfo == NULL)
598  {
599  /*
600  * It must be a restriction clause, since it's being evaluated at a
601  * scan node.
602  */
603  return false;
604  }
605  else
606  {
607  /*
608  * Otherwise, it's a join if there's more than one base relation used.
609  * We can optimize this calculation if an rinfo was passed.
610  *
611  * XXX Since we know the clause is being evaluated at a join, the
612  * only way it could be single-relation is if it was delayed by outer
613  * joins. We intentionally count only baserels here, not OJs that
614  * might be present in rinfo->clause_relids, so that we direct such
615  * cases to the restriction qual estimators not join estimators.
616  * Eventually some notice should be taken of the possibility of
617  * injected nulls, but we'll likely want to do that in the restriction
618  * estimators rather than starting to treat such cases as join quals.
619  */
620  if (rinfo)
621  return (rinfo->num_base_rels > 1);
622  else
623  return (NumRelids(root, clause) > 1);
624  }
625 }
626 
627 
628 /*
629  * clause_selectivity -
630  * Compute the selectivity of a general boolean expression clause.
631  *
632  * The clause can be either a RestrictInfo or a plain expression. If it's
633  * a RestrictInfo, we try to cache the selectivity for possible re-use,
634  * so passing RestrictInfos is preferred.
635  *
636  * varRelid is either 0 or a rangetable index.
637  *
638  * When varRelid is not 0, only variables belonging to that relation are
639  * considered in computing selectivity; other vars are treated as constants
640  * of unknown values. This is appropriate for estimating the selectivity of
641  * a join clause that is being used as a restriction clause in a scan of a
642  * nestloop join's inner relation --- varRelid should then be the ID of the
643  * inner relation.
644  *
645  * When varRelid is 0, all variables are treated as variables. This
646  * is appropriate for ordinary join clauses and restriction clauses.
647  *
648  * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
649  * if the clause isn't a join clause.
650  *
651  * sjinfo is NULL for a non-join clause, otherwise it provides additional
652  * context information about the join being performed. There are some
653  * special cases:
654  * 1. For a special (not INNER) join, sjinfo is always a member of
655  * root->join_info_list.
656  * 2. For an INNER join, sjinfo is just a transient struct, and only the
657  * relids and jointype fields in it can be trusted.
658  * It is possible for jointype to be different from sjinfo->jointype.
659  * This indicates we are considering a variant join: either with
660  * the LHS and RHS switched, or with one input unique-ified.
661  *
662  * Note: when passing nonzero varRelid, it's normally appropriate to set
663  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
664  * join clause; because we aren't treating it as a join clause.
665  */
668  Node *clause,
669  int varRelid,
670  JoinType jointype,
671  SpecialJoinInfo *sjinfo)
672 {
673  return clause_selectivity_ext(root, clause, varRelid,
674  jointype, sjinfo, true);
675 }
676 
677 /*
678  * clause_selectivity_ext -
679  * Extended version of clause_selectivity(). If "use_extended_stats" is
680  * false, all extended statistics will be ignored, and only per-column
681  * statistics will be used.
682  */
685  Node *clause,
686  int varRelid,
687  JoinType jointype,
688  SpecialJoinInfo *sjinfo,
689  bool use_extended_stats)
690 {
691  Selectivity s1 = 0.5; /* default for any unhandled clause type */
692  RestrictInfo *rinfo = NULL;
693  bool cacheable = false;
694 
695  if (clause == NULL) /* can this still happen? */
696  return s1;
697 
698  if (IsA(clause, RestrictInfo))
699  {
700  rinfo = (RestrictInfo *) clause;
701 
702  /*
703  * If the clause is marked pseudoconstant, then it will be used as a
704  * gating qual and should not affect selectivity estimates; hence
705  * return 1.0. The only exception is that a constant FALSE may be
706  * taken as having selectivity 0.0, since it will surely mean no rows
707  * out of the plan. This case is simple enough that we need not
708  * bother caching the result.
709  */
710  if (rinfo->pseudoconstant)
711  {
712  if (!IsA(rinfo->clause, Const))
713  return (Selectivity) 1.0;
714  }
715 
716  /*
717  * If possible, cache the result of the selectivity calculation for
718  * the clause. We can cache if varRelid is zero or the clause
719  * contains only vars of that relid --- otherwise varRelid will affect
720  * the result, so mustn't cache. Outer join quals might be examined
721  * with either their join's actual jointype or JOIN_INNER, so we need
722  * two cache variables to remember both cases. Note: we assume the
723  * result won't change if we are switching the input relations or
724  * considering a unique-ified case, so we only need one cache variable
725  * for all non-JOIN_INNER cases.
726  */
727  if (varRelid == 0 ||
728  rinfo->num_base_rels == 0 ||
729  (rinfo->num_base_rels == 1 &&
730  bms_is_member(varRelid, rinfo->clause_relids)))
731  {
732  /* Cacheable --- do we already have the result? */
733  if (jointype == JOIN_INNER)
734  {
735  if (rinfo->norm_selec >= 0)
736  return rinfo->norm_selec;
737  }
738  else
739  {
740  if (rinfo->outer_selec >= 0)
741  return rinfo->outer_selec;
742  }
743  cacheable = true;
744  }
745 
746  /*
747  * Proceed with examination of contained clause. If the clause is an
748  * OR-clause, we want to look at the variant with sub-RestrictInfos,
749  * so that per-subclause selectivities can be cached.
750  */
751  if (rinfo->orclause)
752  clause = (Node *) rinfo->orclause;
753  else
754  clause = (Node *) rinfo->clause;
755  }
756 
757  if (IsA(clause, Var))
758  {
759  Var *var = (Var *) clause;
760 
761  /*
762  * We probably shouldn't ever see an uplevel Var here, but if we do,
763  * return the default selectivity...
764  */
765  if (var->varlevelsup == 0 &&
766  (varRelid == 0 || varRelid == (int) var->varno))
767  {
768  /* Use the restriction selectivity function for a bool Var */
769  s1 = boolvarsel(root, (Node *) var, varRelid);
770  }
771  }
772  else if (IsA(clause, Const))
773  {
774  /* bool constant is pretty easy... */
775  Const *con = (Const *) clause;
776 
777  s1 = con->constisnull ? 0.0 :
778  DatumGetBool(con->constvalue) ? 1.0 : 0.0;
779  }
780  else if (IsA(clause, Param))
781  {
782  /* see if we can replace the Param */
783  Node *subst = estimate_expression_value(root, clause);
784 
785  if (IsA(subst, Const))
786  {
787  /* bool constant is pretty easy... */
788  Const *con = (Const *) subst;
789 
790  s1 = con->constisnull ? 0.0 :
791  DatumGetBool(con->constvalue) ? 1.0 : 0.0;
792  }
793  else
794  {
795  /* XXX any way to do better than default? */
796  }
797  }
798  else if (is_notclause(clause))
799  {
800  /* inverse of the selectivity of the underlying clause */
802  (Node *) get_notclausearg((Expr *) clause),
803  varRelid,
804  jointype,
805  sjinfo,
806  use_extended_stats);
807  }
808  else if (is_andclause(clause))
809  {
810  /* share code with clauselist_selectivity() */
812  ((BoolExpr *) clause)->args,
813  varRelid,
814  jointype,
815  sjinfo,
816  use_extended_stats);
817  }
818  else if (is_orclause(clause))
819  {
820  /*
821  * Almost the same thing as clauselist_selectivity, but with the
822  * clauses connected by OR.
823  */
825  ((BoolExpr *) clause)->args,
826  varRelid,
827  jointype,
828  sjinfo,
829  use_extended_stats);
830  }
831  else if (is_opclause(clause) || IsA(clause, DistinctExpr))
832  {
833  OpExpr *opclause = (OpExpr *) clause;
834  Oid opno = opclause->opno;
835 
836  if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
837  {
838  /* Estimate selectivity for a join clause. */
839  s1 = join_selectivity(root, opno,
840  opclause->args,
841  opclause->inputcollid,
842  jointype,
843  sjinfo);
844  }
845  else
846  {
847  /* Estimate selectivity for a restriction clause. */
849  opclause->args,
850  opclause->inputcollid,
851  varRelid);
852  }
853 
854  /*
855  * DistinctExpr has the same representation as OpExpr, but the
856  * contained operator is "=" not "<>", so we must negate the result.
857  * This estimation method doesn't give the right behavior for nulls,
858  * but it's better than doing nothing.
859  */
860  if (IsA(clause, DistinctExpr))
861  s1 = 1.0 - s1;
862  }
863  else if (is_funcclause(clause))
864  {
865  FuncExpr *funcclause = (FuncExpr *) clause;
866 
867  /* Try to get an estimate from the support function, if any */
869  funcclause->funcid,
870  funcclause->args,
871  funcclause->inputcollid,
872  treat_as_join_clause(root, clause, rinfo,
873  varRelid, sjinfo),
874  varRelid,
875  jointype,
876  sjinfo);
877  }
878  else if (IsA(clause, ScalarArrayOpExpr))
879  {
880  /* Use node specific selectivity calculation function */
882  (ScalarArrayOpExpr *) clause,
883  treat_as_join_clause(root, clause, rinfo,
884  varRelid, sjinfo),
885  varRelid,
886  jointype,
887  sjinfo);
888  }
889  else if (IsA(clause, RowCompareExpr))
890  {
891  /* Use node specific selectivity calculation function */
893  (RowCompareExpr *) clause,
894  varRelid,
895  jointype,
896  sjinfo);
897  }
898  else if (IsA(clause, NullTest))
899  {
900  /* Use node specific selectivity calculation function */
901  s1 = nulltestsel(root,
902  ((NullTest *) clause)->nulltesttype,
903  (Node *) ((NullTest *) clause)->arg,
904  varRelid,
905  jointype,
906  sjinfo);
907  }
908  else if (IsA(clause, BooleanTest))
909  {
910  /* Use node specific selectivity calculation function */
911  s1 = booltestsel(root,
912  ((BooleanTest *) clause)->booltesttype,
913  (Node *) ((BooleanTest *) clause)->arg,
914  varRelid,
915  jointype,
916  sjinfo);
917  }
918  else if (IsA(clause, CurrentOfExpr))
919  {
920  /* CURRENT OF selects at most one row of its table */
921  CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
922  RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
923 
924  if (crel->tuples > 0)
925  s1 = 1.0 / crel->tuples;
926  }
927  else if (IsA(clause, RelabelType))
928  {
929  /* Not sure this case is needed, but it can't hurt */
931  (Node *) ((RelabelType *) clause)->arg,
932  varRelid,
933  jointype,
934  sjinfo,
935  use_extended_stats);
936  }
937  else if (IsA(clause, CoerceToDomain))
938  {
939  /* Not sure this case is needed, but it can't hurt */
941  (Node *) ((CoerceToDomain *) clause)->arg,
942  varRelid,
943  jointype,
944  sjinfo,
945  use_extended_stats);
946  }
947  else
948  {
949  /*
950  * For anything else, see if we can consider it as a boolean variable.
951  * This only works if it's an immutable expression in Vars of a single
952  * relation; but there's no point in us checking that here because
953  * boolvarsel() will do it internally, and return a suitable default
954  * selectivity if not.
955  */
956  s1 = boolvarsel(root, clause, varRelid);
957  }
958 
959  /* Cache the result if possible */
960  if (cacheable)
961  {
962  if (jointype == JOIN_INNER)
963  rinfo->norm_selec = s1;
964  else
965  rinfo->outer_selec = s1;
966  }
967 
968 #ifdef SELECTIVITY_DEBUG
969  elog(DEBUG4, "clause_selectivity: s1 %f", s1);
970 #endif /* SELECTIVITY_DEBUG */
971 
972  return s1;
973 }
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Definition: bitmapset.c:715
#define bms_is_empty(a)
Definition: bitmapset.h:118
int NumRelids(PlannerInfo *root, Node *clause)
Definition: clauses.c:2130
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:2088
bool is_pseudo_constant_clause_relids(Node *clause, Relids relids)
Definition: clauses.c:2108
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2395
static void addRangeClause(RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2)
Definition: clausesel.c:427
static RelOptInfo * find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
Definition: clausesel.c:523
Selectivity clause_selectivity_ext(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:684
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:100
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:117
struct RangeQueryClause RangeQueryClause
static bool treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo, int varRelid, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:586
static Selectivity clauselist_selectivity_or(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:359
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:667
#define elog(elevel,...)
Definition: elog.h:225
#define DEBUG4
Definition: elog.h:27
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
Selectivity statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses, bool is_or)
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1557
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:107
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:134
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:116
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:95
static bool is_funcclause(const void *clause)
Definition: nodeFuncs.h:69
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:125
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:83
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
double Selectivity
Definition: nodes.h:250
JoinType
Definition: nodes.h:288
@ JOIN_INNER
Definition: nodes.h:293
@ RTE_RELATION
Definition: parsenodes.h:1017
void * arg
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Definition: plancat.c:1955
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:1994
Selectivity function_selectivity(PlannerInfo *root, Oid funcid, List *args, Oid inputcollid, bool is_join, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:2035
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
unsigned int Oid
Definition: postgres_ext.h:31
char * s1
char * s2
@ IS_NULL
Definition: primnodes.h:1952
tree ctl root
Definition: radixtree.h:1886
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:414
Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1540
Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1698
Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
Definition: selfuncs.c:1512
Selectivity scalararraysel(PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1816
Selectivity rowcomparesel(PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:2205
#define DEFAULT_RANGE_INEQ_SEL
Definition: selfuncs.h:40
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37
Oid funcid
Definition: primnodes.h:750
List * args
Definition: primnodes.h:768
Definition: pg_list.h:54
Definition: nodes.h:129
Oid opno
Definition: primnodes.h:818
List * args
Definition: primnodes.h:836
Selectivity hibound
Definition: clausesel.c:38
Selectivity lobound
Definition: clausesel.c:37
struct RangeQueryClause * next
Definition: clausesel.c:33
Index relid
Definition: pathnodes.h:918
List * statlist
Definition: pathnodes.h:946
Cardinality tuples
Definition: pathnodes.h:949
RTEKind rtekind
Definition: pathnodes.h:922
Expr * clause
Definition: pathnodes.h:2574
Definition: primnodes.h:248