PostgreSQL Source Code  git master
clausesel.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * clausesel.c
4  * Routines to compute clause selectivities
5  *
6  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/path/clausesel.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "nodes/makefuncs.h"
18 #include "nodes/nodeFuncs.h"
19 #include "optimizer/clauses.h"
20 #include "optimizer/cost.h"
21 #include "optimizer/optimizer.h"
22 #include "optimizer/pathnode.h"
23 #include "optimizer/plancat.h"
24 #include "statistics/statistics.h"
25 #include "utils/fmgroids.h"
26 #include "utils/lsyscache.h"
27 #include "utils/selfuncs.h"
28 
29 /*
30  * Data structure for accumulating info about possible range-query
31  * clause pairs in clauselist_selectivity.
32  */
33 typedef struct RangeQueryClause
34 {
35  struct RangeQueryClause *next; /* next in linked list */
36  Node *var; /* The common variable of the clauses */
37  bool have_lobound; /* found a low-bound clause yet? */
38  bool have_hibound; /* found a high-bound clause yet? */
39  Selectivity lobound; /* Selectivity of a var > something clause */
40  Selectivity hibound; /* Selectivity of a var < something clause */
42 
43 static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
44  bool varonleft, bool isLTsel, Selectivity s2);
46  List *clauses);
48  List *clauses,
49  int varRelid,
50  JoinType jointype,
51  SpecialJoinInfo *sjinfo,
52  bool use_extended_stats);
53 
54 /****************************************************************************
55  * ROUTINES TO COMPUTE SELECTIVITIES
56  ****************************************************************************/
57 
58 /*
59  * clauselist_selectivity -
60  * Compute the selectivity of an implicitly-ANDed list of boolean
61  * expression clauses. The list can be empty, in which case 1.0
62  * must be returned. List elements may be either RestrictInfos
63  * or bare expression clauses --- the former is preferred since
64  * it allows caching of results.
65  *
66  * See clause_selectivity() for the meaning of the additional parameters.
67  *
68  * The basic approach is to apply extended statistics first, on as many
69  * clauses as possible, in order to capture cross-column dependencies etc.
70  * The remaining clauses are then estimated by taking the product of their
71  * selectivities, but that's only right if they have independent
72  * probabilities, and in reality they are often NOT independent even if they
73  * only refer to a single column. So, we want to be smarter where we can.
74  *
75  * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
76  * are recognized as possible range query components if they are restriction
77  * opclauses whose operators have scalarltsel or a related function as their
78  * restriction selectivity estimator. We pair up clauses of this form that
79  * refer to the same variable. An unpairable clause of this kind is simply
80  * multiplied into the selectivity product in the normal way. But when we
81  * find a pair, we know that the selectivities represent the relative
82  * positions of the low and high bounds within the column's range, so instead
83  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
84  * losel - 1. (To visualize this, see that hisel is the fraction of the range
85  * below the high bound, while losel is the fraction above the low bound; so
86  * hisel can be interpreted directly as a 0..1 value but we need to convert
87  * losel to 1-losel before interpreting it as a value. Then the available
88  * range is 1-losel to hisel. However, this calculation double-excludes
89  * nulls, so really we need hisel + losel + null_frac - 1.)
90  *
91  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
92  * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
93  * yields an impossible (negative) result.
94  *
95  * A free side-effect is that we can recognize redundant inequalities such
96  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
97  *
98  * Of course this is all very dependent on the behavior of the inequality
99  * selectivity functions; perhaps some day we can generalize the approach.
100  */
103  List *clauses,
104  int varRelid,
105  JoinType jointype,
106  SpecialJoinInfo *sjinfo)
107 {
108  return clauselist_selectivity_ext(root, clauses, varRelid,
109  jointype, sjinfo, true);
110 }
111 
112 /*
113  * clauselist_selectivity_ext -
114  * Extended version of clauselist_selectivity(). If "use_extended_stats"
115  * is false, all extended statistics will be ignored, and only per-column
116  * statistics will be used.
117  */
120  List *clauses,
121  int varRelid,
122  JoinType jointype,
123  SpecialJoinInfo *sjinfo,
124  bool use_extended_stats)
125 {
126  Selectivity s1 = 1.0;
127  RelOptInfo *rel;
128  Bitmapset *estimatedclauses = NULL;
129  RangeQueryClause *rqlist = NULL;
130  ListCell *l;
131  int listidx;
132 
133  /*
134  * If there's exactly one clause, just go directly to
135  * clause_selectivity_ext(). None of what we might do below is relevant.
136  */
137  if (list_length(clauses) == 1)
138  return clause_selectivity_ext(root, (Node *) linitial(clauses),
139  varRelid, jointype, sjinfo,
140  use_extended_stats);
141 
142  /*
143  * Determine if these clauses reference a single relation. If so, and if
144  * it has extended statistics, try to apply those.
145  */
146  rel = find_single_rel_for_clauses(root, clauses);
147  if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
148  {
149  /*
150  * Estimate as many clauses as possible using extended statistics.
151  *
152  * 'estimatedclauses' is populated with the 0-based list position
153  * index of clauses estimated here, and that should be ignored below.
154  */
155  s1 = statext_clauselist_selectivity(root, clauses, varRelid,
156  jointype, sjinfo, rel,
157  &estimatedclauses, false);
158  }
159 
160  /*
161  * Apply normal selectivity estimates for remaining clauses. We'll be
162  * careful to skip any clauses which were already estimated above.
163  *
164  * Anything that doesn't look like a potential rangequery clause gets
165  * multiplied into s1 and forgotten. Anything that does gets inserted into
166  * an rqlist entry.
167  */
168  listidx = -1;
169  foreach(l, clauses)
170  {
171  Node *clause = (Node *) lfirst(l);
172  RestrictInfo *rinfo;
173  Selectivity s2;
174 
175  listidx++;
176 
177  /*
178  * Skip this clause if it's already been estimated by some other
179  * statistics above.
180  */
181  if (bms_is_member(listidx, estimatedclauses))
182  continue;
183 
184  /* Compute the selectivity of this clause in isolation */
185  s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
186  use_extended_stats);
187 
188  /*
189  * Check for being passed a RestrictInfo.
190  *
191  * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
192  * 0.0; just use that rather than looking for range pairs.
193  */
194  if (IsA(clause, RestrictInfo))
195  {
196  rinfo = (RestrictInfo *) clause;
197  if (rinfo->pseudoconstant)
198  {
199  s1 = s1 * s2;
200  continue;
201  }
202  clause = (Node *) rinfo->clause;
203  }
204  else
205  rinfo = NULL;
206 
207  /*
208  * See if it looks like a restriction clause with a pseudoconstant on
209  * one side. (Anything more complicated than that might not behave in
210  * the simple way we are expecting.) Most of the tests here can be
211  * done more efficiently with rinfo than without.
212  */
213  if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
214  {
215  OpExpr *expr = (OpExpr *) clause;
216  bool varonleft = true;
217  bool ok;
218 
219  if (rinfo)
220  {
221  ok = (rinfo->num_base_rels == 1) &&
223  rinfo->right_relids) ||
224  (varonleft = false,
226  rinfo->left_relids)));
227  }
228  else
229  {
230  ok = (NumRelids(root, clause) == 1) &&
232  (varonleft = false,
234  }
235 
236  if (ok)
237  {
238  /*
239  * If it's not a "<"/"<="/">"/">=" operator, just merge the
240  * selectivity in generically. But if it's the right oprrest,
241  * add the clause to rqlist for later processing.
242  */
243  switch (get_oprrest(expr->opno))
244  {
245  case F_SCALARLTSEL:
246  case F_SCALARLESEL:
247  addRangeClause(&rqlist, clause,
248  varonleft, true, s2);
249  break;
250  case F_SCALARGTSEL:
251  case F_SCALARGESEL:
252  addRangeClause(&rqlist, clause,
253  varonleft, false, s2);
254  break;
255  default:
256  /* Just merge the selectivity in generically */
257  s1 = s1 * s2;
258  break;
259  }
260  continue; /* drop to loop bottom */
261  }
262  }
263 
264  /* Not the right form, so treat it generically. */
265  s1 = s1 * s2;
266  }
267 
268  /*
269  * Now scan the rangequery pair list.
270  */
271  while (rqlist != NULL)
272  {
273  RangeQueryClause *rqnext;
274 
275  if (rqlist->have_lobound && rqlist->have_hibound)
276  {
277  /* Successfully matched a pair of range clauses */
278  Selectivity s2;
279 
280  /*
281  * Exact equality to the default value probably means the
282  * selectivity function punted. This is not airtight but should
283  * be good enough.
284  */
285  if (rqlist->hibound == DEFAULT_INEQ_SEL ||
286  rqlist->lobound == DEFAULT_INEQ_SEL)
287  {
289  }
290  else
291  {
292  s2 = rqlist->hibound + rqlist->lobound - 1.0;
293 
294  /* Adjust for double-exclusion of NULLs */
295  s2 += nulltestsel(root, IS_NULL, rqlist->var,
296  varRelid, jointype, sjinfo);
297 
298  /*
299  * A zero or slightly negative s2 should be converted into a
300  * small positive value; we probably are dealing with a very
301  * tight range and got a bogus result due to roundoff errors.
302  * However, if s2 is very negative, then we probably have
303  * default selectivity estimates on one or both sides of the
304  * range that we failed to recognize above for some reason.
305  */
306  if (s2 <= 0.0)
307  {
308  if (s2 < -0.01)
309  {
310  /*
311  * No data available --- use a default estimate that
312  * is small, but not real small.
313  */
315  }
316  else
317  {
318  /*
319  * It's just roundoff error; use a small positive
320  * value
321  */
322  s2 = 1.0e-10;
323  }
324  }
325  }
326  /* Merge in the selectivity of the pair of clauses */
327  s1 *= s2;
328  }
329  else
330  {
331  /* Only found one of a pair, merge it in generically */
332  if (rqlist->have_lobound)
333  s1 *= rqlist->lobound;
334  else
335  s1 *= rqlist->hibound;
336  }
337  /* release storage and advance */
338  rqnext = rqlist->next;
339  pfree(rqlist);
340  rqlist = rqnext;
341  }
342 
343  return s1;
344 }
345 
346 /*
347  * clauselist_selectivity_or -
348  * Compute the selectivity of an implicitly-ORed list of boolean
349  * expression clauses. The list can be empty, in which case 0.0
350  * must be returned. List elements may be either RestrictInfos
351  * or bare expression clauses --- the former is preferred since
352  * it allows caching of results.
353  *
354  * See clause_selectivity() for the meaning of the additional parameters.
355  *
356  * The basic approach is to apply extended statistics first, on as many
357  * clauses as possible, in order to capture cross-column dependencies etc.
358  * The remaining clauses are then estimated as if they were independent.
359  */
360 static Selectivity
362  List *clauses,
363  int varRelid,
364  JoinType jointype,
365  SpecialJoinInfo *sjinfo,
366  bool use_extended_stats)
367 {
368  Selectivity s1 = 0.0;
369  RelOptInfo *rel;
370  Bitmapset *estimatedclauses = NULL;
371  ListCell *lc;
372  int listidx;
373 
374  /*
375  * Determine if these clauses reference a single relation. If so, and if
376  * it has extended statistics, try to apply those.
377  */
378  rel = find_single_rel_for_clauses(root, clauses);
379  if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
380  {
381  /*
382  * Estimate as many clauses as possible using extended statistics.
383  *
384  * 'estimatedclauses' is populated with the 0-based list position
385  * index of clauses estimated here, and that should be ignored below.
386  */
387  s1 = statext_clauselist_selectivity(root, clauses, varRelid,
388  jointype, sjinfo, rel,
389  &estimatedclauses, true);
390  }
391 
392  /*
393  * Estimate the remaining clauses as if they were independent.
394  *
395  * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
396  * for the probable overlap of selected tuple sets.
397  *
398  * XXX is this too conservative?
399  */
400  listidx = -1;
401  foreach(lc, clauses)
402  {
403  Selectivity s2;
404 
405  listidx++;
406 
407  /*
408  * Skip this clause if it's already been estimated by some other
409  * statistics above.
410  */
411  if (bms_is_member(listidx, estimatedclauses))
412  continue;
413 
414  s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
415  jointype, sjinfo, use_extended_stats);
416 
417  s1 = s1 + s2 - s1 * s2;
418  }
419 
420  return s1;
421 }
422 
423 /*
424  * addRangeClause --- add a new range clause for clauselist_selectivity
425  *
426  * Here is where we try to match up pairs of range-query clauses
427  */
428 static void
430  bool varonleft, bool isLTsel, Selectivity s2)
431 {
432  RangeQueryClause *rqelem;
433  Node *var;
434  bool is_lobound;
435 
436  if (varonleft)
437  {
438  var = get_leftop((Expr *) clause);
439  is_lobound = !isLTsel; /* x < something is high bound */
440  }
441  else
442  {
443  var = get_rightop((Expr *) clause);
444  is_lobound = isLTsel; /* something < x is low bound */
445  }
446 
447  for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
448  {
449  /*
450  * We use full equal() here because the "var" might be a function of
451  * one or more attributes of the same relation...
452  */
453  if (!equal(var, rqelem->var))
454  continue;
455  /* Found the right group to put this clause in */
456  if (is_lobound)
457  {
458  if (!rqelem->have_lobound)
459  {
460  rqelem->have_lobound = true;
461  rqelem->lobound = s2;
462  }
463  else
464  {
465 
466  /*------
467  * We have found two similar clauses, such as
468  * x < y AND x <= z.
469  * Keep only the more restrictive one.
470  *------
471  */
472  if (rqelem->lobound > s2)
473  rqelem->lobound = s2;
474  }
475  }
476  else
477  {
478  if (!rqelem->have_hibound)
479  {
480  rqelem->have_hibound = true;
481  rqelem->hibound = s2;
482  }
483  else
484  {
485 
486  /*------
487  * We have found two similar clauses, such as
488  * x > y AND x >= z.
489  * Keep only the more restrictive one.
490  *------
491  */
492  if (rqelem->hibound > s2)
493  rqelem->hibound = s2;
494  }
495  }
496  return;
497  }
498 
499  /* No matching var found, so make a new clause-pair data structure */
500  rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
501  rqelem->var = var;
502  if (is_lobound)
503  {
504  rqelem->have_lobound = true;
505  rqelem->have_hibound = false;
506  rqelem->lobound = s2;
507  }
508  else
509  {
510  rqelem->have_lobound = false;
511  rqelem->have_hibound = true;
512  rqelem->hibound = s2;
513  }
514  rqelem->next = *rqlist;
515  *rqlist = rqelem;
516 }
517 
518 /*
519  * find_single_rel_for_clauses
520  * Examine each clause in 'clauses' and determine if all clauses
521  * reference only a single relation. If so return that relation,
522  * otherwise return NULL.
523  */
524 static RelOptInfo *
526 {
527  int lastrelid = 0;
528  ListCell *l;
529 
530  foreach(l, clauses)
531  {
532  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
533  int relid;
534 
535  /*
536  * If we have a list of bare clauses rather than RestrictInfos, we
537  * could pull out their relids the hard way with pull_varnos().
538  * However, currently the extended-stats machinery won't do anything
539  * with non-RestrictInfo clauses anyway, so there's no point in
540  * spending extra cycles; just fail if that's what we have.
541  *
542  * An exception to that rule is if we have a bare BoolExpr AND clause.
543  * We treat this as a special case because the restrictinfo machinery
544  * doesn't build RestrictInfos on top of AND clauses.
545  */
546  if (is_andclause(rinfo))
547  {
548  RelOptInfo *rel;
549 
550  rel = find_single_rel_for_clauses(root,
551  ((BoolExpr *) rinfo)->args);
552 
553  if (rel == NULL)
554  return NULL;
555  if (lastrelid == 0)
556  lastrelid = rel->relid;
557  else if (rel->relid != lastrelid)
558  return NULL;
559 
560  continue;
561  }
562 
563  if (!IsA(rinfo, RestrictInfo))
564  return NULL;
565 
566  if (bms_is_empty(rinfo->clause_relids))
567  continue; /* we can ignore variable-free clauses */
568  if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
569  return NULL; /* multiple relations in this clause */
570  if (lastrelid == 0)
571  lastrelid = relid; /* first clause referencing a relation */
572  else if (relid != lastrelid)
573  return NULL; /* relation not same as last one */
574  }
575 
576  if (lastrelid != 0)
577  return find_base_rel(root, lastrelid);
578 
579  return NULL; /* no clauses */
580 }
581 
582 /*
583  * treat_as_join_clause -
584  * Decide whether an operator clause is to be handled by the
585  * restriction or join estimator. Subroutine for clause_selectivity().
586  */
587 static inline bool
589  int varRelid, SpecialJoinInfo *sjinfo)
590 {
591  if (varRelid != 0)
592  {
593  /*
594  * Caller is forcing restriction mode (eg, because we are examining an
595  * inner indexscan qual).
596  */
597  return false;
598  }
599  else if (sjinfo == NULL)
600  {
601  /*
602  * It must be a restriction clause, since it's being evaluated at a
603  * scan node.
604  */
605  return false;
606  }
607  else
608  {
609  /*
610  * Otherwise, it's a join if there's more than one base relation used.
611  * We can optimize this calculation if an rinfo was passed.
612  *
613  * XXX Since we know the clause is being evaluated at a join, the
614  * only way it could be single-relation is if it was delayed by outer
615  * joins. We intentionally count only baserels here, not OJs that
616  * might be present in rinfo->clause_relids, so that we direct such
617  * cases to the restriction qual estimators not join estimators.
618  * Eventually some notice should be taken of the possibility of
619  * injected nulls, but we'll likely want to do that in the restriction
620  * estimators rather than starting to treat such cases as join quals.
621  */
622  if (rinfo)
623  return (rinfo->num_base_rels > 1);
624  else
625  return (NumRelids(root, clause) > 1);
626  }
627 }
628 
629 
630 /*
631  * clause_selectivity -
632  * Compute the selectivity of a general boolean expression clause.
633  *
634  * The clause can be either a RestrictInfo or a plain expression. If it's
635  * a RestrictInfo, we try to cache the selectivity for possible re-use,
636  * so passing RestrictInfos is preferred.
637  *
638  * varRelid is either 0 or a rangetable index.
639  *
640  * When varRelid is not 0, only variables belonging to that relation are
641  * considered in computing selectivity; other vars are treated as constants
642  * of unknown values. This is appropriate for estimating the selectivity of
643  * a join clause that is being used as a restriction clause in a scan of a
644  * nestloop join's inner relation --- varRelid should then be the ID of the
645  * inner relation.
646  *
647  * When varRelid is 0, all variables are treated as variables. This
648  * is appropriate for ordinary join clauses and restriction clauses.
649  *
650  * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
651  * if the clause isn't a join clause.
652  *
653  * sjinfo is NULL for a non-join clause, otherwise it provides additional
654  * context information about the join being performed. There are some
655  * special cases:
656  * 1. For a special (not INNER) join, sjinfo is always a member of
657  * root->join_info_list.
658  * 2. For an INNER join, sjinfo is just a transient struct, and only the
659  * relids and jointype fields in it can be trusted.
660  * It is possible for jointype to be different from sjinfo->jointype.
661  * This indicates we are considering a variant join: either with
662  * the LHS and RHS switched, or with one input unique-ified.
663  *
664  * Note: when passing nonzero varRelid, it's normally appropriate to set
665  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
666  * join clause; because we aren't treating it as a join clause.
667  */
670  Node *clause,
671  int varRelid,
672  JoinType jointype,
673  SpecialJoinInfo *sjinfo)
674 {
675  return clause_selectivity_ext(root, clause, varRelid,
676  jointype, sjinfo, true);
677 }
678 
679 /*
680  * clause_selectivity_ext -
681  * Extended version of clause_selectivity(). If "use_extended_stats" is
682  * false, all extended statistics will be ignored, and only per-column
683  * statistics will be used.
684  */
687  Node *clause,
688  int varRelid,
689  JoinType jointype,
690  SpecialJoinInfo *sjinfo,
691  bool use_extended_stats)
692 {
693  Selectivity s1 = 0.5; /* default for any unhandled clause type */
694  RestrictInfo *rinfo = NULL;
695  bool cacheable = false;
696 
697  if (clause == NULL) /* can this still happen? */
698  return s1;
699 
700  if (IsA(clause, RestrictInfo))
701  {
702  rinfo = (RestrictInfo *) clause;
703 
704  /*
705  * If the clause is marked pseudoconstant, then it will be used as a
706  * gating qual and should not affect selectivity estimates; hence
707  * return 1.0. The only exception is that a constant FALSE may be
708  * taken as having selectivity 0.0, since it will surely mean no rows
709  * out of the plan. This case is simple enough that we need not
710  * bother caching the result.
711  */
712  if (rinfo->pseudoconstant)
713  {
714  if (!IsA(rinfo->clause, Const))
715  return (Selectivity) 1.0;
716  }
717 
718  /*
719  * If possible, cache the result of the selectivity calculation for
720  * the clause. We can cache if varRelid is zero or the clause
721  * contains only vars of that relid --- otherwise varRelid will affect
722  * the result, so mustn't cache. Outer join quals might be examined
723  * with either their join's actual jointype or JOIN_INNER, so we need
724  * two cache variables to remember both cases. Note: we assume the
725  * result won't change if we are switching the input relations or
726  * considering a unique-ified case, so we only need one cache variable
727  * for all non-JOIN_INNER cases.
728  */
729  if (varRelid == 0 ||
730  rinfo->num_base_rels == 0 ||
731  (rinfo->num_base_rels == 1 &&
732  bms_is_member(varRelid, rinfo->clause_relids)))
733  {
734  /* Cacheable --- do we already have the result? */
735  if (jointype == JOIN_INNER)
736  {
737  if (rinfo->norm_selec >= 0)
738  return rinfo->norm_selec;
739  }
740  else
741  {
742  if (rinfo->outer_selec >= 0)
743  return rinfo->outer_selec;
744  }
745  cacheable = true;
746  }
747 
748  /*
749  * Proceed with examination of contained clause. If the clause is an
750  * OR-clause, we want to look at the variant with sub-RestrictInfos,
751  * so that per-subclause selectivities can be cached.
752  */
753  if (rinfo->orclause)
754  clause = (Node *) rinfo->orclause;
755  else
756  clause = (Node *) rinfo->clause;
757  }
758 
759  if (IsA(clause, Var))
760  {
761  Var *var = (Var *) clause;
762 
763  /*
764  * We probably shouldn't ever see an uplevel Var here, but if we do,
765  * return the default selectivity...
766  */
767  if (var->varlevelsup == 0 &&
768  (varRelid == 0 || varRelid == (int) var->varno))
769  {
770  /* Use the restriction selectivity function for a bool Var */
771  s1 = boolvarsel(root, (Node *) var, varRelid);
772  }
773  }
774  else if (IsA(clause, Const))
775  {
776  /* bool constant is pretty easy... */
777  Const *con = (Const *) clause;
778 
779  s1 = con->constisnull ? 0.0 :
780  DatumGetBool(con->constvalue) ? 1.0 : 0.0;
781  }
782  else if (IsA(clause, Param))
783  {
784  /* see if we can replace the Param */
785  Node *subst = estimate_expression_value(root, clause);
786 
787  if (IsA(subst, Const))
788  {
789  /* bool constant is pretty easy... */
790  Const *con = (Const *) subst;
791 
792  s1 = con->constisnull ? 0.0 :
793  DatumGetBool(con->constvalue) ? 1.0 : 0.0;
794  }
795  else
796  {
797  /* XXX any way to do better than default? */
798  }
799  }
800  else if (is_notclause(clause))
801  {
802  /* inverse of the selectivity of the underlying clause */
803  s1 = 1.0 - clause_selectivity_ext(root,
804  (Node *) get_notclausearg((Expr *) clause),
805  varRelid,
806  jointype,
807  sjinfo,
808  use_extended_stats);
809  }
810  else if (is_andclause(clause))
811  {
812  /* share code with clauselist_selectivity() */
814  ((BoolExpr *) clause)->args,
815  varRelid,
816  jointype,
817  sjinfo,
818  use_extended_stats);
819  }
820  else if (is_orclause(clause))
821  {
822  /*
823  * Almost the same thing as clauselist_selectivity, but with the
824  * clauses connected by OR.
825  */
827  ((BoolExpr *) clause)->args,
828  varRelid,
829  jointype,
830  sjinfo,
831  use_extended_stats);
832  }
833  else if (is_opclause(clause) || IsA(clause, DistinctExpr))
834  {
835  OpExpr *opclause = (OpExpr *) clause;
836  Oid opno = opclause->opno;
837 
838  if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
839  {
840  /* Estimate selectivity for a join clause. */
841  s1 = join_selectivity(root, opno,
842  opclause->args,
843  opclause->inputcollid,
844  jointype,
845  sjinfo);
846  }
847  else
848  {
849  /* Estimate selectivity for a restriction clause. */
850  s1 = restriction_selectivity(root, opno,
851  opclause->args,
852  opclause->inputcollid,
853  varRelid);
854  }
855 
856  /*
857  * DistinctExpr has the same representation as OpExpr, but the
858  * contained operator is "=" not "<>", so we must negate the result.
859  * This estimation method doesn't give the right behavior for nulls,
860  * but it's better than doing nothing.
861  */
862  if (IsA(clause, DistinctExpr))
863  s1 = 1.0 - s1;
864  }
865  else if (is_funcclause(clause))
866  {
867  FuncExpr *funcclause = (FuncExpr *) clause;
868 
869  /* Try to get an estimate from the support function, if any */
870  s1 = function_selectivity(root,
871  funcclause->funcid,
872  funcclause->args,
873  funcclause->inputcollid,
874  treat_as_join_clause(root, clause, rinfo,
875  varRelid, sjinfo),
876  varRelid,
877  jointype,
878  sjinfo);
879  }
880  else if (IsA(clause, ScalarArrayOpExpr))
881  {
882  /* Use node specific selectivity calculation function */
883  s1 = scalararraysel(root,
884  (ScalarArrayOpExpr *) clause,
885  treat_as_join_clause(root, clause, rinfo,
886  varRelid, sjinfo),
887  varRelid,
888  jointype,
889  sjinfo);
890  }
891  else if (IsA(clause, RowCompareExpr))
892  {
893  /* Use node specific selectivity calculation function */
894  s1 = rowcomparesel(root,
895  (RowCompareExpr *) clause,
896  varRelid,
897  jointype,
898  sjinfo);
899  }
900  else if (IsA(clause, NullTest))
901  {
902  /* Use node specific selectivity calculation function */
903  s1 = nulltestsel(root,
904  ((NullTest *) clause)->nulltesttype,
905  (Node *) ((NullTest *) clause)->arg,
906  varRelid,
907  jointype,
908  sjinfo);
909  }
910  else if (IsA(clause, BooleanTest))
911  {
912  /* Use node specific selectivity calculation function */
913  s1 = booltestsel(root,
914  ((BooleanTest *) clause)->booltesttype,
915  (Node *) ((BooleanTest *) clause)->arg,
916  varRelid,
917  jointype,
918  sjinfo);
919  }
920  else if (IsA(clause, CurrentOfExpr))
921  {
922  /* CURRENT OF selects at most one row of its table */
923  CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
924  RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
925 
926  if (crel->tuples > 0)
927  s1 = 1.0 / crel->tuples;
928  }
929  else if (IsA(clause, RelabelType))
930  {
931  /* Not sure this case is needed, but it can't hurt */
932  s1 = clause_selectivity_ext(root,
933  (Node *) ((RelabelType *) clause)->arg,
934  varRelid,
935  jointype,
936  sjinfo,
937  use_extended_stats);
938  }
939  else if (IsA(clause, CoerceToDomain))
940  {
941  /* Not sure this case is needed, but it can't hurt */
942  s1 = clause_selectivity_ext(root,
943  (Node *) ((CoerceToDomain *) clause)->arg,
944  varRelid,
945  jointype,
946  sjinfo,
947  use_extended_stats);
948  }
949  else
950  {
951  /*
952  * For anything else, see if we can consider it as a boolean variable.
953  * This only works if it's an immutable expression in Vars of a single
954  * relation; but there's no point in us checking that here because
955  * boolvarsel() will do it internally, and return a suitable default
956  * selectivity if not.
957  */
958  s1 = boolvarsel(root, clause, varRelid);
959  }
960 
961  /* Cache the result if possible */
962  if (cacheable)
963  {
964  if (jointype == JOIN_INNER)
965  rinfo->norm_selec = s1;
966  else
967  rinfo->outer_selec = s1;
968  }
969 
970 #ifdef SELECTIVITY_DEBUG
971  elog(DEBUG4, "clause_selectivity: s1 %f", s1);
972 #endif /* SELECTIVITY_DEBUG */
973 
974  return s1;
975 }
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:444
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Definition: bitmapset.c:634
#define bms_is_empty(a)
Definition: bitmapset.h:105
int NumRelids(PlannerInfo *root, Node *clause)
Definition: clauses.c:2047
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:2005
bool is_pseudo_constant_clause_relids(Node *clause, Relids relids)
Definition: clauses.c:2025
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2312
static void addRangeClause(RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2)
Definition: clausesel.c:429
static RelOptInfo * find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
Definition: clausesel.c:525
Selectivity clause_selectivity_ext(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:686
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:102
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:119
struct RangeQueryClause RangeQueryClause
static bool treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo, int varRelid, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:588
static Selectivity clauselist_selectivity_or(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:361
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:669
#define DEBUG4
Definition: elog.h:27
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
Selectivity statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses, bool is_or)
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1539
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc(Size size)
Definition: mcxt.c:1226
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:105
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:132
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:114
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:74
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:93
static bool is_funcclause(const void *clause)
Definition: nodeFuncs.h:67
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:123
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:81
#define IsA(nodeptr, _type_)
Definition: nodes.h:179
double Selectivity
Definition: nodes.h:261
JoinType
Definition: nodes.h:299
@ JOIN_INNER
Definition: nodes.h:304
@ RTE_RELATION
Definition: parsenodes.h:1014
void * arg
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Definition: plancat.c:1899
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:1938
Selectivity function_selectivity(PlannerInfo *root, Oid funcid, List *args, Oid inputcollid, bool is_join, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:1979
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
unsigned int Oid
Definition: postgres_ext.h:31
char * s1
char * s2
@ IS_NULL
Definition: primnodes.h:1677
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:405
Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1539
Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1697
Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
Definition: selfuncs.c:1511
Selectivity scalararraysel(PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1815
Selectivity rowcomparesel(PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:2172
#define DEFAULT_RANGE_INEQ_SEL
Definition: selfuncs.h:40
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37
Oid funcid
Definition: primnodes.h:677
List * args
Definition: primnodes.h:695
Definition: pg_list.h:54
Definition: nodes.h:129
Oid opno
Definition: primnodes.h:745
List * args
Definition: primnodes.h:763
Selectivity hibound
Definition: clausesel.c:40
Selectivity lobound
Definition: clausesel.c:39
struct RangeQueryClause * next
Definition: clausesel.c:35
Index relid
Definition: pathnodes.h:903
List * statlist
Definition: pathnodes.h:925
Cardinality tuples
Definition: pathnodes.h:928
RTEKind rtekind
Definition: pathnodes.h:907
Expr * clause
Definition: pathnodes.h:2516
Definition: primnodes.h:226