PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
clausesel.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * clausesel.c
4 * Routines to compute clause selectivities
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/clausesel.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "nodes/nodeFuncs.h"
18#include "optimizer/clauses.h"
19#include "optimizer/optimizer.h"
20#include "optimizer/pathnode.h"
21#include "optimizer/plancat.h"
23#include "utils/fmgroids.h"
24#include "utils/lsyscache.h"
25#include "utils/selfuncs.h"
26
27/*
28 * Data structure for accumulating info about possible range-query
29 * clause pairs in clauselist_selectivity.
30 */
31typedef struct RangeQueryClause
32{
33 struct RangeQueryClause *next; /* next in linked list */
34 Node *var; /* The common variable of the clauses */
35 bool have_lobound; /* found a low-bound clause yet? */
36 bool have_hibound; /* found a high-bound clause yet? */
37 Selectivity lobound; /* Selectivity of a var > something clause */
38 Selectivity hibound; /* Selectivity of a var < something clause */
40
41static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
42 bool varonleft, bool isLTsel, Selectivity s2);
44 List *clauses);
46 List *clauses,
47 int varRelid,
48 JoinType jointype,
49 SpecialJoinInfo *sjinfo,
50 bool use_extended_stats);
51
52/****************************************************************************
53 * ROUTINES TO COMPUTE SELECTIVITIES
54 ****************************************************************************/
55
56/*
57 * clauselist_selectivity -
58 * Compute the selectivity of an implicitly-ANDed list of boolean
59 * expression clauses. The list can be empty, in which case 1.0
60 * must be returned. List elements may be either RestrictInfos
61 * or bare expression clauses --- the former is preferred since
62 * it allows caching of results.
63 *
64 * See clause_selectivity() for the meaning of the additional parameters.
65 *
66 * The basic approach is to apply extended statistics first, on as many
67 * clauses as possible, in order to capture cross-column dependencies etc.
68 * The remaining clauses are then estimated by taking the product of their
69 * selectivities, but that's only right if they have independent
70 * probabilities, and in reality they are often NOT independent even if they
71 * only refer to a single column. So, we want to be smarter where we can.
72 *
73 * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
74 * are recognized as possible range query components if they are restriction
75 * opclauses whose operators have scalarltsel or a related function as their
76 * restriction selectivity estimator. We pair up clauses of this form that
77 * refer to the same variable. An unpairable clause of this kind is simply
78 * multiplied into the selectivity product in the normal way. But when we
79 * find a pair, we know that the selectivities represent the relative
80 * positions of the low and high bounds within the column's range, so instead
81 * of figuring the selectivity as hisel * losel, we can figure it as hisel +
82 * losel - 1. (To visualize this, see that hisel is the fraction of the range
83 * below the high bound, while losel is the fraction above the low bound; so
84 * hisel can be interpreted directly as a 0..1 value but we need to convert
85 * losel to 1-losel before interpreting it as a value. Then the available
86 * range is 1-losel to hisel. However, this calculation double-excludes
87 * nulls, so really we need hisel + losel + null_frac - 1.)
88 *
89 * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
90 * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
91 * yields an impossible (negative) result.
92 *
93 * A free side-effect is that we can recognize redundant inequalities such
94 * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
95 *
96 * Of course this is all very dependent on the behavior of the inequality
97 * selectivity functions; perhaps some day we can generalize the approach.
98 */
101 List *clauses,
102 int varRelid,
103 JoinType jointype,
104 SpecialJoinInfo *sjinfo)
105{
106 return clauselist_selectivity_ext(root, clauses, varRelid,
107 jointype, sjinfo, true);
108}
109
110/*
111 * clauselist_selectivity_ext -
112 * Extended version of clauselist_selectivity(). If "use_extended_stats"
113 * is false, all extended statistics will be ignored, and only per-column
114 * statistics will be used.
115 */
118 List *clauses,
119 int varRelid,
120 JoinType jointype,
121 SpecialJoinInfo *sjinfo,
122 bool use_extended_stats)
123{
124 Selectivity s1 = 1.0;
125 RelOptInfo *rel;
126 Bitmapset *estimatedclauses = NULL;
127 RangeQueryClause *rqlist = NULL;
128 ListCell *l;
129 int listidx;
130
131 /*
132 * If there's exactly one clause, just go directly to
133 * clause_selectivity_ext(). None of what we might do below is relevant.
134 */
135 if (list_length(clauses) == 1)
136 return clause_selectivity_ext(root, (Node *) linitial(clauses),
137 varRelid, jointype, sjinfo,
138 use_extended_stats);
139
140 /*
141 * Determine if these clauses reference a single relation. If so, and if
142 * it has extended statistics, try to apply those.
143 */
144 rel = find_single_rel_for_clauses(root, clauses);
145 if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
146 {
147 /*
148 * Estimate as many clauses as possible using extended statistics.
149 *
150 * 'estimatedclauses' is populated with the 0-based list position
151 * index of clauses estimated here, and that should be ignored below.
152 */
153 s1 = statext_clauselist_selectivity(root, clauses, varRelid,
154 jointype, sjinfo, rel,
155 &estimatedclauses, false);
156 }
157
158 /*
159 * Apply normal selectivity estimates for remaining clauses. We'll be
160 * careful to skip any clauses which were already estimated above.
161 *
162 * Anything that doesn't look like a potential rangequery clause gets
163 * multiplied into s1 and forgotten. Anything that does gets inserted into
164 * an rqlist entry.
165 */
166 listidx = -1;
167 foreach(l, clauses)
168 {
169 Node *clause = (Node *) lfirst(l);
170 RestrictInfo *rinfo;
172
173 listidx++;
174
175 /*
176 * Skip this clause if it's already been estimated by some other
177 * statistics above.
178 */
179 if (bms_is_member(listidx, estimatedclauses))
180 continue;
181
182 /* Compute the selectivity of this clause in isolation */
183 s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
184 use_extended_stats);
185
186 /*
187 * Check for being passed a RestrictInfo.
188 *
189 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
190 * 0.0; just use that rather than looking for range pairs.
191 */
192 if (IsA(clause, RestrictInfo))
193 {
194 rinfo = (RestrictInfo *) clause;
195 if (rinfo->pseudoconstant)
196 {
197 s1 = s1 * s2;
198 continue;
199 }
200 clause = (Node *) rinfo->clause;
201 }
202 else
203 rinfo = NULL;
204
205 /*
206 * See if it looks like a restriction clause with a pseudoconstant on
207 * one side. (Anything more complicated than that might not behave in
208 * the simple way we are expecting.) Most of the tests here can be
209 * done more efficiently with rinfo than without.
210 */
211 if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
212 {
213 OpExpr *expr = (OpExpr *) clause;
214 bool varonleft = true;
215 bool ok;
216
217 if (rinfo)
218 {
219 ok = (rinfo->num_base_rels == 1) &&
221 rinfo->right_relids) ||
222 (varonleft = false,
224 rinfo->left_relids)));
225 }
226 else
227 {
228 ok = (NumRelids(root, clause) == 1) &&
230 (varonleft = false,
232 }
233
234 if (ok)
235 {
236 /*
237 * If it's not a "<"/"<="/">"/">=" operator, just merge the
238 * selectivity in generically. But if it's the right oprrest,
239 * add the clause to rqlist for later processing.
240 */
241 switch (get_oprrest(expr->opno))
242 {
243 case F_SCALARLTSEL:
244 case F_SCALARLESEL:
245 addRangeClause(&rqlist, clause,
246 varonleft, true, s2);
247 break;
248 case F_SCALARGTSEL:
249 case F_SCALARGESEL:
250 addRangeClause(&rqlist, clause,
251 varonleft, false, s2);
252 break;
253 default:
254 /* Just merge the selectivity in generically */
255 s1 = s1 * s2;
256 break;
257 }
258 continue; /* drop to loop bottom */
259 }
260 }
261
262 /* Not the right form, so treat it generically. */
263 s1 = s1 * s2;
264 }
265
266 /*
267 * Now scan the rangequery pair list.
268 */
269 while (rqlist != NULL)
270 {
271 RangeQueryClause *rqnext;
272
273 if (rqlist->have_lobound && rqlist->have_hibound)
274 {
275 /* Successfully matched a pair of range clauses */
277
278 /*
279 * Exact equality to the default value probably means the
280 * selectivity function punted. This is not airtight but should
281 * be good enough.
282 */
283 if (rqlist->hibound == DEFAULT_INEQ_SEL ||
284 rqlist->lobound == DEFAULT_INEQ_SEL)
285 {
287 }
288 else
289 {
290 s2 = rqlist->hibound + rqlist->lobound - 1.0;
291
292 /* Adjust for double-exclusion of NULLs */
293 s2 += nulltestsel(root, IS_NULL, rqlist->var,
294 varRelid, jointype, sjinfo);
295
296 /*
297 * A zero or slightly negative s2 should be converted into a
298 * small positive value; we probably are dealing with a very
299 * tight range and got a bogus result due to roundoff errors.
300 * However, if s2 is very negative, then we probably have
301 * default selectivity estimates on one or both sides of the
302 * range that we failed to recognize above for some reason.
303 */
304 if (s2 <= 0.0)
305 {
306 if (s2 < -0.01)
307 {
308 /*
309 * No data available --- use a default estimate that
310 * is small, but not real small.
311 */
313 }
314 else
315 {
316 /*
317 * It's just roundoff error; use a small positive
318 * value
319 */
320 s2 = 1.0e-10;
321 }
322 }
323 }
324 /* Merge in the selectivity of the pair of clauses */
325 s1 *= s2;
326 }
327 else
328 {
329 /* Only found one of a pair, merge it in generically */
330 if (rqlist->have_lobound)
331 s1 *= rqlist->lobound;
332 else
333 s1 *= rqlist->hibound;
334 }
335 /* release storage and advance */
336 rqnext = rqlist->next;
337 pfree(rqlist);
338 rqlist = rqnext;
339 }
340
341 return s1;
342}
343
344/*
345 * clauselist_selectivity_or -
346 * Compute the selectivity of an implicitly-ORed list of boolean
347 * expression clauses. The list can be empty, in which case 0.0
348 * must be returned. List elements may be either RestrictInfos
349 * or bare expression clauses --- the former is preferred since
350 * it allows caching of results.
351 *
352 * See clause_selectivity() for the meaning of the additional parameters.
353 *
354 * The basic approach is to apply extended statistics first, on as many
355 * clauses as possible, in order to capture cross-column dependencies etc.
356 * The remaining clauses are then estimated as if they were independent.
357 */
358static Selectivity
360 List *clauses,
361 int varRelid,
362 JoinType jointype,
363 SpecialJoinInfo *sjinfo,
364 bool use_extended_stats)
365{
366 Selectivity s1 = 0.0;
367 RelOptInfo *rel;
368 Bitmapset *estimatedclauses = NULL;
369 ListCell *lc;
370 int listidx;
371
372 /*
373 * Determine if these clauses reference a single relation. If so, and if
374 * it has extended statistics, try to apply those.
375 */
376 rel = find_single_rel_for_clauses(root, clauses);
377 if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
378 {
379 /*
380 * Estimate as many clauses as possible using extended statistics.
381 *
382 * 'estimatedclauses' is populated with the 0-based list position
383 * index of clauses estimated here, and that should be ignored below.
384 */
385 s1 = statext_clauselist_selectivity(root, clauses, varRelid,
386 jointype, sjinfo, rel,
387 &estimatedclauses, true);
388 }
389
390 /*
391 * Estimate the remaining clauses as if they were independent.
392 *
393 * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
394 * for the probable overlap of selected tuple sets.
395 *
396 * XXX is this too conservative?
397 */
398 listidx = -1;
399 foreach(lc, clauses)
400 {
402
403 listidx++;
404
405 /*
406 * Skip this clause if it's already been estimated by some other
407 * statistics above.
408 */
409 if (bms_is_member(listidx, estimatedclauses))
410 continue;
411
412 s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
413 jointype, sjinfo, use_extended_stats);
414
415 s1 = s1 + s2 - s1 * s2;
416 }
417
418 return s1;
419}
420
421/*
422 * addRangeClause --- add a new range clause for clauselist_selectivity
423 *
424 * Here is where we try to match up pairs of range-query clauses
425 */
426static void
428 bool varonleft, bool isLTsel, Selectivity s2)
429{
430 RangeQueryClause *rqelem;
431 Node *var;
432 bool is_lobound;
433
434 if (varonleft)
435 {
436 var = get_leftop((Expr *) clause);
437 is_lobound = !isLTsel; /* x < something is high bound */
438 }
439 else
440 {
441 var = get_rightop((Expr *) clause);
442 is_lobound = isLTsel; /* something < x is low bound */
443 }
444
445 for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
446 {
447 /*
448 * We use full equal() here because the "var" might be a function of
449 * one or more attributes of the same relation...
450 */
451 if (!equal(var, rqelem->var))
452 continue;
453 /* Found the right group to put this clause in */
454 if (is_lobound)
455 {
456 if (!rqelem->have_lobound)
457 {
458 rqelem->have_lobound = true;
459 rqelem->lobound = s2;
460 }
461 else
462 {
463
464 /*------
465 * We have found two similar clauses, such as
466 * x < y AND x <= z.
467 * Keep only the more restrictive one.
468 *------
469 */
470 if (rqelem->lobound > s2)
471 rqelem->lobound = s2;
472 }
473 }
474 else
475 {
476 if (!rqelem->have_hibound)
477 {
478 rqelem->have_hibound = true;
479 rqelem->hibound = s2;
480 }
481 else
482 {
483
484 /*------
485 * We have found two similar clauses, such as
486 * x > y AND x >= z.
487 * Keep only the more restrictive one.
488 *------
489 */
490 if (rqelem->hibound > s2)
491 rqelem->hibound = s2;
492 }
493 }
494 return;
495 }
496
497 /* No matching var found, so make a new clause-pair data structure */
498 rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
499 rqelem->var = var;
500 if (is_lobound)
501 {
502 rqelem->have_lobound = true;
503 rqelem->have_hibound = false;
504 rqelem->lobound = s2;
505 }
506 else
507 {
508 rqelem->have_lobound = false;
509 rqelem->have_hibound = true;
510 rqelem->hibound = s2;
511 }
512 rqelem->next = *rqlist;
513 *rqlist = rqelem;
514}
515
516/*
517 * find_single_rel_for_clauses
518 * Examine each clause in 'clauses' and determine if all clauses
519 * reference only a single relation. If so return that relation,
520 * otherwise return NULL.
521 */
522static RelOptInfo *
524{
525 int lastrelid = 0;
526 ListCell *l;
527
528 foreach(l, clauses)
529 {
530 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
531 int relid;
532
533 /*
534 * If we have a list of bare clauses rather than RestrictInfos, we
535 * could pull out their relids the hard way with pull_varnos().
536 * However, currently the extended-stats machinery won't do anything
537 * with non-RestrictInfo clauses anyway, so there's no point in
538 * spending extra cycles; just fail if that's what we have.
539 *
540 * An exception to that rule is if we have a bare BoolExpr AND clause.
541 * We treat this as a special case because the restrictinfo machinery
542 * doesn't build RestrictInfos on top of AND clauses.
543 */
544 if (is_andclause(rinfo))
545 {
546 RelOptInfo *rel;
547
549 ((BoolExpr *) rinfo)->args);
550
551 if (rel == NULL)
552 return NULL;
553 if (lastrelid == 0)
554 lastrelid = rel->relid;
555 else if (rel->relid != lastrelid)
556 return NULL;
557
558 continue;
559 }
560
561 if (!IsA(rinfo, RestrictInfo))
562 return NULL;
563
564 if (bms_is_empty(rinfo->clause_relids))
565 continue; /* we can ignore variable-free clauses */
566 if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
567 return NULL; /* multiple relations in this clause */
568 if (lastrelid == 0)
569 lastrelid = relid; /* first clause referencing a relation */
570 else if (relid != lastrelid)
571 return NULL; /* relation not same as last one */
572 }
573
574 if (lastrelid != 0)
575 return find_base_rel(root, lastrelid);
576
577 return NULL; /* no clauses */
578}
579
580/*
581 * treat_as_join_clause -
582 * Decide whether an operator clause is to be handled by the
583 * restriction or join estimator. Subroutine for clause_selectivity().
584 */
585static inline bool
587 int varRelid, SpecialJoinInfo *sjinfo)
588{
589 if (varRelid != 0)
590 {
591 /*
592 * Caller is forcing restriction mode (eg, because we are examining an
593 * inner indexscan qual).
594 */
595 return false;
596 }
597 else if (sjinfo == NULL)
598 {
599 /*
600 * It must be a restriction clause, since it's being evaluated at a
601 * scan node.
602 */
603 return false;
604 }
605 else
606 {
607 /*
608 * Otherwise, it's a join if there's more than one base relation used.
609 * We can optimize this calculation if an rinfo was passed.
610 *
611 * XXX Since we know the clause is being evaluated at a join, the
612 * only way it could be single-relation is if it was delayed by outer
613 * joins. We intentionally count only baserels here, not OJs that
614 * might be present in rinfo->clause_relids, so that we direct such
615 * cases to the restriction qual estimators not join estimators.
616 * Eventually some notice should be taken of the possibility of
617 * injected nulls, but we'll likely want to do that in the restriction
618 * estimators rather than starting to treat such cases as join quals.
619 */
620 if (rinfo)
621 return (rinfo->num_base_rels > 1);
622 else
623 return (NumRelids(root, clause) > 1);
624 }
625}
626
627
628/*
629 * clause_selectivity -
630 * Compute the selectivity of a general boolean expression clause.
631 *
632 * The clause can be either a RestrictInfo or a plain expression. If it's
633 * a RestrictInfo, we try to cache the selectivity for possible re-use,
634 * so passing RestrictInfos is preferred.
635 *
636 * varRelid is either 0 or a rangetable index.
637 *
638 * When varRelid is not 0, only variables belonging to that relation are
639 * considered in computing selectivity; other vars are treated as constants
640 * of unknown values. This is appropriate for estimating the selectivity of
641 * a join clause that is being used as a restriction clause in a scan of a
642 * nestloop join's inner relation --- varRelid should then be the ID of the
643 * inner relation.
644 *
645 * When varRelid is 0, all variables are treated as variables. This
646 * is appropriate for ordinary join clauses and restriction clauses.
647 *
648 * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
649 * if the clause isn't a join clause.
650 *
651 * sjinfo is NULL for a non-join clause, otherwise it provides additional
652 * context information about the join being performed. There are some
653 * special cases:
654 * 1. For a special (not INNER) join, sjinfo is always a member of
655 * root->join_info_list.
656 * 2. For an INNER join, sjinfo is just a transient struct, and only the
657 * relids and jointype fields in it can be trusted.
658 * It is possible for jointype to be different from sjinfo->jointype.
659 * This indicates we are considering a variant join: either with
660 * the LHS and RHS switched, or with one input unique-ified.
661 *
662 * Note: when passing nonzero varRelid, it's normally appropriate to set
663 * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
664 * join clause; because we aren't treating it as a join clause.
665 */
668 Node *clause,
669 int varRelid,
670 JoinType jointype,
671 SpecialJoinInfo *sjinfo)
672{
673 return clause_selectivity_ext(root, clause, varRelid,
674 jointype, sjinfo, true);
675}
676
677/*
678 * clause_selectivity_ext -
679 * Extended version of clause_selectivity(). If "use_extended_stats" is
680 * false, all extended statistics will be ignored, and only per-column
681 * statistics will be used.
682 */
685 Node *clause,
686 int varRelid,
687 JoinType jointype,
688 SpecialJoinInfo *sjinfo,
689 bool use_extended_stats)
690{
691 Selectivity s1 = 0.5; /* default for any unhandled clause type */
692 RestrictInfo *rinfo = NULL;
693 bool cacheable = false;
694
695 if (clause == NULL) /* can this still happen? */
696 return s1;
697
698 if (IsA(clause, RestrictInfo))
699 {
700 rinfo = (RestrictInfo *) clause;
701
702 /*
703 * If the clause is marked pseudoconstant, then it will be used as a
704 * gating qual and should not affect selectivity estimates; hence
705 * return 1.0. The only exception is that a constant FALSE may be
706 * taken as having selectivity 0.0, since it will surely mean no rows
707 * out of the plan. This case is simple enough that we need not
708 * bother caching the result.
709 */
710 if (rinfo->pseudoconstant)
711 {
712 if (!IsA(rinfo->clause, Const))
713 return (Selectivity) 1.0;
714 }
715
716 /*
717 * If possible, cache the result of the selectivity calculation for
718 * the clause. We can cache if varRelid is zero or the clause
719 * contains only vars of that relid --- otherwise varRelid will affect
720 * the result, so mustn't cache. Outer join quals might be examined
721 * with either their join's actual jointype or JOIN_INNER, so we need
722 * two cache variables to remember both cases. Note: we assume the
723 * result won't change if we are switching the input relations or
724 * considering a unique-ified case, so we only need one cache variable
725 * for all non-JOIN_INNER cases.
726 */
727 if (varRelid == 0 ||
728 rinfo->num_base_rels == 0 ||
729 (rinfo->num_base_rels == 1 &&
730 bms_is_member(varRelid, rinfo->clause_relids)))
731 {
732 /* Cacheable --- do we already have the result? */
733 if (jointype == JOIN_INNER)
734 {
735 if (rinfo->norm_selec >= 0)
736 return rinfo->norm_selec;
737 }
738 else
739 {
740 if (rinfo->outer_selec >= 0)
741 return rinfo->outer_selec;
742 }
743 cacheable = true;
744 }
745
746 /*
747 * Proceed with examination of contained clause. If the clause is an
748 * OR-clause, we want to look at the variant with sub-RestrictInfos,
749 * so that per-subclause selectivities can be cached.
750 */
751 if (rinfo->orclause)
752 clause = (Node *) rinfo->orclause;
753 else
754 clause = (Node *) rinfo->clause;
755 }
756
757 if (IsA(clause, Var))
758 {
759 Var *var = (Var *) clause;
760
761 /*
762 * We probably shouldn't ever see an uplevel Var here, but if we do,
763 * return the default selectivity...
764 */
765 if (var->varlevelsup == 0 &&
766 (varRelid == 0 || varRelid == (int) var->varno))
767 {
768 /* Use the restriction selectivity function for a bool Var */
769 s1 = boolvarsel(root, (Node *) var, varRelid);
770 }
771 }
772 else if (IsA(clause, Const))
773 {
774 /* bool constant is pretty easy... */
775 Const *con = (Const *) clause;
776
777 s1 = con->constisnull ? 0.0 :
778 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
779 }
780 else if (IsA(clause, Param))
781 {
782 /* see if we can replace the Param */
783 Node *subst = estimate_expression_value(root, clause);
784
785 if (IsA(subst, Const))
786 {
787 /* bool constant is pretty easy... */
788 Const *con = (Const *) subst;
789
790 s1 = con->constisnull ? 0.0 :
791 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
792 }
793 else
794 {
795 /* XXX any way to do better than default? */
796 }
797 }
798 else if (is_notclause(clause))
799 {
800 /* inverse of the selectivity of the underlying clause */
802 (Node *) get_notclausearg((Expr *) clause),
803 varRelid,
804 jointype,
805 sjinfo,
806 use_extended_stats);
807 }
808 else if (is_andclause(clause))
809 {
810 /* share code with clauselist_selectivity() */
812 ((BoolExpr *) clause)->args,
813 varRelid,
814 jointype,
815 sjinfo,
816 use_extended_stats);
817 }
818 else if (is_orclause(clause))
819 {
820 /*
821 * Almost the same thing as clauselist_selectivity, but with the
822 * clauses connected by OR.
823 */
825 ((BoolExpr *) clause)->args,
826 varRelid,
827 jointype,
828 sjinfo,
829 use_extended_stats);
830 }
831 else if (is_opclause(clause) || IsA(clause, DistinctExpr))
832 {
833 OpExpr *opclause = (OpExpr *) clause;
834 Oid opno = opclause->opno;
835
836 if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
837 {
838 /* Estimate selectivity for a join clause. */
839 s1 = join_selectivity(root, opno,
840 opclause->args,
841 opclause->inputcollid,
842 jointype,
843 sjinfo);
844 }
845 else
846 {
847 /* Estimate selectivity for a restriction clause. */
849 opclause->args,
850 opclause->inputcollid,
851 varRelid);
852 }
853
854 /*
855 * DistinctExpr has the same representation as OpExpr, but the
856 * contained operator is "=" not "<>", so we must negate the result.
857 * This estimation method doesn't give the right behavior for nulls,
858 * but it's better than doing nothing.
859 */
860 if (IsA(clause, DistinctExpr))
861 s1 = 1.0 - s1;
862 }
863 else if (is_funcclause(clause))
864 {
865 FuncExpr *funcclause = (FuncExpr *) clause;
866
867 /* Try to get an estimate from the support function, if any */
869 funcclause->funcid,
870 funcclause->args,
871 funcclause->inputcollid,
872 treat_as_join_clause(root, clause, rinfo,
873 varRelid, sjinfo),
874 varRelid,
875 jointype,
876 sjinfo);
877 }
878 else if (IsA(clause, ScalarArrayOpExpr))
879 {
880 /* Use node specific selectivity calculation function */
882 (ScalarArrayOpExpr *) clause,
883 treat_as_join_clause(root, clause, rinfo,
884 varRelid, sjinfo),
885 varRelid,
886 jointype,
887 sjinfo);
888 }
889 else if (IsA(clause, RowCompareExpr))
890 {
891 /* Use node specific selectivity calculation function */
893 (RowCompareExpr *) clause,
894 varRelid,
895 jointype,
896 sjinfo);
897 }
898 else if (IsA(clause, NullTest))
899 {
900 /* Use node specific selectivity calculation function */
902 ((NullTest *) clause)->nulltesttype,
903 (Node *) ((NullTest *) clause)->arg,
904 varRelid,
905 jointype,
906 sjinfo);
907 }
908 else if (IsA(clause, BooleanTest))
909 {
910 /* Use node specific selectivity calculation function */
912 ((BooleanTest *) clause)->booltesttype,
913 (Node *) ((BooleanTest *) clause)->arg,
914 varRelid,
915 jointype,
916 sjinfo);
917 }
918 else if (IsA(clause, CurrentOfExpr))
919 {
920 /* CURRENT OF selects at most one row of its table */
921 CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
922 RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
923
924 if (crel->tuples > 0)
925 s1 = 1.0 / crel->tuples;
926 }
927 else if (IsA(clause, RelabelType))
928 {
929 /* Not sure this case is needed, but it can't hurt */
931 (Node *) ((RelabelType *) clause)->arg,
932 varRelid,
933 jointype,
934 sjinfo,
935 use_extended_stats);
936 }
937 else if (IsA(clause, CoerceToDomain))
938 {
939 /* Not sure this case is needed, but it can't hurt */
941 (Node *) ((CoerceToDomain *) clause)->arg,
942 varRelid,
943 jointype,
944 sjinfo,
945 use_extended_stats);
946 }
947 else
948 {
949 /*
950 * For anything else, see if we can consider it as a boolean variable.
951 * This only works if it's an immutable expression in Vars of a single
952 * relation; but there's no point in us checking that here because
953 * boolvarsel() will do it internally, and return a suitable default
954 * selectivity if not.
955 */
956 s1 = boolvarsel(root, clause, varRelid);
957 }
958
959 /* Cache the result if possible */
960 if (cacheable)
961 {
962 if (jointype == JOIN_INNER)
963 rinfo->norm_selec = s1;
964 else
965 rinfo->outer_selec = s1;
966 }
967
968#ifdef SELECTIVITY_DEBUG
969 elog(DEBUG4, "clause_selectivity: s1 %f", s1);
970#endif /* SELECTIVITY_DEBUG */
971
972 return s1;
973}
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Definition: bitmapset.c:715
#define bms_is_empty(a)
Definition: bitmapset.h:118
int NumRelids(PlannerInfo *root, Node *clause)
Definition: clauses.c:2132
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:2090
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2397
bool is_pseudo_constant_clause_relids(Node *clause, Relids relids)
Definition: clauses.c:2110
static void addRangeClause(RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2)
Definition: clausesel.c:427
static RelOptInfo * find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
Definition: clausesel.c:523
Selectivity clause_selectivity_ext(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:684
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:100
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:117
struct RangeQueryClause RangeQueryClause
static bool treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo, int varRelid, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:586
static Selectivity clauselist_selectivity_or(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:359
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:667
#define elog(elevel,...)
Definition: elog.h:226
#define DEBUG4
Definition: elog.h:27
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
Selectivity statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses, bool is_or)
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1697
void pfree(void *pointer)
Definition: mcxt.c:2147
void * palloc(Size size)
Definition: mcxt.c:1940
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:107
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:116
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:95
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_funcclause(const void *clause)
Definition: nodeFuncs.h:69
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:125
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:134
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:83
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
double Selectivity
Definition: nodes.h:256
JoinType
Definition: nodes.h:294
@ JOIN_INNER
Definition: nodes.h:299
@ RTE_RELATION
Definition: parsenodes.h:1026
void * arg
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Definition: plancat.c:1969
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:2008
Selectivity function_selectivity(PlannerInfo *root, Oid funcid, List *args, Oid inputcollid, bool is_join, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:2049
static bool DatumGetBool(Datum X)
Definition: postgres.h:95
unsigned int Oid
Definition: postgres_ext.h:30
char * s1
char * s2
@ IS_NULL
Definition: primnodes.h:1957
tree ctl root
Definition: radixtree.h:1857
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:414
Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1545
Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1703
Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
Definition: selfuncs.c:1517
Selectivity scalararraysel(PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:1821
Selectivity rowcomparesel(PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: selfuncs.c:2210
#define DEFAULT_RANGE_INEQ_SEL
Definition: selfuncs.h:40
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37
Oid funcid
Definition: primnodes.h:767
List * args
Definition: primnodes.h:785
Definition: pg_list.h:54
Definition: nodes.h:135
Oid opno
Definition: primnodes.h:835
List * args
Definition: primnodes.h:853
Selectivity hibound
Definition: clausesel.c:38
Selectivity lobound
Definition: clausesel.c:37
struct RangeQueryClause * next
Definition: clausesel.c:33
Index relid
Definition: pathnodes.h:945
List * statlist
Definition: pathnodes.h:973
Cardinality tuples
Definition: pathnodes.h:976
RTEKind rtekind
Definition: pathnodes.h:949
Expr * clause
Definition: pathnodes.h:2700
Definition: primnodes.h:262