PostgreSQL Source Code  git master
selfuncs.h File Reference
#include "access/htup.h"
#include "fmgr.h"
#include "nodes/pathnodes.h"
Include dependency graph for selfuncs.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  EstimationInfo
 
struct  VariableStatData
 
struct  GenericCosts
 

Macros

#define DEFAULT_EQ_SEL   0.005
 
#define DEFAULT_INEQ_SEL   0.3333333333333333
 
#define DEFAULT_RANGE_INEQ_SEL   0.005
 
#define DEFAULT_MULTIRANGE_INEQ_SEL   0.005
 
#define DEFAULT_MATCH_SEL   0.005
 
#define DEFAULT_MATCHING_SEL   0.010
 
#define DEFAULT_NUM_DISTINCT   200
 
#define DEFAULT_UNK_SEL   0.005
 
#define DEFAULT_NOT_UNK_SEL   (1.0 - DEFAULT_UNK_SEL)
 
#define CLAMP_PROBABILITY(p)
 
#define SELFLAG_USED_DEFAULT
 
#define ReleaseVariableStats(vardata)
 

Typedefs

typedef struct EstimationInfo EstimationInfo
 
typedef struct VariableStatData VariableStatData
 
typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)
 
typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)
 

Functions

void examine_variable (PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
 
bool statistic_proc_security_check (VariableStatData *vardata, Oid func_oid)
 
bool get_restriction_variable (PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
 
void get_join_variables (PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
 
double get_variable_numdistinct (VariableStatData *vardata, bool *isdefault)
 
double mcv_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
 
double histogram_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
 
double generic_restriction_selectivity (PlannerInfo *root, Oid oproid, Oid collation, List *args, int varRelid, double default_selectivity)
 
double ineq_histogram_selectivity (PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
 
double var_eq_const (VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
 
double var_eq_non_const (VariableStatData *vardata, Oid oproid, Oid collation, Node *other, bool varonleft, bool negate)
 
Selectivity boolvarsel (PlannerInfo *root, Node *arg, int varRelid)
 
Selectivity booltestsel (PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity nulltestsel (PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity scalararraysel (PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
int estimate_array_length (Node *arrayexpr)
 
Selectivity rowcomparesel (PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
void mergejoinscansel (PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend)
 
double estimate_num_groups (PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo)
 
void estimate_hash_bucket_stats (PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, Selectivity *bucketsize_frac)
 
double estimate_hashagg_tablesize (PlannerInfo *root, Path *path, const AggClauseCosts *agg_costs, double dNumGroups)
 
Listget_quals_from_indexclauses (List *indexclauses)
 
Cost index_other_operands_eval_cost (PlannerInfo *root, List *indexquals)
 
Listadd_predicate_to_index_quals (IndexOptInfo *index, List *indexQuals)
 
void genericcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
 
Selectivity scalararraysel_containment (PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
 

Variables

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook
 
PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook
 

Macro Definition Documentation

◆ CLAMP_PROBABILITY

◆ DEFAULT_EQ_SEL

#define DEFAULT_EQ_SEL   0.005

Definition at line 34 of file selfuncs.h.

Referenced by _int_matchsel(), eqsel_internal(), int_query_opr_selec(), and neqjoinsel().

◆ DEFAULT_INEQ_SEL

◆ DEFAULT_MATCH_SEL

#define DEFAULT_MATCH_SEL   0.005

◆ DEFAULT_MATCHING_SEL

#define DEFAULT_MATCHING_SEL   0.010

Definition at line 49 of file selfuncs.h.

Referenced by matchingjoinsel(), and matchingsel().

◆ DEFAULT_MULTIRANGE_INEQ_SEL

#define DEFAULT_MULTIRANGE_INEQ_SEL   0.005

Definition at line 43 of file selfuncs.h.

Referenced by default_multirange_selectivity().

◆ DEFAULT_NOT_UNK_SEL

#define DEFAULT_NOT_UNK_SEL   (1.0 - DEFAULT_UNK_SEL)

Definition at line 56 of file selfuncs.h.

Referenced by booltestsel(), and nulltestsel().

◆ DEFAULT_NUM_DISTINCT

#define DEFAULT_NUM_DISTINCT   200

Definition at line 52 of file selfuncs.h.

Referenced by cost_incremental_sort(), and get_variable_numdistinct().

◆ DEFAULT_RANGE_INEQ_SEL

#define DEFAULT_RANGE_INEQ_SEL   0.005

Definition at line 40 of file selfuncs.h.

Referenced by clauselist_selectivity_ext(), and default_range_selectivity().

◆ DEFAULT_UNK_SEL

#define DEFAULT_UNK_SEL   0.005

Definition at line 55 of file selfuncs.h.

Referenced by booltestsel(), and nulltestsel().

◆ ReleaseVariableStats

◆ SELFLAG_USED_DEFAULT

#define SELFLAG_USED_DEFAULT
Value:
(1 << 0) /* Estimation fell back on one
* of the DEFAULTs as defined
* above. */

Definition at line 76 of file selfuncs.h.

Referenced by cost_resultcache_rescan(), and estimate_num_groups().

Typedef Documentation

◆ EstimationInfo

◆ get_index_stats_hook_type

typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)

Definition at line 142 of file selfuncs.h.

◆ get_relation_stats_hook_type

typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)

Definition at line 137 of file selfuncs.h.

◆ VariableStatData

Function Documentation

◆ add_predicate_to_index_quals()

List* add_predicate_to_index_quals ( IndexOptInfo index,
List indexQuals 
)

Definition at line 6569 of file selfuncs.c.

References IndexOptInfo::indpred, lfirst, list_concat(), list_make1, NIL, and predicate_implied_by().

Referenced by btcostestimate(), genericcostestimate(), and gincostestimate().

6570 {
6571  List *predExtraQuals = NIL;
6572  ListCell *lc;
6573 
6574  if (index->indpred == NIL)
6575  return indexQuals;
6576 
6577  foreach(lc, index->indpred)
6578  {
6579  Node *predQual = (Node *) lfirst(lc);
6580  List *oneQual = list_make1(predQual);
6581 
6582  if (!predicate_implied_by(oneQual, indexQuals, false))
6583  predExtraQuals = list_concat(predExtraQuals, oneQual);
6584  }
6585  return list_concat(predExtraQuals, indexQuals);
6586 }
#define NIL
Definition: pg_list.h:65
Definition: nodes.h:539
List * list_concat(List *list1, const List *list2)
Definition: list.c:530
#define list_make1(x1)
Definition: pg_list.h:206
#define lfirst(lc)
Definition: pg_list.h:169
List * indpred
Definition: pathnodes.h:856
bool predicate_implied_by(List *predicate_list, List *clause_list, bool weak)
Definition: predtest.c:151
Definition: pg_list.h:50

◆ booltestsel()

Selectivity booltestsel ( PlannerInfo root,
BoolTestType  booltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1537 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, AttStatsSlot::nnumbers, AttStatsSlot::numbers, ReleaseVariableStats, VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by clause_selectivity_ext().

1539 {
1540  VariableStatData vardata;
1541  double selec;
1542 
1543  examine_variable(root, arg, varRelid, &vardata);
1544 
1545  if (HeapTupleIsValid(vardata.statsTuple))
1546  {
1547  Form_pg_statistic stats;
1548  double freq_null;
1549  AttStatsSlot sslot;
1550 
1551  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1552  freq_null = stats->stanullfrac;
1553 
1554  if (get_attstatsslot(&sslot, vardata.statsTuple,
1555  STATISTIC_KIND_MCV, InvalidOid,
1557  && sslot.nnumbers > 0)
1558  {
1559  double freq_true;
1560  double freq_false;
1561 
1562  /*
1563  * Get first MCV frequency and derive frequency for true.
1564  */
1565  if (DatumGetBool(sslot.values[0]))
1566  freq_true = sslot.numbers[0];
1567  else
1568  freq_true = 1.0 - sslot.numbers[0] - freq_null;
1569 
1570  /*
1571  * Next derive frequency for false. Then use these as appropriate
1572  * to derive frequency for each case.
1573  */
1574  freq_false = 1.0 - freq_true - freq_null;
1575 
1576  switch (booltesttype)
1577  {
1578  case IS_UNKNOWN:
1579  /* select only NULL values */
1580  selec = freq_null;
1581  break;
1582  case IS_NOT_UNKNOWN:
1583  /* select non-NULL values */
1584  selec = 1.0 - freq_null;
1585  break;
1586  case IS_TRUE:
1587  /* select only TRUE values */
1588  selec = freq_true;
1589  break;
1590  case IS_NOT_TRUE:
1591  /* select non-TRUE values */
1592  selec = 1.0 - freq_true;
1593  break;
1594  case IS_FALSE:
1595  /* select only FALSE values */
1596  selec = freq_false;
1597  break;
1598  case IS_NOT_FALSE:
1599  /* select non-FALSE values */
1600  selec = 1.0 - freq_false;
1601  break;
1602  default:
1603  elog(ERROR, "unrecognized booltesttype: %d",
1604  (int) booltesttype);
1605  selec = 0.0; /* Keep compiler quiet */
1606  break;
1607  }
1608 
1609  free_attstatsslot(&sslot);
1610  }
1611  else
1612  {
1613  /*
1614  * No most-common-value info available. Still have null fraction
1615  * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1616  * for null fraction and assume a 50-50 split of TRUE and FALSE.
1617  */
1618  switch (booltesttype)
1619  {
1620  case IS_UNKNOWN:
1621  /* select only NULL values */
1622  selec = freq_null;
1623  break;
1624  case IS_NOT_UNKNOWN:
1625  /* select non-NULL values */
1626  selec = 1.0 - freq_null;
1627  break;
1628  case IS_TRUE:
1629  case IS_FALSE:
1630  /* Assume we select half of the non-NULL values */
1631  selec = (1.0 - freq_null) / 2.0;
1632  break;
1633  case IS_NOT_TRUE:
1634  case IS_NOT_FALSE:
1635  /* Assume we select NULLs plus half of the non-NULLs */
1636  /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1637  selec = (freq_null + 1.0) / 2.0;
1638  break;
1639  default:
1640  elog(ERROR, "unrecognized booltesttype: %d",
1641  (int) booltesttype);
1642  selec = 0.0; /* Keep compiler quiet */
1643  break;
1644  }
1645  }
1646  }
1647  else
1648  {
1649  /*
1650  * If we can't get variable statistics for the argument, perhaps
1651  * clause_selectivity can do something with it. We ignore the
1652  * possibility of a NULL value when using clause_selectivity, and just
1653  * assume the value is either TRUE or FALSE.
1654  */
1655  switch (booltesttype)
1656  {
1657  case IS_UNKNOWN:
1658  selec = DEFAULT_UNK_SEL;
1659  break;
1660  case IS_NOT_UNKNOWN:
1661  selec = DEFAULT_NOT_UNK_SEL;
1662  break;
1663  case IS_TRUE:
1664  case IS_NOT_FALSE:
1665  selec = (double) clause_selectivity(root, arg,
1666  varRelid,
1667  jointype, sjinfo);
1668  break;
1669  case IS_FALSE:
1670  case IS_NOT_TRUE:
1671  selec = 1.0 - (double) clause_selectivity(root, arg,
1672  varRelid,
1673  jointype, sjinfo);
1674  break;
1675  default:
1676  elog(ERROR, "unrecognized booltesttype: %d",
1677  (int) booltesttype);
1678  selec = 0.0; /* Keep compiler quiet */
1679  break;
1680  }
1681  }
1682 
1683  ReleaseVariableStats(vardata);
1684 
1685  /* result should be in range, but make sure... */
1686  CLAMP_PROBABILITY(selec);
1687 
1688  return (Selectivity) selec;
1689 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
HeapTuple statsTuple
Definition: selfuncs.h:91
int nnumbers
Definition: lsyscache.h:57
double Selectivity
Definition: nodes.h:672
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:56
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
#define ERROR
Definition: elog.h:46
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:690
float4 * numbers
Definition: lsyscache.h:56
#define DatumGetBool(X)
Definition: postgres.h:437
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:55
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4954
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3177
Datum * values
Definition: lsyscache.h:53
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
#define elog(elevel,...)
Definition: elog.h:232
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3294

◆ boolvarsel()

Selectivity boolvarsel ( PlannerInfo root,
Node arg,
int  varRelid 
)

Definition at line 1509 of file selfuncs.c.

References BoolGetDatum, examine_variable(), HeapTupleIsValid, InvalidOid, ReleaseVariableStats, VariableStatData::statsTuple, and var_eq_const().

Referenced by clause_selectivity_ext().

1510 {
1511  VariableStatData vardata;
1512  double selec;
1513 
1514  examine_variable(root, arg, varRelid, &vardata);
1515  if (HeapTupleIsValid(vardata.statsTuple))
1516  {
1517  /*
1518  * A boolean variable V is equivalent to the clause V = 't', so we
1519  * compute the selectivity as if that is what we have.
1520  */
1521  selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
1522  BoolGetDatum(true), false, true, false);
1523  }
1524  else
1525  {
1526  /* Otherwise, the default estimate is 0.5 */
1527  selec = 0.5;
1528  }
1529  ReleaseVariableStats(vardata);
1530  return selec;
1531 }
HeapTuple statsTuple
Definition: selfuncs.h:91
#define BoolGetDatum(X)
Definition: postgres.h:446
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4954
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
double var_eq_const(VariableStatData *vardata, Oid operator, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:292

◆ estimate_array_length()

int estimate_array_length ( Node arrayexpr)

Definition at line 2132 of file selfuncs.c.

References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), DatumGetArrayTypeP, IsA, list_length(), and strip_array_coercion().

Referenced by array_unnest_support(), btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().

2133 {
2134  /* look through any binary-compatible relabeling of arrayexpr */
2135  arrayexpr = strip_array_coercion(arrayexpr);
2136 
2137  if (arrayexpr && IsA(arrayexpr, Const))
2138  {
2139  Datum arraydatum = ((Const *) arrayexpr)->constvalue;
2140  bool arrayisnull = ((Const *) arrayexpr)->constisnull;
2141  ArrayType *arrayval;
2142 
2143  if (arrayisnull)
2144  return 0;
2145  arrayval = DatumGetArrayTypeP(arraydatum);
2146  return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2147  }
2148  else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2149  !((ArrayExpr *) arrayexpr)->multidims)
2150  {
2151  return list_length(((ArrayExpr *) arrayexpr)->elements);
2152  }
2153  else
2154  {
2155  /* default guess --- see also scalararraysel */
2156  return 10;
2157  }
2158 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define ARR_DIMS(a)
Definition: array.h:287
uintptr_t Datum
Definition: postgres.h:411
static int list_length(const List *l)
Definition: pg_list.h:149
#define ARR_NDIM(a)
Definition: array.h:283
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1780
#define DatumGetArrayTypeP(X)
Definition: array.h:254

◆ estimate_hash_bucket_stats()

void estimate_hash_bucket_stats ( PlannerInfo root,
Node hashkey,
double  nbuckets,
Selectivity mcv_freq,
Selectivity bucketsize_frac 
)

Definition at line 3751 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, clamp_row_est(), examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, Max, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by final_cost_hashjoin().

3754 {
3755  VariableStatData vardata;
3756  double estfract,
3757  ndistinct,
3758  stanullfrac,
3759  avgfreq;
3760  bool isdefault;
3761  AttStatsSlot sslot;
3762 
3763  examine_variable(root, hashkey, 0, &vardata);
3764 
3765  /* Look up the frequency of the most common value, if available */
3766  *mcv_freq = 0.0;
3767 
3768  if (HeapTupleIsValid(vardata.statsTuple))
3769  {
3770  if (get_attstatsslot(&sslot, vardata.statsTuple,
3771  STATISTIC_KIND_MCV, InvalidOid,
3773  {
3774  /*
3775  * The first MCV stat is for the most common value.
3776  */
3777  if (sslot.nnumbers > 0)
3778  *mcv_freq = sslot.numbers[0];
3779  free_attstatsslot(&sslot);
3780  }
3781  }
3782 
3783  /* Get number of distinct values */
3784  ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3785 
3786  /*
3787  * If ndistinct isn't real, punt. We normally return 0.1, but if the
3788  * mcv_freq is known to be even higher than that, use it instead.
3789  */
3790  if (isdefault)
3791  {
3792  *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3793  ReleaseVariableStats(vardata);
3794  return;
3795  }
3796 
3797  /* Get fraction that are null */
3798  if (HeapTupleIsValid(vardata.statsTuple))
3799  {
3800  Form_pg_statistic stats;
3801 
3802  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3803  stanullfrac = stats->stanullfrac;
3804  }
3805  else
3806  stanullfrac = 0.0;
3807 
3808  /* Compute avg freq of all distinct data values in raw relation */
3809  avgfreq = (1.0 - stanullfrac) / ndistinct;
3810 
3811  /*
3812  * Adjust ndistinct to account for restriction clauses. Observe we are
3813  * assuming that the data distribution is affected uniformly by the
3814  * restriction clauses!
3815  *
3816  * XXX Possibly better way, but much more expensive: multiply by
3817  * selectivity of rel's restriction clauses that mention the target Var.
3818  */
3819  if (vardata.rel && vardata.rel->tuples > 0)
3820  {
3821  ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3822  ndistinct = clamp_row_est(ndistinct);
3823  }
3824 
3825  /*
3826  * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3827  * number of buckets is less than the expected number of distinct values;
3828  * otherwise it is 1/ndistinct.
3829  */
3830  if (ndistinct > nbuckets)
3831  estfract = 1.0 / nbuckets;
3832  else
3833  estfract = 1.0 / ndistinct;
3834 
3835  /*
3836  * Adjust estimated bucketsize upward to account for skewed distribution.
3837  */
3838  if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3839  estfract *= *mcv_freq / avgfreq;
3840 
3841  /*
3842  * Clamp bucketsize to sane range (the above adjustment could easily
3843  * produce an out-of-range result). We set the lower bound a little above
3844  * zero, since zero isn't a very sane result.
3845  */
3846  if (estfract < 1.0e-6)
3847  estfract = 1.0e-6;
3848  else if (estfract > 1.0)
3849  estfract = 1.0;
3850 
3851  *bucketsize_frac = (Selectivity) estfract;
3852 
3853  ReleaseVariableStats(vardata);
3854 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
HeapTuple statsTuple
Definition: selfuncs.h:91
int nnumbers
Definition: lsyscache.h:57
double tuples
Definition: pathnodes.h:716
RelOptInfo * rel
Definition: selfuncs.h:90
double Selectivity
Definition: nodes.h:672
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5627
float4 * numbers
Definition: lsyscache.h:56
double rows
Definition: pathnodes.h:679
#define InvalidOid
Definition: postgres_ext.h:36
#define Max(x, y)
Definition: c.h:980
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4954
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3177
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
e
Definition: preproc-init.c:82
double clamp_row_est(double nrows)
Definition: costsize.c:199
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3294

◆ estimate_hashagg_tablesize()

double estimate_hashagg_tablesize ( PlannerInfo root,
Path path,
const AggClauseCosts agg_costs,
double  dNumGroups 
)

Definition at line 3870 of file selfuncs.c.

References PlannerInfo::aggtransinfos, hash_agg_entry_size(), list_length(), Path::pathtarget, AggClauseCosts::transitionSpace, and PathTarget::width.

Referenced by consider_groupingsets_paths().

3872 {
3873  Size hashentrysize;
3874 
3875  hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
3876  path->pathtarget->width,
3877  agg_costs->transitionSpace);
3878 
3879  /*
3880  * Note that this disregards the effect of fill-factor and growth policy
3881  * of the hash table. That's probably ok, given that the default
3882  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3883  * "double-in-size" growth policies here.
3884  */
3885  return hashentrysize * dNumGroups;
3886 }
PathTarget * pathtarget
Definition: pathnodes.h:1175
Size hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
Definition: nodeAgg.c:1693
List * aggtransinfos
Definition: pathnodes.h:357
size_t Size
Definition: c.h:540
static int list_length(const List *l)
Definition: pg_list.h:149
Size transitionSpace
Definition: pathnodes.h:60

◆ estimate_num_groups()

double estimate_num_groups ( PlannerInfo root,
List groupExprs,
double  input_rows,
List **  pgset,
EstimationInfo estinfo 
)

Definition at line 3368 of file selfuncs.c.

References add_unique_group_var(), Assert, clamp_row_est(), contain_volatile_functions(), estimate_multivariate_ndistinct(), examine_variable(), expression_returns_set_rows(), exprType(), EstimationInfo::flags, for_each_from, HeapTupleIsValid, i, IS_SIMPLE_REL, GroupVarInfo::isdefault, VariableStatData::isunique, lappend(), lfirst, linitial, list_length(), list_member_int(), GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, PVC_RECURSE_WINDOWFUNCS, GroupVarInfo::rel, ReleaseVariableStats, RelOptInfo::rows, SELFLAG_USED_DEFAULT, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by adjust_rowcount_for_semijoins(), cost_incremental_sort(), cost_resultcache_rescan(), create_distinct_paths(), create_unique_path(), estimate_path_cost_size(), get_number_of_groups(), and recurse_set_operations().

3370 {
3371  List *varinfos = NIL;
3372  double srf_multiplier = 1.0;
3373  double numdistinct;
3374  ListCell *l;
3375  int i;
3376 
3377  /* Zero the estinfo output parameter, if non-NULL */
3378  if (estinfo != NULL)
3379  memset(estinfo, 0, sizeof(EstimationInfo));
3380 
3381  /*
3382  * We don't ever want to return an estimate of zero groups, as that tends
3383  * to lead to division-by-zero and other unpleasantness. The input_rows
3384  * estimate is usually already at least 1, but clamp it just in case it
3385  * isn't.
3386  */
3387  input_rows = clamp_row_est(input_rows);
3388 
3389  /*
3390  * If no grouping columns, there's exactly one group. (This can't happen
3391  * for normal cases with GROUP BY or DISTINCT, but it is possible for
3392  * corner cases with set operations.)
3393  */
3394  if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3395  return 1.0;
3396 
3397  /*
3398  * Count groups derived from boolean grouping expressions. For other
3399  * expressions, find the unique Vars used, treating an expression as a Var
3400  * if we can find stats for it. For each one, record the statistical
3401  * estimate of number of distinct values (total in its table, without
3402  * regard for filtering).
3403  */
3404  numdistinct = 1.0;
3405 
3406  i = 0;
3407  foreach(l, groupExprs)
3408  {
3409  Node *groupexpr = (Node *) lfirst(l);
3410  double this_srf_multiplier;
3411  VariableStatData vardata;
3412  List *varshere;
3413  ListCell *l2;
3414 
3415  /* is expression in this grouping set? */
3416  if (pgset && !list_member_int(*pgset, i++))
3417  continue;
3418 
3419  /*
3420  * Set-returning functions in grouping columns are a bit problematic.
3421  * The code below will effectively ignore their SRF nature and come up
3422  * with a numdistinct estimate as though they were scalar functions.
3423  * We compensate by scaling up the end result by the largest SRF
3424  * rowcount estimate. (This will be an overestimate if the SRF
3425  * produces multiple copies of any output value, but it seems best to
3426  * assume the SRF's outputs are distinct. In any case, it's probably
3427  * pointless to worry too much about this without much better
3428  * estimates for SRF output rowcounts than we have today.)
3429  */
3430  this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3431  if (srf_multiplier < this_srf_multiplier)
3432  srf_multiplier = this_srf_multiplier;
3433 
3434  /* Short-circuit for expressions returning boolean */
3435  if (exprType(groupexpr) == BOOLOID)
3436  {
3437  numdistinct *= 2.0;
3438  continue;
3439  }
3440 
3441  /*
3442  * If examine_variable is able to deduce anything about the GROUP BY
3443  * expression, treat it as a single variable even if it's really more
3444  * complicated.
3445  *
3446  * XXX This has the consequence that if there's a statistics on the
3447  * expression, we don't split it into individual Vars. This affects
3448  * our selection of statistics in estimate_multivariate_ndistinct,
3449  * because it's probably better to use more accurate estimate for
3450  * each expression and treat them as independent, than to combine
3451  * estimates for the extracted variables when we don't know how that
3452  * relates to the expressions.
3453  */
3454  examine_variable(root, groupexpr, 0, &vardata);
3455  if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3456  {
3457  varinfos = add_unique_group_var(root, varinfos,
3458  groupexpr, &vardata);
3459  ReleaseVariableStats(vardata);
3460  continue;
3461  }
3462  ReleaseVariableStats(vardata);
3463 
3464  /*
3465  * Else pull out the component Vars. Handle PlaceHolderVars by
3466  * recursing into their arguments (effectively assuming that the
3467  * PlaceHolderVar doesn't change the number of groups, which boils
3468  * down to ignoring the possible addition of nulls to the result set).
3469  */
3470  varshere = pull_var_clause(groupexpr,
3474 
3475  /*
3476  * If we find any variable-free GROUP BY item, then either it is a
3477  * constant (and we can ignore it) or it contains a volatile function;
3478  * in the latter case we punt and assume that each input row will
3479  * yield a distinct group.
3480  */
3481  if (varshere == NIL)
3482  {
3483  if (contain_volatile_functions(groupexpr))
3484  return input_rows;
3485  continue;
3486  }
3487 
3488  /*
3489  * Else add variables to varinfos list
3490  */
3491  foreach(l2, varshere)
3492  {
3493  Node *var = (Node *) lfirst(l2);
3494 
3495  examine_variable(root, var, 0, &vardata);
3496  varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3497  ReleaseVariableStats(vardata);
3498  }
3499  }
3500 
3501  /*
3502  * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3503  * list.
3504  */
3505  if (varinfos == NIL)
3506  {
3507  /* Apply SRF multiplier as we would do in the long path */
3508  numdistinct *= srf_multiplier;
3509  /* Round off */
3510  numdistinct = ceil(numdistinct);
3511  /* Guard against out-of-range answers */
3512  if (numdistinct > input_rows)
3513  numdistinct = input_rows;
3514  if (numdistinct < 1.0)
3515  numdistinct = 1.0;
3516  return numdistinct;
3517  }
3518 
3519  /*
3520  * Group Vars by relation and estimate total numdistinct.
3521  *
3522  * For each iteration of the outer loop, we process the frontmost Var in
3523  * varinfos, plus all other Vars in the same relation. We remove these
3524  * Vars from the newvarinfos list for the next iteration. This is the
3525  * easiest way to group Vars of same rel together.
3526  */
3527  do
3528  {
3529  GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3530  RelOptInfo *rel = varinfo1->rel;
3531  double reldistinct = 1;
3532  double relmaxndistinct = reldistinct;
3533  int relvarcount = 0;
3534  List *newvarinfos = NIL;
3535  List *relvarinfos = NIL;
3536 
3537  /*
3538  * Split the list of varinfos in two - one for the current rel, one
3539  * for remaining Vars on other rels.
3540  */
3541  relvarinfos = lappend(relvarinfos, varinfo1);
3542  for_each_from(l, varinfos, 1)
3543  {
3544  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3545 
3546  if (varinfo2->rel == varinfo1->rel)
3547  {
3548  /* varinfos on current rel */
3549  relvarinfos = lappend(relvarinfos, varinfo2);
3550  }
3551  else
3552  {
3553  /* not time to process varinfo2 yet */
3554  newvarinfos = lappend(newvarinfos, varinfo2);
3555  }
3556  }
3557 
3558  /*
3559  * Get the numdistinct estimate for the Vars of this rel. We
3560  * iteratively search for multivariate n-distinct with maximum number
3561  * of vars; assuming that each var group is independent of the others,
3562  * we multiply them together. Any remaining relvarinfos after no more
3563  * multivariate matches are found are assumed independent too, so
3564  * their individual ndistinct estimates are multiplied also.
3565  *
3566  * While iterating, count how many separate numdistinct values we
3567  * apply. We apply a fudge factor below, but only if we multiplied
3568  * more than one such values.
3569  */
3570  while (relvarinfos)
3571  {
3572  double mvndistinct;
3573 
3574  if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3575  &mvndistinct))
3576  {
3577  reldistinct *= mvndistinct;
3578  if (relmaxndistinct < mvndistinct)
3579  relmaxndistinct = mvndistinct;
3580  relvarcount++;
3581  }
3582  else
3583  {
3584  foreach(l, relvarinfos)
3585  {
3586  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3587 
3588  reldistinct *= varinfo2->ndistinct;
3589  if (relmaxndistinct < varinfo2->ndistinct)
3590  relmaxndistinct = varinfo2->ndistinct;
3591  relvarcount++;
3592 
3593  /*
3594  * When varinfo2's isdefault is set then we'd better set
3595  * the SELFLAG_USED_DEFAULT bit in the EstimationInfo.
3596  */
3597  if (estinfo != NULL && varinfo2->isdefault)
3598  estinfo->flags |= SELFLAG_USED_DEFAULT;
3599 
3600  }
3601 
3602  /* we're done with this relation */
3603  relvarinfos = NIL;
3604  }
3605  }
3606 
3607  /*
3608  * Sanity check --- don't divide by zero if empty relation.
3609  */
3610  Assert(IS_SIMPLE_REL(rel));
3611  if (rel->tuples > 0)
3612  {
3613  /*
3614  * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3615  * fudge factor is because the Vars are probably correlated but we
3616  * don't know by how much. We should never clamp to less than the
3617  * largest ndistinct value for any of the Vars, though, since
3618  * there will surely be at least that many groups.
3619  */
3620  double clamp = rel->tuples;
3621 
3622  if (relvarcount > 1)
3623  {
3624  clamp *= 0.1;
3625  if (clamp < relmaxndistinct)
3626  {
3627  clamp = relmaxndistinct;
3628  /* for sanity in case some ndistinct is too large: */
3629  if (clamp > rel->tuples)
3630  clamp = rel->tuples;
3631  }
3632  }
3633  if (reldistinct > clamp)
3634  reldistinct = clamp;
3635 
3636  /*
3637  * Update the estimate based on the restriction selectivity,
3638  * guarding against division by zero when reldistinct is zero.
3639  * Also skip this if we know that we are returning all rows.
3640  */
3641  if (reldistinct > 0 && rel->rows < rel->tuples)
3642  {
3643  /*
3644  * Given a table containing N rows with n distinct values in a
3645  * uniform distribution, if we select p rows at random then
3646  * the expected number of distinct values selected is
3647  *
3648  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3649  *
3650  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3651  *
3652  * See "Approximating block accesses in database
3653  * organizations", S. B. Yao, Communications of the ACM,
3654  * Volume 20 Issue 4, April 1977 Pages 260-261.
3655  *
3656  * Alternatively, re-arranging the terms from the factorials,
3657  * this may be written as
3658  *
3659  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3660  *
3661  * This form of the formula is more efficient to compute in
3662  * the common case where p is larger than N/n. Additionally,
3663  * as pointed out by Dell'Era, if i << N for all terms in the
3664  * product, it can be approximated by
3665  *
3666  * n * (1 - ((N-p)/N)^(N/n))
3667  *
3668  * See "Expected distinct values when selecting from a bag
3669  * without replacement", Alberto Dell'Era,
3670  * http://www.adellera.it/investigations/distinct_balls/.
3671  *
3672  * The condition i << N is equivalent to n >> 1, so this is a
3673  * good approximation when the number of distinct values in
3674  * the table is large. It turns out that this formula also
3675  * works well even when n is small.
3676  */
3677  reldistinct *=
3678  (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3679  rel->tuples / reldistinct));
3680  }
3681  reldistinct = clamp_row_est(reldistinct);
3682 
3683  /*
3684  * Update estimate of total distinct groups.
3685  */
3686  numdistinct *= reldistinct;
3687  }
3688 
3689  varinfos = newvarinfos;
3690  } while (varinfos != NIL);
3691 
3692  /* Now we can account for the effects of any SRFs */
3693  numdistinct *= srf_multiplier;
3694 
3695  /* Round off */
3696  numdistinct = ceil(numdistinct);
3697 
3698  /* Guard against out-of-range answers */
3699  if (numdistinct > input_rows)
3700  numdistinct = input_rows;
3701  if (numdistinct < 1.0)
3702  numdistinct = 1.0;
3703 
3704  return numdistinct;
3705 }
#define NIL
Definition: pg_list.h:65
double expression_returns_set_rows(PlannerInfo *root, Node *clause)
Definition: clauses.c:283
#define PVC_RECURSE_PLACEHOLDERS
Definition: optimizer.h:191
HeapTuple statsTuple
Definition: selfuncs.h:91
double tuples
Definition: pathnodes.h:716
Definition: nodes.h:539
List * pull_var_clause(Node *node, int flags)
Definition: var.c:562
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:448
double ndistinct
Definition: selfuncs.c:3243
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:649
#define linitial(l)
Definition: pg_list.h:174
#define SELFLAG_USED_DEFAULT
Definition: selfuncs.h:76
bool list_member_int(const List *list, int datum)
Definition: list.c:669
static bool estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
Definition: selfuncs.c:3907
List * lappend(List *list, void *datum)
Definition: list.c:336
static List * add_unique_group_var(PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
Definition: selfuncs.c:3248
double rows
Definition: pathnodes.h:679
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4954
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
static int list_length(const List *l)
Definition: pg_list.h:149
#define for_each_from(cell, lst, N)
Definition: pg_list.h:393
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
bool isdefault
Definition: selfuncs.c:3244
#define PVC_RECURSE_WINDOWFUNCS
Definition: optimizer.h:188
int i
double clamp_row_est(double nrows)
Definition: costsize.c:199
Definition: pg_list.h:50
#define PVC_RECURSE_AGGREGATES
Definition: optimizer.h:186
RelOptInfo * rel
Definition: selfuncs.c:3242
uint32 flags
Definition: selfuncs.h:82

◆ examine_variable()

void examine_variable ( PlannerInfo root,
Node node,
int  varRelid,
VariableStatData vardata 
)

Definition at line 4954 of file selfuncs.c.

References VariableStatData::acl_ok, ACL_SELECT, ACLCHECK_OK, PlannerInfo::append_rel_array, arg, Assert, VariableStatData::atttype, VariableStatData::atttypmod, BMS_EMPTY_SET, bms_free(), bms_is_member(), bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), BoolGetDatum, RangeTblEntry::checkAsUser, elog, equal(), ERROR, examine_simple_variable(), StatisticExtInfo::exprs, exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, GetUserId(), has_unique_index(), HeapTupleIsValid, IndexOptInfo::indexkeys, RelOptInfo::indexlist, IndexOptInfo::indexoid, IndexOptInfo::indexprs, IndexOptInfo::indpred, Int16GetDatum, IsA, VariableStatData::isunique, StatisticExtInfo::kind, lfirst, list_head(), lnext(), MemSet, IndexOptInfo::ncolumns, NIL, IndexOptInfo::nkeycolumns, ObjectIdGetDatum, AppendRelInfo::parent_relid, pg_class_aclcheck(), planner_rt_fetch, IndexOptInfo::predOK, pull_varnos(), VariableStatData::rel, IndexOptInfo::rel, ReleaseDummy(), ReleaseSysCache(), RelOptInfo::relid, RangeTblEntry::relid, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), RangeTblEntry::securityQuals, statext_expressions_load(), RelOptInfo::statlist, StatisticExtInfo::statOid, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::unique, VariableStatData::var, Var::varattno, Var::varno, VariableStatData::vartype, Var::vartype, and Var::vartypmod.

Referenced by booltestsel(), boolvarsel(), estimate_hash_bucket_stats(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().

4956 {
4957  Node *basenode;
4958  Relids varnos;
4959  RelOptInfo *onerel;
4960 
4961  /* Make sure we don't return dangling pointers in vardata */
4962  MemSet(vardata, 0, sizeof(VariableStatData));
4963 
4964  /* Save the exposed type of the expression */
4965  vardata->vartype = exprType(node);
4966 
4967  /* Look inside any binary-compatible relabeling */
4968 
4969  if (IsA(node, RelabelType))
4970  basenode = (Node *) ((RelabelType *) node)->arg;
4971  else
4972  basenode = node;
4973 
4974  /* Fast path for a simple Var */
4975 
4976  if (IsA(basenode, Var) &&
4977  (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4978  {
4979  Var *var = (Var *) basenode;
4980 
4981  /* Set up result fields other than the stats tuple */
4982  vardata->var = basenode; /* return Var without relabeling */
4983  vardata->rel = find_base_rel(root, var->varno);
4984  vardata->atttype = var->vartype;
4985  vardata->atttypmod = var->vartypmod;
4986  vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4987 
4988  /* Try to locate some stats */
4989  examine_simple_variable(root, var, vardata);
4990 
4991  return;
4992  }
4993 
4994  /*
4995  * Okay, it's a more complicated expression. Determine variable
4996  * membership. Note that when varRelid isn't zero, only vars of that
4997  * relation are considered "real" vars.
4998  */
4999  varnos = pull_varnos(root, basenode);
5000 
5001  onerel = NULL;
5002 
5003  switch (bms_membership(varnos))
5004  {
5005  case BMS_EMPTY_SET:
5006  /* No Vars at all ... must be pseudo-constant clause */
5007  break;
5008  case BMS_SINGLETON:
5009  if (varRelid == 0 || bms_is_member(varRelid, varnos))
5010  {
5011  onerel = find_base_rel(root,
5012  (varRelid ? varRelid : bms_singleton_member(varnos)));
5013  vardata->rel = onerel;
5014  node = basenode; /* strip any relabeling */
5015  }
5016  /* else treat it as a constant */
5017  break;
5018  case BMS_MULTIPLE:
5019  if (varRelid == 0)
5020  {
5021  /* treat it as a variable of a join relation */
5022  vardata->rel = find_join_rel(root, varnos);
5023  node = basenode; /* strip any relabeling */
5024  }
5025  else if (bms_is_member(varRelid, varnos))
5026  {
5027  /* ignore the vars belonging to other relations */
5028  vardata->rel = find_base_rel(root, varRelid);
5029  node = basenode; /* strip any relabeling */
5030  /* note: no point in expressional-index search here */
5031  }
5032  /* else treat it as a constant */
5033  break;
5034  }
5035 
5036  bms_free(varnos);
5037 
5038  vardata->var = node;
5039  vardata->atttype = exprType(node);
5040  vardata->atttypmod = exprTypmod(node);
5041 
5042  if (onerel)
5043  {
5044  /*
5045  * We have an expression in vars of a single relation. Try to match
5046  * it to expressional index columns, in hopes of finding some
5047  * statistics.
5048  *
5049  * Note that we consider all index columns including INCLUDE columns,
5050  * since there could be stats for such columns. But the test for
5051  * uniqueness needs to be warier.
5052  *
5053  * XXX it's conceivable that there are multiple matches with different
5054  * index opfamilies; if so, we need to pick one that matches the
5055  * operator we are estimating for. FIXME later.
5056  */
5057  ListCell *ilist;
5058  ListCell *slist;
5059 
5060  foreach(ilist, onerel->indexlist)
5061  {
5062  IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
5063  ListCell *indexpr_item;
5064  int pos;
5065 
5066  indexpr_item = list_head(index->indexprs);
5067  if (indexpr_item == NULL)
5068  continue; /* no expressions here... */
5069 
5070  for (pos = 0; pos < index->ncolumns; pos++)
5071  {
5072  if (index->indexkeys[pos] == 0)
5073  {
5074  Node *indexkey;
5075 
5076  if (indexpr_item == NULL)
5077  elog(ERROR, "too few entries in indexprs list");
5078  indexkey = (Node *) lfirst(indexpr_item);
5079  if (indexkey && IsA(indexkey, RelabelType))
5080  indexkey = (Node *) ((RelabelType *) indexkey)->arg;
5081  if (equal(node, indexkey))
5082  {
5083  /*
5084  * Found a match ... is it a unique index? Tests here
5085  * should match has_unique_index().
5086  */
5087  if (index->unique &&
5088  index->nkeycolumns == 1 &&
5089  pos == 0 &&
5090  (index->indpred == NIL || index->predOK))
5091  vardata->isunique = true;
5092 
5093  /*
5094  * Has it got stats? We only consider stats for
5095  * non-partial indexes, since partial indexes probably
5096  * don't reflect whole-relation statistics; the above
5097  * check for uniqueness is the only info we take from
5098  * a partial index.
5099  *
5100  * An index stats hook, however, must make its own
5101  * decisions about what to do with partial indexes.
5102  */
5103  if (get_index_stats_hook &&
5104  (*get_index_stats_hook) (root, index->indexoid,
5105  pos + 1, vardata))
5106  {
5107  /*
5108  * The hook took control of acquiring a stats
5109  * tuple. If it did supply a tuple, it'd better
5110  * have supplied a freefunc.
5111  */
5112  if (HeapTupleIsValid(vardata->statsTuple) &&
5113  !vardata->freefunc)
5114  elog(ERROR, "no function provided to release variable stats with");
5115  }
5116  else if (index->indpred == NIL)
5117  {
5118  vardata->statsTuple =
5120  ObjectIdGetDatum(index->indexoid),
5121  Int16GetDatum(pos + 1),
5122  BoolGetDatum(false));
5123  vardata->freefunc = ReleaseSysCache;
5124 
5125  if (HeapTupleIsValid(vardata->statsTuple))
5126  {
5127  /* Get index's table for permission check */
5128  RangeTblEntry *rte;
5129  Oid userid;
5130 
5131  rte = planner_rt_fetch(index->rel->relid, root);
5132  Assert(rte->rtekind == RTE_RELATION);
5133 
5134  /*
5135  * Use checkAsUser if it's set, in case we're
5136  * accessing the table via a view.
5137  */
5138  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
5139 
5140  /*
5141  * For simplicity, we insist on the whole
5142  * table being selectable, rather than trying
5143  * to identify which column(s) the index
5144  * depends on. Also require all rows to be
5145  * selectable --- there must be no
5146  * securityQuals from security barrier views
5147  * or RLS policies.
5148  */
5149  vardata->acl_ok =
5150  rte->securityQuals == NIL &&
5151  (pg_class_aclcheck(rte->relid, userid,
5152  ACL_SELECT) == ACLCHECK_OK);
5153 
5154  /*
5155  * If the user doesn't have permissions to
5156  * access an inheritance child relation, check
5157  * the permissions of the table actually
5158  * mentioned in the query, since most likely
5159  * the user does have that permission. Note
5160  * that whole-table select privilege on the
5161  * parent doesn't quite guarantee that the
5162  * user could read all columns of the child.
5163  * But in practice it's unlikely that any
5164  * interesting security violation could result
5165  * from allowing access to the expression
5166  * index's stats, so we allow it anyway. See
5167  * similar code in examine_simple_variable()
5168  * for additional comments.
5169  */
5170  if (!vardata->acl_ok &&
5171  root->append_rel_array != NULL)
5172  {
5173  AppendRelInfo *appinfo;
5174  Index varno = index->rel->relid;
5175 
5176  appinfo = root->append_rel_array[varno];
5177  while (appinfo &&
5178  planner_rt_fetch(appinfo->parent_relid,
5179  root)->rtekind == RTE_RELATION)
5180  {
5181  varno = appinfo->parent_relid;
5182  appinfo = root->append_rel_array[varno];
5183  }
5184  if (varno != index->rel->relid)
5185  {
5186  /* Repeat access check on this rel */
5187  rte = planner_rt_fetch(varno, root);
5188  Assert(rte->rtekind == RTE_RELATION);
5189 
5190  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
5191 
5192  vardata->acl_ok =
5193  rte->securityQuals == NIL &&
5194  (pg_class_aclcheck(rte->relid,
5195  userid,
5196  ACL_SELECT) == ACLCHECK_OK);
5197  }
5198  }
5199  }
5200  else
5201  {
5202  /* suppress leakproofness checks later */
5203  vardata->acl_ok = true;
5204  }
5205  }
5206  if (vardata->statsTuple)
5207  break;
5208  }
5209  indexpr_item = lnext(index->indexprs, indexpr_item);
5210  }
5211  }
5212  if (vardata->statsTuple)
5213  break;
5214  }
5215 
5216  /*
5217  * Search extended statistics for one with a matching expression.
5218  * There might be multiple ones, so just grab the first one. In the
5219  * future, we might consider the statistics target (and pick the most
5220  * accurate statistics) and maybe some other parameters.
5221  */
5222  foreach(slist, onerel->statlist)
5223  {
5224  StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist);
5225  ListCell *expr_item;
5226  int pos;
5227 
5228  /*
5229  * Stop once we've found statistics for the expression (either
5230  * from extended stats, or for an index in the preceding loop).
5231  */
5232  if (vardata->statsTuple)
5233  break;
5234 
5235  /* skip stats without per-expression stats */
5236  if (info->kind != STATS_EXT_EXPRESSIONS)
5237  continue;
5238 
5239  pos = 0;
5240  foreach(expr_item, info->exprs)
5241  {
5242  Node *expr = (Node *) lfirst(expr_item);
5243 
5244  Assert(expr);
5245 
5246  /* strip RelabelType before comparing it */
5247  if (expr && IsA(expr, RelabelType))
5248  expr = (Node *) ((RelabelType *) expr)->arg;
5249 
5250  /* found a match, see if we can extract pg_statistic row */
5251  if (equal(node, expr))
5252  {
5253  HeapTuple t = statext_expressions_load(info->statOid, pos);
5254 
5255  /* Get index's table for permission check */
5256  RangeTblEntry *rte;
5257  Oid userid;
5258 
5259  vardata->statsTuple = t;
5260 
5261  /*
5262  * XXX Not sure if we should cache the tuple somewhere.
5263  * Now we just create a new copy every time.
5264  */
5265  vardata->freefunc = ReleaseDummy;
5266 
5267  rte = planner_rt_fetch(onerel->relid, root);
5268  Assert(rte->rtekind == RTE_RELATION);
5269 
5270  /*
5271  * Use checkAsUser if it's set, in case we're accessing
5272  * the table via a view.
5273  */
5274  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
5275 
5276  /*
5277  * For simplicity, we insist on the whole table being
5278  * selectable, rather than trying to identify which
5279  * column(s) the statistics depends on. Also require all
5280  * rows to be selectable --- there must be no
5281  * securityQuals from security barrier views or RLS
5282  * policies.
5283  */
5284  vardata->acl_ok =
5285  rte->securityQuals == NIL &&
5286  (pg_class_aclcheck(rte->relid, userid,
5287  ACL_SELECT) == ACLCHECK_OK);
5288 
5289  /*
5290  * If the user doesn't have permissions to access an
5291  * inheritance child relation, check the permissions of
5292  * the table actually mentioned in the query, since most
5293  * likely the user does have that permission. Note that
5294  * whole-table select privilege on the parent doesn't
5295  * quite guarantee that the user could read all columns of
5296  * the child. But in practice it's unlikely that any
5297  * interesting security violation could result from
5298  * allowing access to the expression stats, so we allow it
5299  * anyway. See similar code in examine_simple_variable()
5300  * for additional comments.
5301  */
5302  if (!vardata->acl_ok &&
5303  root->append_rel_array != NULL)
5304  {
5305  AppendRelInfo *appinfo;
5306  Index varno = onerel->relid;
5307 
5308  appinfo = root->append_rel_array[varno];
5309  while (appinfo &&
5310  planner_rt_fetch(appinfo->parent_relid,
5311  root)->rtekind == RTE_RELATION)
5312  {
5313  varno = appinfo->parent_relid;
5314  appinfo = root->append_rel_array[varno];
5315  }
5316  if (varno != onerel->relid)
5317  {
5318  /* Repeat access check on this rel */
5319  rte = planner_rt_fetch(varno, root);
5320  Assert(rte->rtekind == RTE_RELATION);
5321 
5322  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
5323 
5324  vardata->acl_ok =
5325  rte->securityQuals == NIL &&
5326  (pg_class_aclcheck(rte->relid,
5327  userid,
5328  ACL_SELECT) == ACLCHECK_OK);
5329  }
5330  }
5331 
5332  break;
5333  }
5334 
5335  pos++;
5336  }
5337  }
5338  }
5339 }
#define NIL
Definition: pg_list.h:65
Relids pull_varnos(PlannerInfo *root, Node *node)
Definition: var.c:97
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
List * statlist
Definition: pathnodes.h:714
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:322
RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids)
Definition: relnode.c:438
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3113
HeapTuple statsTuple
Definition: selfuncs.h:91
Oid GetUserId(void)
Definition: miscinit.c:478
HeapTuple statext_expressions_load(Oid stxoid, int idx)
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:267
List * securityQuals
Definition: parsenodes.h:1151
RelOptInfo * rel
Definition: selfuncs.h:90
#define Int16GetDatum(X)
Definition: postgres.h:495
Definition: nodes.h:539
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:93
#define MemSet(start, val, len)
Definition: c.h:1008
AttrNumber varattno
Definition: primnodes.h:191
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:186
static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata)
Definition: selfuncs.c:5351
int32 atttypmod
Definition: selfuncs.h:96
Definition: type.h:89
RelOptInfo * rel
Definition: pathnodes.h:832
bool has_unique_index(RelOptInfo *rel, AttrNumber attno)
Definition: plancat.c:2086
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define ERROR
Definition: elog.h:46
Oid vartype
Definition: primnodes.h:193
HeapTuple SearchSysCache3(int cacheId, Datum key1, Datum key2, Datum key3)
Definition: syscache.c:1149
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:383
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
Index relid
Definition: pathnodes.h:704
Index varno
Definition: primnodes.h:189
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:672
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1175
#define ACL_SELECT
Definition: parsenodes.h:83
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:577
struct AppendRelInfo ** append_rel_array
Definition: pathnodes.h:201
unsigned int Index
Definition: c.h:549
List * indexlist
Definition: pathnodes.h:713
#define BoolGetDatum(X)
Definition: postgres.h:446
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
int nkeycolumns
Definition: pathnodes.h:841
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:145
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4685
RTEKind rtekind
Definition: parsenodes.h:995
#define elog(elevel,...)
Definition: elog.h:232
void * arg
int * indexkeys
Definition: pathnodes.h:842
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:374
Index parent_relid
Definition: pathnodes.h:2295
List * indpred
Definition: pathnodes.h:856
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
static void ReleaseDummy(HeapTuple tuple)
Definition: selfuncs.c:4913
List * indexprs
Definition: pathnodes.h:855
int32 vartypmod
Definition: primnodes.h:194

◆ generic_restriction_selectivity()

double generic_restriction_selectivity ( PlannerInfo root,
Oid  oproid,
Oid  collation,
List args,
int  varRelid,
double  default_selectivity 
)

Definition at line 911 of file selfuncs.c.

References CLAMP_PROBABILITY, fmgr_info(), get_opcode(), get_restriction_variable(), GETSTRUCT, HeapTupleIsValid, histogram_selectivity(), IsA, mcv_selectivity(), ReleaseVariableStats, and VariableStatData::statsTuple.

Referenced by ltreeparentsel(), and matchingsel().

914 {
915  double selec;
916  VariableStatData vardata;
917  Node *other;
918  bool varonleft;
919 
920  /*
921  * If expression is not variable OP something or something OP variable,
922  * then punt and return the default estimate.
923  */
924  if (!get_restriction_variable(root, args, varRelid,
925  &vardata, &other, &varonleft))
926  return default_selectivity;
927 
928  /*
929  * If the something is a NULL constant, assume operator is strict and
930  * return zero, ie, operator will never return TRUE.
931  */
932  if (IsA(other, Const) &&
933  ((Const *) other)->constisnull)
934  {
935  ReleaseVariableStats(vardata);
936  return 0.0;
937  }
938 
939  if (IsA(other, Const))
940  {
941  /* Variable is being compared to a known non-null constant */
942  Datum constval = ((Const *) other)->constvalue;
943  FmgrInfo opproc;
944  double mcvsum;
945  double mcvsel;
946  double nullfrac;
947  int hist_size;
948 
949  fmgr_info(get_opcode(oproid), &opproc);
950 
951  /*
952  * Calculate the selectivity for the column's most common values.
953  */
954  mcvsel = mcv_selectivity(&vardata, &opproc, collation,
955  constval, varonleft,
956  &mcvsum);
957 
958  /*
959  * If the histogram is large enough, see what fraction of it matches
960  * the query, and assume that's representative of the non-MCV
961  * population. Otherwise use the default selectivity for the non-MCV
962  * population.
963  */
964  selec = histogram_selectivity(&vardata, &opproc, collation,
965  constval, varonleft,
966  10, 1, &hist_size);
967  if (selec < 0)
968  {
969  /* Nope, fall back on default */
970  selec = default_selectivity;
971  }
972  else if (hist_size < 100)
973  {
974  /*
975  * For histogram sizes from 10 to 100, we combine the histogram
976  * and default selectivities, putting increasingly more trust in
977  * the histogram for larger sizes.
978  */
979  double hist_weight = hist_size / 100.0;
980 
981  selec = selec * hist_weight +
982  default_selectivity * (1.0 - hist_weight);
983  }
984 
985  /* In any case, don't believe extremely small or large estimates. */
986  if (selec < 0.0001)
987  selec = 0.0001;
988  else if (selec > 0.9999)
989  selec = 0.9999;
990 
991  /* Don't forget to account for nulls. */
992  if (HeapTupleIsValid(vardata.statsTuple))
993  nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
994  else
995  nullfrac = 0.0;
996 
997  /*
998  * Now merge the results from the MCV and histogram calculations,
999  * realizing that the histogram covers only the non-null values that
1000  * are not listed in MCV.
1001  */
1002  selec *= 1.0 - nullfrac - mcvsum;
1003  selec += mcvsel;
1004  }
1005  else
1006  {
1007  /* Comparison value is not constant, so we can't do anything */
1008  selec = default_selectivity;
1009  }
1010 
1011  ReleaseVariableStats(vardata);
1012 
1013  /* result should be in range, but make sure... */
1014  CLAMP_PROBABILITY(selec);
1015 
1016  return selec;
1017 }
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
HeapTuple statsTuple
Definition: selfuncs.h:91
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4825
Definition: nodes.h:539
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
uintptr_t Datum
Definition: postgres.h:411
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:729
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1256
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:820
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101

◆ genericcostestimate()

void genericcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
GenericCosts costs 
)

Definition at line 6351 of file selfuncs.c.

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, estimate_array_length(), get_quals_from_indexclauses(), get_tablespace_page_costs(), index_other_operands_eval_cost(), index_pages_fetched(), IndexPath::indexclauses, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexPath::indexorderbys, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, IsA, JOIN_INNER, lfirst, list_length(), lsecond, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, IndexOptInfo::pages, IndexOptInfo::rel, RelOptInfo::relid, IndexOptInfo::reltablespace, GenericCosts::spc_random_page_cost, RelOptInfo::tuples, and IndexOptInfo::tuples.

Referenced by blcostestimate(), btcostestimate(), gistcostestimate(), hashcostestimate(), and spgcostestimate().

6355 {
6356  IndexOptInfo *index = path->indexinfo;
6357  List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
6358  List *indexOrderBys = path->indexorderbys;
6359  Cost indexStartupCost;
6360  Cost indexTotalCost;
6361  Selectivity indexSelectivity;
6362  double indexCorrelation;
6363  double numIndexPages;
6364  double numIndexTuples;
6365  double spc_random_page_cost;
6366  double num_sa_scans;
6367  double num_outer_scans;
6368  double num_scans;
6369  double qual_op_cost;
6370  double qual_arg_cost;
6371  List *selectivityQuals;
6372  ListCell *l;
6373 
6374  /*
6375  * If the index is partial, AND the index predicate with the explicitly
6376  * given indexquals to produce a more accurate idea of the index
6377  * selectivity.
6378  */
6379  selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
6380 
6381  /*
6382  * Check for ScalarArrayOpExpr index quals, and estimate the number of
6383  * index scans that will be performed.
6384  */
6385  num_sa_scans = 1;
6386  foreach(l, indexQuals)
6387  {
6388  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
6389 
6390  if (IsA(rinfo->clause, ScalarArrayOpExpr))
6391  {
6392  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
6393  int alength = estimate_array_length(lsecond(saop->args));
6394 
6395  if (alength > 1)
6396  num_sa_scans *= alength;
6397  }
6398  }
6399 
6400  /* Estimate the fraction of main-table tuples that will be visited */
6401  indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6402  index->rel->relid,
6403  JOIN_INNER,
6404  NULL);
6405 
6406  /*
6407  * If caller didn't give us an estimate, estimate the number of index
6408  * tuples that will be visited. We do it in this rather peculiar-looking
6409  * way in order to get the right answer for partial indexes.
6410  */
6411  numIndexTuples = costs->numIndexTuples;
6412  if (numIndexTuples <= 0.0)
6413  {
6414  numIndexTuples = indexSelectivity * index->rel->tuples;
6415 
6416  /*
6417  * The above calculation counts all the tuples visited across all
6418  * scans induced by ScalarArrayOpExpr nodes. We want to consider the
6419  * average per-indexscan number, so adjust. This is a handy place to
6420  * round to integer, too. (If caller supplied tuple estimate, it's
6421  * responsible for handling these considerations.)
6422  */
6423  numIndexTuples = rint(numIndexTuples / num_sa_scans);
6424  }
6425 
6426  /*
6427  * We can bound the number of tuples by the index size in any case. Also,
6428  * always estimate at least one tuple is touched, even when
6429  * indexSelectivity estimate is tiny.
6430  */
6431  if (numIndexTuples > index->tuples)
6432  numIndexTuples = index->tuples;
6433  if (numIndexTuples < 1.0)
6434  numIndexTuples = 1.0;
6435 
6436  /*
6437  * Estimate the number of index pages that will be retrieved.
6438  *
6439  * We use the simplistic method of taking a pro-rata fraction of the total
6440  * number of index pages. In effect, this counts only leaf pages and not
6441  * any overhead such as index metapage or upper tree levels.
6442  *
6443  * In practice access to upper index levels is often nearly free because
6444  * those tend to stay in cache under load; moreover, the cost involved is
6445  * highly dependent on index type. We therefore ignore such costs here
6446  * and leave it to the caller to add a suitable charge if needed.
6447  */
6448  if (index->pages > 1 && index->tuples > 1)
6449  numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
6450  else
6451  numIndexPages = 1.0;
6452 
6453  /* fetch estimated page cost for tablespace containing index */
6455  &spc_random_page_cost,
6456  NULL);
6457 
6458  /*
6459  * Now compute the disk access costs.
6460  *
6461  * The above calculations are all per-index-scan. However, if we are in a
6462  * nestloop inner scan, we can expect the scan to be repeated (with
6463  * different search keys) for each row of the outer relation. Likewise,
6464  * ScalarArrayOpExpr quals result in multiple index scans. This creates
6465  * the potential for cache effects to reduce the number of disk page
6466  * fetches needed. We want to estimate the average per-scan I/O cost in
6467  * the presence of caching.
6468  *
6469  * We use the Mackert-Lohman formula (see costsize.c for details) to
6470  * estimate the total number of page fetches that occur. While this
6471  * wasn't what it was designed for, it seems a reasonable model anyway.
6472  * Note that we are counting pages not tuples anymore, so we take N = T =
6473  * index size, as if there were one "tuple" per page.
6474  */
6475  num_outer_scans = loop_count;
6476  num_scans = num_sa_scans * num_outer_scans;
6477 
6478  if (num_scans > 1)
6479  {
6480  double pages_fetched;
6481 
6482  /* total page fetches ignoring cache effects */
6483  pages_fetched = numIndexPages * num_scans;
6484 
6485  /* use Mackert and Lohman formula to adjust for cache effects */
6486  pages_fetched = index_pages_fetched(pages_fetched,
6487  index->pages,
6488  (double) index->pages,
6489  root);
6490 
6491  /*
6492  * Now compute the total disk access cost, and then report a pro-rated
6493  * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
6494  * since that's internal to the indexscan.)
6495  */
6496  indexTotalCost = (pages_fetched * spc_random_page_cost)
6497  / num_outer_scans;
6498  }
6499  else
6500  {
6501  /*
6502  * For a single index scan, we just charge spc_random_page_cost per
6503  * page touched.
6504  */
6505  indexTotalCost = numIndexPages * spc_random_page_cost;
6506  }
6507 
6508  /*
6509  * CPU cost: any complex expressions in the indexquals will need to be
6510  * evaluated once at the start of the scan to reduce them to runtime keys
6511  * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
6512  * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
6513  * indexqual operator. Because we have numIndexTuples as a per-scan
6514  * number, we have to multiply by num_sa_scans to get the correct result
6515  * for ScalarArrayOpExpr cases. Similarly add in costs for any index
6516  * ORDER BY expressions.
6517  *
6518  * Note: this neglects the possible costs of rechecking lossy operators.
6519  * Detecting that that might be needed seems more expensive than it's
6520  * worth, though, considering all the other inaccuracies here ...
6521  */
6522  qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
6523  index_other_operands_eval_cost(root, indexOrderBys);
6524  qual_op_cost = cpu_operator_cost *
6525  (list_length(indexQuals) + list_length(indexOrderBys));
6526 
6527  indexStartupCost = qual_arg_cost;
6528  indexTotalCost += qual_arg_cost;
6529  indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
6530 
6531  /*
6532  * Generic assumption about index correlation: there isn't any.
6533  */
6534  indexCorrelation = 0.0;
6535 
6536  /*
6537  * Return everything to caller.
6538  */
6539  costs->indexStartupCost = indexStartupCost;
6540  costs->indexTotalCost = indexTotalCost;
6541  costs->indexSelectivity = indexSelectivity;
6542  costs->indexCorrelation = indexCorrelation;
6543  costs->numIndexPages = numIndexPages;
6544  costs->numIndexTuples = numIndexTuples;
6545  costs->spc_random_page_cost = spc_random_page_cost;
6546  costs->num_sa_scans = num_sa_scans;
6547 }
Selectivity indexSelectivity
Definition: selfuncs.h:126
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
IndexOptInfo * indexinfo
Definition: pathnodes.h:1237
double tuples
Definition: pathnodes.h:716
Oid reltablespace
Definition: pathnodes.h:831
List * indexclauses
Definition: pathnodes.h:1238
double Selectivity
Definition: nodes.h:672
double tuples
Definition: pathnodes.h:836
#define lsecond(l)
Definition: pg_list.h:179
Definition: type.h:89
BlockNumber pages
Definition: pathnodes.h:835
int estimate_array_length(Node *arrayexpr)
Definition: selfuncs.c:2132
RelOptInfo * rel
Definition: pathnodes.h:832
double num_sa_scans
Definition: selfuncs.h:133
double cpu_operator_cost
Definition: costsize.c:123
List * get_quals_from_indexclauses(List *indexclauses)
Definition: selfuncs.c:6267
Cost indexTotalCost
Definition: selfuncs.h:125
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:181
Index relid
Definition: pathnodes.h:704
Expr * clause
Definition: pathnodes.h:2045
double indexCorrelation
Definition: selfuncs.h:127
List * indexorderbys
Definition: pathnodes.h:1239
double spc_random_page_cost
Definition: selfuncs.h:132
double numIndexTuples
Definition: selfuncs.h:131
Cost index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
Definition: selfuncs.c:6297
#define lfirst(lc)
Definition: pg_list.h:169
static int list_length(const List *l)
Definition: pg_list.h:149
Cost indexStartupCost
Definition: selfuncs.h:124
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:102
Definition: pg_list.h:50
double cpu_index_tuple_cost
Definition: costsize.c:122
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:840
double Cost
Definition: nodes.h:673
double numIndexPages
Definition: selfuncs.h:130
List * add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:6569

◆ get_join_variables()

void get_join_variables ( PlannerInfo root,
List args,
SpecialJoinInfo sjinfo,
VariableStatData vardata1,
VariableStatData vardata2,
bool join_is_reversed 
)

Definition at line 4885 of file selfuncs.c.

References bms_is_subset(), elog, ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.

Referenced by eqjoinsel(), neqjoinsel(), and networkjoinsel().

4888 {
4889  Node *left,
4890  *right;
4891 
4892  if (list_length(args) != 2)
4893  elog(ERROR, "join operator should take two arguments");
4894 
4895  left = (Node *) linitial(args);
4896  right = (Node *) lsecond(args);
4897 
4898  examine_variable(root, left, 0, vardata1);
4899  examine_variable(root, right, 0, vardata2);
4900 
4901  if (vardata1->rel &&
4902  bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4903  *join_is_reversed = true; /* var1 is on RHS */
4904  else if (vardata2->rel &&
4905  bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4906  *join_is_reversed = true; /* var2 is on LHS */
4907  else
4908  *join_is_reversed = false;
4909 }
RelOptInfo * rel
Definition: selfuncs.h:90
Definition: nodes.h:539
#define lsecond(l)
Definition: pg_list.h:179
Relids syn_lefthand
Definition: pathnodes.h:2243
Relids syn_righthand
Definition: pathnodes.h:2244
#define linitial(l)
Definition: pg_list.h:174
#define ERROR
Definition: elog.h:46
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:315
Relids relids
Definition: pathnodes.h:676
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4954
static int list_length(const List *l)
Definition: pg_list.h:149
#define elog(elevel,...)
Definition: elog.h:232

◆ get_quals_from_indexclauses()

List* get_quals_from_indexclauses ( List indexclauses)

Definition at line 6267 of file selfuncs.c.

References IndexClause::indexquals, lappend(), lfirst_node, and NIL.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

6268 {
6269  List *result = NIL;
6270  ListCell *lc;
6271 
6272  foreach(lc, indexclauses)
6273  {
6274  IndexClause *iclause = lfirst_node(IndexClause, lc);
6275  ListCell *lc2;
6276 
6277  foreach(lc2, iclause->indexquals)
6278  {
6279  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
6280 
6281  result = lappend(result, rinfo);
6282  }
6283  }
6284  return result;
6285 }
#define NIL
Definition: pg_list.h:65
#define lfirst_node(type, lc)
Definition: pg_list.h:172
List * indexquals
Definition: pathnodes.h:1284
List * lappend(List *list, void *datum)
Definition: list.c:336
Definition: pg_list.h:50

◆ get_restriction_variable()

bool get_restriction_variable ( PlannerInfo root,
List args,
int  varRelid,
VariableStatData vardata,
Node **  other,
bool varonleft 
)

Definition at line 4825 of file selfuncs.c.

References estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, ReleaseVariableStats, and VariableStatData::var.

Referenced by _int_matchsel(), arraycontsel(), eqsel_internal(), generic_restriction_selectivity(), multirangesel(), networksel(), patternsel_common(), rangesel(), scalarineqsel_wrapper(), and tsmatchsel().

4828 {
4829  Node *left,
4830  *right;
4831  VariableStatData rdata;
4832 
4833  /* Fail if not a binary opclause (probably shouldn't happen) */
4834  if (list_length(args) != 2)
4835  return false;
4836 
4837  left = (Node *) linitial(args);
4838  right = (Node *) lsecond(args);
4839 
4840  /*
4841  * Examine both sides. Note that when varRelid is nonzero, Vars of other
4842  * relations will be treated as pseudoconstants.
4843  */
4844  examine_variable(root, left, varRelid, vardata);
4845  examine_variable(root, right, varRelid, &rdata);
4846 
4847  /*
4848  * If one side is a variable and the other not, we win.
4849  */
4850  if (vardata->rel && rdata.rel == NULL)
4851  {
4852  *varonleft = true;
4853  *other = estimate_expression_value(root, rdata.var);
4854  /* Assume we need no ReleaseVariableStats(rdata) here */
4855  return true;
4856  }
4857 
4858  if (vardata->rel == NULL && rdata.rel)
4859  {
4860  *varonleft = false;
4861  *other = estimate_expression_value(root, vardata->var);
4862  /* Assume we need no ReleaseVariableStats(*vardata) here */
4863  *vardata = rdata;
4864  return true;
4865  }
4866 
4867  /* Oops, clause has wrong structure (probably var op var) */
4868  ReleaseVariableStats(*vardata);
4869  ReleaseVariableStats(rdata);
4870 
4871  return false;
4872 }
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2186
RelOptInfo * rel
Definition: selfuncs.h:90
Definition: nodes.h:539
#define lsecond(l)
Definition: pg_list.h:179
#define linitial(l)
Definition: pg_list.h:174
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4954
static int list_length(const List *l)
Definition: pg_list.h:149
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101

◆ get_variable_numdistinct()

double get_variable_numdistinct ( VariableStatData vardata,
bool isdefault 
)

Definition at line 5627 of file selfuncs.c.

References clamp_row_est(), DEFAULT_NUM_DISTINCT, GETSTRUCT, HeapTupleIsValid, IsA, VariableStatData::isunique, VariableStatData::rel, RTE_VALUES, RelOptInfo::rtekind, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.

Referenced by add_unique_group_var(), eqjoinsel(), estimate_hash_bucket_stats(), ineq_histogram_selectivity(), var_eq_const(), and var_eq_non_const().

5628 {
5629  double stadistinct;
5630  double stanullfrac = 0.0;
5631  double ntuples;
5632 
5633  *isdefault = false;
5634 
5635  /*
5636  * Determine the stadistinct value to use. There are cases where we can
5637  * get an estimate even without a pg_statistic entry, or can get a better
5638  * value than is in pg_statistic. Grab stanullfrac too if we can find it
5639  * (otherwise, assume no nulls, for lack of any better idea).
5640  */
5641  if (HeapTupleIsValid(vardata->statsTuple))
5642  {
5643  /* Use the pg_statistic entry */
5644  Form_pg_statistic stats;
5645 
5646  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
5647  stadistinct = stats->stadistinct;
5648  stanullfrac = stats->stanullfrac;
5649  }
5650  else if (vardata->vartype == BOOLOID)
5651  {
5652  /*
5653  * Special-case boolean columns: presumably, two distinct values.
5654  *
5655  * Are there any other datatypes we should wire in special estimates
5656  * for?
5657  */
5658  stadistinct = 2.0;
5659  }
5660  else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
5661  {
5662  /*
5663  * If the Var represents a column of a VALUES RTE, assume it's unique.
5664  * This could of course be very wrong, but it should tend to be true
5665  * in well-written queries. We could consider examining the VALUES'
5666  * contents to get some real statistics; but that only works if the
5667  * entries are all constants, and it would be pretty expensive anyway.
5668  */
5669  stadistinct = -1.0; /* unique (and all non null) */
5670  }
5671  else
5672  {
5673  /*
5674  * We don't keep statistics for system columns, but in some cases we
5675  * can infer distinctness anyway.
5676  */
5677  if (vardata->var && IsA(vardata->var, Var))
5678  {
5679  switch (((Var *) vardata->var)->varattno)
5680  {
5682  stadistinct = -1.0; /* unique (and all non null) */
5683  break;
5685  stadistinct = 1.0; /* only 1 value */
5686  break;
5687  default:
5688  stadistinct = 0.0; /* means "unknown" */
5689  break;
5690  }
5691  }
5692  else
5693  stadistinct = 0.0; /* means "unknown" */
5694 
5695  /*
5696  * XXX consider using estimate_num_groups on expressions?
5697  */
5698  }
5699 
5700  /*
5701  * If there is a unique index or DISTINCT clause for the variable, assume
5702  * it is unique no matter what pg_statistic says; the statistics could be
5703  * out of date, or we might have found a partial unique index that proves
5704  * the var is unique for this query. However, we'd better still believe
5705  * the null-fraction statistic.
5706  */
5707  if (vardata->isunique)
5708  stadistinct = -1.0 * (1.0 - stanullfrac);
5709 
5710  /*
5711  * If we had an absolute estimate, use that.
5712  */
5713  if (stadistinct > 0.0)
5714  return clamp_row_est(stadistinct);
5715 
5716  /*
5717  * Otherwise we need to get the relation size; punt if not available.
5718  */
5719  if (vardata->rel == NULL)
5720  {
5721  *isdefault = true;
5722  return DEFAULT_NUM_DISTINCT;
5723  }
5724  ntuples = vardata->rel->tuples;
5725  if (ntuples <= 0.0)
5726  {
5727  *isdefault = true;
5728  return DEFAULT_NUM_DISTINCT;
5729  }
5730 
5731  /*
5732  * If we had a relative estimate, use that.
5733  */
5734  if (stadistinct < 0.0)
5735  return clamp_row_est(-stadistinct * ntuples);
5736 
5737  /*
5738  * With no data, estimate ndistinct = ntuples if the table is small, else
5739  * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5740  * that the behavior isn't discontinuous.
5741  */
5742  if (ntuples < DEFAULT_NUM_DISTINCT)
5743  return clamp_row_est(ntuples);
5744 
5745  *isdefault = true;
5746  return DEFAULT_NUM_DISTINCT;
5747 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
HeapTuple statsTuple
Definition: selfuncs.h:91
double tuples
Definition: pathnodes.h:716
RelOptInfo * rel
Definition: selfuncs.h:90
Definition: primnodes.h:186
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define TableOidAttributeNumber
Definition: sysattr.h:26
RTEKind rtekind
Definition: pathnodes.h:706
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define DEFAULT_NUM_DISTINCT
Definition: selfuncs.h:52
#define SelfItemPointerAttributeNumber
Definition: sysattr.h:21
double clamp_row_est(double nrows)
Definition: costsize.c:199

◆ histogram_selectivity()

double histogram_selectivity ( VariableStatData vardata,
FmgrInfo opproc,
Oid  collation,
Datum  constval,
bool  varonleft,
int  min_hist_size,
int  n_skip,
int *  hist_size 
)

Definition at line 820 of file selfuncs.c.

References Assert, ATTSTATSSLOT_VALUES, DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::nvalues, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by generic_restriction_selectivity(), and patternsel_common().

825 {
826  double result;
827  AttStatsSlot sslot;
828 
829  /* check sanity of parameters */
830  Assert(n_skip >= 0);
831  Assert(min_hist_size > 2 * n_skip);
832 
833  if (HeapTupleIsValid(vardata->statsTuple) &&
834  statistic_proc_security_check(vardata, opproc->fn_oid) &&
835  get_attstatsslot(&sslot, vardata->statsTuple,
836  STATISTIC_KIND_HISTOGRAM, InvalidOid,
838  {
839  *hist_size = sslot.nvalues;
840  if (sslot.nvalues >= min_hist_size)
841  {
842  LOCAL_FCINFO(fcinfo, 2);
843  int nmatch = 0;
844  int i;
845 
846  /*
847  * We invoke the opproc "by hand" so that we won't fail on NULL
848  * results. Such cases won't arise for normal comparison
849  * functions, but generic_restriction_selectivity could perhaps be
850  * used with operators that can return NULL. A small side benefit
851  * is to not need to re-initialize the fcinfo struct from scratch
852  * each time.
853  */
854  InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
855  NULL, NULL);
856  fcinfo->args[0].isnull = false;
857  fcinfo->args[1].isnull = false;
858  /* be careful to apply operator right way 'round */
859  if (varonleft)
860  fcinfo->args[1].value = constval;
861  else
862  fcinfo->args[0].value = constval;
863 
864  for (i = n_skip; i < sslot.nvalues - n_skip; i++)
865  {
866  Datum fresult;
867 
868  if (varonleft)
869  fcinfo->args[0].value = sslot.values[i];
870  else
871  fcinfo->args[1].value = sslot.values[i];
872  fcinfo->isnull = false;
873  fresult = FunctionCallInvoke(fcinfo);
874  if (!fcinfo->isnull && DatumGetBool(fresult))
875  nmatch++;
876  }
877  result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
878  }
879  else
880  result = -1;
881  free_attstatsslot(&sslot);
882  }
883  else
884  {
885  *hist_size = 0;
886  result = -1;
887  }
888 
889  return result;
890 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
HeapTuple statsTuple
Definition: selfuncs.h:91
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5598
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
#define DatumGetBool(X)
Definition: postgres.h:437
uintptr_t Datum
Definition: postgres.h:411
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3177
#define Assert(condition)
Definition: c.h:804
Datum * values
Definition: lsyscache.h:53
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3294

◆ index_other_operands_eval_cost()

Cost index_other_operands_eval_cost ( PlannerInfo root,
List indexquals 
)

Definition at line 6297 of file selfuncs.c.

References OpExpr::args, ScalarArrayOpExpr::args, cost_qual_eval_node(), elog, ERROR, IsA, lfirst, lsecond, nodeTag, QualCost::per_tuple, RowCompareExpr::rargs, and QualCost::startup.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

6298 {
6299  Cost qual_arg_cost = 0;
6300  ListCell *lc;
6301 
6302  foreach(lc, indexquals)
6303  {
6304  Expr *clause = (Expr *) lfirst(lc);
6305  Node *other_operand;
6306  QualCost index_qual_cost;
6307 
6308  /*
6309  * Index quals will have RestrictInfos, indexorderbys won't. Look
6310  * through RestrictInfo if present.
6311  */
6312  if (IsA(clause, RestrictInfo))
6313  clause = ((RestrictInfo *) clause)->clause;
6314 
6315  if (IsA(clause, OpExpr))
6316  {
6317  OpExpr *op = (OpExpr *) clause;
6318 
6319  other_operand = (Node *) lsecond(op->args);
6320  }
6321  else if (IsA(clause, RowCompareExpr))
6322  {
6323  RowCompareExpr *rc = (RowCompareExpr *) clause;
6324 
6325  other_operand = (Node *) rc->rargs;
6326  }
6327  else if (IsA(clause, ScalarArrayOpExpr))
6328  {
6329  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6330 
6331  other_operand = (Node *) lsecond(saop->args);
6332  }
6333  else if (IsA(clause, NullTest))
6334  {
6335  other_operand = NULL;
6336  }
6337  else
6338  {
6339  elog(ERROR, "unsupported indexqual type: %d",
6340  (int) nodeTag(clause));
6341  other_operand = NULL; /* keep compiler quiet */
6342  }
6343 
6344  cost_qual_eval_node(&index_qual_cost, other_operand, root);
6345  qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6346  }
6347  return qual_arg_cost;
6348 }
void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root)
Definition: costsize.c:4337
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
Definition: nodes.h:539
#define lsecond(l)
Definition: pg_list.h:179
Cost startup
Definition: pathnodes.h:45
Cost per_tuple
Definition: pathnodes.h:46
#define ERROR
Definition: elog.h:46
#define lfirst(lc)
Definition: pg_list.h:169
#define nodeTag(nodeptr)
Definition: nodes.h:544
#define elog(elevel,...)
Definition: elog.h:232
List * args
Definition: primnodes.h:548
double Cost
Definition: nodes.h:673

◆ ineq_histogram_selectivity()

double ineq_histogram_selectivity ( PlannerInfo root,
VariableStatData vardata,
Oid  opoid,
FmgrInfo opproc,
bool  isgt,
bool  iseq,
Oid  collation,
Datum  constval,
Oid  consttype 
)

Definition at line 1038 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, comparison_ops_are_compatible(), convert_to_scalar(), DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCall2Coll(), FunctionCallInvoke, get_actual_variable_range(), get_attstatsslot(), get_variable_numdistinct(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::nnumbers, AttStatsSlot::nvalues, AttStatsSlot::stacoll, AttStatsSlot::staop, statistic_proc_security_check(), VariableStatData::statsTuple, val, AttStatsSlot::values, and VariableStatData::vartype.

Referenced by prefix_selectivity(), and scalarineqsel().

1043 {
1044  double hist_selec;
1045  AttStatsSlot sslot;
1046 
1047  hist_selec = -1.0;
1048 
1049  /*
1050  * Someday, ANALYZE might store more than one histogram per rel/att,
1051  * corresponding to more than one possible sort ordering defined for the
1052  * column type. Right now, we know there is only one, so just grab it and
1053  * see if it matches the query.
1054  *
1055  * Note that we can't use opoid as search argument; the staop appearing in
1056  * pg_statistic will be for the relevant '<' operator, but what we have
1057  * might be some other inequality operator such as '>='. (Even if opoid
1058  * is a '<' operator, it could be cross-type.) Hence we must use
1059  * comparison_ops_are_compatible() to see if the operators match.
1060  */
1061  if (HeapTupleIsValid(vardata->statsTuple) &&
1062  statistic_proc_security_check(vardata, opproc->fn_oid) &&
1063  get_attstatsslot(&sslot, vardata->statsTuple,
1064  STATISTIC_KIND_HISTOGRAM, InvalidOid,
1066  {
1067  if (sslot.nvalues > 1 &&
1068  sslot.stacoll == collation &&
1069  comparison_ops_are_compatible(sslot.staop, opoid))
1070  {
1071  /*
1072  * Use binary search to find the desired location, namely the
1073  * right end of the histogram bin containing the comparison value,
1074  * which is the leftmost entry for which the comparison operator
1075  * succeeds (if isgt) or fails (if !isgt).
1076  *
1077  * In this loop, we pay no attention to whether the operator iseq
1078  * or not; that detail will be mopped up below. (We cannot tell,
1079  * anyway, whether the operator thinks the values are equal.)
1080  *
1081  * If the binary search accesses the first or last histogram
1082  * entry, we try to replace that endpoint with the true column min
1083  * or max as found by get_actual_variable_range(). This
1084  * ameliorates misestimates when the min or max is moving as a
1085  * result of changes since the last ANALYZE. Note that this could
1086  * result in effectively including MCVs into the histogram that
1087  * weren't there before, but we don't try to correct for that.
1088  */
1089  double histfrac;
1090  int lobound = 0; /* first possible slot to search */
1091  int hibound = sslot.nvalues; /* last+1 slot to search */
1092  bool have_end = false;
1093 
1094  /*
1095  * If there are only two histogram entries, we'll want up-to-date
1096  * values for both. (If there are more than two, we need at most
1097  * one of them to be updated, so we deal with that within the
1098  * loop.)
1099  */
1100  if (sslot.nvalues == 2)
1101  have_end = get_actual_variable_range(root,
1102  vardata,
1103  sslot.staop,
1104  collation,
1105  &sslot.values[0],
1106  &sslot.values[1]);
1107 
1108  while (lobound < hibound)
1109  {
1110  int probe = (lobound + hibound) / 2;
1111  bool ltcmp;
1112 
1113  /*
1114  * If we find ourselves about to compare to the first or last
1115  * histogram entry, first try to replace it with the actual
1116  * current min or max (unless we already did so above).
1117  */
1118  if (probe == 0 && sslot.nvalues > 2)
1119  have_end = get_actual_variable_range(root,
1120  vardata,
1121  sslot.staop,
1122  collation,
1123  &sslot.values[0],
1124  NULL);
1125  else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
1126  have_end = get_actual_variable_range(root,
1127  vardata,
1128  sslot.staop,
1129  collation,
1130  NULL,
1131  &sslot.values[probe]);
1132 
1133  ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
1134  collation,
1135  sslot.values[probe],
1136  constval));
1137  if (isgt)
1138  ltcmp = !ltcmp;
1139  if (ltcmp)
1140  lobound = probe + 1;
1141  else
1142  hibound = probe;
1143  }
1144 
1145  if (lobound <= 0)
1146  {
1147  /*
1148  * Constant is below lower histogram boundary. More
1149  * precisely, we have found that no entry in the histogram
1150  * satisfies the inequality clause (if !isgt) or they all do
1151  * (if isgt). We estimate that that's true of the entire
1152  * table, so set histfrac to 0.0 (which we'll flip to 1.0
1153  * below, if isgt).
1154  */
1155  histfrac = 0.0;
1156  }
1157  else if (lobound >= sslot.nvalues)
1158  {
1159  /*
1160  * Inverse case: constant is above upper histogram boundary.
1161  */
1162  histfrac = 1.0;
1163  }
1164  else
1165  {
1166  /* We have values[i-1] <= constant <= values[i]. */
1167  int i = lobound;
1168  double eq_selec = 0;
1169  double val,
1170  high,
1171  low;
1172  double binfrac;
1173 
1174  /*
1175  * In the cases where we'll need it below, obtain an estimate
1176  * of the selectivity of "x = constval". We use a calculation
1177  * similar to what var_eq_const() does for a non-MCV constant,
1178  * ie, estimate that all distinct non-MCV values occur equally
1179  * often. But multiplication by "1.0 - sumcommon - nullfrac"
1180  * will be done by our caller, so we shouldn't do that here.
1181  * Therefore we can't try to clamp the estimate by reference
1182  * to the least common MCV; the result would be too small.
1183  *
1184  * Note: since this is effectively assuming that constval
1185  * isn't an MCV, it's logically dubious if constval in fact is
1186  * one. But we have to apply *some* correction for equality,
1187  * and anyway we cannot tell if constval is an MCV, since we
1188  * don't have a suitable equality operator at hand.
1189  */
1190  if (i == 1 || isgt == iseq)
1191  {
1192  double otherdistinct;
1193  bool isdefault;
1194  AttStatsSlot mcvslot;
1195 
1196  /* Get estimated number of distinct values */
1197  otherdistinct = get_variable_numdistinct(vardata,
1198  &isdefault);
1199 
1200  /* Subtract off the number of known MCVs */
1201  if (get_attstatsslot(&mcvslot, vardata->statsTuple,
1202  STATISTIC_KIND_MCV, InvalidOid,
1204  {
1205  otherdistinct -= mcvslot.nnumbers;
1206  free_attstatsslot(&mcvslot);
1207  }
1208 
1209  /* If result doesn't seem sane, leave eq_selec at 0 */
1210  if (otherdistinct > 1)
1211  eq_selec = 1.0 / otherdistinct;
1212  }
1213 
1214  /*
1215  * Convert the constant and the two nearest bin boundary
1216  * values to a uniform comparison scale, and do a linear
1217  * interpolation within this bin.
1218  */
1219  if (convert_to_scalar(constval, consttype, collation,
1220  &val,
1221  sslot.values[i - 1], sslot.values[i],
1222  vardata->vartype,
1223  &low, &high))
1224  {
1225  if (high <= low)
1226  {
1227  /* cope if bin boundaries appear identical */
1228  binfrac = 0.5;
1229  }
1230  else if (val <= low)
1231  binfrac = 0.0;
1232  else if (val >= high)
1233  binfrac = 1.0;
1234  else
1235  {
1236  binfrac = (val - low) / (high - low);
1237 
1238  /*
1239  * Watch out for the possibility that we got a NaN or
1240  * Infinity from the division. This can happen
1241  * despite the previous checks, if for example "low"
1242  * is -Infinity.
1243  */
1244  if (isnan(binfrac) ||
1245  binfrac < 0.0 || binfrac > 1.0)
1246  binfrac = 0.5;
1247  }
1248  }
1249  else
1250  {
1251  /*
1252  * Ideally we'd produce an error here, on the grounds that
1253  * the given operator shouldn't have scalarXXsel
1254  * registered as its selectivity func unless we can deal
1255  * with its operand types. But currently, all manner of
1256  * stuff is invoking scalarXXsel, so give a default
1257  * estimate until that can be fixed.
1258  */
1259  binfrac = 0.5;
1260  }
1261 
1262  /*
1263  * Now, compute the overall selectivity across the values
1264  * represented by the histogram. We have i-1 full bins and
1265  * binfrac partial bin below the constant.
1266  */
1267  histfrac = (double) (i - 1) + binfrac;
1268  histfrac /= (double) (sslot.nvalues - 1);
1269 
1270  /*
1271  * At this point, histfrac is an estimate of the fraction of
1272  * the population represented by the histogram that satisfies
1273  * "x <= constval". Somewhat remarkably, this statement is
1274  * true regardless of which operator we were doing the probes
1275  * with, so long as convert_to_scalar() delivers reasonable
1276  * results. If the probe constant is equal to some histogram
1277  * entry, we would have considered the bin to the left of that
1278  * entry if probing with "<" or ">=", or the bin to the right
1279  * if probing with "<=" or ">"; but binfrac would have come
1280  * out as 1.0 in the first case and 0.0 in the second, leading
1281  * to the same histfrac in either case. For probe constants
1282  * between histogram entries, we find the same bin and get the
1283  * same estimate with any operator.
1284  *
1285  * The fact that the estimate corresponds to "x <= constval"
1286  * and not "x < constval" is because of the way that ANALYZE
1287  * constructs the histogram: each entry is, effectively, the
1288  * rightmost value in its sample bucket. So selectivity
1289  * values that are exact multiples of 1/(histogram_size-1)
1290  * should be understood as estimates including a histogram
1291  * entry plus everything to its left.
1292  *
1293  * However, that breaks down for the first histogram entry,
1294  * which necessarily is the leftmost value in its sample
1295  * bucket. That means the first histogram bin is slightly
1296  * narrower than the rest, by an amount equal to eq_selec.
1297  * Another way to say that is that we want "x <= leftmost" to
1298  * be estimated as eq_selec not zero. So, if we're dealing
1299  * with the first bin (i==1), rescale to make that true while
1300  * adjusting the rest of that bin linearly.
1301  */
1302  if (i == 1)
1303  histfrac += eq_selec * (1.0 - binfrac);
1304 
1305  /*
1306  * "x <= constval" is good if we want an estimate for "<=" or
1307  * ">", but if we are estimating for "<" or ">=", we now need
1308  * to decrease the estimate by eq_selec.
1309  */
1310  if (isgt == iseq)
1311  histfrac -= eq_selec;
1312  }
1313 
1314  /*
1315  * Now the estimate is finished for "<" and "<=" cases. If we are
1316  * estimating for ">" or ">=", flip it.
1317  */
1318  hist_selec = isgt ? (1.0 - histfrac) : histfrac;
1319 
1320  /*
1321  * The histogram boundaries are only approximate to begin with,
1322  * and may well be out of date anyway. Therefore, don't believe
1323  * extremely small or large selectivity estimates --- unless we
1324  * got actual current endpoint values from the table, in which
1325  * case just do the usual sanity clamp. Somewhat arbitrarily, we
1326  * set the cutoff for other cases at a hundredth of the histogram
1327  * resolution.
1328  */
1329  if (have_end)
1330  CLAMP_PROBABILITY(hist_selec);
1331  else
1332  {
1333  double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1334 
1335  if (hist_selec < cutoff)
1336  hist_selec = cutoff;
1337  else if (hist_selec > 1.0 - cutoff)
1338  hist_selec = 1.0 - cutoff;
1339  }
1340  }
1341  else if (sslot.nvalues > 1)
1342  {
1343  /*
1344  * If we get here, we have a histogram but it's not sorted the way
1345  * we want. Do a brute-force search to see how many of the
1346  * entries satisfy the comparison condition, and take that
1347  * fraction as our estimate. (This is identical to the inner loop
1348  * of histogram_selectivity; maybe share code?)
1349  */
1350  LOCAL_FCINFO(fcinfo, 2);
1351  int nmatch = 0;
1352 
1353  InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
1354  NULL, NULL);
1355  fcinfo->args[0].isnull = false;
1356  fcinfo->args[1].isnull = false;
1357  fcinfo->args[1].value = constval;
1358  for (int i = 0; i < sslot.nvalues; i++)
1359  {
1360  Datum fresult;
1361 
1362  fcinfo->args[0].value = sslot.values[i];
1363  fcinfo->isnull = false;
1364  fresult = FunctionCallInvoke(fcinfo);
1365  if (!fcinfo->isnull && DatumGetBool(fresult))
1366  nmatch++;
1367  }
1368  hist_selec = ((double) nmatch) / ((double) sslot.nvalues);
1369 
1370  /*
1371  * As above, clamp to a hundredth of the histogram resolution.
1372  * This case is surely even less trustworthy than the normal one,
1373  * so we shouldn't believe exact 0 or 1 selectivity. (Maybe the
1374  * clamp should be more restrictive in this case?)
1375  */
1376  {
1377  double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1378 
1379  if (hist_selec < cutoff)
1380  hist_selec = cutoff;
1381  else if (hist_selec > 1.0 - cutoff)
1382  hist_selec = 1.0 - cutoff;
1383  }
1384  }
1385 
1386  free_attstatsslot(&sslot);
1387  }
1388 
1389  return hist_selec;
1390 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
HeapTuple statsTuple
Definition: selfuncs.h:91
int nnumbers
Definition: lsyscache.h:57
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5598
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1148
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound)
Definition: selfuncs.c:4253
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5627
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
#define DatumGetBool(X)
Definition: postgres.h:437
uintptr_t Datum
Definition: postgres.h:411
static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
Definition: selfuncs.c:5930
bool comparison_ops_are_compatible(Oid opno1, Oid opno2)
Definition: lsyscache.c:747
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3177
Datum * values
Definition: lsyscache.h:53
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
long val
Definition: informix.c:664
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3294

◆ mcv_selectivity()

double mcv_selectivity ( VariableStatData vardata,
FmgrInfo opproc,
Oid  collation,
Datum  constval,
bool  varonleft,
double *  sumcommonp 
)

Definition at line 729 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::numbers, AttStatsSlot::nvalues, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by generic_restriction_selectivity(), networksel(), patternsel_common(), and scalarineqsel().

732 {
733  double mcv_selec,
734  sumcommon;
735  AttStatsSlot sslot;
736  int i;
737 
738  mcv_selec = 0.0;
739  sumcommon = 0.0;
740 
741  if (HeapTupleIsValid(vardata->statsTuple) &&
742  statistic_proc_security_check(vardata, opproc->fn_oid) &&
743  get_attstatsslot(&sslot, vardata->statsTuple,
744  STATISTIC_KIND_MCV, InvalidOid,
746  {
747  LOCAL_FCINFO(fcinfo, 2);
748 
749  /*
750  * We invoke the opproc "by hand" so that we won't fail on NULL
751  * results. Such cases won't arise for normal comparison functions,
752  * but generic_restriction_selectivity could perhaps be used with
753  * operators that can return NULL. A small side benefit is to not
754  * need to re-initialize the fcinfo struct from scratch each time.
755  */
756  InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
757  NULL, NULL);
758  fcinfo->args[0].isnull = false;
759  fcinfo->args[1].isnull = false;
760  /* be careful to apply operator right way 'round */
761  if (varonleft)
762  fcinfo->args[1].value = constval;
763  else
764  fcinfo->args[0].value = constval;
765 
766  for (i = 0; i < sslot.nvalues; i++)
767  {
768  Datum fresult;
769 
770  if (varonleft)
771  fcinfo->args[0].value = sslot.values[i];
772  else
773  fcinfo->args[1].value = sslot.values[i];
774  fcinfo->isnull = false;
775  fresult = FunctionCallInvoke(fcinfo);
776  if (!fcinfo->isnull && DatumGetBool(fresult))
777  mcv_selec += sslot.numbers[i];
778  sumcommon += sslot.numbers[i];
779  }
780  free_attstatsslot(&sslot);
781  }
782 
783  *sumcommonp = sumcommon;
784  return mcv_selec;
785 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
HeapTuple statsTuple
Definition: selfuncs.h:91
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5598
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
float4 * numbers
Definition: lsyscache.h:56
#define DatumGetBool(X)
Definition: postgres.h:437
uintptr_t Datum
Definition: postgres.h:411
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3177
Datum * values
Definition: lsyscache.h:53
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3294

◆ mergejoinscansel()

void mergejoinscansel ( PlannerInfo root,
Node clause,
Oid  opfamily,
int  strategy,
bool  nulls_first,
Selectivity leftstart,
Selectivity leftend,
Selectivity rightstart,
Selectivity rightend 
)

Definition at line 2904 of file selfuncs.c.

References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, CLAMP_PROBABILITY, DEFAULT_INEQ_SEL, examine_variable(), get_leftop(), get_op_opfamily_properties(), get_opfamily_member(), get_rightop(), get_variable_range(), GETSTRUCT, HeapTupleIsValid, is_opclause(), OidIsValid, ReleaseVariableStats, scalarineqsel(), and VariableStatData::statsTuple.

Referenced by cached_scansel().

2908 {
2909  Node *left,
2910  *right;
2911  VariableStatData leftvar,
2912  rightvar;
2913  int op_strategy;
2914  Oid op_lefttype;
2915  Oid op_righttype;
2916  Oid opno,
2917  collation,
2918  lsortop,
2919  rsortop,
2920  lstatop,
2921  rstatop,
2922  ltop,
2923  leop,
2924  revltop,
2925  revleop;
2926  bool isgt;
2927  Datum leftmin,
2928  leftmax,
2929  rightmin,
2930  rightmax;
2931  double selec;
2932 
2933  /* Set default results if we can't figure anything out. */
2934  /* XXX should default "start" fraction be a bit more than 0? */
2935  *leftstart = *rightstart = 0.0;
2936  *leftend = *rightend = 1.0;
2937 
2938  /* Deconstruct the merge clause */
2939  if (!is_opclause(clause))
2940  return; /* shouldn't happen */
2941  opno = ((OpExpr *) clause)->opno;
2942  collation = ((OpExpr *) clause)->inputcollid;
2943  left = get_leftop((Expr *) clause);
2944  right = get_rightop((Expr *) clause);
2945  if (!right)
2946  return; /* shouldn't happen */
2947 
2948  /* Look for stats for the inputs */
2949  examine_variable(root, left, 0, &leftvar);
2950  examine_variable(root, right, 0, &rightvar);
2951 
2952  /* Extract the operator's declared left/right datatypes */
2953  get_op_opfamily_properties(opno, opfamily, false,
2954  &op_strategy,
2955  &op_lefttype,
2956  &op_righttype);
2957  Assert(op_strategy == BTEqualStrategyNumber);
2958 
2959  /*
2960  * Look up the various operators we need. If we don't find them all, it
2961  * probably means the opfamily is broken, but we just fail silently.
2962  *
2963  * Note: we expect that pg_statistic histograms will be sorted by the '<'
2964  * operator, regardless of which sort direction we are considering.
2965  */
2966  switch (strategy)
2967  {
2968  case BTLessStrategyNumber:
2969  isgt = false;
2970  if (op_lefttype == op_righttype)
2971  {
2972  /* easy case */
2973  ltop = get_opfamily_member(opfamily,
2974  op_lefttype, op_righttype,
2976  leop = get_opfamily_member(opfamily,
2977  op_lefttype, op_righttype,
2979  lsortop = ltop;
2980  rsortop = ltop;
2981  lstatop = lsortop;
2982  rstatop = rsortop;
2983  revltop = ltop;
2984  revleop = leop;
2985  }
2986  else
2987  {
2988  ltop = get_opfamily_member(opfamily,
2989  op_lefttype, op_righttype,
2991  leop = get_opfamily_member(opfamily,
2992  op_lefttype, op_righttype,
2994  lsortop = get_opfamily_member(opfamily,
2995  op_lefttype, op_lefttype,
2997  rsortop = get_opfamily_member(opfamily,
2998  op_righttype, op_righttype,
3000  lstatop = lsortop;
3001  rstatop = rsortop;
3002  revltop = get_opfamily_member(opfamily,
3003  op_righttype, op_lefttype,
3005  revleop = get_opfamily_member(opfamily,
3006  op_righttype, op_lefttype,
3008  }
3009  break;
3011  /* descending-order case */
3012  isgt = true;
3013  if (op_lefttype == op_righttype)
3014  {
3015  /* easy case */
3016  ltop = get_opfamily_member(opfamily,
3017  op_lefttype, op_righttype,
3019  leop = get_opfamily_member(opfamily,
3020  op_lefttype, op_righttype,
3022  lsortop = ltop;
3023  rsortop = ltop;
3024  lstatop = get_opfamily_member(opfamily,
3025  op_lefttype, op_lefttype,
3027  rstatop = lstatop;
3028  revltop = ltop;
3029  revleop = leop;
3030  }
3031  else
3032  {
3033  ltop = get_opfamily_member(opfamily,
3034  op_lefttype, op_righttype,
3036  leop = get_opfamily_member(opfamily,
3037  op_lefttype, op_righttype,
3039  lsortop = get_opfamily_member(opfamily,
3040  op_lefttype, op_lefttype,
3042  rsortop = get_opfamily_member(opfamily,
3043  op_righttype, op_righttype,
3045  lstatop = get_opfamily_member(opfamily,
3046  op_lefttype, op_lefttype,
3048  rstatop = get_opfamily_member(opfamily,
3049  op_righttype, op_righttype,
3051  revltop = get_opfamily_member(opfamily,
3052  op_righttype, op_lefttype,
3054  revleop = get_opfamily_member(opfamily,
3055  op_righttype, op_lefttype,
3057  }
3058  break;
3059  default:
3060  goto fail; /* shouldn't get here */
3061  }
3062 
3063  if (!OidIsValid(lsortop) ||
3064  !OidIsValid(rsortop) ||
3065  !OidIsValid(lstatop) ||
3066  !OidIsValid(rstatop) ||
3067  !OidIsValid(ltop) ||
3068  !OidIsValid(leop) ||
3069  !OidIsValid(revltop) ||
3070  !OidIsValid(revleop))
3071  goto fail; /* insufficient info in catalogs */
3072 
3073  /* Try to get ranges of both inputs */
3074  if (!isgt)
3075  {
3076  if (!get_variable_range(root, &leftvar, lstatop, collation,
3077  &leftmin, &leftmax))
3078  goto fail; /* no range available from stats */
3079  if (!get_variable_range(root, &rightvar, rstatop, collation,
3080  &rightmin, &rightmax))
3081  goto fail; /* no range available from stats */
3082  }
3083  else
3084  {
3085  /* need to swap the max and min */
3086  if (!get_variable_range(root, &leftvar, lstatop, collation,
3087  &leftmax, &leftmin))
3088  goto fail; /* no range available from stats */
3089  if (!get_variable_range(root, &rightvar, rstatop, collation,
3090  &rightmax, &rightmin))
3091  goto fail; /* no range available from stats */
3092  }
3093 
3094  /*
3095  * Now, the fraction of the left variable that will be scanned is the
3096  * fraction that's <= the right-side maximum value. But only believe
3097  * non-default estimates, else stick with our 1.0.
3098  */
3099  selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar,
3100  rightmax, op_righttype);
3101  if (selec != DEFAULT_INEQ_SEL)
3102  *leftend = selec;
3103 
3104  /* And similarly for the right variable. */
3105  selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar,
3106  leftmax, op_lefttype);
3107  if (selec != DEFAULT_INEQ_SEL)
3108  *rightend = selec;
3109 
3110  /*
3111  * Only one of the two "end" fractions can really be less than 1.0;
3112  * believe the smaller estimate and reset the other one to exactly 1.0. If
3113  * we get exactly equal estimates (as can easily happen with self-joins),
3114  * believe neither.
3115  */
3116  if (*leftend > *rightend)
3117  *leftend = 1.0;
3118  else if (*leftend < *rightend)
3119  *rightend = 1.0;
3120  else
3121  *leftend = *rightend = 1.0;
3122 
3123  /*
3124  * Also, the fraction of the left variable that will be scanned before the
3125  * first join pair is found is the fraction that's < the right-side
3126  * minimum value. But only believe non-default estimates, else stick with
3127  * our own default.
3128  */
3129  selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar,
3130  rightmin, op_righttype);
3131  if (selec != DEFAULT_INEQ_SEL)
3132  *leftstart = selec;
3133 
3134  /* And similarly for the right variable. */
3135  selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar,
3136  leftmin, op_lefttype);
3137  if (selec != DEFAULT_INEQ_SEL)
3138  *rightstart = selec;
3139 
3140  /*
3141  * Only one of the two "start" fractions can really be more than zero;
3142  * believe the larger estimate and reset the other one to exactly 0.0. If
3143  * we get exactly equal estimates (as can easily happen with self-joins),
3144  * believe neither.
3145  */
3146  if (*leftstart < *rightstart)
3147  *leftstart = 0.0;
3148  else if (*leftstart > *rightstart)
3149  *rightstart = 0.0;
3150  else
3151  *leftstart = *rightstart = 0.0;
3152 
3153  /*
3154  * If the sort order is nulls-first, we're going to have to skip over any
3155  * nulls too. These would not have been counted by scalarineqsel, and we
3156  * can safely add in this fraction regardless of whether we believe
3157  * scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
3158  */
3159  if (nulls_first)
3160  {
3161  Form_pg_statistic stats;
3162 
3163  if (HeapTupleIsValid(leftvar.statsTuple))
3164  {
3165  stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
3166  *leftstart += stats->stanullfrac;
3167  CLAMP_PROBABILITY(*leftstart);
3168  *leftend += stats->stanullfrac;
3169  CLAMP_PROBABILITY(*leftend);
3170  }
3171  if (HeapTupleIsValid(rightvar.statsTuple))
3172  {
3173  stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
3174  *rightstart += stats->stanullfrac;
3175  CLAMP_PROBABILITY(*rightstart);
3176  *rightend += stats->stanullfrac;
3177  CLAMP_PROBABILITY(*rightend);
3178  }
3179  }
3180 
3181  /* Disbelieve start >= end, just in case that can happen */
3182  if (*leftstart >= *leftend)
3183  {
3184  *leftstart = 0.0;
3185  *leftend = 1.0;
3186  }
3187  if (*rightstart >= *rightend)
3188  {
3189  *rightstart = 0.0;
3190  *rightend = 1.0;
3191  }
3192 
3193 fail:
3194  ReleaseVariableStats(leftvar);
3195  ReleaseVariableStats(rightvar);
3196 }
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
Definition: selfuncs.c:5760
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
HeapTuple statsTuple
Definition: selfuncs.h:91
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37
Definition: nodes.h:539
static double scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, Oid collation, VariableStatData *vardata, Datum constval, Oid consttype)
Definition: selfuncs.c:577
unsigned int Oid
Definition: postgres_ext.h:31
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define OidIsValid(objectId)
Definition: c.h:710
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:164
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:73
uintptr_t Datum
Definition: postgres.h:411
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:85
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4954
#define Assert(condition)
Definition: c.h:804
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
void get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, int *strategy, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:134
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:66
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32

◆ nulltestsel()

Selectivity nulltestsel ( PlannerInfo root,
NullTestType  nulltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1695 of file selfuncs.c.

References CLAMP_PROBABILITY, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), GETSTRUCT, HeapTupleIsValid, IS_NOT_NULL, IS_NULL, IsA, ReleaseVariableStats, VariableStatData::statsTuple, and VariableStatData::var.

Referenced by clause_selectivity_ext(), and clauselist_selectivity_ext().

1697 {
1698  VariableStatData vardata;
1699  double selec;
1700 
1701  examine_variable(root, arg, varRelid, &vardata);
1702 
1703  if (HeapTupleIsValid(vardata.statsTuple))
1704  {
1705  Form_pg_statistic stats;
1706  double freq_null;
1707 
1708  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1709  freq_null = stats->stanullfrac;
1710 
1711  switch (nulltesttype)
1712  {
1713  case IS_NULL:
1714 
1715  /*
1716  * Use freq_null directly.
1717  */
1718  selec = freq_null;
1719  break;
1720  case IS_NOT_NULL:
1721 
1722  /*
1723  * Select not unknown (not null) values. Calculate from
1724  * freq_null.
1725  */
1726  selec = 1.0 - freq_null;
1727  break;
1728  default:
1729  elog(ERROR, "unrecognized nulltesttype: %d",
1730  (int) nulltesttype);
1731  return (Selectivity) 0; /* keep compiler quiet */
1732  }
1733  }
1734  else if (vardata.var && IsA(vardata.var, Var) &&
1735  ((Var *) vardata.var)->varattno < 0)
1736  {
1737  /*
1738  * There are no stats for system columns, but we know they are never
1739  * NULL.
1740  */
1741  selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
1742  }
1743  else
1744  {
1745  /*
1746  * No ANALYZE stats available, so make a guess
1747  */
1748  switch (nulltesttype)
1749  {
1750  case IS_NULL:
1751  selec = DEFAULT_UNK_SEL;
1752  break;
1753  case IS_NOT_NULL:
1754  selec = DEFAULT_NOT_UNK_SEL;
1755  break;
1756  default:
1757  elog(ERROR, "unrecognized nulltesttype: %d",
1758  (int) nulltesttype);
1759  return (Selectivity) 0; /* keep compiler quiet */
1760  }
1761  }
1762 
1763  ReleaseVariableStats(vardata);
1764 
1765  /* result should be in range, but make sure... */
1766  CLAMP_PROBABILITY(selec);
1767 
1768  return (Selectivity) selec;
1769 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
HeapTuple statsTuple
Definition: selfuncs.h:91
double Selectivity
Definition: nodes.h:672
Definition: primnodes.h:186
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:56
#define ERROR
Definition: elog.h:46
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:55
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4954
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
#define elog(elevel,...)
Definition: elog.h:232

◆ rowcomparesel()

Selectivity rowcomparesel ( PlannerInfo root,
RowCompareExpr clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 2170 of file selfuncs.c.

References RowCompareExpr::inputcollids, join_selectivity(), RowCompareExpr::largs, linitial, linitial_oid, list_make2, NumRelids(), RowCompareExpr::opnos, RowCompareExpr::rargs, restriction_selectivity(), and s1.

Referenced by clause_selectivity_ext().

2173 {
2174  Selectivity s1;
2175  Oid opno = linitial_oid(clause->opnos);
2176  Oid inputcollid = linitial_oid(clause->inputcollids);
2177  List *opargs;
2178  bool is_join_clause;
2179 
2180  /* Build equivalent arg list for single operator */
2181  opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
2182 
2183  /*
2184  * Decide if it's a join clause. This should match clausesel.c's
2185  * treat_as_join_clause(), except that we intentionally consider only the
2186  * leading columns and not the rest of the clause.
2187  */
2188  if (varRelid != 0)
2189  {
2190  /*
2191  * Caller is forcing restriction mode (eg, because we are examining an
2192  * inner indexscan qual).
2193  */
2194  is_join_clause = false;
2195  }
2196  else if (sjinfo == NULL)
2197  {
2198  /*
2199  * It must be a restriction clause, since it's being evaluated at a
2200  * scan node.
2201  */
2202  is_join_clause = false;
2203  }
2204  else
2205  {
2206  /*
2207  * Otherwise, it's a join if there's more than one relation used.
2208  */
2209  is_join_clause = (NumRelids(root, (Node *) opargs) > 1);
2210  }
2211 
2212  if (is_join_clause)
2213  {
2214  /* Estimate selectivity for a join clause. */
2215  s1 = join_selectivity(root, opno,
2216  opargs,
2217  inputcollid,
2218  jointype,
2219  sjinfo);
2220  }
2221  else
2222  {
2223  /* Estimate selectivity for a restriction clause. */
2224  s1 = restriction_selectivity(root, opno,
2225  opargs,
2226  inputcollid,
2227  varRelid);
2228  }
2229 
2230  return s1;
2231 }
#define list_make2(x1, x2)
Definition: pg_list.h:208
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Definition: plancat.c:1825
Definition: nodes.h:539
double Selectivity
Definition: nodes.h:672
unsigned int Oid
Definition: postgres_ext.h:31
#define linitial(l)
Definition: pg_list.h:174
char * s1
int NumRelids(PlannerInfo *root, Node *clause)
Definition: clauses.c:1968
#define linitial_oid(l)
Definition: pg_list.h:176
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:1864
List * inputcollids
Definition: primnodes.h:1109
Definition: pg_list.h:50

◆ scalararraysel()

Selectivity scalararraysel ( PlannerInfo root,
ScalarArrayOpExpr clause,
bool  is_join_clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1813 of file selfuncs.c.

References generate_unaccent_rules::args, ScalarArrayOpExpr::args, ARR_ELEMTYPE, Assert, CLAMP_PROBABILITY, CaseTestExpr::collation, DatumGetArrayTypeP, DatumGetFloat8, deconstruct_array(), ArrayExpr::element_typeid, ArrayExpr::elements, TypeCacheEntry::eq_opr, estimate_expression_value(), exprCollation(), exprType(), fmgr_info(), FunctionCall4Coll(), FunctionCall5Coll(), get_base_element_type(), get_negator(), get_oprjoin(), get_oprrest(), get_typlenbyval(), get_typlenbyvalalign(), i, ScalarArrayOpExpr::inputcollid, Int16GetDatum, Int32GetDatum, IsA, lfirst, linitial, list_length(), list_make2, lookup_type_cache(), lsecond, makeConst(), makeNode, ObjectIdGetDatum, OidIsValid, ScalarArrayOpExpr::opno, PointerGetDatum, s1, s2, scalararraysel_containment(), strip_array_coercion(), TYPECACHE_EQ_OPR, CaseTestExpr::typeId, CaseTestExpr::typeMod, and ScalarArrayOpExpr::useOr.

Referenced by clause_selectivity_ext().

1819 {
1820  Oid operator = clause->opno;
1821  bool useOr = clause->useOr;
1822  bool isEquality = false;
1823  bool isInequality = false;
1824  Node *leftop;
1825  Node *rightop;
1826  Oid nominal_element_type;
1827  Oid nominal_element_collation;
1828  TypeCacheEntry *typentry;
1829  RegProcedure oprsel;
1830  FmgrInfo oprselproc;
1831  Selectivity s1;
1832  Selectivity s1disjoint;
1833 
1834  /* First, deconstruct the expression */
1835  Assert(list_length(clause->args) == 2);
1836  leftop = (Node *) linitial(clause->args);
1837  rightop = (Node *) lsecond(clause->args);
1838 
1839  /* aggressively reduce both sides to constants */
1840  leftop = estimate_expression_value(root, leftop);
1841  rightop = estimate_expression_value(root, rightop);
1842 
1843  /* get nominal (after relabeling) element type of rightop */
1844  nominal_element_type = get_base_element_type(exprType(rightop));
1845  if (!OidIsValid(nominal_element_type))
1846  return (Selectivity) 0.5; /* probably shouldn't happen */
1847  /* get nominal collation, too, for generating constants */
1848  nominal_element_collation = exprCollation(rightop);
1849 
1850  /* look through any binary-compatible relabeling of rightop */
1851  rightop = strip_array_coercion(rightop);
1852 
1853  /*
1854  * Detect whether the operator is the default equality or inequality
1855  * operator of the array element type.
1856  */
1857  typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1858  if (OidIsValid(typentry->eq_opr))
1859  {
1860  if (operator == typentry->eq_opr)
1861  isEquality = true;
1862  else if (get_negator(operator) == typentry->eq_opr)
1863  isInequality = true;
1864  }
1865 
1866  /*
1867  * If it is equality or inequality, we might be able to estimate this as a
1868  * form of array containment; for instance "const = ANY(column)" can be
1869  * treated as "ARRAY[const] <@ column". scalararraysel_containment tries
1870  * that, and returns the selectivity estimate if successful, or -1 if not.
1871  */
1872  if ((isEquality || isInequality) && !is_join_clause)
1873  {
1874  s1 = scalararraysel_containment(root, leftop, rightop,
1875  nominal_element_type,
1876  isEquality, useOr, varRelid);
1877  if (s1 >= 0.0)
1878  return s1;
1879  }
1880 
1881  /*
1882  * Look up the underlying operator's selectivity estimator. Punt if it
1883  * hasn't got one.
1884  */
1885  if (is_join_clause)
1886  oprsel = get_oprjoin(operator);
1887  else
1888  oprsel = get_oprrest(operator);
1889  if (!oprsel)
1890  return (Selectivity) 0.5;
1891  fmgr_info(oprsel, &oprselproc);
1892 
1893  /*
1894  * In the array-containment check above, we must only believe that an
1895  * operator is equality or inequality if it is the default btree equality
1896  * operator (or its negator) for the element type, since those are the
1897  * operators that array containment will use. But in what follows, we can
1898  * be a little laxer, and also believe that any operators using eqsel() or
1899  * neqsel() as selectivity estimator act like equality or inequality.
1900  */
1901  if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1902  isEquality = true;
1903  else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1904  isInequality = true;
1905 
1906  /*
1907  * We consider three cases:
1908  *
1909  * 1. rightop is an Array constant: deconstruct the array, apply the
1910  * operator's selectivity function for each array element, and merge the
1911  * results in the same way that clausesel.c does for AND/OR combinations.
1912  *
1913  * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1914  * function for each element of the ARRAY[] construct, and merge.
1915  *
1916  * 3. otherwise, make a guess ...
1917  */
1918  if (rightop && IsA(rightop, Const))
1919  {
1920  Datum arraydatum = ((Const *) rightop)->constvalue;
1921  bool arrayisnull = ((Const *) rightop)->constisnull;
1922  ArrayType *arrayval;
1923  int16 elmlen;
1924  bool elmbyval;
1925  char elmalign;
1926  int num_elems;
1927  Datum *elem_values;
1928  bool *elem_nulls;
1929  int i;
1930 
1931  if (arrayisnull) /* qual can't succeed if null array */
1932  return (Selectivity) 0.0;
1933  arrayval = DatumGetArrayTypeP(arraydatum);
1935  &elmlen, &elmbyval, &elmalign);
1936  deconstruct_array(arrayval,
1937  ARR_ELEMTYPE(arrayval),
1938  elmlen, elmbyval, elmalign,
1939  &elem_values, &elem_nulls, &num_elems);
1940 
1941  /*
1942  * For generic operators, we assume the probability of success is
1943  * independent for each array element. But for "= ANY" or "<> ALL",
1944  * if the array elements are distinct (which'd typically be the case)
1945  * then the probabilities are disjoint, and we should just sum them.
1946  *
1947  * If we were being really tense we would try to confirm that the
1948  * elements are all distinct, but that would be expensive and it
1949  * doesn't seem to be worth the cycles; it would amount to penalizing
1950  * well-written queries in favor of poorly-written ones. However, we
1951  * do protect ourselves a little bit by checking whether the
1952  * disjointness assumption leads to an impossible (out of range)
1953  * probability; if so, we fall back to the normal calculation.
1954  */
1955  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1956 
1957  for (i = 0; i < num_elems; i++)
1958  {
1959  List *args;
1960  Selectivity s2;
1961 
1962  args = list_make2(leftop,
1963  makeConst(nominal_element_type,
1964  -1,
1965  nominal_element_collation,
1966  elmlen,
1967  elem_values[i],
1968  elem_nulls[i],
1969  elmbyval));
1970  if (is_join_clause)
1971  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1972  clause->inputcollid,
1973  PointerGetDatum(root),
1974  ObjectIdGetDatum(operator),
1975  PointerGetDatum(args),
1976  Int16GetDatum(jointype),
1977  PointerGetDatum(sjinfo)));
1978  else
1979  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1980  clause->inputcollid,
1981  PointerGetDatum(root),
1982  ObjectIdGetDatum(operator),
1983  PointerGetDatum(args),
1984  Int32GetDatum(varRelid)));
1985 
1986  if (useOr)
1987  {
1988  s1 = s1 + s2 - s1 * s2;
1989  if (isEquality)
1990  s1disjoint += s2;
1991  }
1992  else
1993  {
1994  s1 = s1 * s2;
1995  if (isInequality)
1996  s1disjoint += s2 - 1.0;
1997  }
1998  }
1999 
2000  /* accept disjoint-probability estimate if in range */
2001  if ((useOr ? isEquality : isInequality) &&
2002  s1disjoint >= 0.0 && s1disjoint <= 1.0)
2003  s1 = s1disjoint;
2004  }
2005  else if (rightop && IsA(rightop, ArrayExpr) &&
2006  !((ArrayExpr *) rightop)->multidims)
2007  {
2008  ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
2009  int16 elmlen;
2010  bool elmbyval;
2011  ListCell *l;
2012 
2013  get_typlenbyval(arrayexpr->element_typeid,
2014  &elmlen, &elmbyval);
2015 
2016  /*
2017  * We use the assumption of disjoint probabilities here too, although
2018  * the odds of equal array elements are rather higher if the elements
2019  * are not all constants (which they won't be, else constant folding
2020  * would have reduced the ArrayExpr to a Const). In this path it's
2021  * critical to have the sanity check on the s1disjoint estimate.
2022  */
2023  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
2024 
2025  foreach(l, arrayexpr->elements)
2026  {
2027  Node *elem = (Node *) lfirst(l);
2028  List *args;
2029  Selectivity s2;
2030 
2031  /*
2032  * Theoretically, if elem isn't of nominal_element_type we should
2033  * insert a RelabelType, but it seems unlikely that any operator
2034  * estimation function would really care ...
2035  */
2036  args = list_make2(leftop, elem);
2037  if (is_join_clause)
2038  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2039  clause->inputcollid,
2040  PointerGetDatum(root),
2041  ObjectIdGetDatum(operator),
2042  PointerGetDatum(args),
2043  Int16GetDatum(jointype),
2044  PointerGetDatum(sjinfo)));
2045  else
2046  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2047  clause->inputcollid,
2048  PointerGetDatum(root),
2049  ObjectIdGetDatum(operator),
2050  PointerGetDatum(args),
2051  Int32GetDatum(varRelid)));
2052 
2053  if (useOr)
2054  {
2055  s1 = s1 + s2 - s1 * s2;
2056  if (isEquality)
2057  s1disjoint += s2;
2058  }
2059  else
2060  {
2061  s1 = s1 * s2;
2062  if (isInequality)
2063  s1disjoint += s2 - 1.0;
2064  }
2065  }
2066 
2067  /* accept disjoint-probability estimate if in range */
2068  if ((useOr ? isEquality : isInequality) &&
2069  s1disjoint >= 0.0 && s1disjoint <= 1.0)
2070  s1 = s1disjoint;
2071  }
2072  else
2073  {
2074  CaseTestExpr *dummyexpr;
2075  List *args;
2076  Selectivity s2;
2077  int i;
2078 
2079  /*
2080  * We need a dummy rightop to pass to the operator selectivity
2081  * routine. It can be pretty much anything that doesn't look like a
2082  * constant; CaseTestExpr is a convenient choice.
2083  */
2084  dummyexpr = makeNode(CaseTestExpr);
2085  dummyexpr->typeId = nominal_element_type;
2086  dummyexpr->typeMod = -1;
2087  dummyexpr->collation = clause->inputcollid;
2088  args = list_make2(leftop, dummyexpr);
2089  if (is_join_clause)
2090  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2091  clause->inputcollid,
2092  PointerGetDatum(root),
2093  ObjectIdGetDatum(operator),
2094  PointerGetDatum(args),
2095  Int16GetDatum(jointype),
2096  PointerGetDatum(sjinfo)));
2097  else
2098  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2099  clause->inputcollid,
2100  PointerGetDatum(root),
2101  ObjectIdGetDatum(operator),
2102  PointerGetDatum(args),
2103  Int32GetDatum(varRelid)));
2104  s1 = useOr ? 0.0 : 1.0;
2105 
2106  /*
2107  * Arbitrarily assume 10 elements in the eventual array value (see
2108  * also estimate_array_length). We don't risk an assumption of
2109  * disjoint probabilities here.
2110  */
2111  for (i = 0; i < 10; i++)
2112  {
2113  if (useOr)
2114  s1 = s1 + s2 - s1 * s2;
2115  else
2116  s1 = s1 * s2;
2117  }
2118  }
2119 
2120  /* result should be in range, but make sure... */
2121  CLAMP_PROBABILITY(s1);
2122 
2123  return s1;
2124 }
#define list_make2(x1, x2)
Definition: pg_list.h:208
signed short int16
Definition: c.h:428
Definition: fmgr.h:56
RegProcedure get_oprjoin(Oid opno)
Definition: lsyscache.c:1552
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2186
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2218
#define PointerGetDatum(X)
Definition: postgres.h:600
Datum FunctionCall5Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4, Datum arg5)
Definition: fmgr.c:1222
regproc RegProcedure
Definition: c.h:585
#define Int16GetDatum(X)
Definition: postgres.h:495
Definition: nodes.h:539
#define TYPECACHE_EQ_OPR
Definition: typcache.h:136
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1195
double Selectivity
Definition: nodes.h:672
unsigned int Oid
Definition: postgres_ext.h:31
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:299
#define OidIsValid(objectId)
Definition: c.h:710
#define lsecond(l)
Definition: pg_list.h:179
int32 typeMod
Definition: primnodes.h:1009
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define linitial(l)
Definition: pg_list.h:174
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
char * s1
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
List * elements
Definition: primnodes.h:1027
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1528
char * s2
#define DatumGetFloat8(X)
Definition: postgres.h:758
uintptr_t Datum
Definition: postgres.h:411
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:338
#define makeNode(_type_)
Definition: nodes.h:587
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
Selectivity scalararraysel_containment(PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
static int list_length(const List *l)
Definition: pg_list.h:149
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:759
void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval)
Definition: lsyscache.c:2198
Oid element_typeid
Definition: primnodes.h:1026
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3488
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2779
#define Int32GetDatum(X)
Definition: postgres.h:523
int i
Oid get_negator(Oid opno)
Definition: lsyscache.c:1504
Definition: pg_list.h:50
#define ARR_ELEMTYPE(a)
Definition: array.h:285
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1780
#define DatumGetArrayTypeP(X)
Definition: array.h:254

◆ scalararraysel_containment()

Selectivity scalararraysel_containment ( PlannerInfo root,
Node leftop,
Node rightop,
Oid  elemtype,
bool  isEquality,
bool  useOr,
int  varRelid 
)

Definition at line 82 of file array_selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, VariableStatData::rel, ReleaseVariableStats, statistic_proc_security_check(), VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and AttStatsSlot::values.

Referenced by scalararraysel().

86 {
87  Selectivity selec;
88  VariableStatData vardata;
89  Datum constval;
90  TypeCacheEntry *typentry;
91  FmgrInfo *cmpfunc;
92 
93  /*
94  * rightop must be a variable, else punt.
95  */
96  examine_variable(root, rightop, varRelid, &vardata);
97  if (!vardata.rel)
98  {
99  ReleaseVariableStats(vardata);
100  return -1.0;
101  }
102 
103  /*
104  * leftop must be a constant, else punt.
105  */
106  if (!IsA(leftop, Const))
107  {
108  ReleaseVariableStats(vardata);
109  return -1.0;
110  }
111  if (((Const *) leftop)->constisnull)
112  {
113  /* qual can't succeed if null on left */
114  ReleaseVariableStats(vardata);
115  return (Selectivity) 0.0;
116  }
117  constval = ((Const *) leftop)->constvalue;
118 
119  /* Get element type's default comparison function */
120  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
121  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
122  {
123  ReleaseVariableStats(vardata);
124  return -1.0;
125  }
126  cmpfunc = &typentry->cmp_proc_finfo;
127 
128  /*
129  * If the operator is <>, swap ANY/ALL, then invert the result later.
130  */
131  if (!isEquality)
132  useOr = !useOr;
133 
134  /* Get array element stats for var, if available */
135  if (HeapTupleIsValid(vardata.statsTuple) &&
136  statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
137  {
138  Form_pg_statistic stats;
139  AttStatsSlot sslot;
140  AttStatsSlot hslot;
141 
142  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
143 
144  /* MCELEM will be an array of same type as element */
145  if (get_attstatsslot(&sslot, vardata.statsTuple,
146  STATISTIC_KIND_MCELEM, InvalidOid,
148  {
149  /* For ALL case, also get histogram of distinct-element counts */
150  if (useOr ||
151  !get_attstatsslot(&hslot, vardata.statsTuple,
152  STATISTIC_KIND_DECHIST, InvalidOid,
154  memset(&hslot, 0, sizeof(hslot));
155 
156  /*
157  * For = ANY, estimate as var @> ARRAY[const].
158  *
159  * For = ALL, estimate as var <@ ARRAY[const].
160  */
161  if (useOr)
163  sslot.nvalues,
164  sslot.numbers,
165  sslot.nnumbers,
166  &constval, 1,
167  OID_ARRAY_CONTAINS_OP,
168  typentry);
169  else
170  selec = mcelem_array_contained_selec(sslot.values,
171  sslot.nvalues,
172  sslot.numbers,
173  sslot.nnumbers,
174  &constval, 1,
175  hslot.numbers,
176  hslot.nnumbers,
177  OID_ARRAY_CONTAINED_OP,
178  typentry);
179 
180  free_attstatsslot(&hslot);
181  free_attstatsslot(&sslot);
182  }
183  else
184  {
185  /* No most-common-elements info, so do without */
186  if (useOr)
187  selec = mcelem_array_contain_overlap_selec(NULL, 0,
188  NULL, 0,
189  &constval, 1,
190  OID_ARRAY_CONTAINS_OP,
191  typentry);
192  else
193  selec = mcelem_array_contained_selec(NULL, 0,
194  NULL, 0,
195  &constval, 1,
196  NULL, 0,
197  OID_ARRAY_CONTAINED_OP,
198  typentry);
199  }
200 
201  /*
202  * MCE stats count only non-null rows, so adjust for null rows.
203  */
204  selec *= (1.0 - stats->stanullfrac);
205  }
206  else
207  {
208  /* No stats at all, so do without */
209  if (useOr)
210  selec = mcelem_array_contain_overlap_selec(NULL, 0,
211  NULL, 0,
212  &constval, 1,
213  OID_ARRAY_CONTAINS_OP,
214  typentry);
215  else
216  selec = mcelem_array_contained_selec(NULL, 0,
217  NULL, 0,
218  &constval, 1,
219  NULL, 0,
220  OID_ARRAY_CONTAINED_OP,
221  typentry);
222  /* we assume no nulls here, so no stanullfrac correction */
223  }
224 
225  ReleaseVariableStats(vardata);
226 
227  /*
228  * If the operator is <>, invert the results.
229  */
230  if (!isEquality)
231  selec = 1.0 - selec;
232 
233  CLAMP_PROBABILITY(selec);
234 
235  return selec;
236 }
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
HeapTuple statsTuple
Definition: selfuncs.h:91
int nnumbers
Definition: lsyscache.h:57
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5598
RelOptInfo * rel
Definition: selfuncs.h:90
double Selectivity
Definition: nodes.h:672
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define OidIsValid(objectId)
Definition: c.h:710
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, TypeCacheEntry *typentry)
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
FmgrInfo cmp_proc_finfo
Definition: typcache.h:76
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
float4 * numbers
Definition: lsyscache.h:56
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, TypeCacheEntry *typentry)
uintptr_t Datum
Definition: postgres.h:411
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:338
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4954
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3177
Datum * values
Definition: lsyscache.h:53
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:142
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3294

◆ statistic_proc_security_check()

bool statistic_proc_security_check ( VariableStatData vardata,
Oid  func_oid 
)

Definition at line 5598 of file selfuncs.c.

References VariableStatData::acl_ok, DEBUG2, ereport, errmsg_internal(), get_func_leakproof(), get_func_name(), and OidIsValid.

Referenced by calc_arraycontsel(), calc_hist_selectivity(), eqjoinsel(), get_variable_range(), histogram_selectivity(), ineq_histogram_selectivity(), mcv_selectivity(), scalararraysel_containment(), and var_eq_const().

5599 {
5600  if (vardata->acl_ok)
5601  return true;
5602 
5603  if (!OidIsValid(func_oid))
5604  return false;
5605 
5606  if (get_func_leakproof(func_oid))
5607  return true;
5608 
5609  ereport(DEBUG2,
5610  (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
5611  get_func_name(func_oid))));
5612  return false;
5613 }
bool get_func_leakproof(Oid funcid)
Definition: lsyscache.c:1808
#define OidIsValid(objectId)
Definition: c.h:710
char * get_func_name(Oid funcid)
Definition: lsyscache.c:1579
#define DEBUG2
Definition: elog.h:24
#define ereport(elevel,...)
Definition: elog.h:157
int errmsg_internal(const char *fmt,...)
Definition: elog.c:996

◆ var_eq_const()

double var_eq_const ( VariableStatData vardata,
Oid  oproid,
Oid  collation,
Datum  constval,
bool  constisnull,
bool  varonleft,
bool  negate 
)

Definition at line 292 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, DatumGetBool, fmgr_info(), free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), get_opcode(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, VariableStatData::isunique, LOCAL_FCINFO, AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, VariableStatData::rel, statistic_proc_security_check(), VariableStatData::statsTuple, RelOptInfo::tuples, and AttStatsSlot::values.

Referenced by boolvarsel(), eqsel_internal(), patternsel_common(), and prefix_selectivity().

295 {
296  double selec;
297  double nullfrac = 0.0;
298  bool isdefault;
299  Oid opfuncoid;
300 
301  /*
302  * If the constant is NULL, assume operator is strict and return zero, ie,
303  * operator will never return TRUE. (It's zero even for a negator op.)
304  */
305  if (constisnull)
306  return 0.0;
307 
308  /*
309  * Grab the nullfrac for use below. Note we allow use of nullfrac
310  * regardless of security check.
311  */
312  if (HeapTupleIsValid(vardata->statsTuple))
313  {
314  Form_pg_statistic stats;
315 
316  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
317  nullfrac = stats->stanullfrac;
318  }
319 
320  /*
321  * If we matched the var to a unique index or DISTINCT clause, assume
322  * there is exactly one match regardless of anything else. (This is
323  * slightly bogus, since the index or clause's equality operator might be
324  * different from ours, but it's much more likely to be right than
325  * ignoring the information.)
326  */
327  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
328  {
329  selec = 1.0 / vardata->rel->tuples;
330  }
331  else if (HeapTupleIsValid(vardata->statsTuple) &&
333  (opfuncoid = get_opcode(operator))))
334  {
335  AttStatsSlot sslot;
336  bool match = false;
337  int i;
338 
339  /*
340  * Is the constant "=" to any of the column's most common values?
341  * (Although the given operator may not really be "=", we will assume
342  * that seeing whether it returns TRUE is an appropriate test. If you
343  * don't like this, maybe you shouldn't be using eqsel for your
344  * operator...)
345  */
346  if (get_attstatsslot(&sslot, vardata->statsTuple,
347  STATISTIC_KIND_MCV, InvalidOid,
349  {
350  LOCAL_FCINFO(fcinfo, 2);
351  FmgrInfo eqproc;
352 
353  fmgr_info(opfuncoid, &eqproc);
354 
355  /*
356  * Save a few cycles by setting up the fcinfo struct just once.
357  * Using FunctionCallInvoke directly also avoids failure if the
358  * eqproc returns NULL, though really equality functions should
359  * never do that.
360  */
361  InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
362  NULL, NULL);
363  fcinfo->args[0].isnull = false;
364  fcinfo->args[1].isnull = false;
365  /* be careful to apply operator right way 'round */
366  if (varonleft)
367  fcinfo->args[1].value = constval;
368  else
369  fcinfo->args[0].value = constval;
370 
371  for (i = 0; i < sslot.nvalues; i++)
372  {
373  Datum fresult;
374 
375  if (varonleft)
376  fcinfo->args[0].value = sslot.values[i];
377  else
378  fcinfo->args[1].value = sslot.values[i];
379  fcinfo->isnull = false;
380  fresult = FunctionCallInvoke(fcinfo);
381  if (!fcinfo->isnull && DatumGetBool(fresult))
382  {
383  match = true;
384  break;
385  }
386  }
387  }
388  else
389  {
390  /* no most-common-value info available */
391  i = 0; /* keep compiler quiet */
392  }
393 
394  if (match)
395  {
396  /*
397  * Constant is "=" to this common value. We know selectivity
398  * exactly (or as exactly as ANALYZE could calculate it, anyway).
399  */
400  selec = sslot.numbers[i];
401  }
402  else
403  {
404  /*
405  * Comparison is against a constant that is neither NULL nor any
406  * of the common values. Its selectivity cannot be more than
407  * this:
408  */
409  double sumcommon = 0.0;
410  double otherdistinct;
411 
412  for (i = 0; i < sslot.nnumbers; i++)
413  sumcommon += sslot.numbers[i];
414  selec = 1.0 - sumcommon - nullfrac;
415  CLAMP_PROBABILITY(selec);
416 
417  /*
418  * and in fact it's probably a good deal less. We approximate that
419  * all the not-common values share this remaining fraction
420  * equally, so we divide by the number of other distinct values.
421  */
422  otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
423  sslot.nnumbers;
424  if (otherdistinct > 1)
425  selec /= otherdistinct;
426 
427  /*
428  * Another cross-check: selectivity shouldn't be estimated as more
429  * than the least common "most common value".
430  */
431  if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
432  selec = sslot.numbers[sslot.nnumbers - 1];
433  }
434 
435  free_attstatsslot(&sslot);
436  }
437  else
438  {
439  /*
440  * No ANALYZE stats available, so make a guess using estimated number
441  * of distinct values and assuming they are equally common. (The guess
442  * is unlikely to be very good, but we do know a few special cases.)
443  */
444  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
445  }
446 
447  /* now adjust if we wanted <> rather than = */
448  if (negate)
449  selec = 1.0 - selec - nullfrac;
450 
451  /* result should be in range, but make sure... */
452  CLAMP_PROBABILITY(selec);
453 
454  return selec;
455 }
Definition: fmgr.h:56
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
HeapTuple statsTuple
Definition: selfuncs.h:91
int nnumbers
Definition: lsyscache.h:57
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5598
double tuples
Definition: pathnodes.h:716
RelOptInfo * rel
Definition: selfuncs.h:90
unsigned int Oid
Definition: postgres_ext.h:31
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5627
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
float4 * numbers
Definition: lsyscache.h:56
#define DatumGetBool(X)
Definition: postgres.h:437
uintptr_t Datum
Definition: postgres.h:411
#define InvalidOid
Definition: postgres_ext.h:36
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1256
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3177
Datum * values
Definition: lsyscache.h:53
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3294

◆ var_eq_non_const()

double var_eq_non_const ( VariableStatData vardata,
Oid  oproid,
Oid  collation,
Node other,
bool  varonleft,
bool  negate 
)

Definition at line 463 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, CLAMP_PROBABILITY, free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, VariableStatData::isunique, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by eqsel_internal().

466 {
467  double selec;
468  double nullfrac = 0.0;
469  bool isdefault;
470 
471  /*
472  * Grab the nullfrac for use below.
473  */
474  if (HeapTupleIsValid(vardata->statsTuple))
475  {
476  Form_pg_statistic stats;
477 
478  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
479  nullfrac = stats->stanullfrac;
480  }
481 
482  /*
483  * If we matched the var to a unique index or DISTINCT clause, assume
484  * there is exactly one match regardless of anything else. (This is
485  * slightly bogus, since the index or clause's equality operator might be
486  * different from ours, but it's much more likely to be right than
487  * ignoring the information.)
488  */
489  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
490  {
491  selec = 1.0 / vardata->rel->tuples;
492  }
493  else if (HeapTupleIsValid(vardata->statsTuple))
494  {
495  double ndistinct;
496  AttStatsSlot sslot;
497 
498  /*
499  * Search is for a value that we do not know a priori, but we will
500  * assume it is not NULL. Estimate the selectivity as non-null
501  * fraction divided by number of distinct values, so that we get a
502  * result averaged over all possible values whether common or
503  * uncommon. (Essentially, we are assuming that the not-yet-known
504  * comparison value is equally likely to be any of the possible
505  * values, regardless of their frequency in the table. Is that a good
506  * idea?)
507  */
508  selec = 1.0 - nullfrac;
509  ndistinct = get_variable_numdistinct(vardata, &isdefault);
510  if (ndistinct > 1)
511  selec /= ndistinct;
512 
513  /*
514  * Cross-check: selectivity should never be estimated as more than the
515  * most common value's.
516  */
517  if (get_attstatsslot(&sslot, vardata->statsTuple,
518  STATISTIC_KIND_MCV, InvalidOid,
520  {
521  if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
522  selec = sslot.numbers[0];
523  free_attstatsslot(&sslot);
524  }
525  }
526  else
527  {
528  /*
529  * No ANALYZE stats available, so make a guess using estimated number
530  * of distinct values and assuming they are equally common. (The guess
531  * is unlikely to be very good, but we do know a few special cases.)
532  */
533  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
534  }
535 
536  /* now adjust if we wanted <> rather than = */
537  if (negate)
538  selec = 1.0 - selec - nullfrac;
539 
540  /* result should be in range, but make sure... */
541  CLAMP_PROBABILITY(selec);
542 
543  return selec;
544 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:654
HeapTuple statsTuple
Definition: selfuncs.h:91
int nnumbers
Definition: lsyscache.h:57
double tuples
Definition: pathnodes.h:716
RelOptInfo * rel
Definition: selfuncs.h:90
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5627
float4 * numbers
Definition: lsyscache.h:56
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3177
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3294

Variable Documentation

◆ get_index_stats_hook

PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook

Definition at line 145 of file selfuncs.c.

Referenced by brincostestimate(), btcostestimate(), and examine_variable().

◆ get_relation_stats_hook

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook

Definition at line 144 of file selfuncs.c.

Referenced by brincostestimate(), btcostestimate(), and examine_simple_variable().