PostgreSQL Source Code  git master
selfuncs.h File Reference
#include "access/htup.h"
#include "fmgr.h"
#include "nodes/pathnodes.h"
Include dependency graph for selfuncs.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  VariableStatData
 
struct  GenericCosts
 

Macros

#define DEFAULT_EQ_SEL   0.005
 
#define DEFAULT_INEQ_SEL   0.3333333333333333
 
#define DEFAULT_RANGE_INEQ_SEL   0.005
 
#define DEFAULT_MATCH_SEL   0.005
 
#define DEFAULT_MATCHING_SEL   0.010
 
#define DEFAULT_NUM_DISTINCT   200
 
#define DEFAULT_UNK_SEL   0.005
 
#define DEFAULT_NOT_UNK_SEL   (1.0 - DEFAULT_UNK_SEL)
 
#define CLAMP_PROBABILITY(p)
 
#define ReleaseVariableStats(vardata)
 

Typedefs

typedef struct VariableStatData VariableStatData
 
typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)
 
typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)
 

Functions

void examine_variable (PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
 
bool statistic_proc_security_check (VariableStatData *vardata, Oid func_oid)
 
bool get_restriction_variable (PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
 
void get_join_variables (PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
 
double get_variable_numdistinct (VariableStatData *vardata, bool *isdefault)
 
double mcv_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
 
double histogram_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
 
double generic_restriction_selectivity (PlannerInfo *root, Oid oproid, Oid collation, List *args, int varRelid, double default_selectivity)
 
double ineq_histogram_selectivity (PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
 
double var_eq_const (VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
 
double var_eq_non_const (VariableStatData *vardata, Oid oproid, Oid collation, Node *other, bool varonleft, bool negate)
 
Selectivity boolvarsel (PlannerInfo *root, Node *arg, int varRelid)
 
Selectivity booltestsel (PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity nulltestsel (PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity scalararraysel (PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
int estimate_array_length (Node *arrayexpr)
 
Selectivity rowcomparesel (PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
void mergejoinscansel (PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend)
 
double estimate_num_groups (PlannerInfo *root, List *groupExprs, double input_rows, List **pgset)
 
void estimate_hash_bucket_stats (PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, Selectivity *bucketsize_frac)
 
double estimate_hashagg_tablesize (Path *path, const AggClauseCosts *agg_costs, double dNumGroups)
 
Listget_quals_from_indexclauses (List *indexclauses)
 
Cost index_other_operands_eval_cost (PlannerInfo *root, List *indexquals)
 
Listadd_predicate_to_index_quals (IndexOptInfo *index, List *indexQuals)
 
void genericcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
 
Selectivity scalararraysel_containment (PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
 

Variables

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook
 
PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook
 

Macro Definition Documentation

◆ CLAMP_PROBABILITY

◆ DEFAULT_EQ_SEL

#define DEFAULT_EQ_SEL   0.005

Definition at line 34 of file selfuncs.h.

Referenced by _int_matchsel(), eqsel_internal(), int_query_opr_selec(), and neqjoinsel().

◆ DEFAULT_INEQ_SEL

◆ DEFAULT_MATCH_SEL

#define DEFAULT_MATCH_SEL   0.005

◆ DEFAULT_MATCHING_SEL

#define DEFAULT_MATCHING_SEL   0.010

Definition at line 46 of file selfuncs.h.

Referenced by matchingjoinsel(), and matchingsel().

◆ DEFAULT_NOT_UNK_SEL

#define DEFAULT_NOT_UNK_SEL   (1.0 - DEFAULT_UNK_SEL)

Definition at line 53 of file selfuncs.h.

Referenced by booltestsel(), and nulltestsel().

◆ DEFAULT_NUM_DISTINCT

#define DEFAULT_NUM_DISTINCT   200

Definition at line 49 of file selfuncs.h.

Referenced by cost_incremental_sort(), and get_variable_numdistinct().

◆ DEFAULT_RANGE_INEQ_SEL

#define DEFAULT_RANGE_INEQ_SEL   0.005

Definition at line 40 of file selfuncs.h.

Referenced by clauselist_selectivity_simple(), and default_range_selectivity().

◆ DEFAULT_UNK_SEL

#define DEFAULT_UNK_SEL   0.005

Definition at line 52 of file selfuncs.h.

Referenced by booltestsel(), and nulltestsel().

◆ ReleaseVariableStats

Typedef Documentation

◆ get_index_stats_hook_type

typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)

Definition at line 125 of file selfuncs.h.

◆ get_relation_stats_hook_type

typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)

Definition at line 120 of file selfuncs.h.

◆ VariableStatData

Function Documentation

◆ add_predicate_to_index_quals()

List* add_predicate_to_index_quals ( IndexOptInfo index,
List indexQuals 
)

Definition at line 6218 of file selfuncs.c.

References IndexOptInfo::indpred, lfirst, list_concat(), list_make1, NIL, and predicate_implied_by().

Referenced by btcostestimate(), genericcostestimate(), and gincostestimate().

6219 {
6220  List *predExtraQuals = NIL;
6221  ListCell *lc;
6222 
6223  if (index->indpred == NIL)
6224  return indexQuals;
6225 
6226  foreach(lc, index->indpred)
6227  {
6228  Node *predQual = (Node *) lfirst(lc);
6229  List *oneQual = list_make1(predQual);
6230 
6231  if (!predicate_implied_by(oneQual, indexQuals, false))
6232  predExtraQuals = list_concat(predExtraQuals, oneQual);
6233  }
6234  return list_concat(predExtraQuals, indexQuals);
6235 }
#define NIL
Definition: pg_list.h:65
Definition: nodes.h:529
List * list_concat(List *list1, const List *list2)
Definition: list.c:515
#define list_make1(x1)
Definition: pg_list.h:227
#define lfirst(lc)
Definition: pg_list.h:190
List * indpred
Definition: pathnodes.h:844
bool predicate_implied_by(List *predicate_list, List *clause_list, bool weak)
Definition: predtest.c:151
Definition: pg_list.h:50

◆ booltestsel()

Selectivity booltestsel ( PlannerInfo root,
BoolTestType  booltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1537 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, AttStatsSlot::nnumbers, AttStatsSlot::numbers, ReleaseVariableStats, VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by clause_selectivity().

1539 {
1540  VariableStatData vardata;
1541  double selec;
1542 
1543  examine_variable(root, arg, varRelid, &vardata);
1544 
1545  if (HeapTupleIsValid(vardata.statsTuple))
1546  {
1547  Form_pg_statistic stats;
1548  double freq_null;
1549  AttStatsSlot sslot;
1550 
1551  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1552  freq_null = stats->stanullfrac;
1553 
1554  if (get_attstatsslot(&sslot, vardata.statsTuple,
1555  STATISTIC_KIND_MCV, InvalidOid,
1557  && sslot.nnumbers > 0)
1558  {
1559  double freq_true;
1560  double freq_false;
1561 
1562  /*
1563  * Get first MCV frequency and derive frequency for true.
1564  */
1565  if (DatumGetBool(sslot.values[0]))
1566  freq_true = sslot.numbers[0];
1567  else
1568  freq_true = 1.0 - sslot.numbers[0] - freq_null;
1569 
1570  /*
1571  * Next derive frequency for false. Then use these as appropriate
1572  * to derive frequency for each case.
1573  */
1574  freq_false = 1.0 - freq_true - freq_null;
1575 
1576  switch (booltesttype)
1577  {
1578  case IS_UNKNOWN:
1579  /* select only NULL values */
1580  selec = freq_null;
1581  break;
1582  case IS_NOT_UNKNOWN:
1583  /* select non-NULL values */
1584  selec = 1.0 - freq_null;
1585  break;
1586  case IS_TRUE:
1587  /* select only TRUE values */
1588  selec = freq_true;
1589  break;
1590  case IS_NOT_TRUE:
1591  /* select non-TRUE values */
1592  selec = 1.0 - freq_true;
1593  break;
1594  case IS_FALSE:
1595  /* select only FALSE values */
1596  selec = freq_false;
1597  break;
1598  case IS_NOT_FALSE:
1599  /* select non-FALSE values */
1600  selec = 1.0 - freq_false;
1601  break;
1602  default:
1603  elog(ERROR, "unrecognized booltesttype: %d",
1604  (int) booltesttype);
1605  selec = 0.0; /* Keep compiler quiet */
1606  break;
1607  }
1608 
1609  free_attstatsslot(&sslot);
1610  }
1611  else
1612  {
1613  /*
1614  * No most-common-value info available. Still have null fraction
1615  * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1616  * for null fraction and assume a 50-50 split of TRUE and FALSE.
1617  */
1618  switch (booltesttype)
1619  {
1620  case IS_UNKNOWN:
1621  /* select only NULL values */
1622  selec = freq_null;
1623  break;
1624  case IS_NOT_UNKNOWN:
1625  /* select non-NULL values */
1626  selec = 1.0 - freq_null;
1627  break;
1628  case IS_TRUE:
1629  case IS_FALSE:
1630  /* Assume we select half of the non-NULL values */
1631  selec = (1.0 - freq_null) / 2.0;
1632  break;
1633  case IS_NOT_TRUE:
1634  case IS_NOT_FALSE:
1635  /* Assume we select NULLs plus half of the non-NULLs */
1636  /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1637  selec = (freq_null + 1.0) / 2.0;
1638  break;
1639  default:
1640  elog(ERROR, "unrecognized booltesttype: %d",
1641  (int) booltesttype);
1642  selec = 0.0; /* Keep compiler quiet */
1643  break;
1644  }
1645  }
1646  }
1647  else
1648  {
1649  /*
1650  * If we can't get variable statistics for the argument, perhaps
1651  * clause_selectivity can do something with it. We ignore the
1652  * possibility of a NULL value when using clause_selectivity, and just
1653  * assume the value is either TRUE or FALSE.
1654  */
1655  switch (booltesttype)
1656  {
1657  case IS_UNKNOWN:
1658  selec = DEFAULT_UNK_SEL;
1659  break;
1660  case IS_NOT_UNKNOWN:
1661  selec = DEFAULT_NOT_UNK_SEL;
1662  break;
1663  case IS_TRUE:
1664  case IS_NOT_FALSE:
1665  selec = (double) clause_selectivity(root, arg,
1666  varRelid,
1667  jointype, sjinfo);
1668  break;
1669  case IS_FALSE:
1670  case IS_NOT_TRUE:
1671  selec = 1.0 - (double) clause_selectivity(root, arg,
1672  varRelid,
1673  jointype, sjinfo);
1674  break;
1675  default:
1676  elog(ERROR, "unrecognized booltesttype: %d",
1677  (int) booltesttype);
1678  selec = 0.0; /* Keep compiler quiet */
1679  break;
1680  }
1681  }
1682 
1683  ReleaseVariableStats(vardata);
1684 
1685  /* result should be in range, but make sure... */
1686  CLAMP_PROBABILITY(selec);
1687 
1688  return (Selectivity) selec;
1689 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
double Selectivity
Definition: nodes.h:662
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:53
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
#define ERROR
Definition: elog.h:43
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:600
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:52
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4727
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3052
Datum * values
Definition: lsyscache.h:50
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
#define elog(elevel,...)
Definition: elog.h:214
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3169

◆ boolvarsel()

Selectivity boolvarsel ( PlannerInfo root,
Node arg,
int  varRelid 
)

Definition at line 1509 of file selfuncs.c.

References BoolGetDatum, examine_variable(), HeapTupleIsValid, InvalidOid, ReleaseVariableStats, VariableStatData::statsTuple, and var_eq_const().

Referenced by clause_selectivity().

1510 {
1511  VariableStatData vardata;
1512  double selec;
1513 
1514  examine_variable(root, arg, varRelid, &vardata);
1515  if (HeapTupleIsValid(vardata.statsTuple))
1516  {
1517  /*
1518  * A boolean variable V is equivalent to the clause V = 't', so we
1519  * compute the selectivity as if that is what we have.
1520  */
1521  selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
1522  BoolGetDatum(true), false, true, false);
1523  }
1524  else
1525  {
1526  /* Otherwise, the default estimate is 0.5 */
1527  selec = 0.5;
1528  }
1529  ReleaseVariableStats(vardata);
1530  return selec;
1531 }
HeapTuple statsTuple
Definition: selfuncs.h:74
#define BoolGetDatum(X)
Definition: postgres.h:402
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4727
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
double var_eq_const(VariableStatData *vardata, Oid operator, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:292

◆ estimate_array_length()

int estimate_array_length ( Node arrayexpr)

Definition at line 2132 of file selfuncs.c.

References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), DatumGetArrayTypeP, IsA, list_length(), and strip_array_coercion().

Referenced by array_unnest_support(), btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().

2133 {
2134  /* look through any binary-compatible relabeling of arrayexpr */
2135  arrayexpr = strip_array_coercion(arrayexpr);
2136 
2137  if (arrayexpr && IsA(arrayexpr, Const))
2138  {
2139  Datum arraydatum = ((Const *) arrayexpr)->constvalue;
2140  bool arrayisnull = ((Const *) arrayexpr)->constisnull;
2141  ArrayType *arrayval;
2142 
2143  if (arrayisnull)
2144  return 0;
2145  arrayval = DatumGetArrayTypeP(arraydatum);
2146  return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2147  }
2148  else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2149  !((ArrayExpr *) arrayexpr)->multidims)
2150  {
2151  return list_length(((ArrayExpr *) arrayexpr)->elements);
2152  }
2153  else
2154  {
2155  /* default guess --- see also scalararraysel */
2156  return 10;
2157  }
2158 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define ARR_DIMS(a)
Definition: array.h:282
uintptr_t Datum
Definition: postgres.h:367
static int list_length(const List *l)
Definition: pg_list.h:169
#define ARR_NDIM(a)
Definition: array.h:278
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1780
#define DatumGetArrayTypeP(X)
Definition: array.h:249

◆ estimate_hash_bucket_stats()

void estimate_hash_bucket_stats ( PlannerInfo root,
Node hashkey,
double  nbuckets,
Selectivity mcv_freq,
Selectivity bucketsize_frac 
)

Definition at line 3723 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, clamp_row_est(), examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, Max, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by final_cost_hashjoin().

3726 {
3727  VariableStatData vardata;
3728  double estfract,
3729  ndistinct,
3730  stanullfrac,
3731  avgfreq;
3732  bool isdefault;
3733  AttStatsSlot sslot;
3734 
3735  examine_variable(root, hashkey, 0, &vardata);
3736 
3737  /* Look up the frequency of the most common value, if available */
3738  *mcv_freq = 0.0;
3739 
3740  if (HeapTupleIsValid(vardata.statsTuple))
3741  {
3742  if (get_attstatsslot(&sslot, vardata.statsTuple,
3743  STATISTIC_KIND_MCV, InvalidOid,
3745  {
3746  /*
3747  * The first MCV stat is for the most common value.
3748  */
3749  if (sslot.nnumbers > 0)
3750  *mcv_freq = sslot.numbers[0];
3751  free_attstatsslot(&sslot);
3752  }
3753  }
3754 
3755  /* Get number of distinct values */
3756  ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3757 
3758  /*
3759  * If ndistinct isn't real, punt. We normally return 0.1, but if the
3760  * mcv_freq is known to be even higher than that, use it instead.
3761  */
3762  if (isdefault)
3763  {
3764  *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3765  ReleaseVariableStats(vardata);
3766  return;
3767  }
3768 
3769  /* Get fraction that are null */
3770  if (HeapTupleIsValid(vardata.statsTuple))
3771  {
3772  Form_pg_statistic stats;
3773 
3774  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3775  stanullfrac = stats->stanullfrac;
3776  }
3777  else
3778  stanullfrac = 0.0;
3779 
3780  /* Compute avg freq of all distinct data values in raw relation */
3781  avgfreq = (1.0 - stanullfrac) / ndistinct;
3782 
3783  /*
3784  * Adjust ndistinct to account for restriction clauses. Observe we are
3785  * assuming that the data distribution is affected uniformly by the
3786  * restriction clauses!
3787  *
3788  * XXX Possibly better way, but much more expensive: multiply by
3789  * selectivity of rel's restriction clauses that mention the target Var.
3790  */
3791  if (vardata.rel && vardata.rel->tuples > 0)
3792  {
3793  ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3794  ndistinct = clamp_row_est(ndistinct);
3795  }
3796 
3797  /*
3798  * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3799  * number of buckets is less than the expected number of distinct values;
3800  * otherwise it is 1/ndistinct.
3801  */
3802  if (ndistinct > nbuckets)
3803  estfract = 1.0 / nbuckets;
3804  else
3805  estfract = 1.0 / ndistinct;
3806 
3807  /*
3808  * Adjust estimated bucketsize upward to account for skewed distribution.
3809  */
3810  if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3811  estfract *= *mcv_freq / avgfreq;
3812 
3813  /*
3814  * Clamp bucketsize to sane range (the above adjustment could easily
3815  * produce an out-of-range result). We set the lower bound a little above
3816  * zero, since zero isn't a very sane result.
3817  */
3818  if (estfract < 1.0e-6)
3819  estfract = 1.0e-6;
3820  else if (estfract > 1.0)
3821  estfract = 1.0;
3822 
3823  *bucketsize_frac = (Selectivity) estfract;
3824 
3825  ReleaseVariableStats(vardata);
3826 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
double tuples
Definition: pathnodes.h:705
RelOptInfo * rel
Definition: selfuncs.h:73
double Selectivity
Definition: nodes.h:662
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5276
float4 * numbers
Definition: lsyscache.h:53
double rows
Definition: pathnodes.h:668
#define InvalidOid
Definition: postgres_ext.h:36
#define Max(x, y)
Definition: c.h:921
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4727
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3052
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
e
Definition: preproc-init.c:82
double clamp_row_est(double nrows)
Definition: costsize.c:189
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3169

◆ estimate_hashagg_tablesize()

double estimate_hashagg_tablesize ( Path path,
const AggClauseCosts agg_costs,
double  dNumGroups 
)

Definition at line 3842 of file selfuncs.c.

References hash_agg_entry_size(), AggClauseCosts::numAggs, Path::pathtarget, AggClauseCosts::transitionSpace, and PathTarget::width.

Referenced by consider_groupingsets_paths().

3844 {
3845  Size hashentrysize = hash_agg_entry_size(agg_costs->numAggs,
3846  path->pathtarget->width,
3847  agg_costs->transitionSpace);
3848 
3849  /*
3850  * Note that this disregards the effect of fill-factor and growth policy
3851  * of the hash table. That's probably ok, given that the default
3852  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3853  * "double-in-size" growth policies here.
3854  */
3855  return hashentrysize * dNumGroups;
3856 }
PathTarget * pathtarget
Definition: pathnodes.h:1145
Size hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
Definition: nodeAgg.c:1695
size_t Size
Definition: c.h:473
Size transitionSpace
Definition: pathnodes.h:64

◆ estimate_num_groups()

double estimate_num_groups ( PlannerInfo root,
List groupExprs,
double  input_rows,
List **  pgset 
)

Definition at line 3360 of file selfuncs.c.

References add_unique_group_var(), Assert, clamp_row_est(), contain_volatile_functions(), estimate_multivariate_ndistinct(), examine_variable(), expression_returns_set_rows(), exprType(), for_each_cell, HeapTupleIsValid, i, IS_SIMPLE_REL, VariableStatData::isunique, lappend(), lfirst, linitial, list_length(), list_member_int(), list_second_cell(), GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, PVC_RECURSE_WINDOWFUNCS, GroupVarInfo::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by adjust_rowcount_for_semijoins(), cost_incremental_sort(), create_distinct_paths(), create_unique_path(), estimate_path_cost_size(), get_number_of_groups(), and recurse_set_operations().

3362 {
3363  List *varinfos = NIL;
3364  double srf_multiplier = 1.0;
3365  double numdistinct;
3366  ListCell *l;
3367  int i;
3368 
3369  /*
3370  * We don't ever want to return an estimate of zero groups, as that tends
3371  * to lead to division-by-zero and other unpleasantness. The input_rows
3372  * estimate is usually already at least 1, but clamp it just in case it
3373  * isn't.
3374  */
3375  input_rows = clamp_row_est(input_rows);
3376 
3377  /*
3378  * If no grouping columns, there's exactly one group. (This can't happen
3379  * for normal cases with GROUP BY or DISTINCT, but it is possible for
3380  * corner cases with set operations.)
3381  */
3382  if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3383  return 1.0;
3384 
3385  /*
3386  * Count groups derived from boolean grouping expressions. For other
3387  * expressions, find the unique Vars used, treating an expression as a Var
3388  * if we can find stats for it. For each one, record the statistical
3389  * estimate of number of distinct values (total in its table, without
3390  * regard for filtering).
3391  */
3392  numdistinct = 1.0;
3393 
3394  i = 0;
3395  foreach(l, groupExprs)
3396  {
3397  Node *groupexpr = (Node *) lfirst(l);
3398  double this_srf_multiplier;
3399  VariableStatData vardata;
3400  List *varshere;
3401  ListCell *l2;
3402 
3403  /* is expression in this grouping set? */
3404  if (pgset && !list_member_int(*pgset, i++))
3405  continue;
3406 
3407  /*
3408  * Set-returning functions in grouping columns are a bit problematic.
3409  * The code below will effectively ignore their SRF nature and come up
3410  * with a numdistinct estimate as though they were scalar functions.
3411  * We compensate by scaling up the end result by the largest SRF
3412  * rowcount estimate. (This will be an overestimate if the SRF
3413  * produces multiple copies of any output value, but it seems best to
3414  * assume the SRF's outputs are distinct. In any case, it's probably
3415  * pointless to worry too much about this without much better
3416  * estimates for SRF output rowcounts than we have today.)
3417  */
3418  this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3419  if (srf_multiplier < this_srf_multiplier)
3420  srf_multiplier = this_srf_multiplier;
3421 
3422  /* Short-circuit for expressions returning boolean */
3423  if (exprType(groupexpr) == BOOLOID)
3424  {
3425  numdistinct *= 2.0;
3426  continue;
3427  }
3428 
3429  /*
3430  * If examine_variable is able to deduce anything about the GROUP BY
3431  * expression, treat it as a single variable even if it's really more
3432  * complicated.
3433  */
3434  examine_variable(root, groupexpr, 0, &vardata);
3435  if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3436  {
3437  varinfos = add_unique_group_var(root, varinfos,
3438  groupexpr, &vardata);
3439  ReleaseVariableStats(vardata);
3440  continue;
3441  }
3442  ReleaseVariableStats(vardata);
3443 
3444  /*
3445  * Else pull out the component Vars. Handle PlaceHolderVars by
3446  * recursing into their arguments (effectively assuming that the
3447  * PlaceHolderVar doesn't change the number of groups, which boils
3448  * down to ignoring the possible addition of nulls to the result set).
3449  */
3450  varshere = pull_var_clause(groupexpr,
3454 
3455  /*
3456  * If we find any variable-free GROUP BY item, then either it is a
3457  * constant (and we can ignore it) or it contains a volatile function;
3458  * in the latter case we punt and assume that each input row will
3459  * yield a distinct group.
3460  */
3461  if (varshere == NIL)
3462  {
3463  if (contain_volatile_functions(groupexpr))
3464  return input_rows;
3465  continue;
3466  }
3467 
3468  /*
3469  * Else add variables to varinfos list
3470  */
3471  foreach(l2, varshere)
3472  {
3473  Node *var = (Node *) lfirst(l2);
3474 
3475  examine_variable(root, var, 0, &vardata);
3476  varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3477  ReleaseVariableStats(vardata);
3478  }
3479  }
3480 
3481  /*
3482  * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3483  * list.
3484  */
3485  if (varinfos == NIL)
3486  {
3487  /* Apply SRF multiplier as we would do in the long path */
3488  numdistinct *= srf_multiplier;
3489  /* Round off */
3490  numdistinct = ceil(numdistinct);
3491  /* Guard against out-of-range answers */
3492  if (numdistinct > input_rows)
3493  numdistinct = input_rows;
3494  if (numdistinct < 1.0)
3495  numdistinct = 1.0;
3496  return numdistinct;
3497  }
3498 
3499  /*
3500  * Group Vars by relation and estimate total numdistinct.
3501  *
3502  * For each iteration of the outer loop, we process the frontmost Var in
3503  * varinfos, plus all other Vars in the same relation. We remove these
3504  * Vars from the newvarinfos list for the next iteration. This is the
3505  * easiest way to group Vars of same rel together.
3506  */
3507  do
3508  {
3509  GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3510  RelOptInfo *rel = varinfo1->rel;
3511  double reldistinct = 1;
3512  double relmaxndistinct = reldistinct;
3513  int relvarcount = 0;
3514  List *newvarinfos = NIL;
3515  List *relvarinfos = NIL;
3516 
3517  /*
3518  * Split the list of varinfos in two - one for the current rel, one
3519  * for remaining Vars on other rels.
3520  */
3521  relvarinfos = lappend(relvarinfos, varinfo1);
3522  for_each_cell(l, varinfos, list_second_cell(varinfos))
3523  {
3524  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3525 
3526  if (varinfo2->rel == varinfo1->rel)
3527  {
3528  /* varinfos on current rel */
3529  relvarinfos = lappend(relvarinfos, varinfo2);
3530  }
3531  else
3532  {
3533  /* not time to process varinfo2 yet */
3534  newvarinfos = lappend(newvarinfos, varinfo2);
3535  }
3536  }
3537 
3538  /*
3539  * Get the numdistinct estimate for the Vars of this rel. We
3540  * iteratively search for multivariate n-distinct with maximum number
3541  * of vars; assuming that each var group is independent of the others,
3542  * we multiply them together. Any remaining relvarinfos after no more
3543  * multivariate matches are found are assumed independent too, so
3544  * their individual ndistinct estimates are multiplied also.
3545  *
3546  * While iterating, count how many separate numdistinct values we
3547  * apply. We apply a fudge factor below, but only if we multiplied
3548  * more than one such values.
3549  */
3550  while (relvarinfos)
3551  {
3552  double mvndistinct;
3553 
3554  if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3555  &mvndistinct))
3556  {
3557  reldistinct *= mvndistinct;
3558  if (relmaxndistinct < mvndistinct)
3559  relmaxndistinct = mvndistinct;
3560  relvarcount++;
3561  }
3562  else
3563  {
3564  foreach(l, relvarinfos)
3565  {
3566  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3567 
3568  reldistinct *= varinfo2->ndistinct;
3569  if (relmaxndistinct < varinfo2->ndistinct)
3570  relmaxndistinct = varinfo2->ndistinct;
3571  relvarcount++;
3572  }
3573 
3574  /* we're done with this relation */
3575  relvarinfos = NIL;
3576  }
3577  }
3578 
3579  /*
3580  * Sanity check --- don't divide by zero if empty relation.
3581  */
3582  Assert(IS_SIMPLE_REL(rel));
3583  if (rel->tuples > 0)
3584  {
3585  /*
3586  * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3587  * fudge factor is because the Vars are probably correlated but we
3588  * don't know by how much. We should never clamp to less than the
3589  * largest ndistinct value for any of the Vars, though, since
3590  * there will surely be at least that many groups.
3591  */
3592  double clamp = rel->tuples;
3593 
3594  if (relvarcount > 1)
3595  {
3596  clamp *= 0.1;
3597  if (clamp < relmaxndistinct)
3598  {
3599  clamp = relmaxndistinct;
3600  /* for sanity in case some ndistinct is too large: */
3601  if (clamp > rel->tuples)
3602  clamp = rel->tuples;
3603  }
3604  }
3605  if (reldistinct > clamp)
3606  reldistinct = clamp;
3607 
3608  /*
3609  * Update the estimate based on the restriction selectivity,
3610  * guarding against division by zero when reldistinct is zero.
3611  * Also skip this if we know that we are returning all rows.
3612  */
3613  if (reldistinct > 0 && rel->rows < rel->tuples)
3614  {
3615  /*
3616  * Given a table containing N rows with n distinct values in a
3617  * uniform distribution, if we select p rows at random then
3618  * the expected number of distinct values selected is
3619  *
3620  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3621  *
3622  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3623  *
3624  * See "Approximating block accesses in database
3625  * organizations", S. B. Yao, Communications of the ACM,
3626  * Volume 20 Issue 4, April 1977 Pages 260-261.
3627  *
3628  * Alternatively, re-arranging the terms from the factorials,
3629  * this may be written as
3630  *
3631  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3632  *
3633  * This form of the formula is more efficient to compute in
3634  * the common case where p is larger than N/n. Additionally,
3635  * as pointed out by Dell'Era, if i << N for all terms in the
3636  * product, it can be approximated by
3637  *
3638  * n * (1 - ((N-p)/N)^(N/n))
3639  *
3640  * See "Expected distinct values when selecting from a bag
3641  * without replacement", Alberto Dell'Era,
3642  * http://www.adellera.it/investigations/distinct_balls/.
3643  *
3644  * The condition i << N is equivalent to n >> 1, so this is a
3645  * good approximation when the number of distinct values in
3646  * the table is large. It turns out that this formula also
3647  * works well even when n is small.
3648  */
3649  reldistinct *=
3650  (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3651  rel->tuples / reldistinct));
3652  }
3653  reldistinct = clamp_row_est(reldistinct);
3654 
3655  /*
3656  * Update estimate of total distinct groups.
3657  */
3658  numdistinct *= reldistinct;
3659  }
3660 
3661  varinfos = newvarinfos;
3662  } while (varinfos != NIL);
3663 
3664  /* Now we can account for the effects of any SRFs */
3665  numdistinct *= srf_multiplier;
3666 
3667  /* Round off */
3668  numdistinct = ceil(numdistinct);
3669 
3670  /* Guard against out-of-range answers */
3671  if (numdistinct > input_rows)
3672  numdistinct = input_rows;
3673  if (numdistinct < 1.0)
3674  numdistinct = 1.0;
3675 
3676  return numdistinct;
3677 }
#define NIL
Definition: pg_list.h:65
double expression_returns_set_rows(PlannerInfo *root, Node *clause)
Definition: clauses.c:571
#define PVC_RECURSE_PLACEHOLDERS
Definition: optimizer.h:177
HeapTuple statsTuple
Definition: selfuncs.h:74
#define for_each_cell(cell, lst, initcell)
Definition: pg_list.h:390
double tuples
Definition: pathnodes.h:705
Definition: nodes.h:529
List * pull_var_clause(Node *node, int flags)
Definition: var.c:535
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:726
double ndistinct
Definition: selfuncs.c:3243
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:638
#define linitial(l)
Definition: pg_list.h:195
bool list_member_int(const List *list, int datum)
Definition: list.c:654
static ListCell * list_second_cell(const List *l)
Definition: pg_list.h:139
static bool estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
Definition: selfuncs.c:3877
List * lappend(List *list, void *datum)
Definition: list.c:321
static List * add_unique_group_var(PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
Definition: selfuncs.c:3247
double rows
Definition: pathnodes.h:668
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4727
#define Assert(condition)
Definition: c.h:745
#define lfirst(lc)
Definition: pg_list.h:190
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
static int list_length(const List *l)
Definition: pg_list.h:169
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
#define PVC_RECURSE_WINDOWFUNCS
Definition: optimizer.h:174
int i
double clamp_row_est(double nrows)
Definition: costsize.c:189
Definition: pg_list.h:50
#define PVC_RECURSE_AGGREGATES
Definition: optimizer.h:172
RelOptInfo * rel
Definition: selfuncs.c:3242

◆ examine_variable()

void examine_variable ( PlannerInfo root,
Node node,
int  varRelid,
VariableStatData vardata 
)

Definition at line 4727 of file selfuncs.c.

References VariableStatData::acl_ok, ACL_SELECT, ACLCHECK_OK, PlannerInfo::append_rel_array, arg, Assert, VariableStatData::atttype, VariableStatData::atttypmod, BMS_EMPTY_SET, bms_free(), bms_is_member(), bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), BoolGetDatum, RangeTblEntry::checkAsUser, elog, equal(), ERROR, examine_simple_variable(), exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, GetUserId(), has_unique_index(), HeapTupleIsValid, IndexOptInfo::indexkeys, RelOptInfo::indexlist, IndexOptInfo::indexoid, IndexOptInfo::indexprs, IndexOptInfo::indpred, Int16GetDatum, IsA, VariableStatData::isunique, lfirst, list_head(), lnext(), MemSet, IndexOptInfo::ncolumns, NIL, IndexOptInfo::nkeycolumns, ObjectIdGetDatum, AppendRelInfo::parent_relid, pg_class_aclcheck(), planner_rt_fetch, IndexOptInfo::predOK, pull_varnos(), VariableStatData::rel, IndexOptInfo::rel, ReleaseSysCache(), RelOptInfo::relid, RangeTblEntry::relid, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), RangeTblEntry::securityQuals, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::unique, VariableStatData::var, Var::varattno, Var::varno, VariableStatData::vartype, Var::vartype, and Var::vartypmod.

Referenced by booltestsel(), boolvarsel(), estimate_hash_bucket_stats(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().

4729 {
4730  Node *basenode;
4731  Relids varnos;
4732  RelOptInfo *onerel;
4733 
4734  /* Make sure we don't return dangling pointers in vardata */
4735  MemSet(vardata, 0, sizeof(VariableStatData));
4736 
4737  /* Save the exposed type of the expression */
4738  vardata->vartype = exprType(node);
4739 
4740  /* Look inside any binary-compatible relabeling */
4741 
4742  if (IsA(node, RelabelType))
4743  basenode = (Node *) ((RelabelType *) node)->arg;
4744  else
4745  basenode = node;
4746 
4747  /* Fast path for a simple Var */
4748 
4749  if (IsA(basenode, Var) &&
4750  (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4751  {
4752  Var *var = (Var *) basenode;
4753 
4754  /* Set up result fields other than the stats tuple */
4755  vardata->var = basenode; /* return Var without relabeling */
4756  vardata->rel = find_base_rel(root, var->varno);
4757  vardata->atttype = var->vartype;
4758  vardata->atttypmod = var->vartypmod;
4759  vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4760 
4761  /* Try to locate some stats */
4762  examine_simple_variable(root, var, vardata);
4763 
4764  return;
4765  }
4766 
4767  /*
4768  * Okay, it's a more complicated expression. Determine variable
4769  * membership. Note that when varRelid isn't zero, only vars of that
4770  * relation are considered "real" vars.
4771  */
4772  varnos = pull_varnos(basenode);
4773 
4774  onerel = NULL;
4775 
4776  switch (bms_membership(varnos))
4777  {
4778  case BMS_EMPTY_SET:
4779  /* No Vars at all ... must be pseudo-constant clause */
4780  break;
4781  case BMS_SINGLETON:
4782  if (varRelid == 0 || bms_is_member(varRelid, varnos))
4783  {
4784  onerel = find_base_rel(root,
4785  (varRelid ? varRelid : bms_singleton_member(varnos)));
4786  vardata->rel = onerel;
4787  node = basenode; /* strip any relabeling */
4788  }
4789  /* else treat it as a constant */
4790  break;
4791  case BMS_MULTIPLE:
4792  if (varRelid == 0)
4793  {
4794  /* treat it as a variable of a join relation */
4795  vardata->rel = find_join_rel(root, varnos);
4796  node = basenode; /* strip any relabeling */
4797  }
4798  else if (bms_is_member(varRelid, varnos))
4799  {
4800  /* ignore the vars belonging to other relations */
4801  vardata->rel = find_base_rel(root, varRelid);
4802  node = basenode; /* strip any relabeling */
4803  /* note: no point in expressional-index search here */
4804  }
4805  /* else treat it as a constant */
4806  break;
4807  }
4808 
4809  bms_free(varnos);
4810 
4811  vardata->var = node;
4812  vardata->atttype = exprType(node);
4813  vardata->atttypmod = exprTypmod(node);
4814 
4815  if (onerel)
4816  {
4817  /*
4818  * We have an expression in vars of a single relation. Try to match
4819  * it to expressional index columns, in hopes of finding some
4820  * statistics.
4821  *
4822  * Note that we consider all index columns including INCLUDE columns,
4823  * since there could be stats for such columns. But the test for
4824  * uniqueness needs to be warier.
4825  *
4826  * XXX it's conceivable that there are multiple matches with different
4827  * index opfamilies; if so, we need to pick one that matches the
4828  * operator we are estimating for. FIXME later.
4829  */
4830  ListCell *ilist;
4831 
4832  foreach(ilist, onerel->indexlist)
4833  {
4834  IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4835  ListCell *indexpr_item;
4836  int pos;
4837 
4838  indexpr_item = list_head(index->indexprs);
4839  if (indexpr_item == NULL)
4840  continue; /* no expressions here... */
4841 
4842  for (pos = 0; pos < index->ncolumns; pos++)
4843  {
4844  if (index->indexkeys[pos] == 0)
4845  {
4846  Node *indexkey;
4847 
4848  if (indexpr_item == NULL)
4849  elog(ERROR, "too few entries in indexprs list");
4850  indexkey = (Node *) lfirst(indexpr_item);
4851  if (indexkey && IsA(indexkey, RelabelType))
4852  indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4853  if (equal(node, indexkey))
4854  {
4855  /*
4856  * Found a match ... is it a unique index? Tests here
4857  * should match has_unique_index().
4858  */
4859  if (index->unique &&
4860  index->nkeycolumns == 1 &&
4861  pos == 0 &&
4862  (index->indpred == NIL || index->predOK))
4863  vardata->isunique = true;
4864 
4865  /*
4866  * Has it got stats? We only consider stats for
4867  * non-partial indexes, since partial indexes probably
4868  * don't reflect whole-relation statistics; the above
4869  * check for uniqueness is the only info we take from
4870  * a partial index.
4871  *
4872  * An index stats hook, however, must make its own
4873  * decisions about what to do with partial indexes.
4874  */
4875  if (get_index_stats_hook &&
4876  (*get_index_stats_hook) (root, index->indexoid,
4877  pos + 1, vardata))
4878  {
4879  /*
4880  * The hook took control of acquiring a stats
4881  * tuple. If it did supply a tuple, it'd better
4882  * have supplied a freefunc.
4883  */
4884  if (HeapTupleIsValid(vardata->statsTuple) &&
4885  !vardata->freefunc)
4886  elog(ERROR, "no function provided to release variable stats with");
4887  }
4888  else if (index->indpred == NIL)
4889  {
4890  vardata->statsTuple =
4892  ObjectIdGetDatum(index->indexoid),
4893  Int16GetDatum(pos + 1),
4894  BoolGetDatum(false));
4895  vardata->freefunc = ReleaseSysCache;
4896 
4897  if (HeapTupleIsValid(vardata->statsTuple))
4898  {
4899  /* Get index's table for permission check */
4900  RangeTblEntry *rte;
4901  Oid userid;
4902 
4903  rte = planner_rt_fetch(index->rel->relid, root);
4904  Assert(rte->rtekind == RTE_RELATION);
4905 
4906  /*
4907  * Use checkAsUser if it's set, in case we're
4908  * accessing the table via a view.
4909  */
4910  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4911 
4912  /*
4913  * For simplicity, we insist on the whole
4914  * table being selectable, rather than trying
4915  * to identify which column(s) the index
4916  * depends on. Also require all rows to be
4917  * selectable --- there must be no
4918  * securityQuals from security barrier views
4919  * or RLS policies.
4920  */
4921  vardata->acl_ok =
4922  rte->securityQuals == NIL &&
4923  (pg_class_aclcheck(rte->relid, userid,
4924  ACL_SELECT) == ACLCHECK_OK);
4925 
4926  /*
4927  * If the user doesn't have permissions to
4928  * access an inheritance child relation, check
4929  * the permissions of the table actually
4930  * mentioned in the query, since most likely
4931  * the user does have that permission. Note
4932  * that whole-table select privilege on the
4933  * parent doesn't quite guarantee that the
4934  * user could read all columns of the child.
4935  * But in practice it's unlikely that any
4936  * interesting security violation could result
4937  * from allowing access to the expression
4938  * index's stats, so we allow it anyway. See
4939  * similar code in examine_simple_variable()
4940  * for additional comments.
4941  */
4942  if (!vardata->acl_ok &&
4943  root->append_rel_array != NULL)
4944  {
4945  AppendRelInfo *appinfo;
4946  Index varno = index->rel->relid;
4947 
4948  appinfo = root->append_rel_array[varno];
4949  while (appinfo &&
4950  planner_rt_fetch(appinfo->parent_relid,
4951  root)->rtekind == RTE_RELATION)
4952  {
4953  varno = appinfo->parent_relid;
4954  appinfo = root->append_rel_array[varno];
4955  }
4956  if (varno != index->rel->relid)
4957  {
4958  /* Repeat access check on this rel */
4959  rte = planner_rt_fetch(varno, root);
4960  Assert(rte->rtekind == RTE_RELATION);
4961 
4962  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4963 
4964  vardata->acl_ok =
4965  rte->securityQuals == NIL &&
4966  (pg_class_aclcheck(rte->relid,
4967  userid,
4968  ACL_SELECT) == ACLCHECK_OK);
4969  }
4970  }
4971  }
4972  else
4973  {
4974  /* suppress leakproofness checks later */
4975  vardata->acl_ok = true;
4976  }
4977  }
4978  if (vardata->statsTuple)
4979  break;
4980  }
4981  indexpr_item = lnext(index->indexprs, indexpr_item);
4982  }
4983  }
4984  if (vardata->statsTuple)
4985  break;
4986  }
4987  }
4988 }
#define NIL
Definition: pg_list.h:65
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:321
RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids)
Definition: relnode.c:439
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3033
HeapTuple statsTuple
Definition: selfuncs.h:74
Oid GetUserId(void)
Definition: miscinit.c:450
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:275
List * securityQuals
Definition: parsenodes.h:1125
RelOptInfo * rel
Definition: selfuncs.h:73
#define Int16GetDatum(X)
Definition: postgres.h:451
Definition: nodes.h:529
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:76
#define MemSet(start, val, len)
Definition: c.h:949
AttrNumber varattno
Definition: primnodes.h:186
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:181
static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata)
Definition: selfuncs.c:5000
int32 atttypmod
Definition: selfuncs.h:79
Definition: type.h:89
RelOptInfo * rel
Definition: pathnodes.h:820
bool has_unique_index(RelOptInfo *rel, AttrNumber attno)
Definition: plancat.c:2027
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
Oid vartype
Definition: primnodes.h:188
HeapTuple SearchSysCache3(int cacheId, Datum key1, Datum key2, Datum key3)
Definition: syscache.c:1138
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:373
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
Relids pull_varnos(Node *node)
Definition: var.c:95
Index relid
Definition: pathnodes.h:693
Index varno
Definition: primnodes.h:184
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:672
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
#define ACL_SELECT
Definition: parsenodes.h:75
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:577
struct AppendRelInfo ** append_rel_array
Definition: pathnodes.h:219
unsigned int Index
Definition: c.h:482
List * indexlist
Definition: pathnodes.h:702
#define BoolGetDatum(X)
Definition: postgres.h:402
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define Assert(condition)
Definition: c.h:745
#define lfirst(lc)
Definition: pg_list.h:190
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
int nkeycolumns
Definition: pathnodes.h:829
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:145
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4563
RTEKind rtekind
Definition: parsenodes.h:976
#define elog(elevel,...)
Definition: elog.h:214
void * arg
int * indexkeys
Definition: pathnodes.h:830
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:374
Index parent_relid
Definition: pathnodes.h:2229
List * indpred
Definition: pathnodes.h:844
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
List * indexprs
Definition: pathnodes.h:843
int32 vartypmod
Definition: primnodes.h:189

◆ generic_restriction_selectivity()

double generic_restriction_selectivity ( PlannerInfo root,
Oid  oproid,
Oid  collation,
List args,
int  varRelid,
double  default_selectivity 
)

Definition at line 911 of file selfuncs.c.

References CLAMP_PROBABILITY, fmgr_info(), get_opcode(), get_restriction_variable(), GETSTRUCT, HeapTupleIsValid, histogram_selectivity(), IsA, mcv_selectivity(), ReleaseVariableStats, and VariableStatData::statsTuple.

Referenced by ltreeparentsel(), and matchingsel().

914 {
915  double selec;
916  VariableStatData vardata;
917  Node *other;
918  bool varonleft;
919 
920  /*
921  * If expression is not variable OP something or something OP variable,
922  * then punt and return the default estimate.
923  */
924  if (!get_restriction_variable(root, args, varRelid,
925  &vardata, &other, &varonleft))
926  return default_selectivity;
927 
928  /*
929  * If the something is a NULL constant, assume operator is strict and
930  * return zero, ie, operator will never return TRUE.
931  */
932  if (IsA(other, Const) &&
933  ((Const *) other)->constisnull)
934  {
935  ReleaseVariableStats(vardata);
936  return 0.0;
937  }
938 
939  if (IsA(other, Const))
940  {
941  /* Variable is being compared to a known non-null constant */
942  Datum constval = ((Const *) other)->constvalue;
943  FmgrInfo opproc;
944  double mcvsum;
945  double mcvsel;
946  double nullfrac;
947  int hist_size;
948 
949  fmgr_info(get_opcode(oproid), &opproc);
950 
951  /*
952  * Calculate the selectivity for the column's most common values.
953  */
954  mcvsel = mcv_selectivity(&vardata, &opproc, collation,
955  constval, varonleft,
956  &mcvsum);
957 
958  /*
959  * If the histogram is large enough, see what fraction of it matches
960  * the query, and assume that's representative of the non-MCV
961  * population. Otherwise use the default selectivity for the non-MCV
962  * population.
963  */
964  selec = histogram_selectivity(&vardata, &opproc, collation,
965  constval, varonleft,
966  10, 1, &hist_size);
967  if (selec < 0)
968  {
969  /* Nope, fall back on default */
970  selec = default_selectivity;
971  }
972  else if (hist_size < 100)
973  {
974  /*
975  * For histogram sizes from 10 to 100, we combine the histogram
976  * and default selectivities, putting increasingly more trust in
977  * the histogram for larger sizes.
978  */
979  double hist_weight = hist_size / 100.0;
980 
981  selec = selec * hist_weight +
982  default_selectivity * (1.0 - hist_weight);
983  }
984 
985  /* In any case, don't believe extremely small or large estimates. */
986  if (selec < 0.0001)
987  selec = 0.0001;
988  else if (selec > 0.9999)
989  selec = 0.9999;
990 
991  /* Don't forget to account for nulls. */
992  if (HeapTupleIsValid(vardata.statsTuple))
993  nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
994  else
995  nullfrac = 0.0;
996 
997  /*
998  * Now merge the results from the MCV and histogram calculations,
999  * realizing that the histogram covers only the non-null values that
1000  * are not listed in MCV.
1001  */
1002  selec *= 1.0 - nullfrac - mcvsum;
1003  selec += mcvsel;
1004  }
1005  else
1006  {
1007  /* Comparison value is not constant, so we can't do anything */
1008  selec = default_selectivity;
1009  }
1010 
1011  ReleaseVariableStats(vardata);
1012 
1013  /* result should be in range, but make sure... */
1014  CLAMP_PROBABILITY(selec);
1015 
1016  return selec;
1017 }
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4605
Definition: nodes.h:529
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
uintptr_t Datum
Definition: postgres.h:367
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:729
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1202
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:820
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84

◆ genericcostestimate()

void genericcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
GenericCosts costs 
)

Definition at line 6000 of file selfuncs.c.

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, estimate_array_length(), get_quals_from_indexclauses(), get_tablespace_page_costs(), index_other_operands_eval_cost(), index_pages_fetched(), IndexPath::indexclauses, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexPath::indexorderbys, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, IsA, JOIN_INNER, lfirst, list_length(), lsecond, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, IndexOptInfo::pages, IndexOptInfo::rel, RelOptInfo::relid, IndexOptInfo::reltablespace, GenericCosts::spc_random_page_cost, RelOptInfo::tuples, and IndexOptInfo::tuples.

Referenced by blcostestimate(), btcostestimate(), gistcostestimate(), hashcostestimate(), and spgcostestimate().

6004 {
6005  IndexOptInfo *index = path->indexinfo;
6006  List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
6007  List *indexOrderBys = path->indexorderbys;
6008  Cost indexStartupCost;
6009  Cost indexTotalCost;
6010  Selectivity indexSelectivity;
6011  double indexCorrelation;
6012  double numIndexPages;
6013  double numIndexTuples;
6014  double spc_random_page_cost;
6015  double num_sa_scans;
6016  double num_outer_scans;
6017  double num_scans;
6018  double qual_op_cost;
6019  double qual_arg_cost;
6020  List *selectivityQuals;
6021  ListCell *l;
6022 
6023  /*
6024  * If the index is partial, AND the index predicate with the explicitly
6025  * given indexquals to produce a more accurate idea of the index
6026  * selectivity.
6027  */
6028  selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
6029 
6030  /*
6031  * Check for ScalarArrayOpExpr index quals, and estimate the number of
6032  * index scans that will be performed.
6033  */
6034  num_sa_scans = 1;
6035  foreach(l, indexQuals)
6036  {
6037  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
6038 
6039  if (IsA(rinfo->clause, ScalarArrayOpExpr))
6040  {
6041  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
6042  int alength = estimate_array_length(lsecond(saop->args));
6043 
6044  if (alength > 1)
6045  num_sa_scans *= alength;
6046  }
6047  }
6048 
6049  /* Estimate the fraction of main-table tuples that will be visited */
6050  indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6051  index->rel->relid,
6052  JOIN_INNER,
6053  NULL);
6054 
6055  /*
6056  * If caller didn't give us an estimate, estimate the number of index
6057  * tuples that will be visited. We do it in this rather peculiar-looking
6058  * way in order to get the right answer for partial indexes.
6059  */
6060  numIndexTuples = costs->numIndexTuples;
6061  if (numIndexTuples <= 0.0)
6062  {
6063  numIndexTuples = indexSelectivity * index->rel->tuples;
6064 
6065  /*
6066  * The above calculation counts all the tuples visited across all
6067  * scans induced by ScalarArrayOpExpr nodes. We want to consider the
6068  * average per-indexscan number, so adjust. This is a handy place to
6069  * round to integer, too. (If caller supplied tuple estimate, it's
6070  * responsible for handling these considerations.)
6071  */
6072  numIndexTuples = rint(numIndexTuples / num_sa_scans);
6073  }
6074 
6075  /*
6076  * We can bound the number of tuples by the index size in any case. Also,
6077  * always estimate at least one tuple is touched, even when
6078  * indexSelectivity estimate is tiny.
6079  */
6080  if (numIndexTuples > index->tuples)
6081  numIndexTuples = index->tuples;
6082  if (numIndexTuples < 1.0)
6083  numIndexTuples = 1.0;
6084 
6085  /*
6086  * Estimate the number of index pages that will be retrieved.
6087  *
6088  * We use the simplistic method of taking a pro-rata fraction of the total
6089  * number of index pages. In effect, this counts only leaf pages and not
6090  * any overhead such as index metapage or upper tree levels.
6091  *
6092  * In practice access to upper index levels is often nearly free because
6093  * those tend to stay in cache under load; moreover, the cost involved is
6094  * highly dependent on index type. We therefore ignore such costs here
6095  * and leave it to the caller to add a suitable charge if needed.
6096  */
6097  if (index->pages > 1 && index->tuples > 1)
6098  numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
6099  else
6100  numIndexPages = 1.0;
6101 
6102  /* fetch estimated page cost for tablespace containing index */
6104  &spc_random_page_cost,
6105  NULL);
6106 
6107  /*
6108  * Now compute the disk access costs.
6109  *
6110  * The above calculations are all per-index-scan. However, if we are in a
6111  * nestloop inner scan, we can expect the scan to be repeated (with
6112  * different search keys) for each row of the outer relation. Likewise,
6113  * ScalarArrayOpExpr quals result in multiple index scans. This creates
6114  * the potential for cache effects to reduce the number of disk page
6115  * fetches needed. We want to estimate the average per-scan I/O cost in
6116  * the presence of caching.
6117  *
6118  * We use the Mackert-Lohman formula (see costsize.c for details) to
6119  * estimate the total number of page fetches that occur. While this
6120  * wasn't what it was designed for, it seems a reasonable model anyway.
6121  * Note that we are counting pages not tuples anymore, so we take N = T =
6122  * index size, as if there were one "tuple" per page.
6123  */
6124  num_outer_scans = loop_count;
6125  num_scans = num_sa_scans * num_outer_scans;
6126 
6127  if (num_scans > 1)
6128  {
6129  double pages_fetched;
6130 
6131  /* total page fetches ignoring cache effects */
6132  pages_fetched = numIndexPages * num_scans;
6133 
6134  /* use Mackert and Lohman formula to adjust for cache effects */
6135  pages_fetched = index_pages_fetched(pages_fetched,
6136  index->pages,
6137  (double) index->pages,
6138  root);
6139 
6140  /*
6141  * Now compute the total disk access cost, and then report a pro-rated
6142  * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
6143  * since that's internal to the indexscan.)
6144  */
6145  indexTotalCost = (pages_fetched * spc_random_page_cost)
6146  / num_outer_scans;
6147  }
6148  else
6149  {
6150  /*
6151  * For a single index scan, we just charge spc_random_page_cost per
6152  * page touched.
6153  */
6154  indexTotalCost = numIndexPages * spc_random_page_cost;
6155  }
6156 
6157  /*
6158  * CPU cost: any complex expressions in the indexquals will need to be
6159  * evaluated once at the start of the scan to reduce them to runtime keys
6160  * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
6161  * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
6162  * indexqual operator. Because we have numIndexTuples as a per-scan
6163  * number, we have to multiply by num_sa_scans to get the correct result
6164  * for ScalarArrayOpExpr cases. Similarly add in costs for any index
6165  * ORDER BY expressions.
6166  *
6167  * Note: this neglects the possible costs of rechecking lossy operators.
6168  * Detecting that that might be needed seems more expensive than it's
6169  * worth, though, considering all the other inaccuracies here ...
6170  */
6171  qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
6172  index_other_operands_eval_cost(root, indexOrderBys);
6173  qual_op_cost = cpu_operator_cost *
6174  (list_length(indexQuals) + list_length(indexOrderBys));
6175 
6176  indexStartupCost = qual_arg_cost;
6177  indexTotalCost += qual_arg_cost;
6178  indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
6179 
6180  /*
6181  * Generic assumption about index correlation: there isn't any.
6182  */
6183  indexCorrelation = 0.0;
6184 
6185  /*
6186  * Return everything to caller.
6187  */
6188  costs->indexStartupCost = indexStartupCost;
6189  costs->indexTotalCost = indexTotalCost;
6190  costs->indexSelectivity = indexSelectivity;
6191  costs->indexCorrelation = indexCorrelation;
6192  costs->numIndexPages = numIndexPages;
6193  costs->numIndexTuples = numIndexTuples;
6194  costs->spc_random_page_cost = spc_random_page_cost;
6195  costs->num_sa_scans = num_sa_scans;
6196 }
Selectivity indexSelectivity
Definition: selfuncs.h:109
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
IndexOptInfo * indexinfo
Definition: pathnodes.h:1207
double tuples
Definition: pathnodes.h:705
Oid reltablespace
Definition: pathnodes.h:819
List * indexclauses
Definition: pathnodes.h:1208
double Selectivity
Definition: nodes.h:662
double tuples
Definition: pathnodes.h:824
#define lsecond(l)
Definition: pg_list.h:200
Definition: type.h:89
BlockNumber pages
Definition: pathnodes.h:823
int estimate_array_length(Node *arrayexpr)
Definition: selfuncs.c:2132
RelOptInfo * rel
Definition: pathnodes.h:820
double num_sa_scans
Definition: selfuncs.h:116
double cpu_operator_cost
Definition: costsize.c:115
List * get_quals_from_indexclauses(List *indexclauses)
Definition: selfuncs.c:5916
Cost indexTotalCost
Definition: selfuncs.h:108
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
Index relid
Definition: pathnodes.h:693
Expr * clause
Definition: pathnodes.h:1985
double indexCorrelation
Definition: selfuncs.h:110
List * indexorderbys
Definition: pathnodes.h:1209
double spc_random_page_cost
Definition: selfuncs.h:115
double numIndexTuples
Definition: selfuncs.h:114
Cost index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
Definition: selfuncs.c:5946
#define lfirst(lc)
Definition: pg_list.h:190
static int list_length(const List *l)
Definition: pg_list.h:169
Cost indexStartupCost
Definition: selfuncs.h:107
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:69
Definition: pg_list.h:50
double cpu_index_tuple_cost
Definition: costsize.c:114
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:827
double Cost
Definition: nodes.h:663
double numIndexPages
Definition: selfuncs.h:113
List * add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:6218

◆ get_join_variables()

void get_join_variables ( PlannerInfo root,
List args,
SpecialJoinInfo sjinfo,
VariableStatData vardata1,
VariableStatData vardata2,
bool join_is_reversed 
)

Definition at line 4665 of file selfuncs.c.

References bms_is_subset(), elog, ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.

Referenced by eqjoinsel(), neqjoinsel(), and networkjoinsel().

4668 {
4669  Node *left,
4670  *right;
4671 
4672  if (list_length(args) != 2)
4673  elog(ERROR, "join operator should take two arguments");
4674 
4675  left = (Node *) linitial(args);
4676  right = (Node *) lsecond(args);
4677 
4678  examine_variable(root, left, 0, vardata1);
4679  examine_variable(root, right, 0, vardata2);
4680 
4681  if (vardata1->rel &&
4682  bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4683  *join_is_reversed = true; /* var1 is on RHS */
4684  else if (vardata2->rel &&
4685  bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4686  *join_is_reversed = true; /* var2 is on LHS */
4687  else
4688  *join_is_reversed = false;
4689 }
RelOptInfo * rel
Definition: selfuncs.h:73
Definition: nodes.h:529
#define lsecond(l)
Definition: pg_list.h:200
Relids syn_lefthand
Definition: pathnodes.h:2177
Relids syn_righthand
Definition: pathnodes.h:2178
#define linitial(l)
Definition: pg_list.h:195
#define ERROR
Definition: elog.h:43
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:315
Relids relids
Definition: pathnodes.h:665
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4727
static int list_length(const List *l)
Definition: pg_list.h:169
#define elog(elevel,...)
Definition: elog.h:214

◆ get_quals_from_indexclauses()

List* get_quals_from_indexclauses ( List indexclauses)

Definition at line 5916 of file selfuncs.c.

References IndexClause::indexquals, lappend(), lfirst_node, and NIL.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

5917 {
5918  List *result = NIL;
5919  ListCell *lc;
5920 
5921  foreach(lc, indexclauses)
5922  {
5923  IndexClause *iclause = lfirst_node(IndexClause, lc);
5924  ListCell *lc2;
5925 
5926  foreach(lc2, iclause->indexquals)
5927  {
5928  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5929 
5930  result = lappend(result, rinfo);
5931  }
5932  }
5933  return result;
5934 }
#define NIL
Definition: pg_list.h:65
#define lfirst_node(type, lc)
Definition: pg_list.h:193
List * indexquals
Definition: pathnodes.h:1254
List * lappend(List *list, void *datum)
Definition: list.c:321
Definition: pg_list.h:50

◆ get_restriction_variable()

bool get_restriction_variable ( PlannerInfo root,
List args,
int  varRelid,
VariableStatData vardata,
Node **  other,
bool varonleft 
)

Definition at line 4605 of file selfuncs.c.

References estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, ReleaseVariableStats, and VariableStatData::var.

Referenced by _int_matchsel(), arraycontsel(), eqsel_internal(), generic_restriction_selectivity(), networksel(), patternsel_common(), rangesel(), scalarineqsel_wrapper(), and tsmatchsel().

4608 {
4609  Node *left,
4610  *right;
4611  VariableStatData rdata;
4612 
4613  /* Fail if not a binary opclause (probably shouldn't happen) */
4614  if (list_length(args) != 2)
4615  return false;
4616 
4617  left = (Node *) linitial(args);
4618  right = (Node *) lsecond(args);
4619 
4620  /*
4621  * Examine both sides. Note that when varRelid is nonzero, Vars of other
4622  * relations will be treated as pseudoconstants.
4623  */
4624  examine_variable(root, left, varRelid, vardata);
4625  examine_variable(root, right, varRelid, &rdata);
4626 
4627  /*
4628  * If one side is a variable and the other not, we win.
4629  */
4630  if (vardata->rel && rdata.rel == NULL)
4631  {
4632  *varonleft = true;
4633  *other = estimate_expression_value(root, rdata.var);
4634  /* Assume we need no ReleaseVariableStats(rdata) here */
4635  return true;
4636  }
4637 
4638  if (vardata->rel == NULL && rdata.rel)
4639  {
4640  *varonleft = false;
4641  *other = estimate_expression_value(root, vardata->var);
4642  /* Assume we need no ReleaseVariableStats(*vardata) here */
4643  *vardata = rdata;
4644  return true;
4645  }
4646 
4647  /* Oops, clause has wrong structure (probably var op var) */
4648  ReleaseVariableStats(*vardata);
4649  ReleaseVariableStats(rdata);
4650 
4651  return false;
4652 }
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2288
RelOptInfo * rel
Definition: selfuncs.h:73
Definition: nodes.h:529
#define lsecond(l)
Definition: pg_list.h:200
#define linitial(l)
Definition: pg_list.h:195
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4727
static int list_length(const List *l)
Definition: pg_list.h:169
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84

◆ get_variable_numdistinct()

double get_variable_numdistinct ( VariableStatData vardata,
bool isdefault 
)

Definition at line 5276 of file selfuncs.c.

References clamp_row_est(), DEFAULT_NUM_DISTINCT, GETSTRUCT, HeapTupleIsValid, IsA, VariableStatData::isunique, VariableStatData::rel, RTE_VALUES, RelOptInfo::rtekind, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.

Referenced by add_unique_group_var(), eqjoinsel(), estimate_hash_bucket_stats(), ineq_histogram_selectivity(), var_eq_const(), and var_eq_non_const().

5277 {
5278  double stadistinct;
5279  double stanullfrac = 0.0;
5280  double ntuples;
5281 
5282  *isdefault = false;
5283 
5284  /*
5285  * Determine the stadistinct value to use. There are cases where we can
5286  * get an estimate even without a pg_statistic entry, or can get a better
5287  * value than is in pg_statistic. Grab stanullfrac too if we can find it
5288  * (otherwise, assume no nulls, for lack of any better idea).
5289  */
5290  if (HeapTupleIsValid(vardata->statsTuple))
5291  {
5292  /* Use the pg_statistic entry */
5293  Form_pg_statistic stats;
5294 
5295  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
5296  stadistinct = stats->stadistinct;
5297  stanullfrac = stats->stanullfrac;
5298  }
5299  else if (vardata->vartype == BOOLOID)
5300  {
5301  /*
5302  * Special-case boolean columns: presumably, two distinct values.
5303  *
5304  * Are there any other datatypes we should wire in special estimates
5305  * for?
5306  */
5307  stadistinct = 2.0;
5308  }
5309  else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
5310  {
5311  /*
5312  * If the Var represents a column of a VALUES RTE, assume it's unique.
5313  * This could of course be very wrong, but it should tend to be true
5314  * in well-written queries. We could consider examining the VALUES'
5315  * contents to get some real statistics; but that only works if the
5316  * entries are all constants, and it would be pretty expensive anyway.
5317  */
5318  stadistinct = -1.0; /* unique (and all non null) */
5319  }
5320  else
5321  {
5322  /*
5323  * We don't keep statistics for system columns, but in some cases we
5324  * can infer distinctness anyway.
5325  */
5326  if (vardata->var && IsA(vardata->var, Var))
5327  {
5328  switch (((Var *) vardata->var)->varattno)
5329  {
5331  stadistinct = -1.0; /* unique (and all non null) */
5332  break;
5334  stadistinct = 1.0; /* only 1 value */
5335  break;
5336  default:
5337  stadistinct = 0.0; /* means "unknown" */
5338  break;
5339  }
5340  }
5341  else
5342  stadistinct = 0.0; /* means "unknown" */
5343 
5344  /*
5345  * XXX consider using estimate_num_groups on expressions?
5346  */
5347  }
5348 
5349  /*
5350  * If there is a unique index or DISTINCT clause for the variable, assume
5351  * it is unique no matter what pg_statistic says; the statistics could be
5352  * out of date, or we might have found a partial unique index that proves
5353  * the var is unique for this query. However, we'd better still believe
5354  * the null-fraction statistic.
5355  */
5356  if (vardata->isunique)
5357  stadistinct = -1.0 * (1.0 - stanullfrac);
5358 
5359  /*
5360  * If we had an absolute estimate, use that.
5361  */
5362  if (stadistinct > 0.0)
5363  return clamp_row_est(stadistinct);
5364 
5365  /*
5366  * Otherwise we need to get the relation size; punt if not available.
5367  */
5368  if (vardata->rel == NULL)
5369  {
5370  *isdefault = true;
5371  return DEFAULT_NUM_DISTINCT;
5372  }
5373  ntuples = vardata->rel->tuples;
5374  if (ntuples <= 0.0)
5375  {
5376  *isdefault = true;
5377  return DEFAULT_NUM_DISTINCT;
5378  }
5379 
5380  /*
5381  * If we had a relative estimate, use that.
5382  */
5383  if (stadistinct < 0.0)
5384  return clamp_row_est(-stadistinct * ntuples);
5385 
5386  /*
5387  * With no data, estimate ndistinct = ntuples if the table is small, else
5388  * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5389  * that the behavior isn't discontinuous.
5390  */
5391  if (ntuples < DEFAULT_NUM_DISTINCT)
5392  return clamp_row_est(ntuples);
5393 
5394  *isdefault = true;
5395  return DEFAULT_NUM_DISTINCT;
5396 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
double tuples
Definition: pathnodes.h:705
RelOptInfo * rel
Definition: selfuncs.h:73
Definition: primnodes.h:181
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define TableOidAttributeNumber
Definition: sysattr.h:26
RTEKind rtekind
Definition: pathnodes.h:695
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define DEFAULT_NUM_DISTINCT
Definition: selfuncs.h:49
#define SelfItemPointerAttributeNumber
Definition: sysattr.h:21
double clamp_row_est(double nrows)
Definition: costsize.c:189

◆ histogram_selectivity()

double histogram_selectivity ( VariableStatData vardata,
FmgrInfo opproc,
Oid  collation,
Datum  constval,
bool  varonleft,
int  min_hist_size,
int  n_skip,
int *  hist_size 
)

Definition at line 820 of file selfuncs.c.

References Assert, ATTSTATSSLOT_VALUES, DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::nvalues, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by generic_restriction_selectivity(), and patternsel_common().

825 {
826  double result;
827  AttStatsSlot sslot;
828 
829  /* check sanity of parameters */
830  Assert(n_skip >= 0);
831  Assert(min_hist_size > 2 * n_skip);
832 
833  if (HeapTupleIsValid(vardata->statsTuple) &&
834  statistic_proc_security_check(vardata, opproc->fn_oid) &&
835  get_attstatsslot(&sslot, vardata->statsTuple,
836  STATISTIC_KIND_HISTOGRAM, InvalidOid,
838  {
839  *hist_size = sslot.nvalues;
840  if (sslot.nvalues >= min_hist_size)
841  {
842  LOCAL_FCINFO(fcinfo, 2);
843  int nmatch = 0;
844  int i;
845 
846  /*
847  * We invoke the opproc "by hand" so that we won't fail on NULL
848  * results. Such cases won't arise for normal comparison
849  * functions, but generic_restriction_selectivity could perhaps be
850  * used with operators that can return NULL. A small side benefit
851  * is to not need to re-initialize the fcinfo struct from scratch
852  * each time.
853  */
854  InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
855  NULL, NULL);
856  fcinfo->args[0].isnull = false;
857  fcinfo->args[1].isnull = false;
858  /* be careful to apply operator right way 'round */
859  if (varonleft)
860  fcinfo->args[1].value = constval;
861  else
862  fcinfo->args[0].value = constval;
863 
864  for (i = n_skip; i < sslot.nvalues - n_skip; i++)
865  {
866  Datum fresult;
867 
868  if (varonleft)
869  fcinfo->args[0].value = sslot.values[i];
870  else
871  fcinfo->args[1].value = sslot.values[i];
872  fcinfo->isnull = false;
873  fresult = FunctionCallInvoke(fcinfo);
874  if (!fcinfo->isnull && DatumGetBool(fresult))
875  nmatch++;
876  }
877  result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
878  }
879  else
880  result = -1;
881  free_attstatsslot(&sslot);
882  }
883  else
884  {
885  *hist_size = 0;
886  result = -1;
887  }
888 
889  return result;
890 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5247
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
#define DatumGetBool(X)
Definition: postgres.h:393
uintptr_t Datum
Definition: postgres.h:367
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3052
#define Assert(condition)
Definition: c.h:745
Datum * values
Definition: lsyscache.h:50
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3169

◆ index_other_operands_eval_cost()

Cost index_other_operands_eval_cost ( PlannerInfo root,
List indexquals 
)

Definition at line 5946 of file selfuncs.c.

References OpExpr::args, ScalarArrayOpExpr::args, cost_qual_eval_node(), elog, ERROR, IsA, lfirst, lsecond, nodeTag, QualCost::per_tuple, RowCompareExpr::rargs, and QualCost::startup.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

5947 {
5948  Cost qual_arg_cost = 0;
5949  ListCell *lc;
5950 
5951  foreach(lc, indexquals)
5952  {
5953  Expr *clause = (Expr *) lfirst(lc);
5954  Node *other_operand;
5955  QualCost index_qual_cost;
5956 
5957  /*
5958  * Index quals will have RestrictInfos, indexorderbys won't. Look
5959  * through RestrictInfo if present.
5960  */
5961  if (IsA(clause, RestrictInfo))
5962  clause = ((RestrictInfo *) clause)->clause;
5963 
5964  if (IsA(clause, OpExpr))
5965  {
5966  OpExpr *op = (OpExpr *) clause;
5967 
5968  other_operand = (Node *) lsecond(op->args);
5969  }
5970  else if (IsA(clause, RowCompareExpr))
5971  {
5972  RowCompareExpr *rc = (RowCompareExpr *) clause;
5973 
5974  other_operand = (Node *) rc->rargs;
5975  }
5976  else if (IsA(clause, ScalarArrayOpExpr))
5977  {
5978  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
5979 
5980  other_operand = (Node *) lsecond(saop->args);
5981  }
5982  else if (IsA(clause, NullTest))
5983  {
5984  other_operand = NULL;
5985  }
5986  else
5987  {
5988  elog(ERROR, "unsupported indexqual type: %d",
5989  (int) nodeTag(clause));
5990  other_operand = NULL; /* keep compiler quiet */
5991  }
5992 
5993  cost_qual_eval_node(&index_qual_cost, other_operand, root);
5994  qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
5995  }
5996  return qual_arg_cost;
5997 }
void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root)
Definition: costsize.c:4069
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
Definition: nodes.h:529
#define lsecond(l)
Definition: pg_list.h:200
Cost startup
Definition: pathnodes.h:45
Cost per_tuple
Definition: pathnodes.h:46
#define ERROR
Definition: elog.h:43
#define lfirst(lc)
Definition: pg_list.h:190
#define nodeTag(nodeptr)
Definition: nodes.h:534
#define elog(elevel,...)
Definition: elog.h:214
List * args
Definition: primnodes.h:522
double Cost
Definition: nodes.h:663

◆ ineq_histogram_selectivity()

double ineq_histogram_selectivity ( PlannerInfo root,
VariableStatData vardata,
Oid  opoid,
FmgrInfo opproc,
bool  isgt,
bool  iseq,
Oid  collation,
Datum  constval,
Oid  consttype 
)

Definition at line 1038 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, comparison_ops_are_compatible(), convert_to_scalar(), DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCall2Coll(), FunctionCallInvoke, get_actual_variable_range(), get_attstatsslot(), get_variable_numdistinct(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::nnumbers, AttStatsSlot::nvalues, AttStatsSlot::stacoll, AttStatsSlot::staop, statistic_proc_security_check(), VariableStatData::statsTuple, val, AttStatsSlot::values, and VariableStatData::vartype.

Referenced by prefix_selectivity(), and scalarineqsel().

1043 {
1044  double hist_selec;
1045  AttStatsSlot sslot;
1046 
1047  hist_selec = -1.0;
1048 
1049  /*
1050  * Someday, ANALYZE might store more than one histogram per rel/att,
1051  * corresponding to more than one possible sort ordering defined for the
1052  * column type. Right now, we know there is only one, so just grab it and
1053  * see if it matches the query.
1054  *
1055  * Note that we can't use opoid as search argument; the staop appearing in
1056  * pg_statistic will be for the relevant '<' operator, but what we have
1057  * might be some other inequality operator such as '>='. (Even if opoid
1058  * is a '<' operator, it could be cross-type.) Hence we must use
1059  * comparison_ops_are_compatible() to see if the operators match.
1060  */
1061  if (HeapTupleIsValid(vardata->statsTuple) &&
1062  statistic_proc_security_check(vardata, opproc->fn_oid) &&
1063  get_attstatsslot(&sslot, vardata->statsTuple,
1064  STATISTIC_KIND_HISTOGRAM, InvalidOid,
1066  {
1067  if (sslot.nvalues > 1 &&
1068  sslot.stacoll == collation &&
1069  comparison_ops_are_compatible(sslot.staop, opoid))
1070  {
1071  /*
1072  * Use binary search to find the desired location, namely the
1073  * right end of the histogram bin containing the comparison value,
1074  * which is the leftmost entry for which the comparison operator
1075  * succeeds (if isgt) or fails (if !isgt).
1076  *
1077  * In this loop, we pay no attention to whether the operator iseq
1078  * or not; that detail will be mopped up below. (We cannot tell,
1079  * anyway, whether the operator thinks the values are equal.)
1080  *
1081  * If the binary search accesses the first or last histogram
1082  * entry, we try to replace that endpoint with the true column min
1083  * or max as found by get_actual_variable_range(). This
1084  * ameliorates misestimates when the min or max is moving as a
1085  * result of changes since the last ANALYZE. Note that this could
1086  * result in effectively including MCVs into the histogram that
1087  * weren't there before, but we don't try to correct for that.
1088  */
1089  double histfrac;
1090  int lobound = 0; /* first possible slot to search */
1091  int hibound = sslot.nvalues; /* last+1 slot to search */
1092  bool have_end = false;
1093 
1094  /*
1095  * If there are only two histogram entries, we'll want up-to-date
1096  * values for both. (If there are more than two, we need at most
1097  * one of them to be updated, so we deal with that within the
1098  * loop.)
1099  */
1100  if (sslot.nvalues == 2)
1101  have_end = get_actual_variable_range(root,
1102  vardata,
1103  sslot.staop,
1104  collation,
1105  &sslot.values[0],
1106  &sslot.values[1]);
1107 
1108  while (lobound < hibound)
1109  {
1110  int probe = (lobound + hibound) / 2;
1111  bool ltcmp;
1112 
1113  /*
1114  * If we find ourselves about to compare to the first or last
1115  * histogram entry, first try to replace it with the actual
1116  * current min or max (unless we already did so above).
1117  */
1118  if (probe == 0 && sslot.nvalues > 2)
1119  have_end = get_actual_variable_range(root,
1120  vardata,
1121  sslot.staop,
1122  collation,
1123  &sslot.values[0],
1124  NULL);
1125  else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
1126  have_end = get_actual_variable_range(root,
1127  vardata,
1128  sslot.staop,
1129  collation,
1130  NULL,
1131  &sslot.values[probe]);
1132 
1133  ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
1134  collation,
1135  sslot.values[probe],
1136  constval));
1137  if (isgt)
1138  ltcmp = !ltcmp;
1139  if (ltcmp)
1140  lobound = probe + 1;
1141  else
1142  hibound = probe;
1143  }
1144 
1145  if (lobound <= 0)
1146  {
1147  /*
1148  * Constant is below lower histogram boundary. More
1149  * precisely, we have found that no entry in the histogram
1150  * satisfies the inequality clause (if !isgt) or they all do
1151  * (if isgt). We estimate that that's true of the entire
1152  * table, so set histfrac to 0.0 (which we'll flip to 1.0
1153  * below, if isgt).
1154  */
1155  histfrac = 0.0;
1156  }
1157  else if (lobound >= sslot.nvalues)
1158  {
1159  /*
1160  * Inverse case: constant is above upper histogram boundary.
1161  */
1162  histfrac = 1.0;
1163  }
1164  else
1165  {
1166  /* We have values[i-1] <= constant <= values[i]. */
1167  int i = lobound;
1168  double eq_selec = 0;
1169  double val,
1170  high,
1171  low;
1172  double binfrac;
1173 
1174  /*
1175  * In the cases where we'll need it below, obtain an estimate
1176  * of the selectivity of "x = constval". We use a calculation
1177  * similar to what var_eq_const() does for a non-MCV constant,
1178  * ie, estimate that all distinct non-MCV values occur equally
1179  * often. But multiplication by "1.0 - sumcommon - nullfrac"
1180  * will be done by our caller, so we shouldn't do that here.
1181  * Therefore we can't try to clamp the estimate by reference
1182  * to the least common MCV; the result would be too small.
1183  *
1184  * Note: since this is effectively assuming that constval
1185  * isn't an MCV, it's logically dubious if constval in fact is
1186  * one. But we have to apply *some* correction for equality,
1187  * and anyway we cannot tell if constval is an MCV, since we
1188  * don't have a suitable equality operator at hand.
1189  */
1190  if (i == 1 || isgt == iseq)
1191  {
1192  double otherdistinct;
1193  bool isdefault;
1194  AttStatsSlot mcvslot;
1195 
1196  /* Get estimated number of distinct values */
1197  otherdistinct = get_variable_numdistinct(vardata,
1198  &isdefault);
1199 
1200  /* Subtract off the number of known MCVs */
1201  if (get_attstatsslot(&mcvslot, vardata->statsTuple,
1202  STATISTIC_KIND_MCV, InvalidOid,
1204  {
1205  otherdistinct -= mcvslot.nnumbers;
1206  free_attstatsslot(&mcvslot);
1207  }
1208 
1209  /* If result doesn't seem sane, leave eq_selec at 0 */
1210  if (otherdistinct > 1)
1211  eq_selec = 1.0 / otherdistinct;
1212  }
1213 
1214  /*
1215  * Convert the constant and the two nearest bin boundary
1216  * values to a uniform comparison scale, and do a linear
1217  * interpolation within this bin.
1218  */
1219  if (convert_to_scalar(constval, consttype, collation,
1220  &val,
1221  sslot.values[i - 1], sslot.values[i],
1222  vardata->vartype,
1223  &low, &high))
1224  {
1225  if (high <= low)
1226  {
1227  /* cope if bin boundaries appear identical */
1228  binfrac = 0.5;
1229  }
1230  else if (val <= low)
1231  binfrac = 0.0;
1232  else if (val >= high)
1233  binfrac = 1.0;
1234  else
1235  {
1236  binfrac = (val - low) / (high - low);
1237 
1238  /*
1239  * Watch out for the possibility that we got a NaN or
1240  * Infinity from the division. This can happen
1241  * despite the previous checks, if for example "low"
1242  * is -Infinity.
1243  */
1244  if (isnan(binfrac) ||
1245  binfrac < 0.0 || binfrac > 1.0)
1246  binfrac = 0.5;
1247  }
1248  }
1249  else
1250  {
1251  /*
1252  * Ideally we'd produce an error here, on the grounds that
1253  * the given operator shouldn't have scalarXXsel
1254  * registered as its selectivity func unless we can deal
1255  * with its operand types. But currently, all manner of
1256  * stuff is invoking scalarXXsel, so give a default
1257  * estimate until that can be fixed.
1258  */
1259  binfrac = 0.5;
1260  }
1261 
1262  /*
1263  * Now, compute the overall selectivity across the values
1264  * represented by the histogram. We have i-1 full bins and
1265  * binfrac partial bin below the constant.
1266  */
1267  histfrac = (double) (i - 1) + binfrac;
1268  histfrac /= (double) (sslot.nvalues - 1);
1269 
1270  /*
1271  * At this point, histfrac is an estimate of the fraction of
1272  * the population represented by the histogram that satisfies
1273  * "x <= constval". Somewhat remarkably, this statement is
1274  * true regardless of which operator we were doing the probes
1275  * with, so long as convert_to_scalar() delivers reasonable
1276  * results. If the probe constant is equal to some histogram
1277  * entry, we would have considered the bin to the left of that
1278  * entry if probing with "<" or ">=", or the bin to the right
1279  * if probing with "<=" or ">"; but binfrac would have come
1280  * out as 1.0 in the first case and 0.0 in the second, leading
1281  * to the same histfrac in either case. For probe constants
1282  * between histogram entries, we find the same bin and get the
1283  * same estimate with any operator.
1284  *
1285  * The fact that the estimate corresponds to "x <= constval"
1286  * and not "x < constval" is because of the way that ANALYZE
1287  * constructs the histogram: each entry is, effectively, the
1288  * rightmost value in its sample bucket. So selectivity
1289  * values that are exact multiples of 1/(histogram_size-1)
1290  * should be understood as estimates including a histogram
1291  * entry plus everything to its left.
1292  *
1293  * However, that breaks down for the first histogram entry,
1294  * which necessarily is the leftmost value in its sample
1295  * bucket. That means the first histogram bin is slightly
1296  * narrower than the rest, by an amount equal to eq_selec.
1297  * Another way to say that is that we want "x <= leftmost" to
1298  * be estimated as eq_selec not zero. So, if we're dealing
1299  * with the first bin (i==1), rescale to make that true while
1300  * adjusting the rest of that bin linearly.
1301  */
1302  if (i == 1)
1303  histfrac += eq_selec * (1.0 - binfrac);
1304 
1305  /*
1306  * "x <= constval" is good if we want an estimate for "<=" or
1307  * ">", but if we are estimating for "<" or ">=", we now need
1308  * to decrease the estimate by eq_selec.
1309  */
1310  if (isgt == iseq)
1311  histfrac -= eq_selec;
1312  }
1313 
1314  /*
1315  * Now the estimate is finished for "<" and "<=" cases. If we are
1316  * estimating for ">" or ">=", flip it.
1317  */
1318  hist_selec = isgt ? (1.0 - histfrac) : histfrac;
1319 
1320  /*
1321  * The histogram boundaries are only approximate to begin with,
1322  * and may well be out of date anyway. Therefore, don't believe
1323  * extremely small or large selectivity estimates --- unless we
1324  * got actual current endpoint values from the table, in which
1325  * case just do the usual sanity clamp. Somewhat arbitrarily, we
1326  * set the cutoff for other cases at a hundredth of the histogram
1327  * resolution.
1328  */
1329  if (have_end)
1330  CLAMP_PROBABILITY(hist_selec);
1331  else
1332  {
1333  double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1334 
1335  if (hist_selec < cutoff)
1336  hist_selec = cutoff;
1337  else if (hist_selec > 1.0 - cutoff)
1338  hist_selec = 1.0 - cutoff;
1339  }
1340  }
1341  else if (sslot.nvalues > 1)
1342  {
1343  /*
1344  * If we get here, we have a histogram but it's not sorted the way
1345  * we want. Do a brute-force search to see how many of the
1346  * entries satisfy the comparison condition, and take that
1347  * fraction as our estimate. (This is identical to the inner loop
1348  * of histogram_selectivity; maybe share code?)
1349  */
1350  LOCAL_FCINFO(fcinfo, 2);
1351  int nmatch = 0;
1352 
1353  InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
1354  NULL, NULL);
1355  fcinfo->args[0].isnull = false;
1356  fcinfo->args[1].isnull = false;
1357  fcinfo->args[1].value = constval;
1358  for (int i = 0; i < sslot.nvalues; i++)
1359  {
1360  Datum fresult;
1361 
1362  fcinfo->args[0].value = sslot.values[i];
1363  fcinfo->isnull = false;
1364  fresult = FunctionCallInvoke(fcinfo);
1365  if (!fcinfo->isnull && DatumGetBool(fresult))
1366  nmatch++;
1367  }
1368  hist_selec = ((double) nmatch) / ((double) sslot.nvalues);
1369 
1370  /*
1371  * As above, clamp to a hundredth of the histogram resolution.
1372  * This case is surely even less trustworthy than the normal one,
1373  * so we shouldn't believe exact 0 or 1 selectivity. (Maybe the
1374  * clamp should be more restrictive in this case?)
1375  */
1376  {
1377  double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1378 
1379  if (hist_selec < cutoff)
1380  hist_selec = cutoff;
1381  else if (hist_selec > 1.0 - cutoff)
1382  hist_selec = 1.0 - cutoff;
1383  }
1384  }
1385 
1386  free_attstatsslot(&sslot);
1387  }
1388 
1389  return hist_selec;
1390 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5247
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1152
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound)
Definition: selfuncs.c:4033
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5276
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
#define DatumGetBool(X)
Definition: postgres.h:393
uintptr_t Datum
Definition: postgres.h:367
static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
Definition: selfuncs.c:5579
bool comparison_ops_are_compatible(Oid opno1, Oid opno2)
Definition: lsyscache.c:747
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3052
Datum * values
Definition: lsyscache.h:50
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
long val
Definition: informix.c:664
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3169

◆ mcv_selectivity()

double mcv_selectivity ( VariableStatData vardata,
FmgrInfo opproc,
Oid  collation,
Datum  constval,
bool  varonleft,
double *  sumcommonp 
)

Definition at line 729 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::numbers, AttStatsSlot::nvalues, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by generic_restriction_selectivity(), networksel(), patternsel_common(), and scalarineqsel().

732 {
733  double mcv_selec,
734  sumcommon;
735  AttStatsSlot sslot;
736  int i;
737 
738  mcv_selec = 0.0;
739  sumcommon = 0.0;
740 
741  if (HeapTupleIsValid(vardata->statsTuple) &&
742  statistic_proc_security_check(vardata, opproc->fn_oid) &&
743  get_attstatsslot(&sslot, vardata->statsTuple,
744  STATISTIC_KIND_MCV, InvalidOid,
746  {
747  LOCAL_FCINFO(fcinfo, 2);
748 
749  /*
750  * We invoke the opproc "by hand" so that we won't fail on NULL
751  * results. Such cases won't arise for normal comparison functions,
752  * but generic_restriction_selectivity could perhaps be used with
753  * operators that can return NULL. A small side benefit is to not
754  * need to re-initialize the fcinfo struct from scratch each time.
755  */
756  InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
757  NULL, NULL);
758  fcinfo->args[0].isnull = false;
759  fcinfo->args[1].isnull = false;
760  /* be careful to apply operator right way 'round */
761  if (varonleft)
762  fcinfo->args[1].value = constval;
763  else
764  fcinfo->args[0].value = constval;
765 
766  for (i = 0; i < sslot.nvalues; i++)
767  {
768  Datum fresult;
769 
770  if (varonleft)
771  fcinfo->args[0].value = sslot.values[i];
772  else
773  fcinfo->args[1].value = sslot.values[i];
774  fcinfo->isnull = false;
775  fresult = FunctionCallInvoke(fcinfo);
776  if (!fcinfo->isnull && DatumGetBool(fresult))
777  mcv_selec += sslot.numbers[i];
778  sumcommon += sslot.numbers[i];
779  }
780  free_attstatsslot(&sslot);
781  }
782 
783  *sumcommonp = sumcommon;
784  return mcv_selec;
785 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5247
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
uintptr_t Datum
Definition: postgres.h:367
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3052
Datum * values
Definition: lsyscache.h:50
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3169

◆ mergejoinscansel()

void mergejoinscansel ( PlannerInfo root,
Node clause,
Oid  opfamily,
int  strategy,
bool  nulls_first,
Selectivity leftstart,
Selectivity leftend,
Selectivity rightstart,
Selectivity rightend 
)

Definition at line 2904 of file selfuncs.c.

References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, CLAMP_PROBABILITY, DEFAULT_INEQ_SEL, examine_variable(), get_leftop(), get_op_opfamily_properties(), get_opfamily_member(), get_rightop(), get_variable_range(), GETSTRUCT, HeapTupleIsValid, is_opclause(), OidIsValid, ReleaseVariableStats, scalarineqsel(), and VariableStatData::statsTuple.

Referenced by cached_scansel().

2908 {
2909  Node *left,
2910  *right;
2911  VariableStatData leftvar,
2912  rightvar;
2913  int op_strategy;
2914  Oid op_lefttype;
2915  Oid op_righttype;
2916  Oid opno,
2917  collation,
2918  lsortop,
2919  rsortop,
2920  lstatop,
2921  rstatop,
2922  ltop,
2923  leop,
2924  revltop,
2925  revleop;
2926  bool isgt;
2927  Datum leftmin,
2928  leftmax,
2929  rightmin,
2930  rightmax;
2931  double selec;
2932 
2933  /* Set default results if we can't figure anything out. */
2934  /* XXX should default "start" fraction be a bit more than 0? */
2935  *leftstart = *rightstart = 0.0;
2936  *leftend = *rightend = 1.0;
2937 
2938  /* Deconstruct the merge clause */
2939  if (!is_opclause(clause))
2940  return; /* shouldn't happen */
2941  opno = ((OpExpr *) clause)->opno;
2942  collation = ((OpExpr *) clause)->inputcollid;
2943  left = get_leftop((Expr *) clause);
2944  right = get_rightop((Expr *) clause);
2945  if (!right)
2946  return; /* shouldn't happen */
2947 
2948  /* Look for stats for the inputs */
2949  examine_variable(root, left, 0, &leftvar);
2950  examine_variable(root, right, 0, &rightvar);
2951 
2952  /* Extract the operator's declared left/right datatypes */
2953  get_op_opfamily_properties(opno, opfamily, false,
2954  &op_strategy,
2955  &op_lefttype,
2956  &op_righttype);
2957  Assert(op_strategy == BTEqualStrategyNumber);
2958 
2959  /*
2960  * Look up the various operators we need. If we don't find them all, it
2961  * probably means the opfamily is broken, but we just fail silently.
2962  *
2963  * Note: we expect that pg_statistic histograms will be sorted by the '<'
2964  * operator, regardless of which sort direction we are considering.
2965  */
2966  switch (strategy)
2967  {
2968  case BTLessStrategyNumber:
2969  isgt = false;
2970  if (op_lefttype == op_righttype)
2971  {
2972  /* easy case */
2973  ltop = get_opfamily_member(opfamily,
2974  op_lefttype, op_righttype,
2976  leop = get_opfamily_member(opfamily,
2977  op_lefttype, op_righttype,
2979  lsortop = ltop;
2980  rsortop = ltop;
2981  lstatop = lsortop;
2982  rstatop = rsortop;
2983  revltop = ltop;
2984  revleop = leop;
2985  }
2986  else
2987  {
2988  ltop = get_opfamily_member(opfamily,
2989  op_lefttype, op_righttype,
2991  leop = get_opfamily_member(opfamily,
2992  op_lefttype, op_righttype,
2994  lsortop = get_opfamily_member(opfamily,
2995  op_lefttype, op_lefttype,
2997  rsortop = get_opfamily_member(opfamily,
2998  op_righttype, op_righttype,
3000  lstatop = lsortop;
3001  rstatop = rsortop;
3002  revltop = get_opfamily_member(opfamily,
3003  op_righttype, op_lefttype,
3005  revleop = get_opfamily_member(opfamily,
3006  op_righttype, op_lefttype,
3008  }
3009  break;
3011  /* descending-order case */
3012  isgt = true;
3013  if (op_lefttype == op_righttype)
3014  {
3015  /* easy case */
3016  ltop = get_opfamily_member(opfamily,
3017  op_lefttype, op_righttype,
3019  leop = get_opfamily_member(opfamily,
3020  op_lefttype, op_righttype,
3022  lsortop = ltop;
3023  rsortop = ltop;
3024  lstatop = get_opfamily_member(opfamily,
3025  op_lefttype, op_lefttype,
3027  rstatop = lstatop;
3028  revltop = ltop;
3029  revleop = leop;
3030  }
3031  else
3032  {
3033  ltop = get_opfamily_member(opfamily,
3034  op_lefttype, op_righttype,
3036  leop = get_opfamily_member(opfamily,
3037  op_lefttype, op_righttype,
3039  lsortop = get_opfamily_member(opfamily,
3040  op_lefttype, op_lefttype,
3042  rsortop = get_opfamily_member(opfamily,
3043  op_righttype, op_righttype,
3045  lstatop = get_opfamily_member(opfamily,
3046  op_lefttype, op_lefttype,
3048  rstatop = get_opfamily_member(opfamily,
3049  op_righttype, op_righttype,
3051  revltop = get_opfamily_member(opfamily,
3052  op_righttype, op_lefttype,
3054  revleop = get_opfamily_member(opfamily,
3055  op_righttype, op_lefttype,
3057  }
3058  break;
3059  default:
3060  goto fail; /* shouldn't get here */
3061  }
3062 
3063  if (!OidIsValid(lsortop) ||
3064  !OidIsValid(rsortop) ||
3065  !OidIsValid(lstatop) ||
3066  !OidIsValid(rstatop) ||
3067  !OidIsValid(ltop) ||
3068  !OidIsValid(leop) ||
3069  !OidIsValid(revltop) ||
3070  !OidIsValid(revleop))
3071  goto fail; /* insufficient info in catalogs */
3072 
3073  /* Try to get ranges of both inputs */
3074  if (!isgt)
3075  {
3076  if (!get_variable_range(root, &leftvar, lstatop, collation,
3077  &leftmin, &leftmax))
3078  goto fail; /* no range available from stats */
3079  if (!get_variable_range(root, &rightvar, rstatop, collation,
3080  &rightmin, &rightmax))
3081  goto fail; /* no range available from stats */
3082  }
3083  else
3084  {
3085  /* need to swap the max and min */
3086  if (!get_variable_range(root, &leftvar, lstatop, collation,
3087  &leftmax, &leftmin))
3088  goto fail; /* no range available from stats */
3089  if (!get_variable_range(root, &rightvar, rstatop, collation,
3090  &rightmax, &rightmin))
3091  goto fail; /* no range available from stats */
3092  }
3093 
3094  /*
3095  * Now, the fraction of the left variable that will be scanned is the
3096  * fraction that's <= the right-side maximum value. But only believe
3097  * non-default estimates, else stick with our 1.0.
3098  */
3099  selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar,
3100  rightmax, op_righttype);
3101  if (selec != DEFAULT_INEQ_SEL)
3102  *leftend = selec;
3103 
3104  /* And similarly for the right variable. */
3105  selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar,
3106  leftmax, op_lefttype);
3107  if (selec != DEFAULT_INEQ_SEL)
3108  *rightend = selec;
3109 
3110  /*
3111  * Only one of the two "end" fractions can really be less than 1.0;
3112  * believe the smaller estimate and reset the other one to exactly 1.0. If
3113  * we get exactly equal estimates (as can easily happen with self-joins),
3114  * believe neither.
3115  */
3116  if (*leftend > *rightend)
3117  *leftend = 1.0;
3118  else if (*leftend < *rightend)
3119  *rightend = 1.0;
3120  else
3121  *leftend = *rightend = 1.0;
3122 
3123  /*
3124  * Also, the fraction of the left variable that will be scanned before the
3125  * first join pair is found is the fraction that's < the right-side
3126  * minimum value. But only believe non-default estimates, else stick with
3127  * our own default.
3128  */
3129  selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar,
3130  rightmin, op_righttype);
3131  if (selec != DEFAULT_INEQ_SEL)
3132  *leftstart = selec;
3133 
3134  /* And similarly for the right variable. */
3135  selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar,
3136  leftmin, op_lefttype);
3137  if (selec != DEFAULT_INEQ_SEL)
3138  *rightstart = selec;
3139 
3140  /*
3141  * Only one of the two "start" fractions can really be more than zero;
3142  * believe the larger estimate and reset the other one to exactly 0.0. If
3143  * we get exactly equal estimates (as can easily happen with self-joins),
3144  * believe neither.
3145  */
3146  if (*leftstart < *rightstart)
3147  *leftstart = 0.0;
3148  else if (*leftstart > *rightstart)
3149  *rightstart = 0.0;
3150  else
3151  *leftstart = *rightstart = 0.0;
3152 
3153  /*
3154  * If the sort order is nulls-first, we're going to have to skip over any
3155  * nulls too. These would not have been counted by scalarineqsel, and we
3156  * can safely add in this fraction regardless of whether we believe
3157  * scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
3158  */
3159  if (nulls_first)
3160  {
3161  Form_pg_statistic stats;
3162 
3163  if (HeapTupleIsValid(leftvar.statsTuple))
3164  {
3165  stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
3166  *leftstart += stats->stanullfrac;
3167  CLAMP_PROBABILITY(*leftstart);
3168  *leftend += stats->stanullfrac;
3169  CLAMP_PROBABILITY(*leftend);
3170  }
3171  if (HeapTupleIsValid(rightvar.statsTuple))
3172  {
3173  stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
3174  *rightstart += stats->stanullfrac;
3175  CLAMP_PROBABILITY(*rightstart);
3176  *rightend += stats->stanullfrac;
3177  CLAMP_PROBABILITY(*rightend);
3178  }
3179  }
3180 
3181  /* Disbelieve start >= end, just in case that can happen */
3182  if (*leftstart >= *leftend)
3183  {
3184  *leftstart = 0.0;
3185  *leftend = 1.0;
3186  }
3187  if (*rightstart >= *rightend)
3188  {
3189  *rightstart = 0.0;
3190  *rightend = 1.0;
3191  }
3192 
3193 fail:
3194  ReleaseVariableStats(leftvar);
3195  ReleaseVariableStats(rightvar);
3196 }
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
Definition: selfuncs.c:5409
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37
Definition: nodes.h:529
static double scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, Oid collation, VariableStatData *vardata, Datum constval, Oid consttype)
Definition: selfuncs.c:577
unsigned int Oid
Definition: postgres_ext.h:31
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define OidIsValid(objectId)
Definition: c.h:651
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:164
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:70
uintptr_t Datum
Definition: postgres.h:367
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:82
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4727
#define Assert(condition)
Definition: c.h:745
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
void get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, int *strategy, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:134
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:63
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32

◆ nulltestsel()

Selectivity nulltestsel ( PlannerInfo root,
NullTestType  nulltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1695 of file selfuncs.c.

References CLAMP_PROBABILITY, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), GETSTRUCT, HeapTupleIsValid, IS_NOT_NULL, IS_NULL, IsA, ReleaseVariableStats, VariableStatData::statsTuple, and VariableStatData::var.

Referenced by clause_selectivity(), and clauselist_selectivity_simple().

1697 {
1698  VariableStatData vardata;
1699  double selec;
1700 
1701  examine_variable(root, arg, varRelid, &vardata);
1702 
1703  if (HeapTupleIsValid(vardata.statsTuple))
1704  {
1705  Form_pg_statistic stats;
1706  double freq_null;
1707 
1708  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1709  freq_null = stats->stanullfrac;
1710 
1711  switch (nulltesttype)
1712  {
1713  case IS_NULL:
1714 
1715  /*
1716  * Use freq_null directly.
1717  */
1718  selec = freq_null;
1719  break;
1720  case IS_NOT_NULL:
1721 
1722  /*
1723  * Select not unknown (not null) values. Calculate from
1724  * freq_null.
1725  */
1726  selec = 1.0 - freq_null;
1727  break;
1728  default:
1729  elog(ERROR, "unrecognized nulltesttype: %d",
1730  (int) nulltesttype);
1731  return (Selectivity) 0; /* keep compiler quiet */
1732  }
1733  }
1734  else if (vardata.var && IsA(vardata.var, Var) &&
1735  ((Var *) vardata.var)->varattno < 0)
1736  {
1737  /*
1738  * There are no stats for system columns, but we know they are never
1739  * NULL.
1740  */
1741  selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
1742  }
1743  else
1744  {
1745  /*
1746  * No ANALYZE stats available, so make a guess
1747  */
1748  switch (nulltesttype)
1749  {
1750  case IS_NULL:
1751  selec = DEFAULT_UNK_SEL;
1752  break;
1753  case IS_NOT_NULL:
1754  selec = DEFAULT_NOT_UNK_SEL;
1755  break;
1756  default:
1757  elog(ERROR, "unrecognized nulltesttype: %d",
1758  (int) nulltesttype);
1759  return (Selectivity) 0; /* keep compiler quiet */
1760  }
1761  }
1762 
1763  ReleaseVariableStats(vardata);
1764 
1765  /* result should be in range, but make sure... */
1766  CLAMP_PROBABILITY(selec);
1767 
1768  return (Selectivity) selec;
1769 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
double Selectivity
Definition: nodes.h:662
Definition: primnodes.h:181
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:53
#define ERROR
Definition: elog.h:43
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:52
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4727
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
#define elog(elevel,...)
Definition: elog.h:214

◆ rowcomparesel()

Selectivity rowcomparesel ( PlannerInfo root,
RowCompareExpr clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 2170 of file selfuncs.c.

References RowCompareExpr::inputcollids, join_selectivity(), RowCompareExpr::largs, linitial, linitial_oid, list_make2, NumRelids(), RowCompareExpr::opnos, RowCompareExpr::rargs, restriction_selectivity(), and s1.

Referenced by clause_selectivity().

2173 {
2174  Selectivity s1;
2175  Oid opno = linitial_oid(clause->opnos);
2176  Oid inputcollid = linitial_oid(clause->inputcollids);
2177  List *opargs;
2178  bool is_join_clause;
2179 
2180  /* Build equivalent arg list for single operator */
2181  opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
2182 
2183  /*
2184  * Decide if it's a join clause. This should match clausesel.c's
2185  * treat_as_join_clause(), except that we intentionally consider only the
2186  * leading columns and not the rest of the clause.
2187  */
2188  if (varRelid != 0)
2189  {
2190  /*
2191  * Caller is forcing restriction mode (eg, because we are examining an
2192  * inner indexscan qual).
2193  */
2194  is_join_clause = false;
2195  }
2196  else if (sjinfo == NULL)
2197  {
2198  /*
2199  * It must be a restriction clause, since it's being evaluated at a
2200  * scan node.
2201  */
2202  is_join_clause = false;
2203  }
2204  else
2205  {
2206  /*
2207  * Otherwise, it's a join if there's more than one relation used.
2208  */
2209  is_join_clause = (NumRelids((Node *) opargs) > 1);
2210  }
2211 
2212  if (is_join_clause)
2213  {
2214  /* Estimate selectivity for a join clause. */
2215  s1 = join_selectivity(root, opno,
2216  opargs,
2217  inputcollid,
2218  jointype,
2219  sjinfo);
2220  }
2221  else
2222  {
2223  /* Estimate selectivity for a restriction clause. */
2224  s1 = restriction_selectivity(root, opno,
2225  opargs,
2226  inputcollid,
2227  varRelid);
2228  }
2229 
2230  return s1;
2231 }
#define list_make2(x1, x2)
Definition: pg_list.h:229
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Definition: plancat.c:1766
Definition: nodes.h:529
double Selectivity
Definition: nodes.h:662
unsigned int Oid
Definition: postgres_ext.h:31
#define linitial(l)
Definition: pg_list.h:195
char * s1
#define linitial_oid(l)
Definition: pg_list.h:197
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:1805
List * inputcollids
Definition: primnodes.h:1073
Definition: pg_list.h:50
int NumRelids(Node *clause)
Definition: clauses.c:2133

◆ scalararraysel()

Selectivity scalararraysel ( PlannerInfo root,
ScalarArrayOpExpr clause,
bool  is_join_clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1813 of file selfuncs.c.

References generate_unaccent_rules::args, ScalarArrayOpExpr::args, ARR_ELEMTYPE, Assert, CLAMP_PROBABILITY, CaseTestExpr::collation, DatumGetArrayTypeP, DatumGetFloat8, deconstruct_array(), ArrayExpr::element_typeid, ArrayExpr::elements, TypeCacheEntry::eq_opr, estimate_expression_value(), exprCollation(), exprType(), fmgr_info(), FunctionCall4Coll(), FunctionCall5Coll(), get_base_element_type(), get_negator(), get_oprjoin(), get_oprrest(), get_typlenbyval(), get_typlenbyvalalign(), i, ScalarArrayOpExpr::inputcollid, Int16GetDatum, Int32GetDatum, IsA, lfirst, linitial, list_length(), list_make2, lookup_type_cache(), lsecond, makeConst(), makeNode, ObjectIdGetDatum, OidIsValid, ScalarArrayOpExpr::opno, PointerGetDatum, s1, s2, scalararraysel_containment(), strip_array_coercion(), TYPECACHE_EQ_OPR, CaseTestExpr::typeId, CaseTestExpr::typeMod, and ScalarArrayOpExpr::useOr.

Referenced by clause_selectivity().

1819 {
1820  Oid operator = clause->opno;
1821  bool useOr = clause->useOr;
1822  bool isEquality = false;
1823  bool isInequality = false;
1824  Node *leftop;
1825  Node *rightop;
1826  Oid nominal_element_type;
1827  Oid nominal_element_collation;
1828  TypeCacheEntry *typentry;
1829  RegProcedure oprsel;
1830  FmgrInfo oprselproc;
1831  Selectivity s1;
1832  Selectivity s1disjoint;
1833 
1834  /* First, deconstruct the expression */
1835  Assert(list_length(clause->args) == 2);
1836  leftop = (Node *) linitial(clause->args);
1837  rightop = (Node *) lsecond(clause->args);
1838 
1839  /* aggressively reduce both sides to constants */
1840  leftop = estimate_expression_value(root, leftop);
1841  rightop = estimate_expression_value(root, rightop);
1842 
1843  /* get nominal (after relabeling) element type of rightop */
1844  nominal_element_type = get_base_element_type(exprType(rightop));
1845  if (!OidIsValid(nominal_element_type))
1846  return (Selectivity) 0.5; /* probably shouldn't happen */
1847  /* get nominal collation, too, for generating constants */
1848  nominal_element_collation = exprCollation(rightop);
1849 
1850  /* look through any binary-compatible relabeling of rightop */
1851  rightop = strip_array_coercion(rightop);
1852 
1853  /*
1854  * Detect whether the operator is the default equality or inequality
1855  * operator of the array element type.
1856  */
1857  typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1858  if (OidIsValid(typentry->eq_opr))
1859  {
1860  if (operator == typentry->eq_opr)
1861  isEquality = true;
1862  else if (get_negator(operator) == typentry->eq_opr)
1863  isInequality = true;
1864  }
1865 
1866  /*
1867  * If it is equality or inequality, we might be able to estimate this as a
1868  * form of array containment; for instance "const = ANY(column)" can be
1869  * treated as "ARRAY[const] <@ column". scalararraysel_containment tries
1870  * that, and returns the selectivity estimate if successful, or -1 if not.
1871  */
1872  if ((isEquality || isInequality) && !is_join_clause)
1873  {
1874  s1 = scalararraysel_containment(root, leftop, rightop,
1875  nominal_element_type,
1876  isEquality, useOr, varRelid);
1877  if (s1 >= 0.0)
1878  return s1;
1879  }
1880 
1881  /*
1882  * Look up the underlying operator's selectivity estimator. Punt if it
1883  * hasn't got one.
1884  */
1885  if (is_join_clause)
1886  oprsel = get_oprjoin(operator);
1887  else
1888  oprsel = get_oprrest(operator);
1889  if (!oprsel)
1890  return (Selectivity) 0.5;
1891  fmgr_info(oprsel, &oprselproc);
1892 
1893  /*
1894  * In the array-containment check above, we must only believe that an
1895  * operator is equality or inequality if it is the default btree equality
1896  * operator (or its negator) for the element type, since those are the
1897  * operators that array containment will use. But in what follows, we can
1898  * be a little laxer, and also believe that any operators using eqsel() or
1899  * neqsel() as selectivity estimator act like equality or inequality.
1900  */
1901  if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1902  isEquality = true;
1903  else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1904  isInequality = true;
1905 
1906  /*
1907  * We consider three cases:
1908  *
1909  * 1. rightop is an Array constant: deconstruct the array, apply the
1910  * operator's selectivity function for each array element, and merge the
1911  * results in the same way that clausesel.c does for AND/OR combinations.
1912  *
1913  * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1914  * function for each element of the ARRAY[] construct, and merge.
1915  *
1916  * 3. otherwise, make a guess ...
1917  */
1918  if (rightop && IsA(rightop, Const))
1919  {
1920  Datum arraydatum = ((Const *) rightop)->constvalue;
1921  bool arrayisnull = ((Const *) rightop)->constisnull;
1922  ArrayType *arrayval;
1923  int16 elmlen;
1924  bool elmbyval;
1925  char elmalign;
1926  int num_elems;
1927  Datum *elem_values;
1928  bool *elem_nulls;
1929  int i;
1930 
1931  if (arrayisnull) /* qual can't succeed if null array */
1932  return (Selectivity) 0.0;
1933  arrayval = DatumGetArrayTypeP(arraydatum);
1935  &elmlen, &elmbyval, &elmalign);
1936  deconstruct_array(arrayval,
1937  ARR_ELEMTYPE(arrayval),
1938  elmlen, elmbyval, elmalign,
1939  &elem_values, &elem_nulls, &num_elems);
1940 
1941  /*
1942  * For generic operators, we assume the probability of success is
1943  * independent for each array element. But for "= ANY" or "<> ALL",
1944  * if the array elements are distinct (which'd typically be the case)
1945  * then the probabilities are disjoint, and we should just sum them.
1946  *
1947  * If we were being really tense we would try to confirm that the
1948  * elements are all distinct, but that would be expensive and it
1949  * doesn't seem to be worth the cycles; it would amount to penalizing
1950  * well-written queries in favor of poorly-written ones. However, we
1951  * do protect ourselves a little bit by checking whether the
1952  * disjointness assumption leads to an impossible (out of range)
1953  * probability; if so, we fall back to the normal calculation.
1954  */
1955  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1956 
1957  for (i = 0; i < num_elems; i++)
1958  {
1959  List *args;
1960  Selectivity s2;
1961 
1962  args = list_make2(leftop,
1963  makeConst(nominal_element_type,
1964  -1,
1965  nominal_element_collation,
1966  elmlen,
1967  elem_values[i],
1968  elem_nulls[i],
1969  elmbyval));
1970  if (is_join_clause)
1971  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1972  clause->inputcollid,
1973  PointerGetDatum(root),
1974  ObjectIdGetDatum(operator),
1975  PointerGetDatum(args),
1976  Int16GetDatum(jointype),
1977  PointerGetDatum(sjinfo)));
1978  else
1979  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1980  clause->inputcollid,
1981  PointerGetDatum(root),
1982  ObjectIdGetDatum(operator),
1983  PointerGetDatum(args),
1984  Int32GetDatum(varRelid)));
1985 
1986  if (useOr)
1987  {
1988  s1 = s1 + s2 - s1 * s2;
1989  if (isEquality)
1990  s1disjoint += s2;
1991  }
1992  else
1993  {
1994  s1 = s1 * s2;
1995  if (isInequality)
1996  s1disjoint += s2 - 1.0;
1997  }
1998  }
1999 
2000  /* accept disjoint-probability estimate if in range */
2001  if ((useOr ? isEquality : isInequality) &&
2002  s1disjoint >= 0.0 && s1disjoint <= 1.0)
2003  s1 = s1disjoint;
2004  }
2005  else if (rightop && IsA(rightop, ArrayExpr) &&
2006  !((ArrayExpr *) rightop)->multidims)
2007  {
2008  ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
2009  int16 elmlen;
2010  bool elmbyval;
2011  ListCell *l;
2012 
2013  get_typlenbyval(arrayexpr->element_typeid,
2014  &elmlen, &elmbyval);
2015 
2016  /*
2017  * We use the assumption of disjoint probabilities here too, although
2018  * the odds of equal array elements are rather higher if the elements
2019  * are not all constants (which they won't be, else constant folding
2020  * would have reduced the ArrayExpr to a Const). In this path it's
2021  * critical to have the sanity check on the s1disjoint estimate.
2022  */
2023  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
2024 
2025  foreach(l, arrayexpr->elements)
2026  {
2027  Node *elem = (Node *) lfirst(l);
2028  List *args;
2029  Selectivity s2;
2030 
2031  /*
2032  * Theoretically, if elem isn't of nominal_element_type we should
2033  * insert a RelabelType, but it seems unlikely that any operator
2034  * estimation function would really care ...
2035  */
2036  args = list_make2(leftop, elem);
2037  if (is_join_clause)
2038  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2039  clause->inputcollid,
2040  PointerGetDatum(root),
2041  ObjectIdGetDatum(operator),
2042  PointerGetDatum(args),
2043  Int16GetDatum(jointype),
2044  PointerGetDatum(sjinfo)));
2045  else
2046  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2047  clause->inputcollid,
2048  PointerGetDatum(root),
2049  ObjectIdGetDatum(operator),
2050  PointerGetDatum(args),
2051  Int32GetDatum(varRelid)));
2052 
2053  if (useOr)
2054  {
2055  s1 = s1 + s2 - s1 * s2;
2056  if (isEquality)
2057  s1disjoint += s2;
2058  }
2059  else
2060  {
2061  s1 = s1 * s2;
2062  if (isInequality)
2063  s1disjoint += s2 - 1.0;
2064  }
2065  }
2066 
2067  /* accept disjoint-probability estimate if in range */
2068  if ((useOr ? isEquality : isInequality) &&
2069  s1disjoint >= 0.0 && s1disjoint <= 1.0)
2070  s1 = s1disjoint;
2071  }
2072  else
2073  {
2074  CaseTestExpr *dummyexpr;
2075  List *args;
2076  Selectivity s2;
2077  int i;
2078 
2079  /*
2080  * We need a dummy rightop to pass to the operator selectivity
2081  * routine. It can be pretty much anything that doesn't look like a
2082  * constant; CaseTestExpr is a convenient choice.
2083  */
2084  dummyexpr = makeNode(CaseTestExpr);
2085  dummyexpr->typeId = nominal_element_type;
2086  dummyexpr->typeMod = -1;
2087  dummyexpr->collation = clause->inputcollid;
2088  args = list_make2(leftop, dummyexpr);
2089  if (is_join_clause)
2090  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2091  clause->inputcollid,
2092  PointerGetDatum(root),
2093  ObjectIdGetDatum(operator),
2094  PointerGetDatum(args),
2095  Int16GetDatum(jointype),
2096  PointerGetDatum(sjinfo)));
2097  else
2098  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2099  clause->inputcollid,
2100  PointerGetDatum(root),
2101  ObjectIdGetDatum(operator),
2102  PointerGetDatum(args),
2103  Int32GetDatum(varRelid)));
2104  s1 = useOr ? 0.0 : 1.0;
2105 
2106  /*
2107  * Arbitrarily assume 10 elements in the eventual array value (see
2108  * also estimate_array_length). We don't risk an assumption of
2109  * disjoint probabilities here.
2110  */
2111  for (i = 0; i < 10; i++)
2112  {
2113  if (useOr)
2114  s1 = s1 + s2 - s1 * s2;
2115  else
2116  s1 = s1 * s2;
2117  }
2118  }
2119 
2120  /* result should be in range, but make sure... */
2121  CLAMP_PROBABILITY(s1);
2122 
2123  return s1;
2124 }
#define list_make2(x1, x2)
Definition: pg_list.h:229
signed short int16
Definition: c.h:361
Definition: fmgr.h:56
RegProcedure get_oprjoin(Oid opno)
Definition: lsyscache.c:1493
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2288
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2159
#define PointerGetDatum(X)
Definition: postgres.h:556
Datum FunctionCall5Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4, Datum arg5)
Definition: fmgr.c:1226
regproc RegProcedure
Definition: c.h:518
#define Int16GetDatum(X)
Definition: postgres.h:451
Definition: nodes.h:529
#define TYPECACHE_EQ_OPR
Definition: typcache.h:130
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1199
double Selectivity
Definition: nodes.h:662
unsigned int Oid
Definition: postgres_ext.h:31
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:299
#define OidIsValid(objectId)
Definition: c.h:651
#define lsecond(l)
Definition: pg_list.h:200
int32 typeMod
Definition: primnodes.h:973
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
#define linitial(l)
Definition: pg_list.h:195
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
char * s1
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
List * elements
Definition: primnodes.h:991
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1469
char * s2
#define DatumGetFloat8(X)
Definition: postgres.h:714
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:331
#define makeNode(_type_)
Definition: nodes.h:577
#define Assert(condition)
Definition: c.h:745
#define lfirst(lc)
Definition: pg_list.h:190
Selectivity scalararraysel_containment(PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
static int list_length(const List *l)
Definition: pg_list.h:169
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:719
void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval)
Definition: lsyscache.c:2139
Oid element_typeid
Definition: primnodes.h:990
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3483
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2709
#define Int32GetDatum(X)
Definition: postgres.h:479
int i
Oid get_negator(Oid opno)
Definition: lsyscache.c:1445
Definition: pg_list.h:50
#define ARR_ELEMTYPE(a)
Definition: array.h:280
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1780
#define DatumGetArrayTypeP(X)
Definition: array.h:249

◆ scalararraysel_containment()

Selectivity scalararraysel_containment ( PlannerInfo root,
Node leftop,
Node rightop,
Oid  elemtype,
bool  isEquality,
bool  useOr,
int  varRelid 
)

Definition at line 82 of file array_selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, VariableStatData::rel, ReleaseVariableStats, statistic_proc_security_check(), VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and AttStatsSlot::values.

Referenced by scalararraysel().

86 {
87  Selectivity selec;
88  VariableStatData vardata;
89  Datum constval;
90  TypeCacheEntry *typentry;
91  FmgrInfo *cmpfunc;
92 
93  /*
94  * rightop must be a variable, else punt.
95  */
96  examine_variable(root, rightop, varRelid, &vardata);
97  if (!vardata.rel)
98  {
99  ReleaseVariableStats(vardata);
100  return -1.0;
101  }
102 
103  /*
104  * leftop must be a constant, else punt.
105  */
106  if (!IsA(leftop, Const))
107  {
108  ReleaseVariableStats(vardata);
109  return -1.0;
110  }
111  if (((Const *) leftop)->constisnull)
112  {
113  /* qual can't succeed if null on left */
114  ReleaseVariableStats(vardata);
115  return (Selectivity) 0.0;
116  }
117  constval = ((Const *) leftop)->constvalue;
118 
119  /* Get element type's default comparison function */
120  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
121  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
122  {
123  ReleaseVariableStats(vardata);
124  return -1.0;
125  }
126  cmpfunc = &typentry->cmp_proc_finfo;
127 
128  /*
129  * If the operator is <>, swap ANY/ALL, then invert the result later.
130  */
131  if (!isEquality)
132  useOr = !useOr;
133 
134  /* Get array element stats for var, if available */
135  if (HeapTupleIsValid(vardata.statsTuple) &&
136  statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
137  {
138  Form_pg_statistic stats;
139  AttStatsSlot sslot;
140  AttStatsSlot hslot;
141 
142  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
143 
144  /* MCELEM will be an array of same type as element */
145  if (get_attstatsslot(&sslot, vardata.statsTuple,
146  STATISTIC_KIND_MCELEM, InvalidOid,
148  {
149  /* For ALL case, also get histogram of distinct-element counts */
150  if (useOr ||
151  !get_attstatsslot(&hslot, vardata.statsTuple,
152  STATISTIC_KIND_DECHIST, InvalidOid,
154  memset(&hslot, 0, sizeof(hslot));
155 
156  /*
157  * For = ANY, estimate as var @> ARRAY[const].
158  *
159  * For = ALL, estimate as var <@ ARRAY[const].
160  */
161  if (useOr)
163  sslot.nvalues,
164  sslot.numbers,
165  sslot.nnumbers,
166  &constval, 1,
167  OID_ARRAY_CONTAINS_OP,
168  typentry);
169  else
170  selec = mcelem_array_contained_selec(sslot.values,
171  sslot.nvalues,
172  sslot.numbers,
173  sslot.nnumbers,
174  &constval, 1,
175  hslot.numbers,
176  hslot.nnumbers,
177  OID_ARRAY_CONTAINED_OP,
178  typentry);
179 
180  free_attstatsslot(&hslot);
181  free_attstatsslot(&sslot);
182  }
183  else
184  {
185  /* No most-common-elements info, so do without */
186  if (useOr)
187  selec = mcelem_array_contain_overlap_selec(NULL, 0,
188  NULL, 0,
189  &constval, 1,
190  OID_ARRAY_CONTAINS_OP,
191  typentry);
192  else
193  selec = mcelem_array_contained_selec(NULL, 0,
194  NULL, 0,
195  &constval, 1,
196  NULL, 0,
197  OID_ARRAY_CONTAINED_OP,
198  typentry);
199  }
200 
201  /*
202  * MCE stats count only non-null rows, so adjust for null rows.
203  */
204  selec *= (1.0 - stats->stanullfrac);
205  }
206  else
207  {
208  /* No stats at all, so do without */
209  if (useOr)
210  selec = mcelem_array_contain_overlap_selec(NULL, 0,
211  NULL, 0,
212  &constval, 1,
213  OID_ARRAY_CONTAINS_OP,
214  typentry);
215  else
216  selec = mcelem_array_contained_selec(NULL, 0,
217  NULL, 0,
218  &constval, 1,
219  NULL, 0,
220  OID_ARRAY_CONTAINED_OP,
221  typentry);
222  /* we assume no nulls here, so no stanullfrac correction */
223  }
224 
225  ReleaseVariableStats(vardata);
226 
227  /*
228  * If the operator is <>, invert the results.
229  */
230  if (!isEquality)
231  selec = 1.0 - selec;
232 
233  CLAMP_PROBABILITY(selec);
234 
235  return selec;
236 }
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5247
RelOptInfo * rel
Definition: selfuncs.h:73
double Selectivity
Definition: nodes.h:662
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define OidIsValid(objectId)
Definition: c.h:651
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, TypeCacheEntry *typentry)
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
FmgrInfo cmp_proc_finfo
Definition: typcache.h:75
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
float4 * numbers
Definition: lsyscache.h:53
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, TypeCacheEntry *typentry)
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:331
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4727
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3052
Datum * values
Definition: lsyscache.h:50
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:136
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3169

◆ statistic_proc_security_check()

bool statistic_proc_security_check ( VariableStatData vardata,
Oid  func_oid 
)

Definition at line 5247 of file selfuncs.c.

References VariableStatData::acl_ok, DEBUG2, ereport, errmsg_internal(), get_func_leakproof(), get_func_name(), and OidIsValid.

Referenced by calc_arraycontsel(), calc_hist_selectivity(), eqjoinsel(), get_variable_range(), histogram_selectivity(), ineq_histogram_selectivity(), mcv_selectivity(), scalararraysel_containment(), and var_eq_const().

5248 {
5249  if (vardata->acl_ok)
5250  return true;
5251 
5252  if (!OidIsValid(func_oid))
5253  return false;
5254 
5255  if (get_func_leakproof(func_oid))
5256  return true;
5257 
5258  ereport(DEBUG2,
5259  (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
5260  get_func_name(func_oid))));
5261  return false;
5262 }
bool get_func_leakproof(Oid funcid)
Definition: lsyscache.c:1749
#define OidIsValid(objectId)
Definition: c.h:651
char * get_func_name(Oid funcid)
Definition: lsyscache.c:1520
#define DEBUG2
Definition: elog.h:24
#define ereport(elevel,...)
Definition: elog.h:144
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911

◆ var_eq_const()

double var_eq_const ( VariableStatData vardata,
Oid  oproid,
Oid  collation,
Datum  constval,
bool  constisnull,
bool  varonleft,
bool  negate 
)

Definition at line 292 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, DatumGetBool, fmgr_info(), free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), get_opcode(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, VariableStatData::isunique, LOCAL_FCINFO, AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, VariableStatData::rel, statistic_proc_security_check(), VariableStatData::statsTuple, RelOptInfo::tuples, and AttStatsSlot::values.

Referenced by boolvarsel(), eqsel_internal(), patternsel_common(), and prefix_selectivity().

295 {
296  double selec;
297  double nullfrac = 0.0;
298  bool isdefault;
299  Oid opfuncoid;
300 
301  /*
302  * If the constant is NULL, assume operator is strict and return zero, ie,
303  * operator will never return TRUE. (It's zero even for a negator op.)
304  */
305  if (constisnull)
306  return 0.0;
307 
308  /*
309  * Grab the nullfrac for use below. Note we allow use of nullfrac
310  * regardless of security check.
311  */
312  if (HeapTupleIsValid(vardata->statsTuple))
313  {
314  Form_pg_statistic stats;
315 
316  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
317  nullfrac = stats->stanullfrac;
318  }
319 
320  /*
321  * If we matched the var to a unique index or DISTINCT clause, assume
322  * there is exactly one match regardless of anything else. (This is
323  * slightly bogus, since the index or clause's equality operator might be
324  * different from ours, but it's much more likely to be right than
325  * ignoring the information.)
326  */
327  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
328  {
329  selec = 1.0 / vardata->rel->tuples;
330  }
331  else if (HeapTupleIsValid(vardata->statsTuple) &&
333  (opfuncoid = get_opcode(operator))))
334  {
335  AttStatsSlot sslot;
336  bool match = false;
337  int i;
338 
339  /*
340  * Is the constant "=" to any of the column's most common values?
341  * (Although the given operator may not really be "=", we will assume
342  * that seeing whether it returns TRUE is an appropriate test. If you
343  * don't like this, maybe you shouldn't be using eqsel for your
344  * operator...)
345  */
346  if (get_attstatsslot(&sslot, vardata->statsTuple,
347  STATISTIC_KIND_MCV, InvalidOid,
349  {
350  LOCAL_FCINFO(fcinfo, 2);
351  FmgrInfo eqproc;
352 
353  fmgr_info(opfuncoid, &eqproc);
354 
355  /*
356  * Save a few cycles by setting up the fcinfo struct just once.
357  * Using FunctionCallInvoke directly also avoids failure if the
358  * eqproc returns NULL, though really equality functions should
359  * never do that.
360  */
361  InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
362  NULL, NULL);
363  fcinfo->args[0].isnull = false;
364  fcinfo->args[1].isnull = false;
365  /* be careful to apply operator right way 'round */
366  if (varonleft)
367  fcinfo->args[1].value = constval;
368  else
369  fcinfo->args[0].value = constval;
370 
371  for (i = 0; i < sslot.nvalues; i++)
372  {
373  Datum fresult;
374 
375  if (varonleft)
376  fcinfo->args[0].value = sslot.values[i];
377  else
378  fcinfo->args[1].value = sslot.values[i];
379  fcinfo->isnull = false;
380  fresult = FunctionCallInvoke(fcinfo);
381  if (!fcinfo->isnull && DatumGetBool(fresult))
382  {
383  match = true;
384  break;
385  }
386  }
387  }
388  else
389  {
390  /* no most-common-value info available */
391  i = 0; /* keep compiler quiet */
392  }
393 
394  if (match)
395  {
396  /*
397  * Constant is "=" to this common value. We know selectivity
398  * exactly (or as exactly as ANALYZE could calculate it, anyway).
399  */
400  selec = sslot.numbers[i];
401  }
402  else
403  {
404  /*
405  * Comparison is against a constant that is neither NULL nor any
406  * of the common values. Its selectivity cannot be more than
407  * this:
408  */
409  double sumcommon = 0.0;
410  double otherdistinct;
411 
412  for (i = 0; i < sslot.nnumbers; i++)
413  sumcommon += sslot.numbers[i];
414  selec = 1.0 - sumcommon - nullfrac;
415  CLAMP_PROBABILITY(selec);
416 
417  /*
418  * and in fact it's probably a good deal less. We approximate that
419  * all the not-common values share this remaining fraction
420  * equally, so we divide by the number of other distinct values.
421  */
422  otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
423  sslot.nnumbers;
424  if (otherdistinct > 1)
425  selec /= otherdistinct;
426 
427  /*
428  * Another cross-check: selectivity shouldn't be estimated as more
429  * than the least common "most common value".
430  */
431  if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
432  selec = sslot.numbers[sslot.nnumbers - 1];
433  }
434 
435  free_attstatsslot(&sslot);
436  }
437  else
438  {
439  /*
440  * No ANALYZE stats available, so make a guess using estimated number
441  * of distinct values and assuming they are equally common. (The guess
442  * is unlikely to be very good, but we do know a few special cases.)
443  */
444  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
445  }
446 
447  /* now adjust if we wanted <> rather than = */
448  if (negate)
449  selec = 1.0 - selec - nullfrac;
450 
451  /* result should be in range, but make sure... */
452  CLAMP_PROBABILITY(selec);
453 
454  return selec;
455 }
Definition: fmgr.h:56
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5247
double tuples
Definition: pathnodes.h:705
RelOptInfo * rel
Definition: selfuncs.h:73
unsigned int Oid
Definition: postgres_ext.h:31
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5276
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
uintptr_t Datum
Definition: postgres.h:367
#define InvalidOid
Definition: postgres_ext.h:36
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1202
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3052
Datum * values
Definition: lsyscache.h:50
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3169

◆ var_eq_non_const()

double var_eq_non_const ( VariableStatData vardata,
Oid  oproid,
Oid  collation,
Node other,
bool  varonleft,
bool  negate 
)

Definition at line 463 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, CLAMP_PROBABILITY, free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, VariableStatData::isunique, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by eqsel_internal().

466 {
467  double selec;
468  double nullfrac = 0.0;
469  bool isdefault;
470 
471  /*
472  * Grab the nullfrac for use below.
473  */
474  if (HeapTupleIsValid(vardata->statsTuple))
475  {
476  Form_pg_statistic stats;
477 
478  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
479  nullfrac = stats->stanullfrac;
480  }
481 
482  /*
483  * If we matched the var to a unique index or DISTINCT clause, assume
484  * there is exactly one match regardless of anything else. (This is
485  * slightly bogus, since the index or clause's equality operator might be
486  * different from ours, but it's much more likely to be right than
487  * ignoring the information.)
488  */
489  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
490  {
491  selec = 1.0 / vardata->rel->tuples;
492  }
493  else if (HeapTupleIsValid(vardata->statsTuple))
494  {
495  double ndistinct;
496  AttStatsSlot sslot;
497 
498  /*
499  * Search is for a value that we do not know a priori, but we will
500  * assume it is not NULL. Estimate the selectivity as non-null
501  * fraction divided by number of distinct values, so that we get a
502  * result averaged over all possible values whether common or
503  * uncommon. (Essentially, we are assuming that the not-yet-known
504  * comparison value is equally likely to be any of the possible
505  * values, regardless of their frequency in the table. Is that a good
506  * idea?)
507  */
508  selec = 1.0 - nullfrac;
509  ndistinct = get_variable_numdistinct(vardata, &isdefault);
510  if (ndistinct > 1)
511  selec /= ndistinct;
512 
513  /*
514  * Cross-check: selectivity should never be estimated as more than the
515  * most common value's.
516  */
517  if (get_attstatsslot(&sslot, vardata->statsTuple,
518  STATISTIC_KIND_MCV, InvalidOid,
520  {
521  if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
522  selec = sslot.numbers[0];
523  free_attstatsslot(&sslot);
524  }
525  }
526  else
527  {
528  /*
529  * No ANALYZE stats available, so make a guess using estimated number
530  * of distinct values and assuming they are equally common. (The guess
531  * is unlikely to be very good, but we do know a few special cases.)
532  */
533  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
534  }
535 
536  /* now adjust if we wanted <> rather than = */
537  if (negate)
538  selec = 1.0 - selec - nullfrac;
539 
540  /* result should be in range, but make sure... */
541  CLAMP_PROBABILITY(selec);
542 
543  return selec;
544 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
double tuples
Definition: pathnodes.h:705
RelOptInfo * rel
Definition: selfuncs.h:73
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5276
float4 * numbers
Definition: lsyscache.h:53
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3052
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3169

Variable Documentation

◆ get_index_stats_hook

PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook

Definition at line 145 of file selfuncs.c.

Referenced by brincostestimate(), btcostestimate(), and examine_variable().

◆ get_relation_stats_hook

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook

Definition at line 144 of file selfuncs.c.

Referenced by brincostestimate(), btcostestimate(), and examine_simple_variable().