PostgreSQL Source Code  git master
selfuncs.h File Reference
#include "access/htup.h"
#include "fmgr.h"
#include "nodes/pathnodes.h"
Include dependency graph for selfuncs.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  VariableStatData
 
struct  GenericCosts
 

Macros

#define DEFAULT_EQ_SEL   0.005
 
#define DEFAULT_INEQ_SEL   0.3333333333333333
 
#define DEFAULT_RANGE_INEQ_SEL   0.005
 
#define DEFAULT_MATCH_SEL   0.005
 
#define DEFAULT_MATCHING_SEL   0.010
 
#define DEFAULT_NUM_DISTINCT   200
 
#define DEFAULT_UNK_SEL   0.005
 
#define DEFAULT_NOT_UNK_SEL   (1.0 - DEFAULT_UNK_SEL)
 
#define CLAMP_PROBABILITY(p)
 
#define ReleaseVariableStats(vardata)
 

Typedefs

typedef struct VariableStatData VariableStatData
 
typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)
 
typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)
 

Functions

void examine_variable (PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
 
bool statistic_proc_security_check (VariableStatData *vardata, Oid func_oid)
 
bool get_restriction_variable (PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
 
void get_join_variables (PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
 
double get_variable_numdistinct (VariableStatData *vardata, bool *isdefault)
 
double mcv_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, double *sumcommonp)
 
double histogram_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
 
double generic_restriction_selectivity (PlannerInfo *root, Oid oproid, List *args, int varRelid, double default_selectivity)
 
double ineq_histogram_selectivity (PlannerInfo *root, VariableStatData *vardata, FmgrInfo *opproc, bool isgt, bool iseq, Datum constval, Oid consttype)
 
double var_eq_const (VariableStatData *vardata, Oid oproid, Datum constval, bool constisnull, bool varonleft, bool negate)
 
double var_eq_non_const (VariableStatData *vardata, Oid oproid, Node *other, bool varonleft, bool negate)
 
Selectivity boolvarsel (PlannerInfo *root, Node *arg, int varRelid)
 
Selectivity booltestsel (PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity nulltestsel (PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity scalararraysel (PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
int estimate_array_length (Node *arrayexpr)
 
Selectivity rowcomparesel (PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
void mergejoinscansel (PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend)
 
double estimate_num_groups (PlannerInfo *root, List *groupExprs, double input_rows, List **pgset)
 
void estimate_hash_bucket_stats (PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, Selectivity *bucketsize_frac)
 
double estimate_hashagg_tablesize (Path *path, const AggClauseCosts *agg_costs, double dNumGroups)
 
Listget_quals_from_indexclauses (List *indexclauses)
 
Cost index_other_operands_eval_cost (PlannerInfo *root, List *indexquals)
 
Listadd_predicate_to_index_quals (IndexOptInfo *index, List *indexQuals)
 
void genericcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
 
Selectivity scalararraysel_containment (PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
 

Variables

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook
 
PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook
 

Macro Definition Documentation

◆ CLAMP_PROBABILITY

◆ DEFAULT_EQ_SEL

#define DEFAULT_EQ_SEL   0.005

Definition at line 34 of file selfuncs.h.

Referenced by _int_matchsel(), eqsel_internal(), int_query_opr_selec(), and neqjoinsel().

◆ DEFAULT_INEQ_SEL

◆ DEFAULT_MATCH_SEL

#define DEFAULT_MATCH_SEL   0.005

◆ DEFAULT_MATCHING_SEL

#define DEFAULT_MATCHING_SEL   0.010

Definition at line 46 of file selfuncs.h.

Referenced by matchingjoinsel(), and matchingsel().

◆ DEFAULT_NOT_UNK_SEL

#define DEFAULT_NOT_UNK_SEL   (1.0 - DEFAULT_UNK_SEL)

Definition at line 53 of file selfuncs.h.

Referenced by booltestsel(), and nulltestsel().

◆ DEFAULT_NUM_DISTINCT

#define DEFAULT_NUM_DISTINCT   200

Definition at line 49 of file selfuncs.h.

Referenced by cost_incremental_sort(), and get_variable_numdistinct().

◆ DEFAULT_RANGE_INEQ_SEL

#define DEFAULT_RANGE_INEQ_SEL   0.005

Definition at line 40 of file selfuncs.h.

Referenced by clauselist_selectivity_simple(), and default_range_selectivity().

◆ DEFAULT_UNK_SEL

#define DEFAULT_UNK_SEL   0.005

Definition at line 52 of file selfuncs.h.

Referenced by booltestsel(), and nulltestsel().

◆ ReleaseVariableStats

Typedef Documentation

◆ get_index_stats_hook_type

typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)

Definition at line 125 of file selfuncs.h.

◆ get_relation_stats_hook_type

typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)

Definition at line 120 of file selfuncs.h.

◆ VariableStatData

Function Documentation

◆ add_predicate_to_index_quals()

List* add_predicate_to_index_quals ( IndexOptInfo index,
List indexQuals 
)

Definition at line 6111 of file selfuncs.c.

References IndexOptInfo::indpred, lfirst, list_concat(), list_make1, NIL, and predicate_implied_by().

Referenced by btcostestimate(), genericcostestimate(), and gincostestimate().

6112 {
6113  List *predExtraQuals = NIL;
6114  ListCell *lc;
6115 
6116  if (index->indpred == NIL)
6117  return indexQuals;
6118 
6119  foreach(lc, index->indpred)
6120  {
6121  Node *predQual = (Node *) lfirst(lc);
6122  List *oneQual = list_make1(predQual);
6123 
6124  if (!predicate_implied_by(oneQual, indexQuals, false))
6125  predExtraQuals = list_concat(predExtraQuals, oneQual);
6126  }
6127  return list_concat(predExtraQuals, indexQuals);
6128 }
#define NIL
Definition: pg_list.h:65
Definition: nodes.h:529
List * list_concat(List *list1, const List *list2)
Definition: list.c:515
#define list_make1(x1)
Definition: pg_list.h:227
#define lfirst(lc)
Definition: pg_list.h:190
List * indpred
Definition: pathnodes.h:844
bool predicate_implied_by(List *predicate_list, List *clause_list, bool weak)
Definition: predtest.c:151
Definition: pg_list.h:50

◆ booltestsel()

Selectivity booltestsel ( PlannerInfo root,
BoolTestType  booltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1478 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, AttStatsSlot::nnumbers, AttStatsSlot::numbers, ReleaseVariableStats, VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by clause_selectivity().

1480 {
1481  VariableStatData vardata;
1482  double selec;
1483 
1484  examine_variable(root, arg, varRelid, &vardata);
1485 
1486  if (HeapTupleIsValid(vardata.statsTuple))
1487  {
1488  Form_pg_statistic stats;
1489  double freq_null;
1490  AttStatsSlot sslot;
1491 
1492  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1493  freq_null = stats->stanullfrac;
1494 
1495  if (get_attstatsslot(&sslot, vardata.statsTuple,
1496  STATISTIC_KIND_MCV, InvalidOid,
1498  && sslot.nnumbers > 0)
1499  {
1500  double freq_true;
1501  double freq_false;
1502 
1503  /*
1504  * Get first MCV frequency and derive frequency for true.
1505  */
1506  if (DatumGetBool(sslot.values[0]))
1507  freq_true = sslot.numbers[0];
1508  else
1509  freq_true = 1.0 - sslot.numbers[0] - freq_null;
1510 
1511  /*
1512  * Next derive frequency for false. Then use these as appropriate
1513  * to derive frequency for each case.
1514  */
1515  freq_false = 1.0 - freq_true - freq_null;
1516 
1517  switch (booltesttype)
1518  {
1519  case IS_UNKNOWN:
1520  /* select only NULL values */
1521  selec = freq_null;
1522  break;
1523  case IS_NOT_UNKNOWN:
1524  /* select non-NULL values */
1525  selec = 1.0 - freq_null;
1526  break;
1527  case IS_TRUE:
1528  /* select only TRUE values */
1529  selec = freq_true;
1530  break;
1531  case IS_NOT_TRUE:
1532  /* select non-TRUE values */
1533  selec = 1.0 - freq_true;
1534  break;
1535  case IS_FALSE:
1536  /* select only FALSE values */
1537  selec = freq_false;
1538  break;
1539  case IS_NOT_FALSE:
1540  /* select non-FALSE values */
1541  selec = 1.0 - freq_false;
1542  break;
1543  default:
1544  elog(ERROR, "unrecognized booltesttype: %d",
1545  (int) booltesttype);
1546  selec = 0.0; /* Keep compiler quiet */
1547  break;
1548  }
1549 
1550  free_attstatsslot(&sslot);
1551  }
1552  else
1553  {
1554  /*
1555  * No most-common-value info available. Still have null fraction
1556  * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1557  * for null fraction and assume a 50-50 split of TRUE and FALSE.
1558  */
1559  switch (booltesttype)
1560  {
1561  case IS_UNKNOWN:
1562  /* select only NULL values */
1563  selec = freq_null;
1564  break;
1565  case IS_NOT_UNKNOWN:
1566  /* select non-NULL values */
1567  selec = 1.0 - freq_null;
1568  break;
1569  case IS_TRUE:
1570  case IS_FALSE:
1571  /* Assume we select half of the non-NULL values */
1572  selec = (1.0 - freq_null) / 2.0;
1573  break;
1574  case IS_NOT_TRUE:
1575  case IS_NOT_FALSE:
1576  /* Assume we select NULLs plus half of the non-NULLs */
1577  /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1578  selec = (freq_null + 1.0) / 2.0;
1579  break;
1580  default:
1581  elog(ERROR, "unrecognized booltesttype: %d",
1582  (int) booltesttype);
1583  selec = 0.0; /* Keep compiler quiet */
1584  break;
1585  }
1586  }
1587  }
1588  else
1589  {
1590  /*
1591  * If we can't get variable statistics for the argument, perhaps
1592  * clause_selectivity can do something with it. We ignore the
1593  * possibility of a NULL value when using clause_selectivity, and just
1594  * assume the value is either TRUE or FALSE.
1595  */
1596  switch (booltesttype)
1597  {
1598  case IS_UNKNOWN:
1599  selec = DEFAULT_UNK_SEL;
1600  break;
1601  case IS_NOT_UNKNOWN:
1602  selec = DEFAULT_NOT_UNK_SEL;
1603  break;
1604  case IS_TRUE:
1605  case IS_NOT_FALSE:
1606  selec = (double) clause_selectivity(root, arg,
1607  varRelid,
1608  jointype, sjinfo);
1609  break;
1610  case IS_FALSE:
1611  case IS_NOT_TRUE:
1612  selec = 1.0 - (double) clause_selectivity(root, arg,
1613  varRelid,
1614  jointype, sjinfo);
1615  break;
1616  default:
1617  elog(ERROR, "unrecognized booltesttype: %d",
1618  (int) booltesttype);
1619  selec = 0.0; /* Keep compiler quiet */
1620  break;
1621  }
1622  }
1623 
1624  ReleaseVariableStats(vardata);
1625 
1626  /* result should be in range, but make sure... */
1627  CLAMP_PROBABILITY(selec);
1628 
1629  return (Selectivity) selec;
1630 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
double Selectivity
Definition: nodes.h:662
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:53
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
#define ERROR
Definition: elog.h:43
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:600
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:52
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4661
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3003
Datum * values
Definition: lsyscache.h:50
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
#define elog(elevel,...)
Definition: elog.h:214
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3133

◆ boolvarsel()

Selectivity boolvarsel ( PlannerInfo root,
Node arg,
int  varRelid 
)

Definition at line 1450 of file selfuncs.c.

References BoolGetDatum, examine_variable(), HeapTupleIsValid, ReleaseVariableStats, VariableStatData::statsTuple, and var_eq_const().

Referenced by clause_selectivity().

1451 {
1452  VariableStatData vardata;
1453  double selec;
1454 
1455  examine_variable(root, arg, varRelid, &vardata);
1456  if (HeapTupleIsValid(vardata.statsTuple))
1457  {
1458  /*
1459  * A boolean variable V is equivalent to the clause V = 't', so we
1460  * compute the selectivity as if that is what we have.
1461  */
1462  selec = var_eq_const(&vardata, BooleanEqualOperator,
1463  BoolGetDatum(true), false, true, false);
1464  }
1465  else
1466  {
1467  /* Otherwise, the default estimate is 0.5 */
1468  selec = 0.5;
1469  }
1470  ReleaseVariableStats(vardata);
1471  return selec;
1472 }
HeapTuple statsTuple
Definition: selfuncs.h:74
double var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:290
#define BoolGetDatum(X)
Definition: postgres.h:402
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4661
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84

◆ estimate_array_length()

int estimate_array_length ( Node arrayexpr)

Definition at line 2073 of file selfuncs.c.

References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), DatumGetArrayTypeP, IsA, list_length(), and strip_array_coercion().

Referenced by array_unnest_support(), btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().

2074 {
2075  /* look through any binary-compatible relabeling of arrayexpr */
2076  arrayexpr = strip_array_coercion(arrayexpr);
2077 
2078  if (arrayexpr && IsA(arrayexpr, Const))
2079  {
2080  Datum arraydatum = ((Const *) arrayexpr)->constvalue;
2081  bool arrayisnull = ((Const *) arrayexpr)->constisnull;
2082  ArrayType *arrayval;
2083 
2084  if (arrayisnull)
2085  return 0;
2086  arrayval = DatumGetArrayTypeP(arraydatum);
2087  return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2088  }
2089  else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2090  !((ArrayExpr *) arrayexpr)->multidims)
2091  {
2092  return list_length(((ArrayExpr *) arrayexpr)->elements);
2093  }
2094  else
2095  {
2096  /* default guess --- see also scalararraysel */
2097  return 10;
2098  }
2099 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define ARR_DIMS(a)
Definition: array.h:282
uintptr_t Datum
Definition: postgres.h:367
static int list_length(const List *l)
Definition: pg_list.h:169
#define ARR_NDIM(a)
Definition: array.h:278
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1721
#define DatumGetArrayTypeP(X)
Definition: array.h:249

◆ estimate_hash_bucket_stats()

void estimate_hash_bucket_stats ( PlannerInfo root,
Node hashkey,
double  nbuckets,
Selectivity mcv_freq,
Selectivity bucketsize_frac 
)

Definition at line 3657 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, clamp_row_est(), examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, Max, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by final_cost_hashjoin().

3660 {
3661  VariableStatData vardata;
3662  double estfract,
3663  ndistinct,
3664  stanullfrac,
3665  avgfreq;
3666  bool isdefault;
3667  AttStatsSlot sslot;
3668 
3669  examine_variable(root, hashkey, 0, &vardata);
3670 
3671  /* Look up the frequency of the most common value, if available */
3672  *mcv_freq = 0.0;
3673 
3674  if (HeapTupleIsValid(vardata.statsTuple))
3675  {
3676  if (get_attstatsslot(&sslot, vardata.statsTuple,
3677  STATISTIC_KIND_MCV, InvalidOid,
3679  {
3680  /*
3681  * The first MCV stat is for the most common value.
3682  */
3683  if (sslot.nnumbers > 0)
3684  *mcv_freq = sslot.numbers[0];
3685  free_attstatsslot(&sslot);
3686  }
3687  }
3688 
3689  /* Get number of distinct values */
3690  ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3691 
3692  /*
3693  * If ndistinct isn't real, punt. We normally return 0.1, but if the
3694  * mcv_freq is known to be even higher than that, use it instead.
3695  */
3696  if (isdefault)
3697  {
3698  *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3699  ReleaseVariableStats(vardata);
3700  return;
3701  }
3702 
3703  /* Get fraction that are null */
3704  if (HeapTupleIsValid(vardata.statsTuple))
3705  {
3706  Form_pg_statistic stats;
3707 
3708  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3709  stanullfrac = stats->stanullfrac;
3710  }
3711  else
3712  stanullfrac = 0.0;
3713 
3714  /* Compute avg freq of all distinct data values in raw relation */
3715  avgfreq = (1.0 - stanullfrac) / ndistinct;
3716 
3717  /*
3718  * Adjust ndistinct to account for restriction clauses. Observe we are
3719  * assuming that the data distribution is affected uniformly by the
3720  * restriction clauses!
3721  *
3722  * XXX Possibly better way, but much more expensive: multiply by
3723  * selectivity of rel's restriction clauses that mention the target Var.
3724  */
3725  if (vardata.rel && vardata.rel->tuples > 0)
3726  {
3727  ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3728  ndistinct = clamp_row_est(ndistinct);
3729  }
3730 
3731  /*
3732  * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3733  * number of buckets is less than the expected number of distinct values;
3734  * otherwise it is 1/ndistinct.
3735  */
3736  if (ndistinct > nbuckets)
3737  estfract = 1.0 / nbuckets;
3738  else
3739  estfract = 1.0 / ndistinct;
3740 
3741  /*
3742  * Adjust estimated bucketsize upward to account for skewed distribution.
3743  */
3744  if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3745  estfract *= *mcv_freq / avgfreq;
3746 
3747  /*
3748  * Clamp bucketsize to sane range (the above adjustment could easily
3749  * produce an out-of-range result). We set the lower bound a little above
3750  * zero, since zero isn't a very sane result.
3751  */
3752  if (estfract < 1.0e-6)
3753  estfract = 1.0e-6;
3754  else if (estfract > 1.0)
3755  estfract = 1.0;
3756 
3757  *bucketsize_frac = (Selectivity) estfract;
3758 
3759  ReleaseVariableStats(vardata);
3760 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
double tuples
Definition: pathnodes.h:705
RelOptInfo * rel
Definition: selfuncs.h:73
double Selectivity
Definition: nodes.h:662
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5210
float4 * numbers
Definition: lsyscache.h:53
double rows
Definition: pathnodes.h:668
#define InvalidOid
Definition: postgres_ext.h:36
#define Max(x, y)
Definition: c.h:914
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4661
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3003
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
e
Definition: preproc-init.c:82
double clamp_row_est(double nrows)
Definition: costsize.c:191
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3133

◆ estimate_hashagg_tablesize()

double estimate_hashagg_tablesize ( Path path,
const AggClauseCosts agg_costs,
double  dNumGroups 
)

Definition at line 3776 of file selfuncs.c.

References hash_agg_entry_size(), AggClauseCosts::numAggs, Path::pathtarget, AggClauseCosts::transitionSpace, and PathTarget::width.

Referenced by add_paths_to_grouping_rel(), consider_groupingsets_paths(), and create_partial_grouping_paths().

3778 {
3779  Size hashentrysize = hash_agg_entry_size(agg_costs->numAggs,
3780  path->pathtarget->width,
3781  agg_costs->transitionSpace);
3782 
3783  /*
3784  * Note that this disregards the effect of fill-factor and growth policy
3785  * of the hash table. That's probably ok, given that the default
3786  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3787  * "double-in-size" growth policies here.
3788  */
3789  return hashentrysize * dNumGroups;
3790 }
PathTarget * pathtarget
Definition: pathnodes.h:1145
Size hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
Definition: nodeAgg.c:1649
size_t Size
Definition: c.h:466
Size transitionSpace
Definition: pathnodes.h:64

◆ estimate_num_groups()

double estimate_num_groups ( PlannerInfo root,
List groupExprs,
double  input_rows,
List **  pgset 
)

Definition at line 3294 of file selfuncs.c.

References add_unique_group_var(), Assert, clamp_row_est(), contain_volatile_functions(), estimate_multivariate_ndistinct(), examine_variable(), expression_returns_set_rows(), exprType(), for_each_cell, HeapTupleIsValid, i, IS_SIMPLE_REL, VariableStatData::isunique, lappend(), lfirst, linitial, list_length(), list_member_int(), list_second_cell(), GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, PVC_RECURSE_WINDOWFUNCS, GroupVarInfo::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by adjust_rowcount_for_semijoins(), cost_incremental_sort(), create_distinct_paths(), create_unique_path(), estimate_path_cost_size(), get_number_of_groups(), and recurse_set_operations().

3296 {
3297  List *varinfos = NIL;
3298  double srf_multiplier = 1.0;
3299  double numdistinct;
3300  ListCell *l;
3301  int i;
3302 
3303  /*
3304  * We don't ever want to return an estimate of zero groups, as that tends
3305  * to lead to division-by-zero and other unpleasantness. The input_rows
3306  * estimate is usually already at least 1, but clamp it just in case it
3307  * isn't.
3308  */
3309  input_rows = clamp_row_est(input_rows);
3310 
3311  /*
3312  * If no grouping columns, there's exactly one group. (This can't happen
3313  * for normal cases with GROUP BY or DISTINCT, but it is possible for
3314  * corner cases with set operations.)
3315  */
3316  if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3317  return 1.0;
3318 
3319  /*
3320  * Count groups derived from boolean grouping expressions. For other
3321  * expressions, find the unique Vars used, treating an expression as a Var
3322  * if we can find stats for it. For each one, record the statistical
3323  * estimate of number of distinct values (total in its table, without
3324  * regard for filtering).
3325  */
3326  numdistinct = 1.0;
3327 
3328  i = 0;
3329  foreach(l, groupExprs)
3330  {
3331  Node *groupexpr = (Node *) lfirst(l);
3332  double this_srf_multiplier;
3333  VariableStatData vardata;
3334  List *varshere;
3335  ListCell *l2;
3336 
3337  /* is expression in this grouping set? */
3338  if (pgset && !list_member_int(*pgset, i++))
3339  continue;
3340 
3341  /*
3342  * Set-returning functions in grouping columns are a bit problematic.
3343  * The code below will effectively ignore their SRF nature and come up
3344  * with a numdistinct estimate as though they were scalar functions.
3345  * We compensate by scaling up the end result by the largest SRF
3346  * rowcount estimate. (This will be an overestimate if the SRF
3347  * produces multiple copies of any output value, but it seems best to
3348  * assume the SRF's outputs are distinct. In any case, it's probably
3349  * pointless to worry too much about this without much better
3350  * estimates for SRF output rowcounts than we have today.)
3351  */
3352  this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3353  if (srf_multiplier < this_srf_multiplier)
3354  srf_multiplier = this_srf_multiplier;
3355 
3356  /* Short-circuit for expressions returning boolean */
3357  if (exprType(groupexpr) == BOOLOID)
3358  {
3359  numdistinct *= 2.0;
3360  continue;
3361  }
3362 
3363  /*
3364  * If examine_variable is able to deduce anything about the GROUP BY
3365  * expression, treat it as a single variable even if it's really more
3366  * complicated.
3367  */
3368  examine_variable(root, groupexpr, 0, &vardata);
3369  if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3370  {
3371  varinfos = add_unique_group_var(root, varinfos,
3372  groupexpr, &vardata);
3373  ReleaseVariableStats(vardata);
3374  continue;
3375  }
3376  ReleaseVariableStats(vardata);
3377 
3378  /*
3379  * Else pull out the component Vars. Handle PlaceHolderVars by
3380  * recursing into their arguments (effectively assuming that the
3381  * PlaceHolderVar doesn't change the number of groups, which boils
3382  * down to ignoring the possible addition of nulls to the result set).
3383  */
3384  varshere = pull_var_clause(groupexpr,
3388 
3389  /*
3390  * If we find any variable-free GROUP BY item, then either it is a
3391  * constant (and we can ignore it) or it contains a volatile function;
3392  * in the latter case we punt and assume that each input row will
3393  * yield a distinct group.
3394  */
3395  if (varshere == NIL)
3396  {
3397  if (contain_volatile_functions(groupexpr))
3398  return input_rows;
3399  continue;
3400  }
3401 
3402  /*
3403  * Else add variables to varinfos list
3404  */
3405  foreach(l2, varshere)
3406  {
3407  Node *var = (Node *) lfirst(l2);
3408 
3409  examine_variable(root, var, 0, &vardata);
3410  varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3411  ReleaseVariableStats(vardata);
3412  }
3413  }
3414 
3415  /*
3416  * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3417  * list.
3418  */
3419  if (varinfos == NIL)
3420  {
3421  /* Apply SRF multiplier as we would do in the long path */
3422  numdistinct *= srf_multiplier;
3423  /* Round off */
3424  numdistinct = ceil(numdistinct);
3425  /* Guard against out-of-range answers */
3426  if (numdistinct > input_rows)
3427  numdistinct = input_rows;
3428  if (numdistinct < 1.0)
3429  numdistinct = 1.0;
3430  return numdistinct;
3431  }
3432 
3433  /*
3434  * Group Vars by relation and estimate total numdistinct.
3435  *
3436  * For each iteration of the outer loop, we process the frontmost Var in
3437  * varinfos, plus all other Vars in the same relation. We remove these
3438  * Vars from the newvarinfos list for the next iteration. This is the
3439  * easiest way to group Vars of same rel together.
3440  */
3441  do
3442  {
3443  GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3444  RelOptInfo *rel = varinfo1->rel;
3445  double reldistinct = 1;
3446  double relmaxndistinct = reldistinct;
3447  int relvarcount = 0;
3448  List *newvarinfos = NIL;
3449  List *relvarinfos = NIL;
3450 
3451  /*
3452  * Split the list of varinfos in two - one for the current rel, one
3453  * for remaining Vars on other rels.
3454  */
3455  relvarinfos = lappend(relvarinfos, varinfo1);
3456  for_each_cell(l, varinfos, list_second_cell(varinfos))
3457  {
3458  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3459 
3460  if (varinfo2->rel == varinfo1->rel)
3461  {
3462  /* varinfos on current rel */
3463  relvarinfos = lappend(relvarinfos, varinfo2);
3464  }
3465  else
3466  {
3467  /* not time to process varinfo2 yet */
3468  newvarinfos = lappend(newvarinfos, varinfo2);
3469  }
3470  }
3471 
3472  /*
3473  * Get the numdistinct estimate for the Vars of this rel. We
3474  * iteratively search for multivariate n-distinct with maximum number
3475  * of vars; assuming that each var group is independent of the others,
3476  * we multiply them together. Any remaining relvarinfos after no more
3477  * multivariate matches are found are assumed independent too, so
3478  * their individual ndistinct estimates are multiplied also.
3479  *
3480  * While iterating, count how many separate numdistinct values we
3481  * apply. We apply a fudge factor below, but only if we multiplied
3482  * more than one such values.
3483  */
3484  while (relvarinfos)
3485  {
3486  double mvndistinct;
3487 
3488  if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3489  &mvndistinct))
3490  {
3491  reldistinct *= mvndistinct;
3492  if (relmaxndistinct < mvndistinct)
3493  relmaxndistinct = mvndistinct;
3494  relvarcount++;
3495  }
3496  else
3497  {
3498  foreach(l, relvarinfos)
3499  {
3500  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3501 
3502  reldistinct *= varinfo2->ndistinct;
3503  if (relmaxndistinct < varinfo2->ndistinct)
3504  relmaxndistinct = varinfo2->ndistinct;
3505  relvarcount++;
3506  }
3507 
3508  /* we're done with this relation */
3509  relvarinfos = NIL;
3510  }
3511  }
3512 
3513  /*
3514  * Sanity check --- don't divide by zero if empty relation.
3515  */
3516  Assert(IS_SIMPLE_REL(rel));
3517  if (rel->tuples > 0)
3518  {
3519  /*
3520  * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3521  * fudge factor is because the Vars are probably correlated but we
3522  * don't know by how much. We should never clamp to less than the
3523  * largest ndistinct value for any of the Vars, though, since
3524  * there will surely be at least that many groups.
3525  */
3526  double clamp = rel->tuples;
3527 
3528  if (relvarcount > 1)
3529  {
3530  clamp *= 0.1;
3531  if (clamp < relmaxndistinct)
3532  {
3533  clamp = relmaxndistinct;
3534  /* for sanity in case some ndistinct is too large: */
3535  if (clamp > rel->tuples)
3536  clamp = rel->tuples;
3537  }
3538  }
3539  if (reldistinct > clamp)
3540  reldistinct = clamp;
3541 
3542  /*
3543  * Update the estimate based on the restriction selectivity,
3544  * guarding against division by zero when reldistinct is zero.
3545  * Also skip this if we know that we are returning all rows.
3546  */
3547  if (reldistinct > 0 && rel->rows < rel->tuples)
3548  {
3549  /*
3550  * Given a table containing N rows with n distinct values in a
3551  * uniform distribution, if we select p rows at random then
3552  * the expected number of distinct values selected is
3553  *
3554  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3555  *
3556  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3557  *
3558  * See "Approximating block accesses in database
3559  * organizations", S. B. Yao, Communications of the ACM,
3560  * Volume 20 Issue 4, April 1977 Pages 260-261.
3561  *
3562  * Alternatively, re-arranging the terms from the factorials,
3563  * this may be written as
3564  *
3565  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3566  *
3567  * This form of the formula is more efficient to compute in
3568  * the common case where p is larger than N/n. Additionally,
3569  * as pointed out by Dell'Era, if i << N for all terms in the
3570  * product, it can be approximated by
3571  *
3572  * n * (1 - ((N-p)/N)^(N/n))
3573  *
3574  * See "Expected distinct values when selecting from a bag
3575  * without replacement", Alberto Dell'Era,
3576  * http://www.adellera.it/investigations/distinct_balls/.
3577  *
3578  * The condition i << N is equivalent to n >> 1, so this is a
3579  * good approximation when the number of distinct values in
3580  * the table is large. It turns out that this formula also
3581  * works well even when n is small.
3582  */
3583  reldistinct *=
3584  (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3585  rel->tuples / reldistinct));
3586  }
3587  reldistinct = clamp_row_est(reldistinct);
3588 
3589  /*
3590  * Update estimate of total distinct groups.
3591  */
3592  numdistinct *= reldistinct;
3593  }
3594 
3595  varinfos = newvarinfos;
3596  } while (varinfos != NIL);
3597 
3598  /* Now we can account for the effects of any SRFs */
3599  numdistinct *= srf_multiplier;
3600 
3601  /* Round off */
3602  numdistinct = ceil(numdistinct);
3603 
3604  /* Guard against out-of-range answers */
3605  if (numdistinct > input_rows)
3606  numdistinct = input_rows;
3607  if (numdistinct < 1.0)
3608  numdistinct = 1.0;
3609 
3610  return numdistinct;
3611 }
#define NIL
Definition: pg_list.h:65
double expression_returns_set_rows(PlannerInfo *root, Node *clause)
Definition: clauses.c:571
#define PVC_RECURSE_PLACEHOLDERS
Definition: optimizer.h:177
HeapTuple statsTuple
Definition: selfuncs.h:74
#define for_each_cell(cell, lst, initcell)
Definition: pg_list.h:390
double tuples
Definition: pathnodes.h:705
Definition: nodes.h:529
List * pull_var_clause(Node *node, int flags)
Definition: var.c:535
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:726
double ndistinct
Definition: selfuncs.c:3177
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:638
#define linitial(l)
Definition: pg_list.h:195
bool list_member_int(const List *list, int datum)
Definition: list.c:654
static ListCell * list_second_cell(const List *l)
Definition: pg_list.h:139
static bool estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
Definition: selfuncs.c:3811
List * lappend(List *list, void *datum)
Definition: list.c:321
static List * add_unique_group_var(PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
Definition: selfuncs.c:3181
double rows
Definition: pathnodes.h:668
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4661
#define Assert(condition)
Definition: c.h:738
#define lfirst(lc)
Definition: pg_list.h:190
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
static int list_length(const List *l)
Definition: pg_list.h:169
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
#define PVC_RECURSE_WINDOWFUNCS
Definition: optimizer.h:174
int i
double clamp_row_est(double nrows)
Definition: costsize.c:191
Definition: pg_list.h:50
#define PVC_RECURSE_AGGREGATES
Definition: optimizer.h:172
RelOptInfo * rel
Definition: selfuncs.c:3176

◆ examine_variable()

void examine_variable ( PlannerInfo root,
Node node,
int  varRelid,
VariableStatData vardata 
)

Definition at line 4661 of file selfuncs.c.

References VariableStatData::acl_ok, ACL_SELECT, ACLCHECK_OK, PlannerInfo::append_rel_array, arg, Assert, VariableStatData::atttype, VariableStatData::atttypmod, BMS_EMPTY_SET, bms_free(), bms_is_member(), bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), BoolGetDatum, RangeTblEntry::checkAsUser, elog, equal(), ERROR, examine_simple_variable(), exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, GetUserId(), has_unique_index(), HeapTupleIsValid, IndexOptInfo::indexkeys, RelOptInfo::indexlist, IndexOptInfo::indexoid, IndexOptInfo::indexprs, IndexOptInfo::indpred, Int16GetDatum, IsA, VariableStatData::isunique, lfirst, list_head(), lnext(), MemSet, IndexOptInfo::ncolumns, NIL, IndexOptInfo::nkeycolumns, ObjectIdGetDatum, AppendRelInfo::parent_relid, pg_class_aclcheck(), planner_rt_fetch, IndexOptInfo::predOK, pull_varnos(), VariableStatData::rel, IndexOptInfo::rel, ReleaseSysCache(), RelOptInfo::relid, RangeTblEntry::relid, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), RangeTblEntry::securityQuals, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::unique, VariableStatData::var, Var::varattno, Var::varno, VariableStatData::vartype, Var::vartype, and Var::vartypmod.

Referenced by booltestsel(), boolvarsel(), estimate_hash_bucket_stats(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().

4663 {
4664  Node *basenode;
4665  Relids varnos;
4666  RelOptInfo *onerel;
4667 
4668  /* Make sure we don't return dangling pointers in vardata */
4669  MemSet(vardata, 0, sizeof(VariableStatData));
4670 
4671  /* Save the exposed type of the expression */
4672  vardata->vartype = exprType(node);
4673 
4674  /* Look inside any binary-compatible relabeling */
4675 
4676  if (IsA(node, RelabelType))
4677  basenode = (Node *) ((RelabelType *) node)->arg;
4678  else
4679  basenode = node;
4680 
4681  /* Fast path for a simple Var */
4682 
4683  if (IsA(basenode, Var) &&
4684  (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4685  {
4686  Var *var = (Var *) basenode;
4687 
4688  /* Set up result fields other than the stats tuple */
4689  vardata->var = basenode; /* return Var without relabeling */
4690  vardata->rel = find_base_rel(root, var->varno);
4691  vardata->atttype = var->vartype;
4692  vardata->atttypmod = var->vartypmod;
4693  vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4694 
4695  /* Try to locate some stats */
4696  examine_simple_variable(root, var, vardata);
4697 
4698  return;
4699  }
4700 
4701  /*
4702  * Okay, it's a more complicated expression. Determine variable
4703  * membership. Note that when varRelid isn't zero, only vars of that
4704  * relation are considered "real" vars.
4705  */
4706  varnos = pull_varnos(basenode);
4707 
4708  onerel = NULL;
4709 
4710  switch (bms_membership(varnos))
4711  {
4712  case BMS_EMPTY_SET:
4713  /* No Vars at all ... must be pseudo-constant clause */
4714  break;
4715  case BMS_SINGLETON:
4716  if (varRelid == 0 || bms_is_member(varRelid, varnos))
4717  {
4718  onerel = find_base_rel(root,
4719  (varRelid ? varRelid : bms_singleton_member(varnos)));
4720  vardata->rel = onerel;
4721  node = basenode; /* strip any relabeling */
4722  }
4723  /* else treat it as a constant */
4724  break;
4725  case BMS_MULTIPLE:
4726  if (varRelid == 0)
4727  {
4728  /* treat it as a variable of a join relation */
4729  vardata->rel = find_join_rel(root, varnos);
4730  node = basenode; /* strip any relabeling */
4731  }
4732  else if (bms_is_member(varRelid, varnos))
4733  {
4734  /* ignore the vars belonging to other relations */
4735  vardata->rel = find_base_rel(root, varRelid);
4736  node = basenode; /* strip any relabeling */
4737  /* note: no point in expressional-index search here */
4738  }
4739  /* else treat it as a constant */
4740  break;
4741  }
4742 
4743  bms_free(varnos);
4744 
4745  vardata->var = node;
4746  vardata->atttype = exprType(node);
4747  vardata->atttypmod = exprTypmod(node);
4748 
4749  if (onerel)
4750  {
4751  /*
4752  * We have an expression in vars of a single relation. Try to match
4753  * it to expressional index columns, in hopes of finding some
4754  * statistics.
4755  *
4756  * Note that we consider all index columns including INCLUDE columns,
4757  * since there could be stats for such columns. But the test for
4758  * uniqueness needs to be warier.
4759  *
4760  * XXX it's conceivable that there are multiple matches with different
4761  * index opfamilies; if so, we need to pick one that matches the
4762  * operator we are estimating for. FIXME later.
4763  */
4764  ListCell *ilist;
4765 
4766  foreach(ilist, onerel->indexlist)
4767  {
4768  IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4769  ListCell *indexpr_item;
4770  int pos;
4771 
4772  indexpr_item = list_head(index->indexprs);
4773  if (indexpr_item == NULL)
4774  continue; /* no expressions here... */
4775 
4776  for (pos = 0; pos < index->ncolumns; pos++)
4777  {
4778  if (index->indexkeys[pos] == 0)
4779  {
4780  Node *indexkey;
4781 
4782  if (indexpr_item == NULL)
4783  elog(ERROR, "too few entries in indexprs list");
4784  indexkey = (Node *) lfirst(indexpr_item);
4785  if (indexkey && IsA(indexkey, RelabelType))
4786  indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4787  if (equal(node, indexkey))
4788  {
4789  /*
4790  * Found a match ... is it a unique index? Tests here
4791  * should match has_unique_index().
4792  */
4793  if (index->unique &&
4794  index->nkeycolumns == 1 &&
4795  pos == 0 &&
4796  (index->indpred == NIL || index->predOK))
4797  vardata->isunique = true;
4798 
4799  /*
4800  * Has it got stats? We only consider stats for
4801  * non-partial indexes, since partial indexes probably
4802  * don't reflect whole-relation statistics; the above
4803  * check for uniqueness is the only info we take from
4804  * a partial index.
4805  *
4806  * An index stats hook, however, must make its own
4807  * decisions about what to do with partial indexes.
4808  */
4809  if (get_index_stats_hook &&
4810  (*get_index_stats_hook) (root, index->indexoid,
4811  pos + 1, vardata))
4812  {
4813  /*
4814  * The hook took control of acquiring a stats
4815  * tuple. If it did supply a tuple, it'd better
4816  * have supplied a freefunc.
4817  */
4818  if (HeapTupleIsValid(vardata->statsTuple) &&
4819  !vardata->freefunc)
4820  elog(ERROR, "no function provided to release variable stats with");
4821  }
4822  else if (index->indpred == NIL)
4823  {
4824  vardata->statsTuple =
4826  ObjectIdGetDatum(index->indexoid),
4827  Int16GetDatum(pos + 1),
4828  BoolGetDatum(false));
4829  vardata->freefunc = ReleaseSysCache;
4830 
4831  if (HeapTupleIsValid(vardata->statsTuple))
4832  {
4833  /* Get index's table for permission check */
4834  RangeTblEntry *rte;
4835  Oid userid;
4836 
4837  rte = planner_rt_fetch(index->rel->relid, root);
4838  Assert(rte->rtekind == RTE_RELATION);
4839 
4840  /*
4841  * Use checkAsUser if it's set, in case we're
4842  * accessing the table via a view.
4843  */
4844  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4845 
4846  /*
4847  * For simplicity, we insist on the whole
4848  * table being selectable, rather than trying
4849  * to identify which column(s) the index
4850  * depends on. Also require all rows to be
4851  * selectable --- there must be no
4852  * securityQuals from security barrier views
4853  * or RLS policies.
4854  */
4855  vardata->acl_ok =
4856  rte->securityQuals == NIL &&
4857  (pg_class_aclcheck(rte->relid, userid,
4858  ACL_SELECT) == ACLCHECK_OK);
4859 
4860  /*
4861  * If the user doesn't have permissions to
4862  * access an inheritance child relation, check
4863  * the permissions of the table actually
4864  * mentioned in the query, since most likely
4865  * the user does have that permission. Note
4866  * that whole-table select privilege on the
4867  * parent doesn't quite guarantee that the
4868  * user could read all columns of the child.
4869  * But in practice it's unlikely that any
4870  * interesting security violation could result
4871  * from allowing access to the expression
4872  * index's stats, so we allow it anyway. See
4873  * similar code in examine_simple_variable()
4874  * for additional comments.
4875  */
4876  if (!vardata->acl_ok &&
4877  root->append_rel_array != NULL)
4878  {
4879  AppendRelInfo *appinfo;
4880  Index varno = index->rel->relid;
4881 
4882  appinfo = root->append_rel_array[varno];
4883  while (appinfo &&
4884  planner_rt_fetch(appinfo->parent_relid,
4885  root)->rtekind == RTE_RELATION)
4886  {
4887  varno = appinfo->parent_relid;
4888  appinfo = root->append_rel_array[varno];
4889  }
4890  if (varno != index->rel->relid)
4891  {
4892  /* Repeat access check on this rel */
4893  rte = planner_rt_fetch(varno, root);
4894  Assert(rte->rtekind == RTE_RELATION);
4895 
4896  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4897 
4898  vardata->acl_ok =
4899  rte->securityQuals == NIL &&
4900  (pg_class_aclcheck(rte->relid,
4901  userid,
4902  ACL_SELECT) == ACLCHECK_OK);
4903  }
4904  }
4905  }
4906  else
4907  {
4908  /* suppress leakproofness checks later */
4909  vardata->acl_ok = true;
4910  }
4911  }
4912  if (vardata->statsTuple)
4913  break;
4914  }
4915  indexpr_item = lnext(index->indexprs, indexpr_item);
4916  }
4917  }
4918  if (vardata->statsTuple)
4919  break;
4920  }
4921  }
4922 }
#define NIL
Definition: pg_list.h:65
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:321
RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids)
Definition: relnode.c:439
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3033
HeapTuple statsTuple
Definition: selfuncs.h:74
Oid GetUserId(void)
Definition: miscinit.c:448
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:275
List * securityQuals
Definition: parsenodes.h:1125
RelOptInfo * rel
Definition: selfuncs.h:73
#define Int16GetDatum(X)
Definition: postgres.h:451
Definition: nodes.h:529
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:76
#define MemSet(start, val, len)
Definition: c.h:971
AttrNumber varattno
Definition: primnodes.h:186
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:181
static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata)
Definition: selfuncs.c:4934
int32 atttypmod
Definition: selfuncs.h:79
Definition: type.h:89
RelOptInfo * rel
Definition: pathnodes.h:820
bool has_unique_index(RelOptInfo *rel, AttrNumber attno)
Definition: plancat.c:2027
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
Oid vartype
Definition: primnodes.h:188
HeapTuple SearchSysCache3(int cacheId, Datum key1, Datum key2, Datum key3)
Definition: syscache.c:1138
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:373
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
Relids pull_varnos(Node *node)
Definition: var.c:95
Index relid
Definition: pathnodes.h:693
Index varno
Definition: primnodes.h:184
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:672
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
#define ACL_SELECT
Definition: parsenodes.h:75
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:577
struct AppendRelInfo ** append_rel_array
Definition: pathnodes.h:219
unsigned int Index
Definition: c.h:475
List * indexlist
Definition: pathnodes.h:702
#define BoolGetDatum(X)
Definition: postgres.h:402
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define Assert(condition)
Definition: c.h:738
#define lfirst(lc)
Definition: pg_list.h:190
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
int nkeycolumns
Definition: pathnodes.h:829
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:149
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4596
RTEKind rtekind
Definition: parsenodes.h:976
#define elog(elevel,...)
Definition: elog.h:214
void * arg
int * indexkeys
Definition: pathnodes.h:830
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:374
Index parent_relid
Definition: pathnodes.h:2229
List * indpred
Definition: pathnodes.h:844
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
List * indexprs
Definition: pathnodes.h:843
int32 vartypmod
Definition: primnodes.h:189

◆ generic_restriction_selectivity()

double generic_restriction_selectivity ( PlannerInfo root,
Oid  oproid,
List args,
int  varRelid,
double  default_selectivity 
)

Definition at line 906 of file selfuncs.c.

References CLAMP_PROBABILITY, fmgr_info(), get_opcode(), get_restriction_variable(), GETSTRUCT, HeapTupleIsValid, histogram_selectivity(), IsA, mcv_selectivity(), ReleaseVariableStats, and VariableStatData::statsTuple.

Referenced by ltreeparentsel(), and matchingsel().

909 {
910  double selec;
911  VariableStatData vardata;
912  Node *other;
913  bool varonleft;
914 
915  /*
916  * If expression is not variable OP something or something OP variable,
917  * then punt and return the default estimate.
918  */
919  if (!get_restriction_variable(root, args, varRelid,
920  &vardata, &other, &varonleft))
921  return default_selectivity;
922 
923  /*
924  * If the something is a NULL constant, assume operator is strict and
925  * return zero, ie, operator will never return TRUE.
926  */
927  if (IsA(other, Const) &&
928  ((Const *) other)->constisnull)
929  {
930  ReleaseVariableStats(vardata);
931  return 0.0;
932  }
933 
934  if (IsA(other, Const))
935  {
936  /* Variable is being compared to a known non-null constant */
937  Datum constval = ((Const *) other)->constvalue;
938  FmgrInfo opproc;
939  double mcvsum;
940  double mcvsel;
941  double nullfrac;
942  int hist_size;
943 
944  fmgr_info(get_opcode(oproid), &opproc);
945 
946  /*
947  * Calculate the selectivity for the column's most common values.
948  */
949  mcvsel = mcv_selectivity(&vardata, &opproc, constval, varonleft,
950  &mcvsum);
951 
952  /*
953  * If the histogram is large enough, see what fraction of it matches
954  * the query, and assume that's representative of the non-MCV
955  * population. Otherwise use the default selectivity for the non-MCV
956  * population.
957  */
958  selec = histogram_selectivity(&vardata, &opproc,
959  constval, varonleft,
960  10, 1, &hist_size);
961  if (selec < 0)
962  {
963  /* Nope, fall back on default */
964  selec = default_selectivity;
965  }
966  else if (hist_size < 100)
967  {
968  /*
969  * For histogram sizes from 10 to 100, we combine the histogram
970  * and default selectivities, putting increasingly more trust in
971  * the histogram for larger sizes.
972  */
973  double hist_weight = hist_size / 100.0;
974 
975  selec = selec * hist_weight +
976  default_selectivity * (1.0 - hist_weight);
977  }
978 
979  /* In any case, don't believe extremely small or large estimates. */
980  if (selec < 0.0001)
981  selec = 0.0001;
982  else if (selec > 0.9999)
983  selec = 0.9999;
984 
985  /* Don't forget to account for nulls. */
986  if (HeapTupleIsValid(vardata.statsTuple))
987  nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
988  else
989  nullfrac = 0.0;
990 
991  /*
992  * Now merge the results from the MCV and histogram calculations,
993  * realizing that the histogram covers only the non-null values that
994  * are not listed in MCV.
995  */
996  selec *= 1.0 - nullfrac - mcvsum;
997  selec += mcvsel;
998  }
999  else
1000  {
1001  /* Comparison value is not constant, so we can't do anything */
1002  selec = default_selectivity;
1003  }
1004 
1005  ReleaseVariableStats(vardata);
1006 
1007  /* result should be in range, but make sure... */
1008  CLAMP_PROBABILITY(selec);
1009 
1010  return selec;
1011 }
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4539
Definition: nodes.h:529
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
uintptr_t Datum
Definition: postgres.h:367
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:816
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:725
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1153
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84

◆ genericcostestimate()

void genericcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
GenericCosts costs 
)

Definition at line 5893 of file selfuncs.c.

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, estimate_array_length(), get_quals_from_indexclauses(), get_tablespace_page_costs(), index_other_operands_eval_cost(), index_pages_fetched(), IndexPath::indexclauses, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexPath::indexorderbys, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, IsA, JOIN_INNER, lfirst, list_length(), lsecond, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, IndexOptInfo::pages, IndexOptInfo::rel, RelOptInfo::relid, IndexOptInfo::reltablespace, GenericCosts::spc_random_page_cost, RelOptInfo::tuples, and IndexOptInfo::tuples.

Referenced by blcostestimate(), btcostestimate(), gistcostestimate(), hashcostestimate(), and spgcostestimate().

5897 {
5898  IndexOptInfo *index = path->indexinfo;
5899  List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
5900  List *indexOrderBys = path->indexorderbys;
5901  Cost indexStartupCost;
5902  Cost indexTotalCost;
5903  Selectivity indexSelectivity;
5904  double indexCorrelation;
5905  double numIndexPages;
5906  double numIndexTuples;
5907  double spc_random_page_cost;
5908  double num_sa_scans;
5909  double num_outer_scans;
5910  double num_scans;
5911  double qual_op_cost;
5912  double qual_arg_cost;
5913  List *selectivityQuals;
5914  ListCell *l;
5915 
5916  /*
5917  * If the index is partial, AND the index predicate with the explicitly
5918  * given indexquals to produce a more accurate idea of the index
5919  * selectivity.
5920  */
5921  selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
5922 
5923  /*
5924  * Check for ScalarArrayOpExpr index quals, and estimate the number of
5925  * index scans that will be performed.
5926  */
5927  num_sa_scans = 1;
5928  foreach(l, indexQuals)
5929  {
5930  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
5931 
5932  if (IsA(rinfo->clause, ScalarArrayOpExpr))
5933  {
5934  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
5935  int alength = estimate_array_length(lsecond(saop->args));
5936 
5937  if (alength > 1)
5938  num_sa_scans *= alength;
5939  }
5940  }
5941 
5942  /* Estimate the fraction of main-table tuples that will be visited */
5943  indexSelectivity = clauselist_selectivity(root, selectivityQuals,
5944  index->rel->relid,
5945  JOIN_INNER,
5946  NULL);
5947 
5948  /*
5949  * If caller didn't give us an estimate, estimate the number of index
5950  * tuples that will be visited. We do it in this rather peculiar-looking
5951  * way in order to get the right answer for partial indexes.
5952  */
5953  numIndexTuples = costs->numIndexTuples;
5954  if (numIndexTuples <= 0.0)
5955  {
5956  numIndexTuples = indexSelectivity * index->rel->tuples;
5957 
5958  /*
5959  * The above calculation counts all the tuples visited across all
5960  * scans induced by ScalarArrayOpExpr nodes. We want to consider the
5961  * average per-indexscan number, so adjust. This is a handy place to
5962  * round to integer, too. (If caller supplied tuple estimate, it's
5963  * responsible for handling these considerations.)
5964  */
5965  numIndexTuples = rint(numIndexTuples / num_sa_scans);
5966  }
5967 
5968  /*
5969  * We can bound the number of tuples by the index size in any case. Also,
5970  * always estimate at least one tuple is touched, even when
5971  * indexSelectivity estimate is tiny.
5972  */
5973  if (numIndexTuples > index->tuples)
5974  numIndexTuples = index->tuples;
5975  if (numIndexTuples < 1.0)
5976  numIndexTuples = 1.0;
5977 
5978  /*
5979  * Estimate the number of index pages that will be retrieved.
5980  *
5981  * We use the simplistic method of taking a pro-rata fraction of the total
5982  * number of index pages. In effect, this counts only leaf pages and not
5983  * any overhead such as index metapage or upper tree levels.
5984  *
5985  * In practice access to upper index levels is often nearly free because
5986  * those tend to stay in cache under load; moreover, the cost involved is
5987  * highly dependent on index type. We therefore ignore such costs here
5988  * and leave it to the caller to add a suitable charge if needed.
5989  */
5990  if (index->pages > 1 && index->tuples > 1)
5991  numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
5992  else
5993  numIndexPages = 1.0;
5994 
5995  /* fetch estimated page cost for tablespace containing index */
5997  &spc_random_page_cost,
5998  NULL);
5999 
6000  /*
6001  * Now compute the disk access costs.
6002  *
6003  * The above calculations are all per-index-scan. However, if we are in a
6004  * nestloop inner scan, we can expect the scan to be repeated (with
6005  * different search keys) for each row of the outer relation. Likewise,
6006  * ScalarArrayOpExpr quals result in multiple index scans. This creates
6007  * the potential for cache effects to reduce the number of disk page
6008  * fetches needed. We want to estimate the average per-scan I/O cost in
6009  * the presence of caching.
6010  *
6011  * We use the Mackert-Lohman formula (see costsize.c for details) to
6012  * estimate the total number of page fetches that occur. While this
6013  * wasn't what it was designed for, it seems a reasonable model anyway.
6014  * Note that we are counting pages not tuples anymore, so we take N = T =
6015  * index size, as if there were one "tuple" per page.
6016  */
6017  num_outer_scans = loop_count;
6018  num_scans = num_sa_scans * num_outer_scans;
6019 
6020  if (num_scans > 1)
6021  {
6022  double pages_fetched;
6023 
6024  /* total page fetches ignoring cache effects */
6025  pages_fetched = numIndexPages * num_scans;
6026 
6027  /* use Mackert and Lohman formula to adjust for cache effects */
6028  pages_fetched = index_pages_fetched(pages_fetched,
6029  index->pages,
6030  (double) index->pages,
6031  root);
6032 
6033  /*
6034  * Now compute the total disk access cost, and then report a pro-rated
6035  * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
6036  * since that's internal to the indexscan.)
6037  */
6038  indexTotalCost = (pages_fetched * spc_random_page_cost)
6039  / num_outer_scans;
6040  }
6041  else
6042  {
6043  /*
6044  * For a single index scan, we just charge spc_random_page_cost per
6045  * page touched.
6046  */
6047  indexTotalCost = numIndexPages * spc_random_page_cost;
6048  }
6049 
6050  /*
6051  * CPU cost: any complex expressions in the indexquals will need to be
6052  * evaluated once at the start of the scan to reduce them to runtime keys
6053  * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
6054  * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
6055  * indexqual operator. Because we have numIndexTuples as a per-scan
6056  * number, we have to multiply by num_sa_scans to get the correct result
6057  * for ScalarArrayOpExpr cases. Similarly add in costs for any index
6058  * ORDER BY expressions.
6059  *
6060  * Note: this neglects the possible costs of rechecking lossy operators.
6061  * Detecting that that might be needed seems more expensive than it's
6062  * worth, though, considering all the other inaccuracies here ...
6063  */
6064  qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
6065  index_other_operands_eval_cost(root, indexOrderBys);
6066  qual_op_cost = cpu_operator_cost *
6067  (list_length(indexQuals) + list_length(indexOrderBys));
6068 
6069  indexStartupCost = qual_arg_cost;
6070  indexTotalCost += qual_arg_cost;
6071  indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
6072 
6073  /*
6074  * Generic assumption about index correlation: there isn't any.
6075  */
6076  indexCorrelation = 0.0;
6077 
6078  /*
6079  * Return everything to caller.
6080  */
6081  costs->indexStartupCost = indexStartupCost;
6082  costs->indexTotalCost = indexTotalCost;
6083  costs->indexSelectivity = indexSelectivity;
6084  costs->indexCorrelation = indexCorrelation;
6085  costs->numIndexPages = numIndexPages;
6086  costs->numIndexTuples = numIndexTuples;
6087  costs->spc_random_page_cost = spc_random_page_cost;
6088  costs->num_sa_scans = num_sa_scans;
6089 }
Selectivity indexSelectivity
Definition: selfuncs.h:109
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
IndexOptInfo * indexinfo
Definition: pathnodes.h:1207
double tuples
Definition: pathnodes.h:705
Oid reltablespace
Definition: pathnodes.h:819
List * indexclauses
Definition: pathnodes.h:1208
double Selectivity
Definition: nodes.h:662
double tuples
Definition: pathnodes.h:824
#define lsecond(l)
Definition: pg_list.h:200
Definition: type.h:89
BlockNumber pages
Definition: pathnodes.h:823
int estimate_array_length(Node *arrayexpr)
Definition: selfuncs.c:2073
RelOptInfo * rel
Definition: pathnodes.h:820
double num_sa_scans
Definition: selfuncs.h:116
double cpu_operator_cost
Definition: costsize.c:115
List * get_quals_from_indexclauses(List *indexclauses)
Definition: selfuncs.c:5809
Cost indexTotalCost
Definition: selfuncs.h:108
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
Index relid
Definition: pathnodes.h:693
Expr * clause
Definition: pathnodes.h:1985
double indexCorrelation
Definition: selfuncs.h:110
List * indexorderbys
Definition: pathnodes.h:1209
double spc_random_page_cost
Definition: selfuncs.h:115
double numIndexTuples
Definition: selfuncs.h:114
Cost index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
Definition: selfuncs.c:5839
#define lfirst(lc)
Definition: pg_list.h:190
static int list_length(const List *l)
Definition: pg_list.h:169
Cost indexStartupCost
Definition: selfuncs.h:107
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:69
Definition: pg_list.h:50
double cpu_index_tuple_cost
Definition: costsize.c:114
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:829
double Cost
Definition: nodes.h:663
double numIndexPages
Definition: selfuncs.h:113
List * add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:6111

◆ get_join_variables()

void get_join_variables ( PlannerInfo root,
List args,
SpecialJoinInfo sjinfo,
VariableStatData vardata1,
VariableStatData vardata2,
bool join_is_reversed 
)

Definition at line 4599 of file selfuncs.c.

References bms_is_subset(), elog, ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.

Referenced by eqjoinsel(), neqjoinsel(), and networkjoinsel().

4602 {
4603  Node *left,
4604  *right;
4605 
4606  if (list_length(args) != 2)
4607  elog(ERROR, "join operator should take two arguments");
4608 
4609  left = (Node *) linitial(args);
4610  right = (Node *) lsecond(args);
4611 
4612  examine_variable(root, left, 0, vardata1);
4613  examine_variable(root, right, 0, vardata2);
4614 
4615  if (vardata1->rel &&
4616  bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4617  *join_is_reversed = true; /* var1 is on RHS */
4618  else if (vardata2->rel &&
4619  bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4620  *join_is_reversed = true; /* var2 is on LHS */
4621  else
4622  *join_is_reversed = false;
4623 }
RelOptInfo * rel
Definition: selfuncs.h:73
Definition: nodes.h:529
#define lsecond(l)
Definition: pg_list.h:200
Relids syn_lefthand
Definition: pathnodes.h:2177
Relids syn_righthand
Definition: pathnodes.h:2178
#define linitial(l)
Definition: pg_list.h:195
#define ERROR
Definition: elog.h:43
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:315
Relids relids
Definition: pathnodes.h:665
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4661
static int list_length(const List *l)
Definition: pg_list.h:169
#define elog(elevel,...)
Definition: elog.h:214

◆ get_quals_from_indexclauses()

List* get_quals_from_indexclauses ( List indexclauses)

Definition at line 5809 of file selfuncs.c.

References IndexClause::indexquals, lappend(), lfirst_node, and NIL.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

5810 {
5811  List *result = NIL;
5812  ListCell *lc;
5813 
5814  foreach(lc, indexclauses)
5815  {
5816  IndexClause *iclause = lfirst_node(IndexClause, lc);
5817  ListCell *lc2;
5818 
5819  foreach(lc2, iclause->indexquals)
5820  {
5821  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5822 
5823  result = lappend(result, rinfo);
5824  }
5825  }
5826  return result;
5827 }
#define NIL
Definition: pg_list.h:65
#define lfirst_node(type, lc)
Definition: pg_list.h:193
List * indexquals
Definition: pathnodes.h:1254
List * lappend(List *list, void *datum)
Definition: list.c:321
Definition: pg_list.h:50

◆ get_restriction_variable()

bool get_restriction_variable ( PlannerInfo root,
List args,
int  varRelid,
VariableStatData vardata,
Node **  other,
bool varonleft 
)

Definition at line 4539 of file selfuncs.c.

References estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, ReleaseVariableStats, and VariableStatData::var.

Referenced by _int_matchsel(), arraycontsel(), eqsel_internal(), generic_restriction_selectivity(), networksel(), patternsel_common(), rangesel(), scalarineqsel_wrapper(), and tsmatchsel().

4542 {
4543  Node *left,
4544  *right;
4545  VariableStatData rdata;
4546 
4547  /* Fail if not a binary opclause (probably shouldn't happen) */
4548  if (list_length(args) != 2)
4549  return false;
4550 
4551  left = (Node *) linitial(args);
4552  right = (Node *) lsecond(args);
4553 
4554  /*
4555  * Examine both sides. Note that when varRelid is nonzero, Vars of other
4556  * relations will be treated as pseudoconstants.
4557  */
4558  examine_variable(root, left, varRelid, vardata);
4559  examine_variable(root, right, varRelid, &rdata);
4560 
4561  /*
4562  * If one side is a variable and the other not, we win.
4563  */
4564  if (vardata->rel && rdata.rel == NULL)
4565  {
4566  *varonleft = true;
4567  *other = estimate_expression_value(root, rdata.var);
4568  /* Assume we need no ReleaseVariableStats(rdata) here */
4569  return true;
4570  }
4571 
4572  if (vardata->rel == NULL && rdata.rel)
4573  {
4574  *varonleft = false;
4575  *other = estimate_expression_value(root, vardata->var);
4576  /* Assume we need no ReleaseVariableStats(*vardata) here */
4577  *vardata = rdata;
4578  return true;
4579  }
4580 
4581  /* Oops, clause has wrong structure (probably var op var) */
4582  ReleaseVariableStats(*vardata);
4583  ReleaseVariableStats(rdata);
4584 
4585  return false;
4586 }
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2288
RelOptInfo * rel
Definition: selfuncs.h:73
Definition: nodes.h:529
#define lsecond(l)
Definition: pg_list.h:200
#define linitial(l)
Definition: pg_list.h:195
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4661
static int list_length(const List *l)
Definition: pg_list.h:169
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84

◆ get_variable_numdistinct()

double get_variable_numdistinct ( VariableStatData vardata,
bool isdefault 
)

Definition at line 5210 of file selfuncs.c.

References clamp_row_est(), DEFAULT_NUM_DISTINCT, GETSTRUCT, HeapTupleIsValid, IsA, VariableStatData::isunique, VariableStatData::rel, RTE_VALUES, RelOptInfo::rtekind, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.

Referenced by add_unique_group_var(), eqjoinsel(), estimate_hash_bucket_stats(), ineq_histogram_selectivity(), var_eq_const(), and var_eq_non_const().

5211 {
5212  double stadistinct;
5213  double stanullfrac = 0.0;
5214  double ntuples;
5215 
5216  *isdefault = false;
5217 
5218  /*
5219  * Determine the stadistinct value to use. There are cases where we can
5220  * get an estimate even without a pg_statistic entry, or can get a better
5221  * value than is in pg_statistic. Grab stanullfrac too if we can find it
5222  * (otherwise, assume no nulls, for lack of any better idea).
5223  */
5224  if (HeapTupleIsValid(vardata->statsTuple))
5225  {
5226  /* Use the pg_statistic entry */
5227  Form_pg_statistic stats;
5228 
5229  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
5230  stadistinct = stats->stadistinct;
5231  stanullfrac = stats->stanullfrac;
5232  }
5233  else if (vardata->vartype == BOOLOID)
5234  {
5235  /*
5236  * Special-case boolean columns: presumably, two distinct values.
5237  *
5238  * Are there any other datatypes we should wire in special estimates
5239  * for?
5240  */
5241  stadistinct = 2.0;
5242  }
5243  else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
5244  {
5245  /*
5246  * If the Var represents a column of a VALUES RTE, assume it's unique.
5247  * This could of course be very wrong, but it should tend to be true
5248  * in well-written queries. We could consider examining the VALUES'
5249  * contents to get some real statistics; but that only works if the
5250  * entries are all constants, and it would be pretty expensive anyway.
5251  */
5252  stadistinct = -1.0; /* unique (and all non null) */
5253  }
5254  else
5255  {
5256  /*
5257  * We don't keep statistics for system columns, but in some cases we
5258  * can infer distinctness anyway.
5259  */
5260  if (vardata->var && IsA(vardata->var, Var))
5261  {
5262  switch (((Var *) vardata->var)->varattno)
5263  {
5265  stadistinct = -1.0; /* unique (and all non null) */
5266  break;
5268  stadistinct = 1.0; /* only 1 value */
5269  break;
5270  default:
5271  stadistinct = 0.0; /* means "unknown" */
5272  break;
5273  }
5274  }
5275  else
5276  stadistinct = 0.0; /* means "unknown" */
5277 
5278  /*
5279  * XXX consider using estimate_num_groups on expressions?
5280  */
5281  }
5282 
5283  /*
5284  * If there is a unique index or DISTINCT clause for the variable, assume
5285  * it is unique no matter what pg_statistic says; the statistics could be
5286  * out of date, or we might have found a partial unique index that proves
5287  * the var is unique for this query. However, we'd better still believe
5288  * the null-fraction statistic.
5289  */
5290  if (vardata->isunique)
5291  stadistinct = -1.0 * (1.0 - stanullfrac);
5292 
5293  /*
5294  * If we had an absolute estimate, use that.
5295  */
5296  if (stadistinct > 0.0)
5297  return clamp_row_est(stadistinct);
5298 
5299  /*
5300  * Otherwise we need to get the relation size; punt if not available.
5301  */
5302  if (vardata->rel == NULL)
5303  {
5304  *isdefault = true;
5305  return DEFAULT_NUM_DISTINCT;
5306  }
5307  ntuples = vardata->rel->tuples;
5308  if (ntuples <= 0.0)
5309  {
5310  *isdefault = true;
5311  return DEFAULT_NUM_DISTINCT;
5312  }
5313 
5314  /*
5315  * If we had a relative estimate, use that.
5316  */
5317  if (stadistinct < 0.0)
5318  return clamp_row_est(-stadistinct * ntuples);
5319 
5320  /*
5321  * With no data, estimate ndistinct = ntuples if the table is small, else
5322  * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5323  * that the behavior isn't discontinuous.
5324  */
5325  if (ntuples < DEFAULT_NUM_DISTINCT)
5326  return clamp_row_est(ntuples);
5327 
5328  *isdefault = true;
5329  return DEFAULT_NUM_DISTINCT;
5330 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
double tuples
Definition: pathnodes.h:705
RelOptInfo * rel
Definition: selfuncs.h:73
Definition: primnodes.h:181
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define TableOidAttributeNumber
Definition: sysattr.h:26
RTEKind rtekind
Definition: pathnodes.h:695
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define DEFAULT_NUM_DISTINCT
Definition: selfuncs.h:49
#define SelfItemPointerAttributeNumber
Definition: sysattr.h:21
double clamp_row_est(double nrows)
Definition: costsize.c:191

◆ histogram_selectivity()

double histogram_selectivity ( VariableStatData vardata,
FmgrInfo opproc,
Datum  constval,
bool  varonleft,
int  min_hist_size,
int  n_skip,
int *  hist_size 
)

Definition at line 816 of file selfuncs.c.

References Assert, ATTSTATSSLOT_VALUES, DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::nvalues, AttStatsSlot::stacoll, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by generic_restriction_selectivity(), and patternsel_common().

820 {
821  double result;
822  AttStatsSlot sslot;
823 
824  /* check sanity of parameters */
825  Assert(n_skip >= 0);
826  Assert(min_hist_size > 2 * n_skip);
827 
828  if (HeapTupleIsValid(vardata->statsTuple) &&
829  statistic_proc_security_check(vardata, opproc->fn_oid) &&
830  get_attstatsslot(&sslot, vardata->statsTuple,
831  STATISTIC_KIND_HISTOGRAM, InvalidOid,
833  {
834  *hist_size = sslot.nvalues;
835  if (sslot.nvalues >= min_hist_size)
836  {
837  LOCAL_FCINFO(fcinfo, 2);
838  int nmatch = 0;
839  int i;
840 
841  /*
842  * We invoke the opproc "by hand" so that we won't fail on NULL
843  * results. Such cases won't arise for normal comparison
844  * functions, but generic_restriction_selectivity could perhaps be
845  * used with operators that can return NULL. A small side benefit
846  * is to not need to re-initialize the fcinfo struct from scratch
847  * each time.
848  */
849  InitFunctionCallInfoData(*fcinfo, opproc, 2, sslot.stacoll,
850  NULL, NULL);
851  fcinfo->args[0].isnull = false;
852  fcinfo->args[1].isnull = false;
853  /* be careful to apply operator right way 'round */
854  if (varonleft)
855  fcinfo->args[1].value = constval;
856  else
857  fcinfo->args[0].value = constval;
858 
859  for (i = n_skip; i < sslot.nvalues - n_skip; i++)
860  {
861  Datum fresult;
862 
863  if (varonleft)
864  fcinfo->args[0].value = sslot.values[i];
865  else
866  fcinfo->args[1].value = sslot.values[i];
867  fcinfo->isnull = false;
868  fresult = FunctionCallInvoke(fcinfo);
869  if (!fcinfo->isnull && DatumGetBool(fresult))
870  nmatch++;
871  }
872  result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
873  }
874  else
875  result = -1;
876  free_attstatsslot(&sslot);
877  }
878  else
879  {
880  *hist_size = 0;
881  result = -1;
882  }
883 
884  return result;
885 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5181
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
#define DatumGetBool(X)
Definition: postgres.h:393
uintptr_t Datum
Definition: postgres.h:367
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3003
#define Assert(condition)
Definition: c.h:738
Datum * values
Definition: lsyscache.h:50
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3133

◆ index_other_operands_eval_cost()

Cost index_other_operands_eval_cost ( PlannerInfo root,
List indexquals 
)

Definition at line 5839 of file selfuncs.c.

References OpExpr::args, ScalarArrayOpExpr::args, cost_qual_eval_node(), elog, ERROR, IsA, lfirst, lsecond, nodeTag, QualCost::per_tuple, RowCompareExpr::rargs, and QualCost::startup.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

5840 {
5841  Cost qual_arg_cost = 0;
5842  ListCell *lc;
5843 
5844  foreach(lc, indexquals)
5845  {
5846  Expr *clause = (Expr *) lfirst(lc);
5847  Node *other_operand;
5848  QualCost index_qual_cost;
5849 
5850  /*
5851  * Index quals will have RestrictInfos, indexorderbys won't. Look
5852  * through RestrictInfo if present.
5853  */
5854  if (IsA(clause, RestrictInfo))
5855  clause = ((RestrictInfo *) clause)->clause;
5856 
5857  if (IsA(clause, OpExpr))
5858  {
5859  OpExpr *op = (OpExpr *) clause;
5860 
5861  other_operand = (Node *) lsecond(op->args);
5862  }
5863  else if (IsA(clause, RowCompareExpr))
5864  {
5865  RowCompareExpr *rc = (RowCompareExpr *) clause;
5866 
5867  other_operand = (Node *) rc->rargs;
5868  }
5869  else if (IsA(clause, ScalarArrayOpExpr))
5870  {
5871  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
5872 
5873  other_operand = (Node *) lsecond(saop->args);
5874  }
5875  else if (IsA(clause, NullTest))
5876  {
5877  other_operand = NULL;
5878  }
5879  else
5880  {
5881  elog(ERROR, "unsupported indexqual type: %d",
5882  (int) nodeTag(clause));
5883  other_operand = NULL; /* keep compiler quiet */
5884  }
5885 
5886  cost_qual_eval_node(&index_qual_cost, other_operand, root);
5887  qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
5888  }
5889  return qual_arg_cost;
5890 }
void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root)
Definition: costsize.c:4069
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
Definition: nodes.h:529
#define lsecond(l)
Definition: pg_list.h:200
Cost startup
Definition: pathnodes.h:45
Cost per_tuple
Definition: pathnodes.h:46
#define ERROR
Definition: elog.h:43
#define lfirst(lc)
Definition: pg_list.h:190
#define nodeTag(nodeptr)
Definition: nodes.h:534
#define elog(elevel,...)
Definition: elog.h:214
List * args
Definition: primnodes.h:522
double Cost
Definition: nodes.h:663

◆ ineq_histogram_selectivity()

double ineq_histogram_selectivity ( PlannerInfo root,
VariableStatData vardata,
FmgrInfo opproc,
bool  isgt,
bool  iseq,
Datum  constval,
Oid  consttype 
)

Definition at line 1029 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, convert_to_scalar(), DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCall2Coll(), get_actual_variable_range(), get_attstatsslot(), get_variable_numdistinct(), HeapTupleIsValid, i, InvalidOid, AttStatsSlot::nnumbers, AttStatsSlot::nvalues, AttStatsSlot::stacoll, AttStatsSlot::staop, statistic_proc_security_check(), VariableStatData::statsTuple, val, AttStatsSlot::values, and VariableStatData::vartype.

Referenced by prefix_selectivity(), and scalarineqsel().

1033 {
1034  double hist_selec;
1035  AttStatsSlot sslot;
1036 
1037  hist_selec = -1.0;
1038 
1039  /*
1040  * Someday, ANALYZE might store more than one histogram per rel/att,
1041  * corresponding to more than one possible sort ordering defined for the
1042  * column type. However, to make that work we will need to figure out
1043  * which staop to search for --- it's not necessarily the one we have at
1044  * hand! (For example, we might have a '<=' operator rather than the '<'
1045  * operator that will appear in staop.) For now, assume that whatever
1046  * appears in pg_statistic is sorted the same way our operator sorts, or
1047  * the reverse way if isgt is true.
1048  */
1049  if (HeapTupleIsValid(vardata->statsTuple) &&
1050  statistic_proc_security_check(vardata, opproc->fn_oid) &&
1051  get_attstatsslot(&sslot, vardata->statsTuple,
1052  STATISTIC_KIND_HISTOGRAM, InvalidOid,
1054  {
1055  if (sslot.nvalues > 1)
1056  {
1057  /*
1058  * Use binary search to find the desired location, namely the
1059  * right end of the histogram bin containing the comparison value,
1060  * which is the leftmost entry for which the comparison operator
1061  * succeeds (if isgt) or fails (if !isgt). (If the given operator
1062  * isn't actually sort-compatible with the histogram, you'll get
1063  * garbage results ... but probably not any more garbage-y than
1064  * you would have from the old linear search.)
1065  *
1066  * In this loop, we pay no attention to whether the operator iseq
1067  * or not; that detail will be mopped up below. (We cannot tell,
1068  * anyway, whether the operator thinks the values are equal.)
1069  *
1070  * If the binary search accesses the first or last histogram
1071  * entry, we try to replace that endpoint with the true column min
1072  * or max as found by get_actual_variable_range(). This
1073  * ameliorates misestimates when the min or max is moving as a
1074  * result of changes since the last ANALYZE. Note that this could
1075  * result in effectively including MCVs into the histogram that
1076  * weren't there before, but we don't try to correct for that.
1077  */
1078  double histfrac;
1079  int lobound = 0; /* first possible slot to search */
1080  int hibound = sslot.nvalues; /* last+1 slot to search */
1081  bool have_end = false;
1082 
1083  /*
1084  * If there are only two histogram entries, we'll want up-to-date
1085  * values for both. (If there are more than two, we need at most
1086  * one of them to be updated, so we deal with that within the
1087  * loop.)
1088  */
1089  if (sslot.nvalues == 2)
1090  have_end = get_actual_variable_range(root,
1091  vardata,
1092  sslot.staop,
1093  &sslot.values[0],
1094  &sslot.values[1]);
1095 
1096  while (lobound < hibound)
1097  {
1098  int probe = (lobound + hibound) / 2;
1099  bool ltcmp;
1100 
1101  /*
1102  * If we find ourselves about to compare to the first or last
1103  * histogram entry, first try to replace it with the actual
1104  * current min or max (unless we already did so above).
1105  */
1106  if (probe == 0 && sslot.nvalues > 2)
1107  have_end = get_actual_variable_range(root,
1108  vardata,
1109  sslot.staop,
1110  &sslot.values[0],
1111  NULL);
1112  else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
1113  have_end = get_actual_variable_range(root,
1114  vardata,
1115  sslot.staop,
1116  NULL,
1117  &sslot.values[probe]);
1118 
1119  ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
1120  sslot.stacoll,
1121  sslot.values[probe],
1122  constval));
1123  if (isgt)
1124  ltcmp = !ltcmp;
1125  if (ltcmp)
1126  lobound = probe + 1;
1127  else
1128  hibound = probe;
1129  }
1130 
1131  if (lobound <= 0)
1132  {
1133  /*
1134  * Constant is below lower histogram boundary. More
1135  * precisely, we have found that no entry in the histogram
1136  * satisfies the inequality clause (if !isgt) or they all do
1137  * (if isgt). We estimate that that's true of the entire
1138  * table, so set histfrac to 0.0 (which we'll flip to 1.0
1139  * below, if isgt).
1140  */
1141  histfrac = 0.0;
1142  }
1143  else if (lobound >= sslot.nvalues)
1144  {
1145  /*
1146  * Inverse case: constant is above upper histogram boundary.
1147  */
1148  histfrac = 1.0;
1149  }
1150  else
1151  {
1152  /* We have values[i-1] <= constant <= values[i]. */
1153  int i = lobound;
1154  double eq_selec = 0;
1155  double val,
1156  high,
1157  low;
1158  double binfrac;
1159 
1160  /*
1161  * In the cases where we'll need it below, obtain an estimate
1162  * of the selectivity of "x = constval". We use a calculation
1163  * similar to what var_eq_const() does for a non-MCV constant,
1164  * ie, estimate that all distinct non-MCV values occur equally
1165  * often. But multiplication by "1.0 - sumcommon - nullfrac"
1166  * will be done by our caller, so we shouldn't do that here.
1167  * Therefore we can't try to clamp the estimate by reference
1168  * to the least common MCV; the result would be too small.
1169  *
1170  * Note: since this is effectively assuming that constval
1171  * isn't an MCV, it's logically dubious if constval in fact is
1172  * one. But we have to apply *some* correction for equality,
1173  * and anyway we cannot tell if constval is an MCV, since we
1174  * don't have a suitable equality operator at hand.
1175  */
1176  if (i == 1 || isgt == iseq)
1177  {
1178  double otherdistinct;
1179  bool isdefault;
1180  AttStatsSlot mcvslot;
1181 
1182  /* Get estimated number of distinct values */
1183  otherdistinct = get_variable_numdistinct(vardata,
1184  &isdefault);
1185 
1186  /* Subtract off the number of known MCVs */
1187  if (get_attstatsslot(&mcvslot, vardata->statsTuple,
1188  STATISTIC_KIND_MCV, InvalidOid,
1190  {
1191  otherdistinct -= mcvslot.nnumbers;
1192  free_attstatsslot(&mcvslot);
1193  }
1194 
1195  /* If result doesn't seem sane, leave eq_selec at 0 */
1196  if (otherdistinct > 1)
1197  eq_selec = 1.0 / otherdistinct;
1198  }
1199 
1200  /*
1201  * Convert the constant and the two nearest bin boundary
1202  * values to a uniform comparison scale, and do a linear
1203  * interpolation within this bin.
1204  */
1205  if (convert_to_scalar(constval, consttype, sslot.stacoll,
1206  &val,
1207  sslot.values[i - 1], sslot.values[i],
1208  vardata->vartype,
1209  &low, &high))
1210  {
1211  if (high <= low)
1212  {
1213  /* cope if bin boundaries appear identical */
1214  binfrac = 0.5;
1215  }
1216  else if (val <= low)
1217  binfrac = 0.0;
1218  else if (val >= high)
1219  binfrac = 1.0;
1220  else
1221  {
1222  binfrac = (val - low) / (high - low);
1223 
1224  /*
1225  * Watch out for the possibility that we got a NaN or
1226  * Infinity from the division. This can happen
1227  * despite the previous checks, if for example "low"
1228  * is -Infinity.
1229  */
1230  if (isnan(binfrac) ||
1231  binfrac < 0.0 || binfrac > 1.0)
1232  binfrac = 0.5;
1233  }
1234  }
1235  else
1236  {
1237  /*
1238  * Ideally we'd produce an error here, on the grounds that
1239  * the given operator shouldn't have scalarXXsel
1240  * registered as its selectivity func unless we can deal
1241  * with its operand types. But currently, all manner of
1242  * stuff is invoking scalarXXsel, so give a default
1243  * estimate until that can be fixed.
1244  */
1245  binfrac = 0.5;
1246  }
1247 
1248  /*
1249  * Now, compute the overall selectivity across the values
1250  * represented by the histogram. We have i-1 full bins and
1251  * binfrac partial bin below the constant.
1252  */
1253  histfrac = (double) (i - 1) + binfrac;
1254  histfrac /= (double) (sslot.nvalues - 1);
1255 
1256  /*
1257  * At this point, histfrac is an estimate of the fraction of
1258  * the population represented by the histogram that satisfies
1259  * "x <= constval". Somewhat remarkably, this statement is
1260  * true regardless of which operator we were doing the probes
1261  * with, so long as convert_to_scalar() delivers reasonable
1262  * results. If the probe constant is equal to some histogram
1263  * entry, we would have considered the bin to the left of that
1264  * entry if probing with "<" or ">=", or the bin to the right
1265  * if probing with "<=" or ">"; but binfrac would have come
1266  * out as 1.0 in the first case and 0.0 in the second, leading
1267  * to the same histfrac in either case. For probe constants
1268  * between histogram entries, we find the same bin and get the
1269  * same estimate with any operator.
1270  *
1271  * The fact that the estimate corresponds to "x <= constval"
1272  * and not "x < constval" is because of the way that ANALYZE
1273  * constructs the histogram: each entry is, effectively, the
1274  * rightmost value in its sample bucket. So selectivity
1275  * values that are exact multiples of 1/(histogram_size-1)
1276  * should be understood as estimates including a histogram
1277  * entry plus everything to its left.
1278  *
1279  * However, that breaks down for the first histogram entry,
1280  * which necessarily is the leftmost value in its sample
1281  * bucket. That means the first histogram bin is slightly
1282  * narrower than the rest, by an amount equal to eq_selec.
1283  * Another way to say that is that we want "x <= leftmost" to
1284  * be estimated as eq_selec not zero. So, if we're dealing
1285  * with the first bin (i==1), rescale to make that true while
1286  * adjusting the rest of that bin linearly.
1287  */
1288  if (i == 1)
1289  histfrac += eq_selec * (1.0 - binfrac);
1290 
1291  /*
1292  * "x <= constval" is good if we want an estimate for "<=" or
1293  * ">", but if we are estimating for "<" or ">=", we now need
1294  * to decrease the estimate by eq_selec.
1295  */
1296  if (isgt == iseq)
1297  histfrac -= eq_selec;
1298  }
1299 
1300  /*
1301  * Now the estimate is finished for "<" and "<=" cases. If we are
1302  * estimating for ">" or ">=", flip it.
1303  */
1304  hist_selec = isgt ? (1.0 - histfrac) : histfrac;
1305 
1306  /*
1307  * The histogram boundaries are only approximate to begin with,
1308  * and may well be out of date anyway. Therefore, don't believe
1309  * extremely small or large selectivity estimates --- unless we
1310  * got actual current endpoint values from the table, in which
1311  * case just do the usual sanity clamp. Somewhat arbitrarily, we
1312  * set the cutoff for other cases at a hundredth of the histogram
1313  * resolution.
1314  */
1315  if (have_end)
1316  CLAMP_PROBABILITY(hist_selec);
1317  else
1318  {
1319  double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1320 
1321  if (hist_selec < cutoff)
1322  hist_selec = cutoff;
1323  else if (hist_selec > 1.0 - cutoff)
1324  hist_selec = 1.0 - cutoff;
1325  }
1326  }
1327 
1328  free_attstatsslot(&sslot);
1329  }
1330 
1331  return hist_selec;
1332 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5181
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1152
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound)
Definition: selfuncs.c:3967
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5210
static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max)
Definition: selfuncs.c:5476
#define DatumGetBool(X)
Definition: postgres.h:393
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3003
Datum * values
Definition: lsyscache.h:50
int i
long val
Definition: informix.c:664
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3133

◆ mcv_selectivity()

double mcv_selectivity ( VariableStatData vardata,
FmgrInfo opproc,
Datum  constval,
bool  varonleft,
double *  sumcommonp 
)

Definition at line 725 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::numbers, AttStatsSlot::nvalues, AttStatsSlot::stacoll, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by generic_restriction_selectivity(), networksel(), patternsel_common(), and scalarineqsel().

728 {
729  double mcv_selec,
730  sumcommon;
731  AttStatsSlot sslot;
732  int i;
733 
734  mcv_selec = 0.0;
735  sumcommon = 0.0;
736 
737  if (HeapTupleIsValid(vardata->statsTuple) &&
738  statistic_proc_security_check(vardata, opproc->fn_oid) &&
739  get_attstatsslot(&sslot, vardata->statsTuple,
740  STATISTIC_KIND_MCV, InvalidOid,
742  {
743  LOCAL_FCINFO(fcinfo, 2);
744 
745  /*
746  * We invoke the opproc "by hand" so that we won't fail on NULL
747  * results. Such cases won't arise for normal comparison functions,
748  * but generic_restriction_selectivity could perhaps be used with
749  * operators that can return NULL. A small side benefit is to not
750  * need to re-initialize the fcinfo struct from scratch each time.
751  */
752  InitFunctionCallInfoData(*fcinfo, opproc, 2, sslot.stacoll,
753  NULL, NULL);
754  fcinfo->args[0].isnull = false;
755  fcinfo->args[1].isnull = false;
756  /* be careful to apply operator right way 'round */
757  if (varonleft)
758  fcinfo->args[1].value = constval;
759  else
760  fcinfo->args[0].value = constval;
761 
762  for (i = 0; i < sslot.nvalues; i++)
763  {
764  Datum fresult;
765 
766  if (varonleft)
767  fcinfo->args[0].value = sslot.values[i];
768  else
769  fcinfo->args[1].value = sslot.values[i];
770  fcinfo->isnull = false;
771  fresult = FunctionCallInvoke(fcinfo);
772  if (!fcinfo->isnull && DatumGetBool(fresult))
773  mcv_selec += sslot.numbers[i];
774  sumcommon += sslot.numbers[i];
775  }
776  free_attstatsslot(&sslot);
777  }
778 
779  *sumcommonp = sumcommon;
780  return mcv_selec;
781 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5181
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
uintptr_t Datum
Definition: postgres.h:367
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3003
Datum * values
Definition: lsyscache.h:50
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3133

◆ mergejoinscansel()

void mergejoinscansel ( PlannerInfo root,
Node clause,
Oid  opfamily,
int  strategy,
bool  nulls_first,
Selectivity leftstart,
Selectivity leftend,
Selectivity rightstart,
Selectivity rightend 
)

Definition at line 2841 of file selfuncs.c.

References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, CLAMP_PROBABILITY, DEFAULT_INEQ_SEL, examine_variable(), get_leftop(), get_op_opfamily_properties(), get_opfamily_member(), get_rightop(), get_variable_range(), GETSTRUCT, HeapTupleIsValid, is_opclause(), OidIsValid, ReleaseVariableStats, scalarineqsel(), and VariableStatData::statsTuple.

Referenced by cached_scansel().

2845 {
2846  Node *left,
2847  *right;
2848  VariableStatData leftvar,
2849  rightvar;
2850  int op_strategy;
2851  Oid op_lefttype;
2852  Oid op_righttype;
2853  Oid opno,
2854  lsortop,
2855  rsortop,
2856  lstatop,
2857  rstatop,
2858  ltop,
2859  leop,
2860  revltop,
2861  revleop;
2862  bool isgt;
2863  Datum leftmin,
2864  leftmax,
2865  rightmin,
2866  rightmax;
2867  double selec;
2868 
2869  /* Set default results if we can't figure anything out. */
2870  /* XXX should default "start" fraction be a bit more than 0? */
2871  *leftstart = *rightstart = 0.0;
2872  *leftend = *rightend = 1.0;
2873 
2874  /* Deconstruct the merge clause */
2875  if (!is_opclause(clause))
2876  return; /* shouldn't happen */
2877  opno = ((OpExpr *) clause)->opno;
2878  left = get_leftop((Expr *) clause);
2879  right = get_rightop((Expr *) clause);
2880  if (!right)
2881  return; /* shouldn't happen */
2882 
2883  /* Look for stats for the inputs */
2884  examine_variable(root, left, 0, &leftvar);
2885  examine_variable(root, right, 0, &rightvar);
2886 
2887  /* Extract the operator's declared left/right datatypes */
2888  get_op_opfamily_properties(opno, opfamily, false,
2889  &op_strategy,
2890  &op_lefttype,
2891  &op_righttype);
2892  Assert(op_strategy == BTEqualStrategyNumber);
2893 
2894  /*
2895  * Look up the various operators we need. If we don't find them all, it
2896  * probably means the opfamily is broken, but we just fail silently.
2897  *
2898  * Note: we expect that pg_statistic histograms will be sorted by the '<'
2899  * operator, regardless of which sort direction we are considering.
2900  */
2901  switch (strategy)
2902  {
2903  case BTLessStrategyNumber:
2904  isgt = false;
2905  if (op_lefttype == op_righttype)
2906  {
2907  /* easy case */
2908  ltop = get_opfamily_member(opfamily,
2909  op_lefttype, op_righttype,
2911  leop = get_opfamily_member(opfamily,
2912  op_lefttype, op_righttype,
2914  lsortop = ltop;
2915  rsortop = ltop;
2916  lstatop = lsortop;
2917  rstatop = rsortop;
2918  revltop = ltop;
2919  revleop = leop;
2920  }
2921  else
2922  {
2923  ltop = get_opfamily_member(opfamily,
2924  op_lefttype, op_righttype,
2926  leop = get_opfamily_member(opfamily,
2927  op_lefttype, op_righttype,
2929  lsortop = get_opfamily_member(opfamily,
2930  op_lefttype, op_lefttype,
2932  rsortop = get_opfamily_member(opfamily,
2933  op_righttype, op_righttype,
2935  lstatop = lsortop;
2936  rstatop = rsortop;
2937  revltop = get_opfamily_member(opfamily,
2938  op_righttype, op_lefttype,
2940  revleop = get_opfamily_member(opfamily,
2941  op_righttype, op_lefttype,
2943  }
2944  break;
2946  /* descending-order case */
2947  isgt = true;
2948  if (op_lefttype == op_righttype)
2949  {
2950  /* easy case */
2951  ltop = get_opfamily_member(opfamily,
2952  op_lefttype, op_righttype,
2954  leop = get_opfamily_member(opfamily,
2955  op_lefttype, op_righttype,
2957  lsortop = ltop;
2958  rsortop = ltop;
2959  lstatop = get_opfamily_member(opfamily,
2960  op_lefttype, op_lefttype,
2962  rstatop = lstatop;
2963  revltop = ltop;
2964  revleop = leop;
2965  }
2966  else
2967  {
2968  ltop = get_opfamily_member(opfamily,
2969  op_lefttype, op_righttype,
2971  leop = get_opfamily_member(opfamily,
2972  op_lefttype, op_righttype,
2974  lsortop = get_opfamily_member(opfamily,
2975  op_lefttype, op_lefttype,
2977  rsortop = get_opfamily_member(opfamily,
2978  op_righttype, op_righttype,
2980  lstatop = get_opfamily_member(opfamily,
2981  op_lefttype, op_lefttype,
2983  rstatop = get_opfamily_member(opfamily,
2984  op_righttype, op_righttype,
2986  revltop = get_opfamily_member(opfamily,
2987  op_righttype, op_lefttype,
2989  revleop = get_opfamily_member(opfamily,
2990  op_righttype, op_lefttype,
2992  }
2993  break;
2994  default:
2995  goto fail; /* shouldn't get here */
2996  }
2997 
2998  if (!OidIsValid(lsortop) ||
2999  !OidIsValid(rsortop) ||
3000  !OidIsValid(lstatop) ||
3001  !OidIsValid(rstatop) ||
3002  !OidIsValid(ltop) ||
3003  !OidIsValid(leop) ||
3004  !OidIsValid(revltop) ||
3005  !OidIsValid(revleop))
3006  goto fail; /* insufficient info in catalogs */
3007 
3008  /* Try to get ranges of both inputs */
3009  if (!isgt)
3010  {
3011  if (!get_variable_range(root, &leftvar, lstatop,
3012  &leftmin, &leftmax))
3013  goto fail; /* no range available from stats */
3014  if (!get_variable_range(root, &rightvar, rstatop,
3015  &rightmin, &rightmax))
3016  goto fail; /* no range available from stats */
3017  }
3018  else
3019  {
3020  /* need to swap the max and min */
3021  if (!get_variable_range(root, &leftvar, lstatop,
3022  &leftmax, &leftmin))
3023  goto fail; /* no range available from stats */
3024  if (!get_variable_range(root, &rightvar, rstatop,
3025  &rightmax, &rightmin))
3026  goto fail; /* no range available from stats */
3027  }
3028 
3029  /*
3030  * Now, the fraction of the left variable that will be scanned is the
3031  * fraction that's <= the right-side maximum value. But only believe
3032  * non-default estimates, else stick with our 1.0.
3033  */
3034  selec = scalarineqsel(root, leop, isgt, true, &leftvar,
3035  rightmax, op_righttype);
3036  if (selec != DEFAULT_INEQ_SEL)
3037  *leftend = selec;
3038 
3039  /* And similarly for the right variable. */
3040  selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
3041  leftmax, op_lefttype);
3042  if (selec != DEFAULT_INEQ_SEL)
3043  *rightend = selec;
3044 
3045  /*
3046  * Only one of the two "end" fractions can really be less than 1.0;
3047  * believe the smaller estimate and reset the other one to exactly 1.0. If
3048  * we get exactly equal estimates (as can easily happen with self-joins),
3049  * believe neither.
3050  */
3051  if (*leftend > *rightend)
3052  *leftend = 1.0;
3053  else if (*leftend < *rightend)
3054  *rightend = 1.0;
3055  else
3056  *leftend = *rightend = 1.0;
3057 
3058  /*
3059  * Also, the fraction of the left variable that will be scanned before the
3060  * first join pair is found is the fraction that's < the right-side
3061  * minimum value. But only believe non-default estimates, else stick with
3062  * our own default.
3063  */
3064  selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
3065  rightmin, op_righttype);
3066  if (selec != DEFAULT_INEQ_SEL)
3067  *leftstart = selec;
3068 
3069  /* And similarly for the right variable. */
3070  selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
3071  leftmin, op_lefttype);
3072  if (selec != DEFAULT_INEQ_SEL)
3073  *rightstart = selec;
3074 
3075  /*
3076  * Only one of the two "start" fractions can really be more than zero;
3077  * believe the larger estimate and reset the other one to exactly 0.0. If
3078  * we get exactly equal estimates (as can easily happen with self-joins),
3079  * believe neither.
3080  */
3081  if (*leftstart < *rightstart)
3082  *leftstart = 0.0;
3083  else if (*leftstart > *rightstart)
3084  *rightstart = 0.0;
3085  else
3086  *leftstart = *rightstart = 0.0;
3087 
3088  /*
3089  * If the sort order is nulls-first, we're going to have to skip over any
3090  * nulls too. These would not have been counted by scalarineqsel, and we
3091  * can safely add in this fraction regardless of whether we believe
3092  * scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
3093  */
3094  if (nulls_first)
3095  {
3096  Form_pg_statistic stats;
3097 
3098  if (HeapTupleIsValid(leftvar.statsTuple))
3099  {
3100  stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
3101  *leftstart += stats->stanullfrac;
3102  CLAMP_PROBABILITY(*leftstart);
3103  *leftend += stats->stanullfrac;
3104  CLAMP_PROBABILITY(*leftend);
3105  }
3106  if (HeapTupleIsValid(rightvar.statsTuple))
3107  {
3108  stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
3109  *rightstart += stats->stanullfrac;
3110  CLAMP_PROBABILITY(*rightstart);
3111  *rightend += stats->stanullfrac;
3112  CLAMP_PROBABILITY(*rightend);
3113  }
3114  }
3115 
3116  /* Disbelieve start >= end, just in case that can happen */
3117  if (*leftstart >= *leftend)
3118  {
3119  *leftstart = 0.0;
3120  *leftend = 1.0;
3121  }
3122  if (*rightstart >= *rightend)
3123  {
3124  *rightstart = 0.0;
3125  *rightend = 1.0;
3126  }
3127 
3128 fail:
3129  ReleaseVariableStats(leftvar);
3130  ReleaseVariableStats(rightvar);
3131 }
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37
Definition: nodes.h:529
unsigned int Oid
Definition: postgres_ext.h:31
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define OidIsValid(objectId)
Definition: c.h:644
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:164
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:70
uintptr_t Datum
Definition: postgres.h:367
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:82
static double scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, VariableStatData *vardata, Datum constval, Oid consttype)
Definition: selfuncs.c:575
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4661
#define Assert(condition)
Definition: c.h:738
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
void get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, int *strategy, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:134
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max)
Definition: selfuncs.c:5342
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:63
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32

◆ nulltestsel()

Selectivity nulltestsel ( PlannerInfo root,
NullTestType  nulltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1636 of file selfuncs.c.

References CLAMP_PROBABILITY, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), GETSTRUCT, HeapTupleIsValid, IS_NOT_NULL, IS_NULL, IsA, ReleaseVariableStats, VariableStatData::statsTuple, and VariableStatData::var.

Referenced by clause_selectivity(), and clauselist_selectivity_simple().

1638 {
1639  VariableStatData vardata;
1640  double selec;
1641 
1642  examine_variable(root, arg, varRelid, &vardata);
1643 
1644  if (HeapTupleIsValid(vardata.statsTuple))
1645  {
1646  Form_pg_statistic stats;
1647  double freq_null;
1648 
1649  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1650  freq_null = stats->stanullfrac;
1651 
1652  switch (nulltesttype)
1653  {
1654  case IS_NULL:
1655 
1656  /*
1657  * Use freq_null directly.
1658  */
1659  selec = freq_null;
1660  break;
1661  case IS_NOT_NULL:
1662 
1663  /*
1664  * Select not unknown (not null) values. Calculate from
1665  * freq_null.
1666  */
1667  selec = 1.0 - freq_null;
1668  break;
1669  default:
1670  elog(ERROR, "unrecognized nulltesttype: %d",
1671  (int) nulltesttype);
1672  return (Selectivity) 0; /* keep compiler quiet */
1673  }
1674  }
1675  else if (vardata.var && IsA(vardata.var, Var) &&
1676  ((Var *) vardata.var)->varattno < 0)
1677  {
1678  /*
1679  * There are no stats for system columns, but we know they are never
1680  * NULL.
1681  */
1682  selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
1683  }
1684  else
1685  {
1686  /*
1687  * No ANALYZE stats available, so make a guess
1688  */
1689  switch (nulltesttype)
1690  {
1691  case IS_NULL:
1692  selec = DEFAULT_UNK_SEL;
1693  break;
1694  case IS_NOT_NULL:
1695  selec = DEFAULT_NOT_UNK_SEL;
1696  break;
1697  default:
1698  elog(ERROR, "unrecognized nulltesttype: %d",
1699  (int) nulltesttype);
1700  return (Selectivity) 0; /* keep compiler quiet */
1701  }
1702  }
1703 
1704  ReleaseVariableStats(vardata);
1705 
1706  /* result should be in range, but make sure... */
1707  CLAMP_PROBABILITY(selec);
1708 
1709  return (Selectivity) selec;
1710 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
double Selectivity
Definition: nodes.h:662
Definition: primnodes.h:181
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:53
#define ERROR
Definition: elog.h:43
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:52
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4661
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
#define elog(elevel,...)
Definition: elog.h:214

◆ rowcomparesel()

Selectivity rowcomparesel ( PlannerInfo root,
RowCompareExpr clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 2111 of file selfuncs.c.

References RowCompareExpr::inputcollids, join_selectivity(), RowCompareExpr::largs, linitial, linitial_oid, list_make2, NumRelids(), RowCompareExpr::opnos, RowCompareExpr::rargs, restriction_selectivity(), and s1.

Referenced by clause_selectivity().

2114 {
2115  Selectivity s1;
2116  Oid opno = linitial_oid(clause->opnos);
2117  Oid inputcollid = linitial_oid(clause->inputcollids);
2118  List *opargs;
2119  bool is_join_clause;
2120 
2121  /* Build equivalent arg list for single operator */
2122  opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
2123 
2124  /*
2125  * Decide if it's a join clause. This should match clausesel.c's
2126  * treat_as_join_clause(), except that we intentionally consider only the
2127  * leading columns and not the rest of the clause.
2128  */
2129  if (varRelid != 0)
2130  {
2131  /*
2132  * Caller is forcing restriction mode (eg, because we are examining an
2133  * inner indexscan qual).
2134  */
2135  is_join_clause = false;
2136  }
2137  else if (sjinfo == NULL)
2138  {
2139  /*
2140  * It must be a restriction clause, since it's being evaluated at a
2141  * scan node.
2142  */
2143  is_join_clause = false;
2144  }
2145  else
2146  {
2147  /*
2148  * Otherwise, it's a join if there's more than one relation used.
2149  */
2150  is_join_clause = (NumRelids((Node *) opargs) > 1);
2151  }
2152 
2153  if (is_join_clause)
2154  {
2155  /* Estimate selectivity for a join clause. */
2156  s1 = join_selectivity(root, opno,
2157  opargs,
2158  inputcollid,
2159  jointype,
2160  sjinfo);
2161  }
2162  else
2163  {
2164  /* Estimate selectivity for a restriction clause. */
2165  s1 = restriction_selectivity(root, opno,
2166  opargs,
2167  inputcollid,
2168  varRelid);
2169  }
2170 
2171  return s1;
2172 }
#define list_make2(x1, x2)
Definition: pg_list.h:229
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Definition: plancat.c:1766
Definition: nodes.h:529
double Selectivity
Definition: nodes.h:662
unsigned int Oid
Definition: postgres_ext.h:31
#define linitial(l)
Definition: pg_list.h:195
char * s1
#define linitial_oid(l)
Definition: pg_list.h:197
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:1805
List * inputcollids
Definition: primnodes.h:1073
Definition: pg_list.h:50
int NumRelids(Node *clause)
Definition: clauses.c:2133

◆ scalararraysel()

Selectivity scalararraysel ( PlannerInfo root,
ScalarArrayOpExpr clause,
bool  is_join_clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1754 of file selfuncs.c.

References generate_unaccent_rules::args, ScalarArrayOpExpr::args, ARR_ELEMTYPE, Assert, CLAMP_PROBABILITY, CaseTestExpr::collation, DatumGetArrayTypeP, DatumGetFloat8, deconstruct_array(), ArrayExpr::element_typeid, ArrayExpr::elements, TypeCacheEntry::eq_opr, estimate_expression_value(), exprCollation(), exprType(), fmgr_info(), FunctionCall4Coll(), FunctionCall5Coll(), get_base_element_type(), get_negator(), get_oprjoin(), get_oprrest(), get_typlenbyval(), get_typlenbyvalalign(), i, ScalarArrayOpExpr::inputcollid, Int16GetDatum, Int32GetDatum, IsA, lfirst, linitial, list_length(), list_make2, lookup_type_cache(), lsecond, makeConst(), makeNode, ObjectIdGetDatum, OidIsValid, ScalarArrayOpExpr::opno, PointerGetDatum, s1, s2, scalararraysel_containment(), strip_array_coercion(), TYPECACHE_EQ_OPR, CaseTestExpr::typeId, CaseTestExpr::typeMod, and ScalarArrayOpExpr::useOr.

Referenced by clause_selectivity().

1760 {
1761  Oid operator = clause->opno;
1762  bool useOr = clause->useOr;
1763  bool isEquality = false;
1764  bool isInequality = false;
1765  Node *leftop;
1766  Node *rightop;
1767  Oid nominal_element_type;
1768  Oid nominal_element_collation;
1769  TypeCacheEntry *typentry;
1770  RegProcedure oprsel;
1771  FmgrInfo oprselproc;
1772  Selectivity s1;
1773  Selectivity s1disjoint;
1774 
1775  /* First, deconstruct the expression */
1776  Assert(list_length(clause->args) == 2);
1777  leftop = (Node *) linitial(clause->args);
1778  rightop = (Node *) lsecond(clause->args);
1779 
1780  /* aggressively reduce both sides to constants */
1781  leftop = estimate_expression_value(root, leftop);
1782  rightop = estimate_expression_value(root, rightop);
1783 
1784  /* get nominal (after relabeling) element type of rightop */
1785  nominal_element_type = get_base_element_type(exprType(rightop));
1786  if (!OidIsValid(nominal_element_type))
1787  return (Selectivity) 0.5; /* probably shouldn't happen */
1788  /* get nominal collation, too, for generating constants */
1789  nominal_element_collation = exprCollation(rightop);
1790 
1791  /* look through any binary-compatible relabeling of rightop */
1792  rightop = strip_array_coercion(rightop);
1793 
1794  /*
1795  * Detect whether the operator is the default equality or inequality
1796  * operator of the array element type.
1797  */
1798  typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1799  if (OidIsValid(typentry->eq_opr))
1800  {
1801  if (operator == typentry->eq_opr)
1802  isEquality = true;
1803  else if (get_negator(operator) == typentry->eq_opr)
1804  isInequality = true;
1805  }
1806 
1807  /*
1808  * If it is equality or inequality, we might be able to estimate this as a
1809  * form of array containment; for instance "const = ANY(column)" can be
1810  * treated as "ARRAY[const] <@ column". scalararraysel_containment tries
1811  * that, and returns the selectivity estimate if successful, or -1 if not.
1812  */
1813  if ((isEquality || isInequality) && !is_join_clause)
1814  {
1815  s1 = scalararraysel_containment(root, leftop, rightop,
1816  nominal_element_type,
1817  isEquality, useOr, varRelid);
1818  if (s1 >= 0.0)
1819  return s1;
1820  }
1821 
1822  /*
1823  * Look up the underlying operator's selectivity estimator. Punt if it
1824  * hasn't got one.
1825  */
1826  if (is_join_clause)
1827  oprsel = get_oprjoin(operator);
1828  else
1829  oprsel = get_oprrest(operator);
1830  if (!oprsel)
1831  return (Selectivity) 0.5;
1832  fmgr_info(oprsel, &oprselproc);
1833 
1834  /*
1835  * In the array-containment check above, we must only believe that an
1836  * operator is equality or inequality if it is the default btree equality
1837  * operator (or its negator) for the element type, since those are the
1838  * operators that array containment will use. But in what follows, we can
1839  * be a little laxer, and also believe that any operators using eqsel() or
1840  * neqsel() as selectivity estimator act like equality or inequality.
1841  */
1842  if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1843  isEquality = true;
1844  else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1845  isInequality = true;
1846 
1847  /*
1848  * We consider three cases:
1849  *
1850  * 1. rightop is an Array constant: deconstruct the array, apply the
1851  * operator's selectivity function for each array element, and merge the
1852  * results in the same way that clausesel.c does for AND/OR combinations.
1853  *
1854  * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1855  * function for each element of the ARRAY[] construct, and merge.
1856  *
1857  * 3. otherwise, make a guess ...
1858  */
1859  if (rightop && IsA(rightop, Const))
1860  {
1861  Datum arraydatum = ((Const *) rightop)->constvalue;
1862  bool arrayisnull = ((Const *) rightop)->constisnull;
1863  ArrayType *arrayval;
1864  int16 elmlen;
1865  bool elmbyval;
1866  char elmalign;
1867  int num_elems;
1868  Datum *elem_values;
1869  bool *elem_nulls;
1870  int i;
1871 
1872  if (arrayisnull) /* qual can't succeed if null array */
1873  return (Selectivity) 0.0;
1874  arrayval = DatumGetArrayTypeP(arraydatum);
1876  &elmlen, &elmbyval, &elmalign);
1877  deconstruct_array(arrayval,
1878  ARR_ELEMTYPE(arrayval),
1879  elmlen, elmbyval, elmalign,
1880  &elem_values, &elem_nulls, &num_elems);
1881 
1882  /*
1883  * For generic operators, we assume the probability of success is
1884  * independent for each array element. But for "= ANY" or "<> ALL",
1885  * if the array elements are distinct (which'd typically be the case)
1886  * then the probabilities are disjoint, and we should just sum them.
1887  *
1888  * If we were being really tense we would try to confirm that the
1889  * elements are all distinct, but that would be expensive and it
1890  * doesn't seem to be worth the cycles; it would amount to penalizing
1891  * well-written queries in favor of poorly-written ones. However, we
1892  * do protect ourselves a little bit by checking whether the
1893  * disjointness assumption leads to an impossible (out of range)
1894  * probability; if so, we fall back to the normal calculation.
1895  */
1896  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1897 
1898  for (i = 0; i < num_elems; i++)
1899  {
1900  List *args;
1901  Selectivity s2;
1902 
1903  args = list_make2(leftop,
1904  makeConst(nominal_element_type,
1905  -1,
1906  nominal_element_collation,
1907  elmlen,
1908  elem_values[i],
1909  elem_nulls[i],
1910  elmbyval));
1911  if (is_join_clause)
1912  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1913  clause->inputcollid,
1914  PointerGetDatum(root),
1915  ObjectIdGetDatum(operator),
1916  PointerGetDatum(args),
1917  Int16GetDatum(jointype),
1918  PointerGetDatum(sjinfo)));
1919  else
1920  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1921  clause->inputcollid,
1922  PointerGetDatum(root),
1923  ObjectIdGetDatum(operator),
1924  PointerGetDatum(args),
1925  Int32GetDatum(varRelid)));
1926 
1927  if (useOr)
1928  {
1929  s1 = s1 + s2 - s1 * s2;
1930  if (isEquality)
1931  s1disjoint += s2;
1932  }
1933  else
1934  {
1935  s1 = s1 * s2;
1936  if (isInequality)
1937  s1disjoint += s2 - 1.0;
1938  }
1939  }
1940 
1941  /* accept disjoint-probability estimate if in range */
1942  if ((useOr ? isEquality : isInequality) &&
1943  s1disjoint >= 0.0 && s1disjoint <= 1.0)
1944  s1 = s1disjoint;
1945  }
1946  else if (rightop && IsA(rightop, ArrayExpr) &&
1947  !((ArrayExpr *) rightop)->multidims)
1948  {
1949  ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
1950  int16 elmlen;
1951  bool elmbyval;
1952  ListCell *l;
1953 
1954  get_typlenbyval(arrayexpr->element_typeid,
1955  &elmlen, &elmbyval);
1956 
1957  /*
1958  * We use the assumption of disjoint probabilities here too, although
1959  * the odds of equal array elements are rather higher if the elements
1960  * are not all constants (which they won't be, else constant folding
1961  * would have reduced the ArrayExpr to a Const). In this path it's
1962  * critical to have the sanity check on the s1disjoint estimate.
1963  */
1964  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1965 
1966  foreach(l, arrayexpr->elements)
1967  {
1968  Node *elem = (Node *) lfirst(l);
1969  List *args;
1970  Selectivity s2;
1971 
1972  /*
1973  * Theoretically, if elem isn't of nominal_element_type we should
1974  * insert a RelabelType, but it seems unlikely that any operator
1975  * estimation function would really care ...
1976  */
1977  args = list_make2(leftop, elem);
1978  if (is_join_clause)
1979  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1980  clause->inputcollid,
1981  PointerGetDatum(root),
1982  ObjectIdGetDatum(operator),
1983  PointerGetDatum(args),
1984  Int16GetDatum(jointype),
1985  PointerGetDatum(sjinfo)));
1986  else
1987  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1988  clause->inputcollid,
1989  PointerGetDatum(root),
1990  ObjectIdGetDatum(operator),
1991  PointerGetDatum(args),
1992  Int32GetDatum(varRelid)));
1993 
1994  if (useOr)
1995  {
1996  s1 = s1 + s2 - s1 * s2;
1997  if (isEquality)
1998  s1disjoint += s2;
1999  }
2000  else
2001  {
2002  s1 = s1 * s2;
2003  if (isInequality)
2004  s1disjoint += s2 - 1.0;
2005  }
2006  }
2007 
2008  /* accept disjoint-probability estimate if in range */
2009  if ((useOr ? isEquality : isInequality) &&
2010  s1disjoint >= 0.0 && s1disjoint <= 1.0)
2011  s1 = s1disjoint;
2012  }
2013  else
2014  {
2015  CaseTestExpr *dummyexpr;
2016  List *args;
2017  Selectivity s2;
2018  int i;
2019 
2020  /*
2021  * We need a dummy rightop to pass to the operator selectivity
2022  * routine. It can be pretty much anything that doesn't look like a
2023  * constant; CaseTestExpr is a convenient choice.
2024  */
2025  dummyexpr = makeNode(CaseTestExpr);
2026  dummyexpr->typeId = nominal_element_type;
2027  dummyexpr->typeMod = -1;
2028  dummyexpr->collation = clause->inputcollid;
2029  args = list_make2(leftop, dummyexpr);
2030  if (is_join_clause)
2031  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2032  clause->inputcollid,
2033  PointerGetDatum(root),
2034  ObjectIdGetDatum(operator),
2035  PointerGetDatum(args),
2036  Int16GetDatum(jointype),
2037  PointerGetDatum(sjinfo)));
2038  else
2039  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2040  clause->inputcollid,
2041  PointerGetDatum(root),
2042  ObjectIdGetDatum(operator),
2043  PointerGetDatum(args),
2044  Int32GetDatum(varRelid)));
2045  s1 = useOr ? 0.0 : 1.0;
2046 
2047  /*
2048  * Arbitrarily assume 10 elements in the eventual array value (see
2049  * also estimate_array_length). We don't risk an assumption of
2050  * disjoint probabilities here.
2051  */
2052  for (i = 0; i < 10; i++)
2053  {
2054  if (useOr)
2055  s1 = s1 + s2 - s1 * s2;
2056  else
2057  s1 = s1 * s2;
2058  }
2059  }
2060 
2061  /* result should be in range, but make sure... */
2062  CLAMP_PROBABILITY(s1);
2063 
2064  return s1;
2065 }
#define list_make2(x1, x2)
Definition: pg_list.h:229
signed short int16
Definition: c.h:354
Definition: fmgr.h:56
RegProcedure get_oprjoin(Oid opno)
Definition: lsyscache.c:1444
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2288
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2110
#define PointerGetDatum(X)
Definition: postgres.h:556
Datum FunctionCall5Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4, Datum arg5)
Definition: fmgr.c:1226
regproc RegProcedure
Definition: c.h:511
#define Int16GetDatum(X)
Definition: postgres.h:451
Definition: nodes.h:529
#define TYPECACHE_EQ_OPR
Definition: typcache.h:130
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1199
double Selectivity
Definition: nodes.h:662
unsigned int Oid
Definition: postgres_ext.h:31
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:297
#define OidIsValid(objectId)
Definition: c.h:644
#define lsecond(l)
Definition: pg_list.h:200
int32 typeMod
Definition: primnodes.h:973
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
#define linitial(l)
Definition: pg_list.h:195
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
char * s1
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
List * elements
Definition: primnodes.h:991
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1420
char * s2
#define DatumGetFloat8(X)
Definition: postgres.h:714
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:331
#define makeNode(_type_)
Definition: nodes.h:577
#define Assert(condition)
Definition: c.h:738
#define lfirst(lc)
Definition: pg_list.h:190
Selectivity scalararraysel_containment(PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
static int list_length(const List *l)
Definition: pg_list.h:169
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:719
void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval)
Definition: lsyscache.c:2090
Oid element_typeid
Definition: primnodes.h:990
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3462
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2660
#define Int32GetDatum(X)
Definition: postgres.h:479
int i
Oid get_negator(Oid opno)
Definition: lsyscache.c:1396
Definition: pg_list.h:50
#define ARR_ELEMTYPE(a)
Definition: array.h:280
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1721
#define DatumGetArrayTypeP(X)
Definition: array.h:249

◆ scalararraysel_containment()

Selectivity scalararraysel_containment ( PlannerInfo root,
Node leftop,
Node rightop,
Oid  elemtype,
bool  isEquality,
bool  useOr,
int  varRelid 
)

Definition at line 82 of file array_selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, VariableStatData::rel, ReleaseVariableStats, statistic_proc_security_check(), VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and AttStatsSlot::values.

Referenced by scalararraysel().

86 {
87  Selectivity selec;
88  VariableStatData vardata;
89  Datum constval;
90  TypeCacheEntry *typentry;
91  FmgrInfo *cmpfunc;
92 
93  /*
94  * rightop must be a variable, else punt.
95  */
96  examine_variable(root, rightop, varRelid, &vardata);
97  if (!vardata.rel)
98  {
99  ReleaseVariableStats(vardata);
100  return -1.0;
101  }
102 
103  /*
104  * leftop must be a constant, else punt.
105  */
106  if (!IsA(leftop, Const))
107  {
108  ReleaseVariableStats(vardata);
109  return -1.0;
110  }
111  if (((Const *) leftop)->constisnull)
112  {
113  /* qual can't succeed if null on left */
114  ReleaseVariableStats(vardata);
115  return (Selectivity) 0.0;
116  }
117  constval = ((Const *) leftop)->constvalue;
118 
119  /* Get element type's default comparison function */
120  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
121  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
122  {
123  ReleaseVariableStats(vardata);
124  return -1.0;
125  }
126  cmpfunc = &typentry->cmp_proc_finfo;
127 
128  /*
129  * If the operator is <>, swap ANY/ALL, then invert the result later.
130  */
131  if (!isEquality)
132  useOr = !useOr;
133 
134  /* Get array element stats for var, if available */
135  if (HeapTupleIsValid(vardata.statsTuple) &&
136  statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
137  {
138  Form_pg_statistic stats;
139  AttStatsSlot sslot;
140  AttStatsSlot hslot;
141 
142  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
143 
144  /* MCELEM will be an array of same type as element */
145  if (get_attstatsslot(&sslot, vardata.statsTuple,
146  STATISTIC_KIND_MCELEM, InvalidOid,
148  {
149  /* For ALL case, also get histogram of distinct-element counts */
150  if (useOr ||
151  !get_attstatsslot(&hslot, vardata.statsTuple,
152  STATISTIC_KIND_DECHIST, InvalidOid,
154  memset(&hslot, 0, sizeof(hslot));
155 
156  /*
157  * For = ANY, estimate as var @> ARRAY[const].
158  *
159  * For = ALL, estimate as var <@ ARRAY[const].
160  */
161  if (useOr)
163  sslot.nvalues,
164  sslot.numbers,
165  sslot.nnumbers,
166  &constval, 1,
167  OID_ARRAY_CONTAINS_OP,
168  typentry);
169  else
170  selec = mcelem_array_contained_selec(sslot.values,
171  sslot.nvalues,
172  sslot.numbers,
173  sslot.nnumbers,
174  &constval, 1,
175  hslot.numbers,
176  hslot.nnumbers,
177  OID_ARRAY_CONTAINED_OP,
178  typentry);
179 
180  free_attstatsslot(&hslot);
181  free_attstatsslot(&sslot);
182  }
183  else
184  {
185  /* No most-common-elements info, so do without */
186  if (useOr)
187  selec = mcelem_array_contain_overlap_selec(NULL, 0,
188  NULL, 0,
189  &constval, 1,
190  OID_ARRAY_CONTAINS_OP,
191  typentry);
192  else
193  selec = mcelem_array_contained_selec(NULL, 0,
194  NULL, 0,
195  &constval, 1,
196  NULL, 0,
197  OID_ARRAY_CONTAINED_OP,
198  typentry);
199  }
200 
201  /*
202  * MCE stats count only non-null rows, so adjust for null rows.
203  */
204  selec *= (1.0 - stats->stanullfrac);
205  }
206  else
207  {
208  /* No stats at all, so do without */
209  if (useOr)
210  selec = mcelem_array_contain_overlap_selec(NULL, 0,
211  NULL, 0,
212  &constval, 1,
213  OID_ARRAY_CONTAINS_OP,
214  typentry);
215  else
216  selec = mcelem_array_contained_selec(NULL, 0,
217  NULL, 0,
218  &constval, 1,
219  NULL, 0,
220  OID_ARRAY_CONTAINED_OP,
221  typentry);
222  /* we assume no nulls here, so no stanullfrac correction */
223  }
224 
225  ReleaseVariableStats(vardata);
226 
227  /*
228  * If the operator is <>, invert the results.
229  */
230  if (!isEquality)
231  selec = 1.0 - selec;
232 
233  CLAMP_PROBABILITY(selec);
234 
235  return selec;
236 }
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5181
RelOptInfo * rel
Definition: selfuncs.h:73
double Selectivity
Definition: nodes.h:662
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define OidIsValid(objectId)
Definition: c.h:644
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, TypeCacheEntry *typentry)
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
FmgrInfo cmp_proc_finfo
Definition: typcache.h:75
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
float4 * numbers
Definition: lsyscache.h:53
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, TypeCacheEntry *typentry)
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:331
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4661
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3003
Datum * values
Definition: lsyscache.h:50
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:84
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:136
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3133

◆ statistic_proc_security_check()

bool statistic_proc_security_check ( VariableStatData vardata,
Oid  func_oid 
)

Definition at line 5181 of file selfuncs.c.

References VariableStatData::acl_ok, DEBUG2, ereport, errmsg_internal(), get_func_leakproof(), get_func_name(), and OidIsValid.

Referenced by calc_arraycontsel(), calc_hist_selectivity(), eqjoinsel(), get_variable_range(), histogram_selectivity(), ineq_histogram_selectivity(), mcv_selectivity(), scalararraysel_containment(), and var_eq_const().

5182 {
5183  if (vardata->acl_ok)
5184  return true;
5185 
5186  if (!OidIsValid(func_oid))
5187  return false;
5188 
5189  if (get_func_leakproof(func_oid))
5190  return true;
5191 
5192  ereport(DEBUG2,
5193  (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
5194  get_func_name(func_oid))));
5195  return false;
5196 }
bool get_func_leakproof(Oid funcid)
Definition: lsyscache.c:1700
#define OidIsValid(objectId)
Definition: c.h:644
char * get_func_name(Oid funcid)
Definition: lsyscache.c:1471
#define DEBUG2
Definition: elog.h:24
#define ereport(elevel,...)
Definition: elog.h:144
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911

◆ var_eq_const()

double var_eq_const ( VariableStatData vardata,
Oid  oproid,
Datum  constval,
bool  constisnull,
bool  varonleft,
bool  negate 
)

Definition at line 290 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, DatumGetBool, fmgr_info(), free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), get_opcode(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, VariableStatData::isunique, LOCAL_FCINFO, AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, VariableStatData::rel, AttStatsSlot::stacoll, statistic_proc_security_check(), VariableStatData::statsTuple, RelOptInfo::tuples, and AttStatsSlot::values.

Referenced by boolvarsel(), eqsel_internal(), patternsel_common(), and prefix_selectivity().

293 {
294  double selec;
295  double nullfrac = 0.0;
296  bool isdefault;
297  Oid opfuncoid;
298 
299  /*
300  * If the constant is NULL, assume operator is strict and return zero, ie,
301  * operator will never return TRUE. (It's zero even for a negator op.)
302  */
303  if (constisnull)
304  return 0.0;
305 
306  /*
307  * Grab the nullfrac for use below. Note we allow use of nullfrac
308  * regardless of security check.
309  */
310  if (HeapTupleIsValid(vardata->statsTuple))
311  {
312  Form_pg_statistic stats;
313 
314  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
315  nullfrac = stats->stanullfrac;
316  }
317 
318  /*
319  * If we matched the var to a unique index or DISTINCT clause, assume
320  * there is exactly one match regardless of anything else. (This is
321  * slightly bogus, since the index or clause's equality operator might be
322  * different from ours, but it's much more likely to be right than
323  * ignoring the information.)
324  */
325  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
326  {
327  selec = 1.0 / vardata->rel->tuples;
328  }
329  else if (HeapTupleIsValid(vardata->statsTuple) &&
331  (opfuncoid = get_opcode(operator))))
332  {
333  AttStatsSlot sslot;
334  bool match = false;
335  int i;
336 
337  /*
338  * Is the constant "=" to any of the column's most common values?
339  * (Although the given operator may not really be "=", we will assume
340  * that seeing whether it returns TRUE is an appropriate test. If you
341  * don't like this, maybe you shouldn't be using eqsel for your
342  * operator...)
343  */
344  if (get_attstatsslot(&sslot, vardata->statsTuple,
345  STATISTIC_KIND_MCV, InvalidOid,
347  {
348  LOCAL_FCINFO(fcinfo, 2);
349  FmgrInfo eqproc;
350 
351  fmgr_info(opfuncoid, &eqproc);
352 
353  /*
354  * Save a few cycles by setting up the fcinfo struct just once.
355  * Using FunctionCallInvoke directly also avoids failure if the
356  * eqproc returns NULL, though really equality functions should
357  * never do that.
358  */
359  InitFunctionCallInfoData(*fcinfo, &eqproc, 2, sslot.stacoll,
360  NULL, NULL);
361  fcinfo->args[0].isnull = false;
362  fcinfo->args[1].isnull = false;
363  /* be careful to apply operator right way 'round */
364  if (varonleft)
365  fcinfo->args[1].value = constval;
366  else
367  fcinfo->args[0].value = constval;
368 
369  for (i = 0; i < sslot.nvalues; i++)
370  {
371  Datum fresult;
372 
373  if (varonleft)
374  fcinfo->args[0].value = sslot.values[i];
375  else
376  fcinfo->args[1].value = sslot.values[i];
377  fcinfo->isnull = false;
378  fresult = FunctionCallInvoke(fcinfo);
379  if (!fcinfo->isnull && DatumGetBool(fresult))
380  {
381  match = true;
382  break;
383  }
384  }
385  }
386  else
387  {
388  /* no most-common-value info available */
389  i = 0; /* keep compiler quiet */
390  }
391 
392  if (match)
393  {
394  /*
395  * Constant is "=" to this common value. We know selectivity
396  * exactly (or as exactly as ANALYZE could calculate it, anyway).
397  */
398  selec = sslot.numbers[i];
399  }
400  else
401  {
402  /*
403  * Comparison is against a constant that is neither NULL nor any
404  * of the common values. Its selectivity cannot be more than
405  * this:
406  */
407  double sumcommon = 0.0;
408  double otherdistinct;
409 
410  for (i = 0; i < sslot.nnumbers; i++)
411  sumcommon += sslot.numbers[i];
412  selec = 1.0 - sumcommon - nullfrac;
413  CLAMP_PROBABILITY(selec);
414 
415  /*
416  * and in fact it's probably a good deal less. We approximate that
417  * all the not-common values share this remaining fraction
418  * equally, so we divide by the number of other distinct values.
419  */
420  otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
421  sslot.nnumbers;
422  if (otherdistinct > 1)
423  selec /= otherdistinct;
424 
425  /*
426  * Another cross-check: selectivity shouldn't be estimated as more
427  * than the least common "most common value".
428  */
429  if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
430  selec = sslot.numbers[sslot.nnumbers - 1];
431  }
432 
433  free_attstatsslot(&sslot);
434  }
435  else
436  {
437  /*
438  * No ANALYZE stats available, so make a guess using estimated number
439  * of distinct values and assuming they are equally common. (The guess
440  * is unlikely to be very good, but we do know a few special cases.)
441  */
442  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
443  }
444 
445  /* now adjust if we wanted <> rather than = */
446  if (negate)
447  selec = 1.0 - selec - nullfrac;
448 
449  /* result should be in range, but make sure... */
450  CLAMP_PROBABILITY(selec);
451 
452  return selec;
453 }
Definition: fmgr.h:56
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5181
double tuples
Definition: pathnodes.h:705
RelOptInfo * rel
Definition: selfuncs.h:73
unsigned int Oid
Definition: postgres_ext.h:31
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5210
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
uintptr_t Datum
Definition: postgres.h:367
#define InvalidOid
Definition: postgres_ext.h:36
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1153
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3003
Datum * values
Definition: lsyscache.h:50
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3133

◆ var_eq_non_const()

double var_eq_non_const ( VariableStatData vardata,
Oid  oproid,
Node other,
bool  varonleft,
bool  negate 
)

Definition at line 461 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, CLAMP_PROBABILITY, free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, VariableStatData::isunique, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by eqsel_internal().

464 {
465  double selec;
466  double nullfrac = 0.0;
467  bool isdefault;
468 
469  /*
470  * Grab the nullfrac for use below.
471  */
472  if (HeapTupleIsValid(vardata->statsTuple))
473  {
474  Form_pg_statistic stats;
475 
476  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
477  nullfrac = stats->stanullfrac;
478  }
479 
480  /*
481  * If we matched the var to a unique index or DISTINCT clause, assume
482  * there is exactly one match regardless of anything else. (This is
483  * slightly bogus, since the index or clause's equality operator might be
484  * different from ours, but it's much more likely to be right than
485  * ignoring the information.)
486  */
487  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
488  {
489  selec = 1.0 / vardata->rel->tuples;
490  }
491  else if (HeapTupleIsValid(vardata->statsTuple))
492  {
493  double ndistinct;
494  AttStatsSlot sslot;
495 
496  /*
497  * Search is for a value that we do not know a priori, but we will
498  * assume it is not NULL. Estimate the selectivity as non-null
499  * fraction divided by number of distinct values, so that we get a
500  * result averaged over all possible values whether common or
501  * uncommon. (Essentially, we are assuming that the not-yet-known
502  * comparison value is equally likely to be any of the possible
503  * values, regardless of their frequency in the table. Is that a good
504  * idea?)
505  */
506  selec = 1.0 - nullfrac;
507  ndistinct = get_variable_numdistinct(vardata, &isdefault);
508  if (ndistinct > 1)
509  selec /= ndistinct;
510 
511  /*
512  * Cross-check: selectivity should never be estimated as more than the
513  * most common value's.
514  */
515  if (get_attstatsslot(&sslot, vardata->statsTuple,
516  STATISTIC_KIND_MCV, InvalidOid,
518  {
519  if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
520  selec = sslot.numbers[0];
521  free_attstatsslot(&sslot);
522  }
523  }
524  else
525  {
526  /*
527  * No ANALYZE stats available, so make a guess using estimated number
528  * of distinct values and assuming they are equally common. (The guess
529  * is unlikely to be very good, but we do know a few special cases.)
530  */
531  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
532  }
533 
534  /* now adjust if we wanted <> rather than = */
535  if (negate)
536  selec = 1.0 - selec - nullfrac;
537 
538  /* result should be in range, but make sure... */
539  CLAMP_PROBABILITY(selec);
540 
541  return selec;
542 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:74
int nnumbers
Definition: lsyscache.h:54
double tuples
Definition: pathnodes.h:705
RelOptInfo * rel
Definition: selfuncs.h:73
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:60
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5210
float4 * numbers
Definition: lsyscache.h:53
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3003
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3133

Variable Documentation

◆ get_index_stats_hook

PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook

Definition at line 149 of file selfuncs.c.

Referenced by brincostestimate(), btcostestimate(), and examine_variable().

◆ get_relation_stats_hook

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook

Definition at line 148 of file selfuncs.c.

Referenced by brincostestimate(), btcostestimate(), and examine_simple_variable().