PostgreSQL Source Code  git master
selfuncs.h File Reference
#include "access/htup.h"
#include "fmgr.h"
#include "nodes/pathnodes.h"
Include dependency graph for selfuncs.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  VariableStatData
 
struct  GenericCosts
 

Macros

#define DEFAULT_EQ_SEL   0.005
 
#define DEFAULT_INEQ_SEL   0.3333333333333333
 
#define DEFAULT_RANGE_INEQ_SEL   0.005
 
#define DEFAULT_MATCH_SEL   0.005
 
#define DEFAULT_NUM_DISTINCT   200
 
#define DEFAULT_UNK_SEL   0.005
 
#define DEFAULT_NOT_UNK_SEL   (1.0 - DEFAULT_UNK_SEL)
 
#define CLAMP_PROBABILITY(p)
 
#define ReleaseVariableStats(vardata)
 

Typedefs

typedef struct VariableStatData VariableStatData
 
typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)
 
typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)
 

Functions

void examine_variable (PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
 
bool statistic_proc_security_check (VariableStatData *vardata, Oid func_oid)
 
bool get_restriction_variable (PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
 
void get_join_variables (PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
 
double get_variable_numdistinct (VariableStatData *vardata, bool *isdefault)
 
double mcv_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, double *sumcommonp)
 
double histogram_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
 
double ineq_histogram_selectivity (PlannerInfo *root, VariableStatData *vardata, FmgrInfo *opproc, bool isgt, bool iseq, Datum constval, Oid consttype)
 
double var_eq_const (VariableStatData *vardata, Oid oproid, Datum constval, bool constisnull, bool varonleft, bool negate)
 
double var_eq_non_const (VariableStatData *vardata, Oid oproid, Node *other, bool varonleft, bool negate)
 
Selectivity boolvarsel (PlannerInfo *root, Node *arg, int varRelid)
 
Selectivity booltestsel (PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity nulltestsel (PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity scalararraysel (PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
int estimate_array_length (Node *arrayexpr)
 
Selectivity rowcomparesel (PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
void mergejoinscansel (PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend)
 
double estimate_num_groups (PlannerInfo *root, List *groupExprs, double input_rows, List **pgset)
 
void estimate_hash_bucket_stats (PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, Selectivity *bucketsize_frac)
 
double estimate_hashagg_tablesize (Path *path, const AggClauseCosts *agg_costs, double dNumGroups)
 
Listget_quals_from_indexclauses (List *indexclauses)
 
Cost index_other_operands_eval_cost (PlannerInfo *root, List *indexquals)
 
Listadd_predicate_to_index_quals (IndexOptInfo *index, List *indexQuals)
 
void genericcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
 
Selectivity scalararraysel_containment (PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
 

Variables

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook
 
PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook
 

Macro Definition Documentation

◆ CLAMP_PROBABILITY

◆ DEFAULT_EQ_SEL

#define DEFAULT_EQ_SEL   0.005

Definition at line 34 of file selfuncs.h.

Referenced by _int_matchsel(), eqsel_internal(), int_query_opr_selec(), and neqjoinsel().

◆ DEFAULT_INEQ_SEL

◆ DEFAULT_MATCH_SEL

#define DEFAULT_MATCH_SEL   0.005

◆ DEFAULT_NOT_UNK_SEL

#define DEFAULT_NOT_UNK_SEL   (1.0 - DEFAULT_UNK_SEL)

Definition at line 50 of file selfuncs.h.

Referenced by booltestsel(), and nulltestsel().

◆ DEFAULT_NUM_DISTINCT

#define DEFAULT_NUM_DISTINCT   200

Definition at line 46 of file selfuncs.h.

Referenced by get_variable_numdistinct().

◆ DEFAULT_RANGE_INEQ_SEL

#define DEFAULT_RANGE_INEQ_SEL   0.005

Definition at line 40 of file selfuncs.h.

Referenced by clauselist_selectivity_simple(), and default_range_selectivity().

◆ DEFAULT_UNK_SEL

#define DEFAULT_UNK_SEL   0.005

Definition at line 49 of file selfuncs.h.

Referenced by booltestsel(), and nulltestsel().

◆ ReleaseVariableStats

Typedef Documentation

◆ get_index_stats_hook_type

typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)

Definition at line 122 of file selfuncs.h.

◆ get_relation_stats_hook_type

typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)

Definition at line 117 of file selfuncs.h.

◆ VariableStatData

Function Documentation

◆ add_predicate_to_index_quals()

List* add_predicate_to_index_quals ( IndexOptInfo index,
List indexQuals 
)

Definition at line 5868 of file selfuncs.c.

References IndexOptInfo::indpred, lfirst, list_concat(), list_make1, NIL, and predicate_implied_by().

Referenced by btcostestimate(), genericcostestimate(), and gincostestimate().

5869 {
5870  List *predExtraQuals = NIL;
5871  ListCell *lc;
5872 
5873  if (index->indpred == NIL)
5874  return indexQuals;
5875 
5876  foreach(lc, index->indpred)
5877  {
5878  Node *predQual = (Node *) lfirst(lc);
5879  List *oneQual = list_make1(predQual);
5880 
5881  if (!predicate_implied_by(oneQual, indexQuals, false))
5882  predExtraQuals = list_concat(predExtraQuals, oneQual);
5883  }
5884  return list_concat(predExtraQuals, indexQuals);
5885 }
#define NIL
Definition: pg_list.h:65
Definition: nodes.h:525
List * list_concat(List *list1, const List *list2)
Definition: list.c:516
#define list_make1(x1)
Definition: pg_list.h:227
#define lfirst(lc)
Definition: pg_list.h:190
List * indpred
Definition: pathnodes.h:814
bool predicate_implied_by(List *predicate_list, List *clause_list, bool weak)
Definition: predtest.c:151
Definition: pg_list.h:50

◆ booltestsel()

Selectivity booltestsel ( PlannerInfo root,
BoolTestType  booltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1296 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, AttStatsSlot::nnumbers, AttStatsSlot::numbers, ReleaseVariableStats, VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by clause_selectivity().

1298 {
1299  VariableStatData vardata;
1300  double selec;
1301 
1302  examine_variable(root, arg, varRelid, &vardata);
1303 
1304  if (HeapTupleIsValid(vardata.statsTuple))
1305  {
1306  Form_pg_statistic stats;
1307  double freq_null;
1308  AttStatsSlot sslot;
1309 
1310  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1311  freq_null = stats->stanullfrac;
1312 
1313  if (get_attstatsslot(&sslot, vardata.statsTuple,
1314  STATISTIC_KIND_MCV, InvalidOid,
1316  && sslot.nnumbers > 0)
1317  {
1318  double freq_true;
1319  double freq_false;
1320 
1321  /*
1322  * Get first MCV frequency and derive frequency for true.
1323  */
1324  if (DatumGetBool(sslot.values[0]))
1325  freq_true = sslot.numbers[0];
1326  else
1327  freq_true = 1.0 - sslot.numbers[0] - freq_null;
1328 
1329  /*
1330  * Next derive frequency for false. Then use these as appropriate
1331  * to derive frequency for each case.
1332  */
1333  freq_false = 1.0 - freq_true - freq_null;
1334 
1335  switch (booltesttype)
1336  {
1337  case IS_UNKNOWN:
1338  /* select only NULL values */
1339  selec = freq_null;
1340  break;
1341  case IS_NOT_UNKNOWN:
1342  /* select non-NULL values */
1343  selec = 1.0 - freq_null;
1344  break;
1345  case IS_TRUE:
1346  /* select only TRUE values */
1347  selec = freq_true;
1348  break;
1349  case IS_NOT_TRUE:
1350  /* select non-TRUE values */
1351  selec = 1.0 - freq_true;
1352  break;
1353  case IS_FALSE:
1354  /* select only FALSE values */
1355  selec = freq_false;
1356  break;
1357  case IS_NOT_FALSE:
1358  /* select non-FALSE values */
1359  selec = 1.0 - freq_false;
1360  break;
1361  default:
1362  elog(ERROR, "unrecognized booltesttype: %d",
1363  (int) booltesttype);
1364  selec = 0.0; /* Keep compiler quiet */
1365  break;
1366  }
1367 
1368  free_attstatsslot(&sslot);
1369  }
1370  else
1371  {
1372  /*
1373  * No most-common-value info available. Still have null fraction
1374  * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1375  * for null fraction and assume a 50-50 split of TRUE and FALSE.
1376  */
1377  switch (booltesttype)
1378  {
1379  case IS_UNKNOWN:
1380  /* select only NULL values */
1381  selec = freq_null;
1382  break;
1383  case IS_NOT_UNKNOWN:
1384  /* select non-NULL values */
1385  selec = 1.0 - freq_null;
1386  break;
1387  case IS_TRUE:
1388  case IS_FALSE:
1389  /* Assume we select half of the non-NULL values */
1390  selec = (1.0 - freq_null) / 2.0;
1391  break;
1392  case IS_NOT_TRUE:
1393  case IS_NOT_FALSE:
1394  /* Assume we select NULLs plus half of the non-NULLs */
1395  /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1396  selec = (freq_null + 1.0) / 2.0;
1397  break;
1398  default:
1399  elog(ERROR, "unrecognized booltesttype: %d",
1400  (int) booltesttype);
1401  selec = 0.0; /* Keep compiler quiet */
1402  break;
1403  }
1404  }
1405  }
1406  else
1407  {
1408  /*
1409  * If we can't get variable statistics for the argument, perhaps
1410  * clause_selectivity can do something with it. We ignore the
1411  * possibility of a NULL value when using clause_selectivity, and just
1412  * assume the value is either TRUE or FALSE.
1413  */
1414  switch (booltesttype)
1415  {
1416  case IS_UNKNOWN:
1417  selec = DEFAULT_UNK_SEL;
1418  break;
1419  case IS_NOT_UNKNOWN:
1420  selec = DEFAULT_NOT_UNK_SEL;
1421  break;
1422  case IS_TRUE:
1423  case IS_NOT_FALSE:
1424  selec = (double) clause_selectivity(root, arg,
1425  varRelid,
1426  jointype, sjinfo);
1427  break;
1428  case IS_FALSE:
1429  case IS_NOT_TRUE:
1430  selec = 1.0 - (double) clause_selectivity(root, arg,
1431  varRelid,
1432  jointype, sjinfo);
1433  break;
1434  default:
1435  elog(ERROR, "unrecognized booltesttype: %d",
1436  (int) booltesttype);
1437  selec = 0.0; /* Keep compiler quiet */
1438  break;
1439  }
1440  }
1441 
1442  ReleaseVariableStats(vardata);
1443 
1444  /* result should be in range, but make sure... */
1445  CLAMP_PROBABILITY(selec);
1446 
1447  return (Selectivity) selec;
1448 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
double Selectivity
Definition: nodes.h:658
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:50
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
#define ERROR
Definition: elog.h:43
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:600
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:49
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4418
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
#define elog(elevel,...)
Definition: elog.h:228
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ boolvarsel()

Selectivity boolvarsel ( PlannerInfo root,
Node arg,
int  varRelid 
)

Definition at line 1268 of file selfuncs.c.

References BoolGetDatum, examine_variable(), HeapTupleIsValid, ReleaseVariableStats, VariableStatData::statsTuple, and var_eq_const().

Referenced by clause_selectivity().

1269 {
1270  VariableStatData vardata;
1271  double selec;
1272 
1273  examine_variable(root, arg, varRelid, &vardata);
1274  if (HeapTupleIsValid(vardata.statsTuple))
1275  {
1276  /*
1277  * A boolean variable V is equivalent to the clause V = 't', so we
1278  * compute the selectivity as if that is what we have.
1279  */
1280  selec = var_eq_const(&vardata, BooleanEqualOperator,
1281  BoolGetDatum(true), false, true, false);
1282  }
1283  else
1284  {
1285  /* Otherwise, the default estimate is 0.5 */
1286  selec = 0.5;
1287  }
1288  ReleaseVariableStats(vardata);
1289  return selec;
1290 }
HeapTuple statsTuple
Definition: selfuncs.h:71
double var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:289
#define BoolGetDatum(X)
Definition: postgres.h:402
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4418
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81

◆ estimate_array_length()

int estimate_array_length ( Node arrayexpr)

Definition at line 1891 of file selfuncs.c.

References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), DatumGetArrayTypeP, IsA, list_length(), and strip_array_coercion().

Referenced by array_unnest_support(), btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().

1892 {
1893  /* look through any binary-compatible relabeling of arrayexpr */
1894  arrayexpr = strip_array_coercion(arrayexpr);
1895 
1896  if (arrayexpr && IsA(arrayexpr, Const))
1897  {
1898  Datum arraydatum = ((Const *) arrayexpr)->constvalue;
1899  bool arrayisnull = ((Const *) arrayexpr)->constisnull;
1900  ArrayType *arrayval;
1901 
1902  if (arrayisnull)
1903  return 0;
1904  arrayval = DatumGetArrayTypeP(arraydatum);
1905  return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
1906  }
1907  else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
1908  !((ArrayExpr *) arrayexpr)->multidims)
1909  {
1910  return list_length(((ArrayExpr *) arrayexpr)->elements);
1911  }
1912  else
1913  {
1914  /* default guess --- see also scalararraysel */
1915  return 10;
1916  }
1917 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define ARR_DIMS(a)
Definition: array.h:282
uintptr_t Datum
Definition: postgres.h:367
static int list_length(const List *l)
Definition: pg_list.h:169
#define ARR_NDIM(a)
Definition: array.h:278
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1539
#define DatumGetArrayTypeP(X)
Definition: array.h:249

◆ estimate_hash_bucket_stats()

void estimate_hash_bucket_stats ( PlannerInfo root,
Node hashkey,
double  nbuckets,
Selectivity mcv_freq,
Selectivity bucketsize_frac 
)

Definition at line 3407 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, clamp_row_est(), examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, Max, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by final_cost_hashjoin().

3410 {
3411  VariableStatData vardata;
3412  double estfract,
3413  ndistinct,
3414  stanullfrac,
3415  avgfreq;
3416  bool isdefault;
3417  AttStatsSlot sslot;
3418 
3419  examine_variable(root, hashkey, 0, &vardata);
3420 
3421  /* Look up the frequency of the most common value, if available */
3422  *mcv_freq = 0.0;
3423 
3424  if (HeapTupleIsValid(vardata.statsTuple))
3425  {
3426  if (get_attstatsslot(&sslot, vardata.statsTuple,
3427  STATISTIC_KIND_MCV, InvalidOid,
3429  {
3430  /*
3431  * The first MCV stat is for the most common value.
3432  */
3433  if (sslot.nnumbers > 0)
3434  *mcv_freq = sslot.numbers[0];
3435  free_attstatsslot(&sslot);
3436  }
3437  }
3438 
3439  /* Get number of distinct values */
3440  ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3441 
3442  /*
3443  * If ndistinct isn't real, punt. We normally return 0.1, but if the
3444  * mcv_freq is known to be even higher than that, use it instead.
3445  */
3446  if (isdefault)
3447  {
3448  *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3449  ReleaseVariableStats(vardata);
3450  return;
3451  }
3452 
3453  /* Get fraction that are null */
3454  if (HeapTupleIsValid(vardata.statsTuple))
3455  {
3456  Form_pg_statistic stats;
3457 
3458  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3459  stanullfrac = stats->stanullfrac;
3460  }
3461  else
3462  stanullfrac = 0.0;
3463 
3464  /* Compute avg freq of all distinct data values in raw relation */
3465  avgfreq = (1.0 - stanullfrac) / ndistinct;
3466 
3467  /*
3468  * Adjust ndistinct to account for restriction clauses. Observe we are
3469  * assuming that the data distribution is affected uniformly by the
3470  * restriction clauses!
3471  *
3472  * XXX Possibly better way, but much more expensive: multiply by
3473  * selectivity of rel's restriction clauses that mention the target Var.
3474  */
3475  if (vardata.rel && vardata.rel->tuples > 0)
3476  {
3477  ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3478  ndistinct = clamp_row_est(ndistinct);
3479  }
3480 
3481  /*
3482  * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3483  * number of buckets is less than the expected number of distinct values;
3484  * otherwise it is 1/ndistinct.
3485  */
3486  if (ndistinct > nbuckets)
3487  estfract = 1.0 / nbuckets;
3488  else
3489  estfract = 1.0 / ndistinct;
3490 
3491  /*
3492  * Adjust estimated bucketsize upward to account for skewed distribution.
3493  */
3494  if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3495  estfract *= *mcv_freq / avgfreq;
3496 
3497  /*
3498  * Clamp bucketsize to sane range (the above adjustment could easily
3499  * produce an out-of-range result). We set the lower bound a little above
3500  * zero, since zero isn't a very sane result.
3501  */
3502  if (estfract < 1.0e-6)
3503  estfract = 1.0e-6;
3504  else if (estfract > 1.0)
3505  estfract = 1.0;
3506 
3507  *bucketsize_frac = (Selectivity) estfract;
3508 
3509  ReleaseVariableStats(vardata);
3510 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
double tuples
Definition: pathnodes.h:681
RelOptInfo * rel
Definition: selfuncs.h:70
double Selectivity
Definition: nodes.h:658
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4967
float4 * numbers
Definition: lsyscache.h:53
double rows
Definition: pathnodes.h:644
#define InvalidOid
Definition: postgres_ext.h:36
#define Max(x, y)
Definition: c.h:905
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4418
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
e
Definition: preproc-init.c:82
double clamp_row_est(double nrows)
Definition: costsize.c:187
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ estimate_hashagg_tablesize()

double estimate_hashagg_tablesize ( Path path,
const AggClauseCosts agg_costs,
double  dNumGroups 
)

Definition at line 3526 of file selfuncs.c.

References hash_agg_entry_size(), MAXALIGN, AggClauseCosts::numAggs, Path::pathtarget, SizeofMinimalTupleHeader, AggClauseCosts::transitionSpace, and PathTarget::width.

Referenced by add_paths_to_grouping_rel(), consider_groupingsets_paths(), and create_partial_grouping_paths().

3528 {
3529  Size hashentrysize;
3530 
3531  /* Estimate per-hash-entry space at tuple width... */
3532  hashentrysize = MAXALIGN(path->pathtarget->width) +
3534 
3535  /* plus space for pass-by-ref transition values... */
3536  hashentrysize += agg_costs->transitionSpace;
3537  /* plus the per-hash-entry overhead */
3538  hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3539 
3540  /*
3541  * Note that this disregards the effect of fill-factor and growth policy
3542  * of the hash table. That's probably ok, given that the default
3543  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3544  * "double-in-size" growth policies here.
3545  */
3546  return hashentrysize * dNumGroups;
3547 }
PathTarget * pathtarget
Definition: pathnodes.h:1115
#define SizeofMinimalTupleHeader
Definition: htup_details.h:649
size_t Size
Definition: c.h:467
Size hash_agg_entry_size(int numAggs)
Definition: nodeAgg.c:1445
#define MAXALIGN(LEN)
Definition: c.h:692
Size transitionSpace
Definition: pathnodes.h:64

◆ estimate_num_groups()

double estimate_num_groups ( PlannerInfo root,
List groupExprs,
double  input_rows,
List **  pgset 
)

Definition at line 3044 of file selfuncs.c.

References add_unique_group_var(), Assert, clamp_row_est(), contain_volatile_functions(), estimate_multivariate_ndistinct(), examine_variable(), expression_returns_set_rows(), exprType(), for_each_cell, HeapTupleIsValid, i, IS_SIMPLE_REL, VariableStatData::isunique, lappend(), lfirst, linitial, list_length(), list_member_int(), list_second_cell(), GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, PVC_RECURSE_WINDOWFUNCS, GroupVarInfo::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by adjust_rowcount_for_semijoins(), create_distinct_paths(), create_unique_path(), estimate_path_cost_size(), get_number_of_groups(), and recurse_set_operations().

3046 {
3047  List *varinfos = NIL;
3048  double srf_multiplier = 1.0;
3049  double numdistinct;
3050  ListCell *l;
3051  int i;
3052 
3053  /*
3054  * We don't ever want to return an estimate of zero groups, as that tends
3055  * to lead to division-by-zero and other unpleasantness. The input_rows
3056  * estimate is usually already at least 1, but clamp it just in case it
3057  * isn't.
3058  */
3059  input_rows = clamp_row_est(input_rows);
3060 
3061  /*
3062  * If no grouping columns, there's exactly one group. (This can't happen
3063  * for normal cases with GROUP BY or DISTINCT, but it is possible for
3064  * corner cases with set operations.)
3065  */
3066  if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3067  return 1.0;
3068 
3069  /*
3070  * Count groups derived from boolean grouping expressions. For other
3071  * expressions, find the unique Vars used, treating an expression as a Var
3072  * if we can find stats for it. For each one, record the statistical
3073  * estimate of number of distinct values (total in its table, without
3074  * regard for filtering).
3075  */
3076  numdistinct = 1.0;
3077 
3078  i = 0;
3079  foreach(l, groupExprs)
3080  {
3081  Node *groupexpr = (Node *) lfirst(l);
3082  double this_srf_multiplier;
3083  VariableStatData vardata;
3084  List *varshere;
3085  ListCell *l2;
3086 
3087  /* is expression in this grouping set? */
3088  if (pgset && !list_member_int(*pgset, i++))
3089  continue;
3090 
3091  /*
3092  * Set-returning functions in grouping columns are a bit problematic.
3093  * The code below will effectively ignore their SRF nature and come up
3094  * with a numdistinct estimate as though they were scalar functions.
3095  * We compensate by scaling up the end result by the largest SRF
3096  * rowcount estimate. (This will be an overestimate if the SRF
3097  * produces multiple copies of any output value, but it seems best to
3098  * assume the SRF's outputs are distinct. In any case, it's probably
3099  * pointless to worry too much about this without much better
3100  * estimates for SRF output rowcounts than we have today.)
3101  */
3102  this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3103  if (srf_multiplier < this_srf_multiplier)
3104  srf_multiplier = this_srf_multiplier;
3105 
3106  /* Short-circuit for expressions returning boolean */
3107  if (exprType(groupexpr) == BOOLOID)
3108  {
3109  numdistinct *= 2.0;
3110  continue;
3111  }
3112 
3113  /*
3114  * If examine_variable is able to deduce anything about the GROUP BY
3115  * expression, treat it as a single variable even if it's really more
3116  * complicated.
3117  */
3118  examine_variable(root, groupexpr, 0, &vardata);
3119  if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3120  {
3121  varinfos = add_unique_group_var(root, varinfos,
3122  groupexpr, &vardata);
3123  ReleaseVariableStats(vardata);
3124  continue;
3125  }
3126  ReleaseVariableStats(vardata);
3127 
3128  /*
3129  * Else pull out the component Vars. Handle PlaceHolderVars by
3130  * recursing into their arguments (effectively assuming that the
3131  * PlaceHolderVar doesn't change the number of groups, which boils
3132  * down to ignoring the possible addition of nulls to the result set).
3133  */
3134  varshere = pull_var_clause(groupexpr,
3138 
3139  /*
3140  * If we find any variable-free GROUP BY item, then either it is a
3141  * constant (and we can ignore it) or it contains a volatile function;
3142  * in the latter case we punt and assume that each input row will
3143  * yield a distinct group.
3144  */
3145  if (varshere == NIL)
3146  {
3147  if (contain_volatile_functions(groupexpr))
3148  return input_rows;
3149  continue;
3150  }
3151 
3152  /*
3153  * Else add variables to varinfos list
3154  */
3155  foreach(l2, varshere)
3156  {
3157  Node *var = (Node *) lfirst(l2);
3158 
3159  examine_variable(root, var, 0, &vardata);
3160  varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3161  ReleaseVariableStats(vardata);
3162  }
3163  }
3164 
3165  /*
3166  * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3167  * list.
3168  */
3169  if (varinfos == NIL)
3170  {
3171  /* Apply SRF multiplier as we would do in the long path */
3172  numdistinct *= srf_multiplier;
3173  /* Round off */
3174  numdistinct = ceil(numdistinct);
3175  /* Guard against out-of-range answers */
3176  if (numdistinct > input_rows)
3177  numdistinct = input_rows;
3178  if (numdistinct < 1.0)
3179  numdistinct = 1.0;
3180  return numdistinct;
3181  }
3182 
3183  /*
3184  * Group Vars by relation and estimate total numdistinct.
3185  *
3186  * For each iteration of the outer loop, we process the frontmost Var in
3187  * varinfos, plus all other Vars in the same relation. We remove these
3188  * Vars from the newvarinfos list for the next iteration. This is the
3189  * easiest way to group Vars of same rel together.
3190  */
3191  do
3192  {
3193  GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3194  RelOptInfo *rel = varinfo1->rel;
3195  double reldistinct = 1;
3196  double relmaxndistinct = reldistinct;
3197  int relvarcount = 0;
3198  List *newvarinfos = NIL;
3199  List *relvarinfos = NIL;
3200 
3201  /*
3202  * Split the list of varinfos in two - one for the current rel, one
3203  * for remaining Vars on other rels.
3204  */
3205  relvarinfos = lappend(relvarinfos, varinfo1);
3206  for_each_cell(l, varinfos, list_second_cell(varinfos))
3207  {
3208  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3209 
3210  if (varinfo2->rel == varinfo1->rel)
3211  {
3212  /* varinfos on current rel */
3213  relvarinfos = lappend(relvarinfos, varinfo2);
3214  }
3215  else
3216  {
3217  /* not time to process varinfo2 yet */
3218  newvarinfos = lappend(newvarinfos, varinfo2);
3219  }
3220  }
3221 
3222  /*
3223  * Get the numdistinct estimate for the Vars of this rel. We
3224  * iteratively search for multivariate n-distinct with maximum number
3225  * of vars; assuming that each var group is independent of the others,
3226  * we multiply them together. Any remaining relvarinfos after no more
3227  * multivariate matches are found are assumed independent too, so
3228  * their individual ndistinct estimates are multiplied also.
3229  *
3230  * While iterating, count how many separate numdistinct values we
3231  * apply. We apply a fudge factor below, but only if we multiplied
3232  * more than one such values.
3233  */
3234  while (relvarinfos)
3235  {
3236  double mvndistinct;
3237 
3238  if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3239  &mvndistinct))
3240  {
3241  reldistinct *= mvndistinct;
3242  if (relmaxndistinct < mvndistinct)
3243  relmaxndistinct = mvndistinct;
3244  relvarcount++;
3245  }
3246  else
3247  {
3248  foreach(l, relvarinfos)
3249  {
3250  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3251 
3252  reldistinct *= varinfo2->ndistinct;
3253  if (relmaxndistinct < varinfo2->ndistinct)
3254  relmaxndistinct = varinfo2->ndistinct;
3255  relvarcount++;
3256  }
3257 
3258  /* we're done with this relation */
3259  relvarinfos = NIL;
3260  }
3261  }
3262 
3263  /*
3264  * Sanity check --- don't divide by zero if empty relation.
3265  */
3266  Assert(IS_SIMPLE_REL(rel));
3267  if (rel->tuples > 0)
3268  {
3269  /*
3270  * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3271  * fudge factor is because the Vars are probably correlated but we
3272  * don't know by how much. We should never clamp to less than the
3273  * largest ndistinct value for any of the Vars, though, since
3274  * there will surely be at least that many groups.
3275  */
3276  double clamp = rel->tuples;
3277 
3278  if (relvarcount > 1)
3279  {
3280  clamp *= 0.1;
3281  if (clamp < relmaxndistinct)
3282  {
3283  clamp = relmaxndistinct;
3284  /* for sanity in case some ndistinct is too large: */
3285  if (clamp > rel->tuples)
3286  clamp = rel->tuples;
3287  }
3288  }
3289  if (reldistinct > clamp)
3290  reldistinct = clamp;
3291 
3292  /*
3293  * Update the estimate based on the restriction selectivity,
3294  * guarding against division by zero when reldistinct is zero.
3295  * Also skip this if we know that we are returning all rows.
3296  */
3297  if (reldistinct > 0 && rel->rows < rel->tuples)
3298  {
3299  /*
3300  * Given a table containing N rows with n distinct values in a
3301  * uniform distribution, if we select p rows at random then
3302  * the expected number of distinct values selected is
3303  *
3304  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3305  *
3306  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3307  *
3308  * See "Approximating block accesses in database
3309  * organizations", S. B. Yao, Communications of the ACM,
3310  * Volume 20 Issue 4, April 1977 Pages 260-261.
3311  *
3312  * Alternatively, re-arranging the terms from the factorials,
3313  * this may be written as
3314  *
3315  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3316  *
3317  * This form of the formula is more efficient to compute in
3318  * the common case where p is larger than N/n. Additionally,
3319  * as pointed out by Dell'Era, if i << N for all terms in the
3320  * product, it can be approximated by
3321  *
3322  * n * (1 - ((N-p)/N)^(N/n))
3323  *
3324  * See "Expected distinct values when selecting from a bag
3325  * without replacement", Alberto Dell'Era,
3326  * http://www.adellera.it/investigations/distinct_balls/.
3327  *
3328  * The condition i << N is equivalent to n >> 1, so this is a
3329  * good approximation when the number of distinct values in
3330  * the table is large. It turns out that this formula also
3331  * works well even when n is small.
3332  */
3333  reldistinct *=
3334  (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3335  rel->tuples / reldistinct));
3336  }
3337  reldistinct = clamp_row_est(reldistinct);
3338 
3339  /*
3340  * Update estimate of total distinct groups.
3341  */
3342  numdistinct *= reldistinct;
3343  }
3344 
3345  varinfos = newvarinfos;
3346  } while (varinfos != NIL);
3347 
3348  /* Now we can account for the effects of any SRFs */
3349  numdistinct *= srf_multiplier;
3350 
3351  /* Round off */
3352  numdistinct = ceil(numdistinct);
3353 
3354  /* Guard against out-of-range answers */
3355  if (numdistinct > input_rows)
3356  numdistinct = input_rows;
3357  if (numdistinct < 1.0)
3358  numdistinct = 1.0;
3359 
3360  return numdistinct;
3361 }
#define NIL
Definition: pg_list.h:65
double expression_returns_set_rows(PlannerInfo *root, Node *clause)
Definition: clauses.c:569
#define PVC_RECURSE_PLACEHOLDERS
Definition: optimizer.h:176
HeapTuple statsTuple
Definition: selfuncs.h:71
#define for_each_cell(cell, lst, initcell)
Definition: pg_list.h:390
double tuples
Definition: pathnodes.h:681
Definition: nodes.h:525
List * pull_var_clause(Node *node, int flags)
Definition: var.c:535
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:724
double ndistinct
Definition: selfuncs.c:2927
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:614
#define linitial(l)
Definition: pg_list.h:195
bool list_member_int(const List *list, int datum)
Definition: list.c:655
static ListCell * list_second_cell(const List *l)
Definition: pg_list.h:139
static bool estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
Definition: selfuncs.c:3568
List * lappend(List *list, void *datum)
Definition: list.c:322
static List * add_unique_group_var(PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
Definition: selfuncs.c:2931
double rows
Definition: pathnodes.h:644
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4418
#define Assert(condition)
Definition: c.h:739
#define lfirst(lc)
Definition: pg_list.h:190
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
static int list_length(const List *l)
Definition: pg_list.h:169
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
#define PVC_RECURSE_WINDOWFUNCS
Definition: optimizer.h:173
int i
double clamp_row_est(double nrows)
Definition: costsize.c:187
Definition: pg_list.h:50
#define PVC_RECURSE_AGGREGATES
Definition: optimizer.h:171
RelOptInfo * rel
Definition: selfuncs.c:2926

◆ examine_variable()

void examine_variable ( PlannerInfo root,
Node node,
int  varRelid,
VariableStatData vardata 
)

Definition at line 4418 of file selfuncs.c.

References VariableStatData::acl_ok, ACL_SELECT, ACLCHECK_OK, PlannerInfo::append_rel_array, arg, Assert, VariableStatData::atttype, VariableStatData::atttypmod, BMS_EMPTY_SET, bms_free(), bms_is_member(), bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), BoolGetDatum, RangeTblEntry::checkAsUser, elog, equal(), ERROR, examine_simple_variable(), exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, GetUserId(), has_unique_index(), HeapTupleIsValid, IndexOptInfo::indexkeys, RelOptInfo::indexlist, IndexOptInfo::indexoid, IndexOptInfo::indexprs, IndexOptInfo::indpred, Int16GetDatum, IsA, VariableStatData::isunique, lfirst, list_head(), lnext(), MemSet, IndexOptInfo::ncolumns, NIL, IndexOptInfo::nkeycolumns, ObjectIdGetDatum, AppendRelInfo::parent_relid, pg_class_aclcheck(), planner_rt_fetch, IndexOptInfo::predOK, pull_varnos(), VariableStatData::rel, IndexOptInfo::rel, ReleaseSysCache(), RelOptInfo::relid, RangeTblEntry::relid, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), RangeTblEntry::securityQuals, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::unique, VariableStatData::var, Var::varattno, Var::varno, VariableStatData::vartype, Var::vartype, and Var::vartypmod.

Referenced by booltestsel(), boolvarsel(), estimate_hash_bucket_stats(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().

4420 {
4421  Node *basenode;
4422  Relids varnos;
4423  RelOptInfo *onerel;
4424 
4425  /* Make sure we don't return dangling pointers in vardata */
4426  MemSet(vardata, 0, sizeof(VariableStatData));
4427 
4428  /* Save the exposed type of the expression */
4429  vardata->vartype = exprType(node);
4430 
4431  /* Look inside any binary-compatible relabeling */
4432 
4433  if (IsA(node, RelabelType))
4434  basenode = (Node *) ((RelabelType *) node)->arg;
4435  else
4436  basenode = node;
4437 
4438  /* Fast path for a simple Var */
4439 
4440  if (IsA(basenode, Var) &&
4441  (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4442  {
4443  Var *var = (Var *) basenode;
4444 
4445  /* Set up result fields other than the stats tuple */
4446  vardata->var = basenode; /* return Var without relabeling */
4447  vardata->rel = find_base_rel(root, var->varno);
4448  vardata->atttype = var->vartype;
4449  vardata->atttypmod = var->vartypmod;
4450  vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4451 
4452  /* Try to locate some stats */
4453  examine_simple_variable(root, var, vardata);
4454 
4455  return;
4456  }
4457 
4458  /*
4459  * Okay, it's a more complicated expression. Determine variable
4460  * membership. Note that when varRelid isn't zero, only vars of that
4461  * relation are considered "real" vars.
4462  */
4463  varnos = pull_varnos(basenode);
4464 
4465  onerel = NULL;
4466 
4467  switch (bms_membership(varnos))
4468  {
4469  case BMS_EMPTY_SET:
4470  /* No Vars at all ... must be pseudo-constant clause */
4471  break;
4472  case BMS_SINGLETON:
4473  if (varRelid == 0 || bms_is_member(varRelid, varnos))
4474  {
4475  onerel = find_base_rel(root,
4476  (varRelid ? varRelid : bms_singleton_member(varnos)));
4477  vardata->rel = onerel;
4478  node = basenode; /* strip any relabeling */
4479  }
4480  /* else treat it as a constant */
4481  break;
4482  case BMS_MULTIPLE:
4483  if (varRelid == 0)
4484  {
4485  /* treat it as a variable of a join relation */
4486  vardata->rel = find_join_rel(root, varnos);
4487  node = basenode; /* strip any relabeling */
4488  }
4489  else if (bms_is_member(varRelid, varnos))
4490  {
4491  /* ignore the vars belonging to other relations */
4492  vardata->rel = find_base_rel(root, varRelid);
4493  node = basenode; /* strip any relabeling */
4494  /* note: no point in expressional-index search here */
4495  }
4496  /* else treat it as a constant */
4497  break;
4498  }
4499 
4500  bms_free(varnos);
4501 
4502  vardata->var = node;
4503  vardata->atttype = exprType(node);
4504  vardata->atttypmod = exprTypmod(node);
4505 
4506  if (onerel)
4507  {
4508  /*
4509  * We have an expression in vars of a single relation. Try to match
4510  * it to expressional index columns, in hopes of finding some
4511  * statistics.
4512  *
4513  * Note that we consider all index columns including INCLUDE columns,
4514  * since there could be stats for such columns. But the test for
4515  * uniqueness needs to be warier.
4516  *
4517  * XXX it's conceivable that there are multiple matches with different
4518  * index opfamilies; if so, we need to pick one that matches the
4519  * operator we are estimating for. FIXME later.
4520  */
4521  ListCell *ilist;
4522 
4523  foreach(ilist, onerel->indexlist)
4524  {
4525  IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4526  ListCell *indexpr_item;
4527  int pos;
4528 
4529  indexpr_item = list_head(index->indexprs);
4530  if (indexpr_item == NULL)
4531  continue; /* no expressions here... */
4532 
4533  for (pos = 0; pos < index->ncolumns; pos++)
4534  {
4535  if (index->indexkeys[pos] == 0)
4536  {
4537  Node *indexkey;
4538 
4539  if (indexpr_item == NULL)
4540  elog(ERROR, "too few entries in indexprs list");
4541  indexkey = (Node *) lfirst(indexpr_item);
4542  if (indexkey && IsA(indexkey, RelabelType))
4543  indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4544  if (equal(node, indexkey))
4545  {
4546  /*
4547  * Found a match ... is it a unique index? Tests here
4548  * should match has_unique_index().
4549  */
4550  if (index->unique &&
4551  index->nkeycolumns == 1 &&
4552  pos == 0 &&
4553  (index->indpred == NIL || index->predOK))
4554  vardata->isunique = true;
4555 
4556  /*
4557  * Has it got stats? We only consider stats for
4558  * non-partial indexes, since partial indexes probably
4559  * don't reflect whole-relation statistics; the above
4560  * check for uniqueness is the only info we take from
4561  * a partial index.
4562  *
4563  * An index stats hook, however, must make its own
4564  * decisions about what to do with partial indexes.
4565  */
4566  if (get_index_stats_hook &&
4567  (*get_index_stats_hook) (root, index->indexoid,
4568  pos + 1, vardata))
4569  {
4570  /*
4571  * The hook took control of acquiring a stats
4572  * tuple. If it did supply a tuple, it'd better
4573  * have supplied a freefunc.
4574  */
4575  if (HeapTupleIsValid(vardata->statsTuple) &&
4576  !vardata->freefunc)
4577  elog(ERROR, "no function provided to release variable stats with");
4578  }
4579  else if (index->indpred == NIL)
4580  {
4581  vardata->statsTuple =
4583  ObjectIdGetDatum(index->indexoid),
4584  Int16GetDatum(pos + 1),
4585  BoolGetDatum(false));
4586  vardata->freefunc = ReleaseSysCache;
4587 
4588  if (HeapTupleIsValid(vardata->statsTuple))
4589  {
4590  /* Get index's table for permission check */
4591  RangeTblEntry *rte;
4592  Oid userid;
4593 
4594  rte = planner_rt_fetch(index->rel->relid, root);
4595  Assert(rte->rtekind == RTE_RELATION);
4596 
4597  /*
4598  * Use checkAsUser if it's set, in case we're
4599  * accessing the table via a view.
4600  */
4601  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4602 
4603  /*
4604  * For simplicity, we insist on the whole
4605  * table being selectable, rather than trying
4606  * to identify which column(s) the index
4607  * depends on. Also require all rows to be
4608  * selectable --- there must be no
4609  * securityQuals from security barrier views
4610  * or RLS policies.
4611  */
4612  vardata->acl_ok =
4613  rte->securityQuals == NIL &&
4614  (pg_class_aclcheck(rte->relid, userid,
4615  ACL_SELECT) == ACLCHECK_OK);
4616 
4617  /*
4618  * If the user doesn't have permissions to
4619  * access an inheritance child relation, check
4620  * the permissions of the table actually
4621  * mentioned in the query, since most likely
4622  * the user does have that permission. Note
4623  * that whole-table select privilege on the
4624  * parent doesn't quite guarantee that the
4625  * user could read all columns of the child.
4626  * But in practice it's unlikely that any
4627  * interesting security violation could result
4628  * from allowing access to the expression
4629  * index's stats, so we allow it anyway. See
4630  * similar code in examine_simple_variable()
4631  * for additional comments.
4632  */
4633  if (!vardata->acl_ok &&
4634  root->append_rel_array != NULL)
4635  {
4636  AppendRelInfo *appinfo;
4637  Index varno = index->rel->relid;
4638 
4639  appinfo = root->append_rel_array[varno];
4640  while (appinfo &&
4641  planner_rt_fetch(appinfo->parent_relid,
4642  root)->rtekind == RTE_RELATION)
4643  {
4644  varno = appinfo->parent_relid;
4645  appinfo = root->append_rel_array[varno];
4646  }
4647  if (varno != index->rel->relid)
4648  {
4649  /* Repeat access check on this rel */
4650  rte = planner_rt_fetch(varno, root);
4651  Assert(rte->rtekind == RTE_RELATION);
4652 
4653  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4654 
4655  vardata->acl_ok =
4656  rte->securityQuals == NIL &&
4657  (pg_class_aclcheck(rte->relid,
4658  userid,
4659  ACL_SELECT) == ACLCHECK_OK);
4660  }
4661  }
4662  }
4663  else
4664  {
4665  /* suppress leakproofness checks later */
4666  vardata->acl_ok = true;
4667  }
4668  }
4669  if (vardata->statsTuple)
4670  break;
4671  }
4672  indexpr_item = lnext(index->indexprs, indexpr_item);
4673  }
4674  }
4675  if (vardata->statsTuple)
4676  break;
4677  }
4678  }
4679 }
#define NIL
Definition: pg_list.h:65
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:321
RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids)
Definition: relnode.c:428
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3011
HeapTuple statsTuple
Definition: selfuncs.h:71
Oid GetUserId(void)
Definition: miscinit.c:380
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:275
List * securityQuals
Definition: parsenodes.h:1102
RelOptInfo * rel
Definition: selfuncs.h:70
#define Int16GetDatum(X)
Definition: postgres.h:451
Definition: nodes.h:525
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:73
#define MemSet(start, val, len)
Definition: c.h:962
AttrNumber varattno
Definition: primnodes.h:172
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:167
static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata)
Definition: selfuncs.c:4691
int32 atttypmod
Definition: selfuncs.h:76
Definition: type.h:89
RelOptInfo * rel
Definition: pathnodes.h:791
bool has_unique_index(RelOptInfo *rel, AttrNumber attno)
Definition: plancat.c:2024
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
Oid vartype
Definition: primnodes.h:174
HeapTuple SearchSysCache3(int cacheId, Datum key1, Datum key2, Datum key3)
Definition: syscache.c:1138
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:371
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
Relids pull_varnos(Node *node)
Definition: var.c:95
Index relid
Definition: pathnodes.h:669
Index varno
Definition: primnodes.h:170
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:672
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
#define ACL_SELECT
Definition: parsenodes.h:75
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:577
struct AppendRelInfo ** append_rel_array
Definition: pathnodes.h:217
unsigned int Index
Definition: c.h:476
List * indexlist
Definition: pathnodes.h:678
#define BoolGetDatum(X)
Definition: postgres.h:402
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define Assert(condition)
Definition: c.h:739
#define lfirst(lc)
Definition: pg_list.h:190
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
int nkeycolumns
Definition: pathnodes.h:800
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:148
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4629
RTEKind rtekind
Definition: parsenodes.h:974
#define elog(elevel,...)
Definition: elog.h:228
void * arg
int * indexkeys
Definition: pathnodes.h:801
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:363
Index parent_relid
Definition: pathnodes.h:2187
List * indpred
Definition: pathnodes.h:814
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
List * indexprs
Definition: pathnodes.h:813
int32 vartypmod
Definition: primnodes.h:175

◆ genericcostestimate()

void genericcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
GenericCosts costs 
)

Definition at line 5650 of file selfuncs.c.

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, estimate_array_length(), get_quals_from_indexclauses(), get_tablespace_page_costs(), index_other_operands_eval_cost(), index_pages_fetched(), IndexPath::indexclauses, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexPath::indexorderbys, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, IsA, JOIN_INNER, lfirst, list_length(), lsecond, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, IndexOptInfo::pages, IndexOptInfo::rel, RelOptInfo::relid, IndexOptInfo::reltablespace, rint(), GenericCosts::spc_random_page_cost, RelOptInfo::tuples, and IndexOptInfo::tuples.

Referenced by blcostestimate(), btcostestimate(), gistcostestimate(), hashcostestimate(), and spgcostestimate().

5654 {
5655  IndexOptInfo *index = path->indexinfo;
5656  List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
5657  List *indexOrderBys = path->indexorderbys;
5658  Cost indexStartupCost;
5659  Cost indexTotalCost;
5660  Selectivity indexSelectivity;
5661  double indexCorrelation;
5662  double numIndexPages;
5663  double numIndexTuples;
5664  double spc_random_page_cost;
5665  double num_sa_scans;
5666  double num_outer_scans;
5667  double num_scans;
5668  double qual_op_cost;
5669  double qual_arg_cost;
5670  List *selectivityQuals;
5671  ListCell *l;
5672 
5673  /*
5674  * If the index is partial, AND the index predicate with the explicitly
5675  * given indexquals to produce a more accurate idea of the index
5676  * selectivity.
5677  */
5678  selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
5679 
5680  /*
5681  * Check for ScalarArrayOpExpr index quals, and estimate the number of
5682  * index scans that will be performed.
5683  */
5684  num_sa_scans = 1;
5685  foreach(l, indexQuals)
5686  {
5687  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
5688 
5689  if (IsA(rinfo->clause, ScalarArrayOpExpr))
5690  {
5691  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
5692  int alength = estimate_array_length(lsecond(saop->args));
5693 
5694  if (alength > 1)
5695  num_sa_scans *= alength;
5696  }
5697  }
5698 
5699  /* Estimate the fraction of main-table tuples that will be visited */
5700  indexSelectivity = clauselist_selectivity(root, selectivityQuals,
5701  index->rel->relid,
5702  JOIN_INNER,
5703  NULL);
5704 
5705  /*
5706  * If caller didn't give us an estimate, estimate the number of index
5707  * tuples that will be visited. We do it in this rather peculiar-looking
5708  * way in order to get the right answer for partial indexes.
5709  */
5710  numIndexTuples = costs->numIndexTuples;
5711  if (numIndexTuples <= 0.0)
5712  {
5713  numIndexTuples = indexSelectivity * index->rel->tuples;
5714 
5715  /*
5716  * The above calculation counts all the tuples visited across all
5717  * scans induced by ScalarArrayOpExpr nodes. We want to consider the
5718  * average per-indexscan number, so adjust. This is a handy place to
5719  * round to integer, too. (If caller supplied tuple estimate, it's
5720  * responsible for handling these considerations.)
5721  */
5722  numIndexTuples = rint(numIndexTuples / num_sa_scans);
5723  }
5724 
5725  /*
5726  * We can bound the number of tuples by the index size in any case. Also,
5727  * always estimate at least one tuple is touched, even when
5728  * indexSelectivity estimate is tiny.
5729  */
5730  if (numIndexTuples > index->tuples)
5731  numIndexTuples = index->tuples;
5732  if (numIndexTuples < 1.0)
5733  numIndexTuples = 1.0;
5734 
5735  /*
5736  * Estimate the number of index pages that will be retrieved.
5737  *
5738  * We use the simplistic method of taking a pro-rata fraction of the total
5739  * number of index pages. In effect, this counts only leaf pages and not
5740  * any overhead such as index metapage or upper tree levels.
5741  *
5742  * In practice access to upper index levels is often nearly free because
5743  * those tend to stay in cache under load; moreover, the cost involved is
5744  * highly dependent on index type. We therefore ignore such costs here
5745  * and leave it to the caller to add a suitable charge if needed.
5746  */
5747  if (index->pages > 1 && index->tuples > 1)
5748  numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
5749  else
5750  numIndexPages = 1.0;
5751 
5752  /* fetch estimated page cost for tablespace containing index */
5754  &spc_random_page_cost,
5755  NULL);
5756 
5757  /*
5758  * Now compute the disk access costs.
5759  *
5760  * The above calculations are all per-index-scan. However, if we are in a
5761  * nestloop inner scan, we can expect the scan to be repeated (with
5762  * different search keys) for each row of the outer relation. Likewise,
5763  * ScalarArrayOpExpr quals result in multiple index scans. This creates
5764  * the potential for cache effects to reduce the number of disk page
5765  * fetches needed. We want to estimate the average per-scan I/O cost in
5766  * the presence of caching.
5767  *
5768  * We use the Mackert-Lohman formula (see costsize.c for details) to
5769  * estimate the total number of page fetches that occur. While this
5770  * wasn't what it was designed for, it seems a reasonable model anyway.
5771  * Note that we are counting pages not tuples anymore, so we take N = T =
5772  * index size, as if there were one "tuple" per page.
5773  */
5774  num_outer_scans = loop_count;
5775  num_scans = num_sa_scans * num_outer_scans;
5776 
5777  if (num_scans > 1)
5778  {
5779  double pages_fetched;
5780 
5781  /* total page fetches ignoring cache effects */
5782  pages_fetched = numIndexPages * num_scans;
5783 
5784  /* use Mackert and Lohman formula to adjust for cache effects */
5785  pages_fetched = index_pages_fetched(pages_fetched,
5786  index->pages,
5787  (double) index->pages,
5788  root);
5789 
5790  /*
5791  * Now compute the total disk access cost, and then report a pro-rated
5792  * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
5793  * since that's internal to the indexscan.)
5794  */
5795  indexTotalCost = (pages_fetched * spc_random_page_cost)
5796  / num_outer_scans;
5797  }
5798  else
5799  {
5800  /*
5801  * For a single index scan, we just charge spc_random_page_cost per
5802  * page touched.
5803  */
5804  indexTotalCost = numIndexPages * spc_random_page_cost;
5805  }
5806 
5807  /*
5808  * CPU cost: any complex expressions in the indexquals will need to be
5809  * evaluated once at the start of the scan to reduce them to runtime keys
5810  * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
5811  * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
5812  * indexqual operator. Because we have numIndexTuples as a per-scan
5813  * number, we have to multiply by num_sa_scans to get the correct result
5814  * for ScalarArrayOpExpr cases. Similarly add in costs for any index
5815  * ORDER BY expressions.
5816  *
5817  * Note: this neglects the possible costs of rechecking lossy operators.
5818  * Detecting that that might be needed seems more expensive than it's
5819  * worth, though, considering all the other inaccuracies here ...
5820  */
5821  qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
5822  index_other_operands_eval_cost(root, indexOrderBys);
5823  qual_op_cost = cpu_operator_cost *
5824  (list_length(indexQuals) + list_length(indexOrderBys));
5825 
5826  indexStartupCost = qual_arg_cost;
5827  indexTotalCost += qual_arg_cost;
5828  indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
5829 
5830  /*
5831  * Generic assumption about index correlation: there isn't any.
5832  */
5833  indexCorrelation = 0.0;
5834 
5835  /*
5836  * Return everything to caller.
5837  */
5838  costs->indexStartupCost = indexStartupCost;
5839  costs->indexTotalCost = indexTotalCost;
5840  costs->indexSelectivity = indexSelectivity;
5841  costs->indexCorrelation = indexCorrelation;
5842  costs->numIndexPages = numIndexPages;
5843  costs->numIndexTuples = numIndexTuples;
5844  costs->spc_random_page_cost = spc_random_page_cost;
5845  costs->num_sa_scans = num_sa_scans;
5846 }
Selectivity indexSelectivity
Definition: selfuncs.h:106
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
IndexOptInfo * indexinfo
Definition: pathnodes.h:1177
double tuples
Definition: pathnodes.h:681
Oid reltablespace
Definition: pathnodes.h:790
List * indexclauses
Definition: pathnodes.h:1178
double Selectivity
Definition: nodes.h:658
double tuples
Definition: pathnodes.h:795
#define lsecond(l)
Definition: pg_list.h:200
Definition: type.h:89
BlockNumber pages
Definition: pathnodes.h:794
int estimate_array_length(Node *arrayexpr)
Definition: selfuncs.c:1891
RelOptInfo * rel
Definition: pathnodes.h:791
double num_sa_scans
Definition: selfuncs.h:113
double cpu_operator_cost
Definition: costsize.c:114
List * get_quals_from_indexclauses(List *indexclauses)
Definition: selfuncs.c:5566
Cost indexTotalCost
Definition: selfuncs.h:105
double rint(double x)
Definition: rint.c:21
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
Index relid
Definition: pathnodes.h:669
Expr * clause
Definition: pathnodes.h:1943
double indexCorrelation
Definition: selfuncs.h:107
List * indexorderbys
Definition: pathnodes.h:1179
double spc_random_page_cost
Definition: selfuncs.h:112
double numIndexTuples
Definition: selfuncs.h:111
Cost index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
Definition: selfuncs.c:5596
#define lfirst(lc)
Definition: pg_list.h:190
static int list_length(const List *l)
Definition: pg_list.h:169
Cost indexStartupCost
Definition: selfuncs.h:104
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:69
Definition: pg_list.h:50
double cpu_index_tuple_cost
Definition: costsize.c:113
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:825
double Cost
Definition: nodes.h:659
double numIndexPages
Definition: selfuncs.h:110
List * add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:5868

◆ get_join_variables()

void get_join_variables ( PlannerInfo root,
List args,
SpecialJoinInfo sjinfo,
VariableStatData vardata1,
VariableStatData vardata2,
bool join_is_reversed 
)

Definition at line 4356 of file selfuncs.c.

References bms_is_subset(), elog, ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.

Referenced by eqjoinsel(), neqjoinsel(), and networkjoinsel().

4359 {
4360  Node *left,
4361  *right;
4362 
4363  if (list_length(args) != 2)
4364  elog(ERROR, "join operator should take two arguments");
4365 
4366  left = (Node *) linitial(args);
4367  right = (Node *) lsecond(args);
4368 
4369  examine_variable(root, left, 0, vardata1);
4370  examine_variable(root, right, 0, vardata2);
4371 
4372  if (vardata1->rel &&
4373  bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4374  *join_is_reversed = true; /* var1 is on RHS */
4375  else if (vardata2->rel &&
4376  bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4377  *join_is_reversed = true; /* var2 is on LHS */
4378  else
4379  *join_is_reversed = false;
4380 }
RelOptInfo * rel
Definition: selfuncs.h:70
Definition: nodes.h:525
#define lsecond(l)
Definition: pg_list.h:200
Relids syn_lefthand
Definition: pathnodes.h:2135
Relids syn_righthand
Definition: pathnodes.h:2136
#define linitial(l)
Definition: pg_list.h:195
#define ERROR
Definition: elog.h:43
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:315
Relids relids
Definition: pathnodes.h:641
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4418
static int list_length(const List *l)
Definition: pg_list.h:169
#define elog(elevel,...)
Definition: elog.h:228

◆ get_quals_from_indexclauses()

List* get_quals_from_indexclauses ( List indexclauses)

Definition at line 5566 of file selfuncs.c.

References IndexClause::indexquals, lappend(), lfirst_node, and NIL.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

5567 {
5568  List *result = NIL;
5569  ListCell *lc;
5570 
5571  foreach(lc, indexclauses)
5572  {
5573  IndexClause *iclause = lfirst_node(IndexClause, lc);
5574  ListCell *lc2;
5575 
5576  foreach(lc2, iclause->indexquals)
5577  {
5578  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5579 
5580  result = lappend(result, rinfo);
5581  }
5582  }
5583  return result;
5584 }
#define NIL
Definition: pg_list.h:65
#define lfirst_node(type, lc)
Definition: pg_list.h:193
List * indexquals
Definition: pathnodes.h:1224
List * lappend(List *list, void *datum)
Definition: list.c:322
Definition: pg_list.h:50

◆ get_restriction_variable()

bool get_restriction_variable ( PlannerInfo root,
List args,
int  varRelid,
VariableStatData vardata,
Node **  other,
bool varonleft 
)

Definition at line 4296 of file selfuncs.c.

References estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, ReleaseVariableStats, and VariableStatData::var.

Referenced by _int_matchsel(), arraycontsel(), eqsel_internal(), ltreeparentsel(), networksel(), patternsel_common(), rangesel(), scalarineqsel_wrapper(), and tsmatchsel().

4299 {
4300  Node *left,
4301  *right;
4302  VariableStatData rdata;
4303 
4304  /* Fail if not a binary opclause (probably shouldn't happen) */
4305  if (list_length(args) != 2)
4306  return false;
4307 
4308  left = (Node *) linitial(args);
4309  right = (Node *) lsecond(args);
4310 
4311  /*
4312  * Examine both sides. Note that when varRelid is nonzero, Vars of other
4313  * relations will be treated as pseudoconstants.
4314  */
4315  examine_variable(root, left, varRelid, vardata);
4316  examine_variable(root, right, varRelid, &rdata);
4317 
4318  /*
4319  * If one side is a variable and the other not, we win.
4320  */
4321  if (vardata->rel && rdata.rel == NULL)
4322  {
4323  *varonleft = true;
4324  *other = estimate_expression_value(root, rdata.var);
4325  /* Assume we need no ReleaseVariableStats(rdata) here */
4326  return true;
4327  }
4328 
4329  if (vardata->rel == NULL && rdata.rel)
4330  {
4331  *varonleft = false;
4332  *other = estimate_expression_value(root, vardata->var);
4333  /* Assume we need no ReleaseVariableStats(*vardata) here */
4334  *vardata = rdata;
4335  return true;
4336  }
4337 
4338  /* Oops, clause has wrong structure (probably var op var) */
4339  ReleaseVariableStats(*vardata);
4340  ReleaseVariableStats(rdata);
4341 
4342  return false;
4343 }
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2286
RelOptInfo * rel
Definition: selfuncs.h:70
Definition: nodes.h:525
#define lsecond(l)
Definition: pg_list.h:200
#define linitial(l)
Definition: pg_list.h:195
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4418
static int list_length(const List *l)
Definition: pg_list.h:169
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81

◆ get_variable_numdistinct()

double get_variable_numdistinct ( VariableStatData vardata,
bool isdefault 
)

Definition at line 4967 of file selfuncs.c.

References clamp_row_est(), DEFAULT_NUM_DISTINCT, GETSTRUCT, HeapTupleIsValid, IsA, VariableStatData::isunique, VariableStatData::rel, RTE_VALUES, RelOptInfo::rtekind, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.

Referenced by add_unique_group_var(), eqjoinsel(), estimate_hash_bucket_stats(), ineq_histogram_selectivity(), var_eq_const(), and var_eq_non_const().

4968 {
4969  double stadistinct;
4970  double stanullfrac = 0.0;
4971  double ntuples;
4972 
4973  *isdefault = false;
4974 
4975  /*
4976  * Determine the stadistinct value to use. There are cases where we can
4977  * get an estimate even without a pg_statistic entry, or can get a better
4978  * value than is in pg_statistic. Grab stanullfrac too if we can find it
4979  * (otherwise, assume no nulls, for lack of any better idea).
4980  */
4981  if (HeapTupleIsValid(vardata->statsTuple))
4982  {
4983  /* Use the pg_statistic entry */
4984  Form_pg_statistic stats;
4985 
4986  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
4987  stadistinct = stats->stadistinct;
4988  stanullfrac = stats->stanullfrac;
4989  }
4990  else if (vardata->vartype == BOOLOID)
4991  {
4992  /*
4993  * Special-case boolean columns: presumably, two distinct values.
4994  *
4995  * Are there any other datatypes we should wire in special estimates
4996  * for?
4997  */
4998  stadistinct = 2.0;
4999  }
5000  else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
5001  {
5002  /*
5003  * If the Var represents a column of a VALUES RTE, assume it's unique.
5004  * This could of course be very wrong, but it should tend to be true
5005  * in well-written queries. We could consider examining the VALUES'
5006  * contents to get some real statistics; but that only works if the
5007  * entries are all constants, and it would be pretty expensive anyway.
5008  */
5009  stadistinct = -1.0; /* unique (and all non null) */
5010  }
5011  else
5012  {
5013  /*
5014  * We don't keep statistics for system columns, but in some cases we
5015  * can infer distinctness anyway.
5016  */
5017  if (vardata->var && IsA(vardata->var, Var))
5018  {
5019  switch (((Var *) vardata->var)->varattno)
5020  {
5022  stadistinct = -1.0; /* unique (and all non null) */
5023  break;
5025  stadistinct = 1.0; /* only 1 value */
5026  break;
5027  default:
5028  stadistinct = 0.0; /* means "unknown" */
5029  break;
5030  }
5031  }
5032  else
5033  stadistinct = 0.0; /* means "unknown" */
5034 
5035  /*
5036  * XXX consider using estimate_num_groups on expressions?
5037  */
5038  }
5039 
5040  /*
5041  * If there is a unique index or DISTINCT clause for the variable, assume
5042  * it is unique no matter what pg_statistic says; the statistics could be
5043  * out of date, or we might have found a partial unique index that proves
5044  * the var is unique for this query. However, we'd better still believe
5045  * the null-fraction statistic.
5046  */
5047  if (vardata->isunique)
5048  stadistinct = -1.0 * (1.0 - stanullfrac);
5049 
5050  /*
5051  * If we had an absolute estimate, use that.
5052  */
5053  if (stadistinct > 0.0)
5054  return clamp_row_est(stadistinct);
5055 
5056  /*
5057  * Otherwise we need to get the relation size; punt if not available.
5058  */
5059  if (vardata->rel == NULL)
5060  {
5061  *isdefault = true;
5062  return DEFAULT_NUM_DISTINCT;
5063  }
5064  ntuples = vardata->rel->tuples;
5065  if (ntuples <= 0.0)
5066  {
5067  *isdefault = true;
5068  return DEFAULT_NUM_DISTINCT;
5069  }
5070 
5071  /*
5072  * If we had a relative estimate, use that.
5073  */
5074  if (stadistinct < 0.0)
5075  return clamp_row_est(-stadistinct * ntuples);
5076 
5077  /*
5078  * With no data, estimate ndistinct = ntuples if the table is small, else
5079  * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5080  * that the behavior isn't discontinuous.
5081  */
5082  if (ntuples < DEFAULT_NUM_DISTINCT)
5083  return clamp_row_est(ntuples);
5084 
5085  *isdefault = true;
5086  return DEFAULT_NUM_DISTINCT;
5087 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:71
double tuples
Definition: pathnodes.h:681
RelOptInfo * rel
Definition: selfuncs.h:70
Definition: primnodes.h:167
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define TableOidAttributeNumber
Definition: sysattr.h:26
RTEKind rtekind
Definition: pathnodes.h:671
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define DEFAULT_NUM_DISTINCT
Definition: selfuncs.h:46
#define SelfItemPointerAttributeNumber
Definition: sysattr.h:21
double clamp_row_est(double nrows)
Definition: costsize.c:187

◆ histogram_selectivity()

double histogram_selectivity ( VariableStatData vardata,
FmgrInfo opproc,
Datum  constval,
bool  varonleft,
int  min_hist_size,
int  n_skip,
int *  hist_size 
)

Definition at line 779 of file selfuncs.c.

References Assert, ATTSTATSSLOT_VALUES, DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), HeapTupleIsValid, i, InvalidOid, AttStatsSlot::nvalues, AttStatsSlot::stacoll, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by ltreeparentsel(), and patternsel_common().

783 {
784  double result;
785  AttStatsSlot sslot;
786 
787  /* check sanity of parameters */
788  Assert(n_skip >= 0);
789  Assert(min_hist_size > 2 * n_skip);
790 
791  if (HeapTupleIsValid(vardata->statsTuple) &&
792  statistic_proc_security_check(vardata, opproc->fn_oid) &&
793  get_attstatsslot(&sslot, vardata->statsTuple,
794  STATISTIC_KIND_HISTOGRAM, InvalidOid,
796  {
797  *hist_size = sslot.nvalues;
798  if (sslot.nvalues >= min_hist_size)
799  {
800  int nmatch = 0;
801  int i;
802 
803  for (i = n_skip; i < sslot.nvalues - n_skip; i++)
804  {
805  if (varonleft ?
807  sslot.stacoll,
808  sslot.values[i],
809  constval)) :
811  sslot.stacoll,
812  constval,
813  sslot.values[i])))
814  nmatch++;
815  }
816  result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
817  }
818  else
819  result = -1;
820  free_attstatsslot(&sslot);
821  }
822  else
823  {
824  *hist_size = 0;
825  result = -1;
826  }
827 
828  return result;
829 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4938
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
#define DatumGetBool(X)
Definition: postgres.h:393
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
#define Assert(condition)
Definition: c.h:739
Datum * values
Definition: lsyscache.h:50
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ index_other_operands_eval_cost()

Cost index_other_operands_eval_cost ( PlannerInfo root,
List indexquals 
)

Definition at line 5596 of file selfuncs.c.

References OpExpr::args, ScalarArrayOpExpr::args, cost_qual_eval_node(), elog, ERROR, IsA, lfirst, lsecond, nodeTag, QualCost::per_tuple, RowCompareExpr::rargs, and QualCost::startup.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

5597 {
5598  Cost qual_arg_cost = 0;
5599  ListCell *lc;
5600 
5601  foreach(lc, indexquals)
5602  {
5603  Expr *clause = (Expr *) lfirst(lc);
5604  Node *other_operand;
5605  QualCost index_qual_cost;
5606 
5607  /*
5608  * Index quals will have RestrictInfos, indexorderbys won't. Look
5609  * through RestrictInfo if present.
5610  */
5611  if (IsA(clause, RestrictInfo))
5612  clause = ((RestrictInfo *) clause)->clause;
5613 
5614  if (IsA(clause, OpExpr))
5615  {
5616  OpExpr *op = (OpExpr *) clause;
5617 
5618  other_operand = (Node *) lsecond(op->args);
5619  }
5620  else if (IsA(clause, RowCompareExpr))
5621  {
5622  RowCompareExpr *rc = (RowCompareExpr *) clause;
5623 
5624  other_operand = (Node *) rc->rargs;
5625  }
5626  else if (IsA(clause, ScalarArrayOpExpr))
5627  {
5628  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
5629 
5630  other_operand = (Node *) lsecond(saop->args);
5631  }
5632  else if (IsA(clause, NullTest))
5633  {
5634  other_operand = NULL;
5635  }
5636  else
5637  {
5638  elog(ERROR, "unsupported indexqual type: %d",
5639  (int) nodeTag(clause));
5640  other_operand = NULL; /* keep compiler quiet */
5641  }
5642 
5643  cost_qual_eval_node(&index_qual_cost, other_operand, root);
5644  qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
5645  }
5646  return qual_arg_cost;
5647 }
void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root)
Definition: costsize.c:3845
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
Definition: nodes.h:525
#define lsecond(l)
Definition: pg_list.h:200
Cost startup
Definition: pathnodes.h:45
Cost per_tuple
Definition: pathnodes.h:46
#define ERROR
Definition: elog.h:43
#define lfirst(lc)
Definition: pg_list.h:190
#define nodeTag(nodeptr)
Definition: nodes.h:530
#define elog(elevel,...)
Definition: elog.h:228
List * args
Definition: primnodes.h:508
double Cost
Definition: nodes.h:659

◆ ineq_histogram_selectivity()

double ineq_histogram_selectivity ( PlannerInfo root,
VariableStatData vardata,
FmgrInfo opproc,
bool  isgt,
bool  iseq,
Datum  constval,
Oid  consttype 
)

Definition at line 847 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, convert_to_scalar(), DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCall2Coll(), get_actual_variable_range(), get_attstatsslot(), get_variable_numdistinct(), HeapTupleIsValid, i, InvalidOid, AttStatsSlot::nnumbers, AttStatsSlot::nvalues, AttStatsSlot::stacoll, AttStatsSlot::staop, statistic_proc_security_check(), VariableStatData::statsTuple, val, AttStatsSlot::values, and VariableStatData::vartype.

Referenced by prefix_selectivity(), and scalarineqsel().

851 {
852  double hist_selec;
853  AttStatsSlot sslot;
854 
855  hist_selec = -1.0;
856 
857  /*
858  * Someday, ANALYZE might store more than one histogram per rel/att,
859  * corresponding to more than one possible sort ordering defined for the
860  * column type. However, to make that work we will need to figure out
861  * which staop to search for --- it's not necessarily the one we have at
862  * hand! (For example, we might have a '<=' operator rather than the '<'
863  * operator that will appear in staop.) For now, assume that whatever
864  * appears in pg_statistic is sorted the same way our operator sorts, or
865  * the reverse way if isgt is true.
866  */
867  if (HeapTupleIsValid(vardata->statsTuple) &&
868  statistic_proc_security_check(vardata, opproc->fn_oid) &&
869  get_attstatsslot(&sslot, vardata->statsTuple,
870  STATISTIC_KIND_HISTOGRAM, InvalidOid,
872  {
873  if (sslot.nvalues > 1)
874  {
875  /*
876  * Use binary search to find the desired location, namely the
877  * right end of the histogram bin containing the comparison value,
878  * which is the leftmost entry for which the comparison operator
879  * succeeds (if isgt) or fails (if !isgt). (If the given operator
880  * isn't actually sort-compatible with the histogram, you'll get
881  * garbage results ... but probably not any more garbage-y than
882  * you would have from the old linear search.)
883  *
884  * In this loop, we pay no attention to whether the operator iseq
885  * or not; that detail will be mopped up below. (We cannot tell,
886  * anyway, whether the operator thinks the values are equal.)
887  *
888  * If the binary search accesses the first or last histogram
889  * entry, we try to replace that endpoint with the true column min
890  * or max as found by get_actual_variable_range(). This
891  * ameliorates misestimates when the min or max is moving as a
892  * result of changes since the last ANALYZE. Note that this could
893  * result in effectively including MCVs into the histogram that
894  * weren't there before, but we don't try to correct for that.
895  */
896  double histfrac;
897  int lobound = 0; /* first possible slot to search */
898  int hibound = sslot.nvalues; /* last+1 slot to search */
899  bool have_end = false;
900 
901  /*
902  * If there are only two histogram entries, we'll want up-to-date
903  * values for both. (If there are more than two, we need at most
904  * one of them to be updated, so we deal with that within the
905  * loop.)
906  */
907  if (sslot.nvalues == 2)
908  have_end = get_actual_variable_range(root,
909  vardata,
910  sslot.staop,
911  &sslot.values[0],
912  &sslot.values[1]);
913 
914  while (lobound < hibound)
915  {
916  int probe = (lobound + hibound) / 2;
917  bool ltcmp;
918 
919  /*
920  * If we find ourselves about to compare to the first or last
921  * histogram entry, first try to replace it with the actual
922  * current min or max (unless we already did so above).
923  */
924  if (probe == 0 && sslot.nvalues > 2)
925  have_end = get_actual_variable_range(root,
926  vardata,
927  sslot.staop,
928  &sslot.values[0],
929  NULL);
930  else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
931  have_end = get_actual_variable_range(root,
932  vardata,
933  sslot.staop,
934  NULL,
935  &sslot.values[probe]);
936 
937  ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
938  sslot.stacoll,
939  sslot.values[probe],
940  constval));
941  if (isgt)
942  ltcmp = !ltcmp;
943  if (ltcmp)
944  lobound = probe + 1;
945  else
946  hibound = probe;
947  }
948 
949  if (lobound <= 0)
950  {
951  /*
952  * Constant is below lower histogram boundary. More
953  * precisely, we have found that no entry in the histogram
954  * satisfies the inequality clause (if !isgt) or they all do
955  * (if isgt). We estimate that that's true of the entire
956  * table, so set histfrac to 0.0 (which we'll flip to 1.0
957  * below, if isgt).
958  */
959  histfrac = 0.0;
960  }
961  else if (lobound >= sslot.nvalues)
962  {
963  /*
964  * Inverse case: constant is above upper histogram boundary.
965  */
966  histfrac = 1.0;
967  }
968  else
969  {
970  /* We have values[i-1] <= constant <= values[i]. */
971  int i = lobound;
972  double eq_selec = 0;
973  double val,
974  high,
975  low;
976  double binfrac;
977 
978  /*
979  * In the cases where we'll need it below, obtain an estimate
980  * of the selectivity of "x = constval". We use a calculation
981  * similar to what var_eq_const() does for a non-MCV constant,
982  * ie, estimate that all distinct non-MCV values occur equally
983  * often. But multiplication by "1.0 - sumcommon - nullfrac"
984  * will be done by our caller, so we shouldn't do that here.
985  * Therefore we can't try to clamp the estimate by reference
986  * to the least common MCV; the result would be too small.
987  *
988  * Note: since this is effectively assuming that constval
989  * isn't an MCV, it's logically dubious if constval in fact is
990  * one. But we have to apply *some* correction for equality,
991  * and anyway we cannot tell if constval is an MCV, since we
992  * don't have a suitable equality operator at hand.
993  */
994  if (i == 1 || isgt == iseq)
995  {
996  double otherdistinct;
997  bool isdefault;
998  AttStatsSlot mcvslot;
999 
1000  /* Get estimated number of distinct values */
1001  otherdistinct = get_variable_numdistinct(vardata,
1002  &isdefault);
1003 
1004  /* Subtract off the number of known MCVs */
1005  if (get_attstatsslot(&mcvslot, vardata->statsTuple,
1006  STATISTIC_KIND_MCV, InvalidOid,
1008  {
1009  otherdistinct -= mcvslot.nnumbers;
1010  free_attstatsslot(&mcvslot);
1011  }
1012 
1013  /* If result doesn't seem sane, leave eq_selec at 0 */
1014  if (otherdistinct > 1)
1015  eq_selec = 1.0 / otherdistinct;
1016  }
1017 
1018  /*
1019  * Convert the constant and the two nearest bin boundary
1020  * values to a uniform comparison scale, and do a linear
1021  * interpolation within this bin.
1022  */
1023  if (convert_to_scalar(constval, consttype, sslot.stacoll,
1024  &val,
1025  sslot.values[i - 1], sslot.values[i],
1026  vardata->vartype,
1027  &low, &high))
1028  {
1029  if (high <= low)
1030  {
1031  /* cope if bin boundaries appear identical */
1032  binfrac = 0.5;
1033  }
1034  else if (val <= low)
1035  binfrac = 0.0;
1036  else if (val >= high)
1037  binfrac = 1.0;
1038  else
1039  {
1040  binfrac = (val - low) / (high - low);
1041 
1042  /*
1043  * Watch out for the possibility that we got a NaN or
1044  * Infinity from the division. This can happen
1045  * despite the previous checks, if for example "low"
1046  * is -Infinity.
1047  */
1048  if (isnan(binfrac) ||
1049  binfrac < 0.0 || binfrac > 1.0)
1050  binfrac = 0.5;
1051  }
1052  }
1053  else
1054  {
1055  /*
1056  * Ideally we'd produce an error here, on the grounds that
1057  * the given operator shouldn't have scalarXXsel
1058  * registered as its selectivity func unless we can deal
1059  * with its operand types. But currently, all manner of
1060  * stuff is invoking scalarXXsel, so give a default
1061  * estimate until that can be fixed.
1062  */
1063  binfrac = 0.5;
1064  }
1065 
1066  /*
1067  * Now, compute the overall selectivity across the values
1068  * represented by the histogram. We have i-1 full bins and
1069  * binfrac partial bin below the constant.
1070  */
1071  histfrac = (double) (i - 1) + binfrac;
1072  histfrac /= (double) (sslot.nvalues - 1);
1073 
1074  /*
1075  * At this point, histfrac is an estimate of the fraction of
1076  * the population represented by the histogram that satisfies
1077  * "x <= constval". Somewhat remarkably, this statement is
1078  * true regardless of which operator we were doing the probes
1079  * with, so long as convert_to_scalar() delivers reasonable
1080  * results. If the probe constant is equal to some histogram
1081  * entry, we would have considered the bin to the left of that
1082  * entry if probing with "<" or ">=", or the bin to the right
1083  * if probing with "<=" or ">"; but binfrac would have come
1084  * out as 1.0 in the first case and 0.0 in the second, leading
1085  * to the same histfrac in either case. For probe constants
1086  * between histogram entries, we find the same bin and get the
1087  * same estimate with any operator.
1088  *
1089  * The fact that the estimate corresponds to "x <= constval"
1090  * and not "x < constval" is because of the way that ANALYZE
1091  * constructs the histogram: each entry is, effectively, the
1092  * rightmost value in its sample bucket. So selectivity
1093  * values that are exact multiples of 1/(histogram_size-1)
1094  * should be understood as estimates including a histogram
1095  * entry plus everything to its left.
1096  *
1097  * However, that breaks down for the first histogram entry,
1098  * which necessarily is the leftmost value in its sample
1099  * bucket. That means the first histogram bin is slightly
1100  * narrower than the rest, by an amount equal to eq_selec.
1101  * Another way to say that is that we want "x <= leftmost" to
1102  * be estimated as eq_selec not zero. So, if we're dealing
1103  * with the first bin (i==1), rescale to make that true while
1104  * adjusting the rest of that bin linearly.
1105  */
1106  if (i == 1)
1107  histfrac += eq_selec * (1.0 - binfrac);
1108 
1109  /*
1110  * "x <= constval" is good if we want an estimate for "<=" or
1111  * ">", but if we are estimating for "<" or ">=", we now need
1112  * to decrease the estimate by eq_selec.
1113  */
1114  if (isgt == iseq)
1115  histfrac -= eq_selec;
1116  }
1117 
1118  /*
1119  * Now the estimate is finished for "<" and "<=" cases. If we are
1120  * estimating for ">" or ">=", flip it.
1121  */
1122  hist_selec = isgt ? (1.0 - histfrac) : histfrac;
1123 
1124  /*
1125  * The histogram boundaries are only approximate to begin with,
1126  * and may well be out of date anyway. Therefore, don't believe
1127  * extremely small or large selectivity estimates --- unless we
1128  * got actual current endpoint values from the table, in which
1129  * case just do the usual sanity clamp. Somewhat arbitrarily, we
1130  * set the cutoff for other cases at a hundredth of the histogram
1131  * resolution.
1132  */
1133  if (have_end)
1134  CLAMP_PROBABILITY(hist_selec);
1135  else
1136  {
1137  double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1138 
1139  if (hist_selec < cutoff)
1140  hist_selec = cutoff;
1141  else if (hist_selec > 1.0 - cutoff)
1142  hist_selec = 1.0 - cutoff;
1143  }
1144  }
1145 
1146  free_attstatsslot(&sslot);
1147  }
1148 
1149  return hist_selec;
1150 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4938
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound)
Definition: selfuncs.c:3724
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4967
static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max)
Definition: selfuncs.c:5233
#define DatumGetBool(X)
Definition: postgres.h:393
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
int i
long val
Definition: informix.c:664
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ mcv_selectivity()

double mcv_selectivity ( VariableStatData vardata,
FmgrInfo opproc,
Datum  constval,
bool  varonleft,
double *  sumcommonp 
)

Definition at line 707 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), HeapTupleIsValid, i, InvalidOid, AttStatsSlot::numbers, AttStatsSlot::nvalues, AttStatsSlot::stacoll, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by ltreeparentsel(), networksel(), patternsel_common(), and scalarineqsel().

710 {
711  double mcv_selec,
712  sumcommon;
713  AttStatsSlot sslot;
714  int i;
715 
716  mcv_selec = 0.0;
717  sumcommon = 0.0;
718 
719  if (HeapTupleIsValid(vardata->statsTuple) &&
720  statistic_proc_security_check(vardata, opproc->fn_oid) &&
721  get_attstatsslot(&sslot, vardata->statsTuple,
722  STATISTIC_KIND_MCV, InvalidOid,
724  {
725  for (i = 0; i < sslot.nvalues; i++)
726  {
727  if (varonleft ?
729  sslot.stacoll,
730  sslot.values[i],
731  constval)) :
733  sslot.stacoll,
734  constval,
735  sslot.values[i])))
736  mcv_selec += sslot.numbers[i];
737  sumcommon += sslot.numbers[i];
738  }
739  free_attstatsslot(&sslot);
740  }
741 
742  *sumcommonp = sumcommon;
743  return mcv_selec;
744 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4938
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ mergejoinscansel()

void mergejoinscansel ( PlannerInfo root,
Node clause,
Oid  opfamily,
int  strategy,
bool  nulls_first,
Selectivity leftstart,
Selectivity leftend,
Selectivity rightstart,
Selectivity rightend 
)

Definition at line 2625 of file selfuncs.c.

References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, CLAMP_PROBABILITY, DEFAULT_INEQ_SEL, examine_variable(), get_leftop(), get_op_opfamily_properties(), get_opfamily_member(), get_rightop(), get_variable_range(), GETSTRUCT, HeapTupleIsValid, is_opclause(), OidIsValid, ReleaseVariableStats, scalarineqsel(), and VariableStatData::statsTuple.

Referenced by cached_scansel().

2629 {
2630  Node *left,
2631  *right;
2632  VariableStatData leftvar,
2633  rightvar;
2634  int op_strategy;
2635  Oid op_lefttype;
2636  Oid op_righttype;
2637  Oid opno,
2638  lsortop,
2639  rsortop,
2640  lstatop,
2641  rstatop,
2642  ltop,
2643  leop,
2644  revltop,
2645  revleop;
2646  bool isgt;
2647  Datum leftmin,
2648  leftmax,
2649  rightmin,
2650  rightmax;
2651  double selec;
2652 
2653  /* Set default results if we can't figure anything out. */
2654  /* XXX should default "start" fraction be a bit more than 0? */
2655  *leftstart = *rightstart = 0.0;
2656  *leftend = *rightend = 1.0;
2657 
2658  /* Deconstruct the merge clause */
2659  if (!is_opclause(clause))
2660  return; /* shouldn't happen */
2661  opno = ((OpExpr *) clause)->opno;
2662  left = get_leftop((Expr *) clause);
2663  right = get_rightop((Expr *) clause);
2664  if (!right)
2665  return; /* shouldn't happen */
2666 
2667  /* Look for stats for the inputs */
2668  examine_variable(root, left, 0, &leftvar);
2669  examine_variable(root, right, 0, &rightvar);
2670 
2671  /* Extract the operator's declared left/right datatypes */
2672  get_op_opfamily_properties(opno, opfamily, false,
2673  &op_strategy,
2674  &op_lefttype,
2675  &op_righttype);
2676  Assert(op_strategy == BTEqualStrategyNumber);
2677 
2678  /*
2679  * Look up the various operators we need. If we don't find them all, it
2680  * probably means the opfamily is broken, but we just fail silently.
2681  *
2682  * Note: we expect that pg_statistic histograms will be sorted by the '<'
2683  * operator, regardless of which sort direction we are considering.
2684  */
2685  switch (strategy)
2686  {
2687  case BTLessStrategyNumber:
2688  isgt = false;
2689  if (op_lefttype == op_righttype)
2690  {
2691  /* easy case */
2692  ltop = get_opfamily_member(opfamily,
2693  op_lefttype, op_righttype,
2695  leop = get_opfamily_member(opfamily,
2696  op_lefttype, op_righttype,
2698  lsortop = ltop;
2699  rsortop = ltop;
2700  lstatop = lsortop;
2701  rstatop = rsortop;
2702  revltop = ltop;
2703  revleop = leop;
2704  }
2705  else
2706  {
2707  ltop = get_opfamily_member(opfamily,
2708  op_lefttype, op_righttype,
2710  leop = get_opfamily_member(opfamily,
2711  op_lefttype, op_righttype,
2713  lsortop = get_opfamily_member(opfamily,
2714  op_lefttype, op_lefttype,
2716  rsortop = get_opfamily_member(opfamily,
2717  op_righttype, op_righttype,
2719  lstatop = lsortop;
2720  rstatop = rsortop;
2721  revltop = get_opfamily_member(opfamily,
2722  op_righttype, op_lefttype,
2724  revleop = get_opfamily_member(opfamily,
2725  op_righttype, op_lefttype,
2727  }
2728  break;
2730  /* descending-order case */
2731  isgt = true;
2732  if (op_lefttype == op_righttype)
2733  {
2734  /* easy case */
2735  ltop = get_opfamily_member(opfamily,
2736  op_lefttype, op_righttype,
2738  leop = get_opfamily_member(opfamily,
2739  op_lefttype, op_righttype,
2741  lsortop = ltop;
2742  rsortop = ltop;
2743  lstatop = get_opfamily_member(opfamily,
2744  op_lefttype, op_lefttype,
2746  rstatop = lstatop;
2747  revltop = ltop;
2748  revleop = leop;
2749  }
2750  else
2751  {
2752  ltop = get_opfamily_member(opfamily,
2753  op_lefttype, op_righttype,
2755  leop = get_opfamily_member(opfamily,
2756  op_lefttype, op_righttype,
2758  lsortop = get_opfamily_member(opfamily,
2759  op_lefttype, op_lefttype,
2761  rsortop = get_opfamily_member(opfamily,
2762  op_righttype, op_righttype,
2764  lstatop = get_opfamily_member(opfamily,
2765  op_lefttype, op_lefttype,
2767  rstatop = get_opfamily_member(opfamily,
2768  op_righttype, op_righttype,
2770  revltop = get_opfamily_member(opfamily,
2771  op_righttype, op_lefttype,
2773  revleop = get_opfamily_member(opfamily,
2774  op_righttype, op_lefttype,
2776  }
2777  break;
2778  default:
2779  goto fail; /* shouldn't get here */
2780  }
2781 
2782  if (!OidIsValid(lsortop) ||
2783  !OidIsValid(rsortop) ||
2784  !OidIsValid(lstatop) ||
2785  !OidIsValid(rstatop) ||
2786  !OidIsValid(ltop) ||
2787  !OidIsValid(leop) ||
2788  !OidIsValid(revltop) ||
2789  !OidIsValid(revleop))
2790  goto fail; /* insufficient info in catalogs */
2791 
2792  /* Try to get ranges of both inputs */
2793  if (!isgt)
2794  {
2795  if (!get_variable_range(root, &leftvar, lstatop,
2796  &leftmin, &leftmax))
2797  goto fail; /* no range available from stats */
2798  if (!get_variable_range(root, &rightvar, rstatop,
2799  &rightmin, &rightmax))
2800  goto fail; /* no range available from stats */
2801  }
2802  else
2803  {
2804  /* need to swap the max and min */
2805  if (!get_variable_range(root, &leftvar, lstatop,
2806  &leftmax, &leftmin))
2807  goto fail; /* no range available from stats */
2808  if (!get_variable_range(root, &rightvar, rstatop,
2809  &rightmax, &rightmin))
2810  goto fail; /* no range available from stats */
2811  }
2812 
2813  /*
2814  * Now, the fraction of the left variable that will be scanned is the
2815  * fraction that's <= the right-side maximum value. But only believe
2816  * non-default estimates, else stick with our 1.0.
2817  */
2818  selec = scalarineqsel(root, leop, isgt, true, &leftvar,
2819  rightmax, op_righttype);
2820  if (selec != DEFAULT_INEQ_SEL)
2821  *leftend = selec;
2822 
2823  /* And similarly for the right variable. */
2824  selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
2825  leftmax, op_lefttype);
2826  if (selec != DEFAULT_INEQ_SEL)
2827  *rightend = selec;
2828 
2829  /*
2830  * Only one of the two "end" fractions can really be less than 1.0;
2831  * believe the smaller estimate and reset the other one to exactly 1.0. If
2832  * we get exactly equal estimates (as can easily happen with self-joins),
2833  * believe neither.
2834  */
2835  if (*leftend > *rightend)
2836  *leftend = 1.0;
2837  else if (*leftend < *rightend)
2838  *rightend = 1.0;
2839  else
2840  *leftend = *rightend = 1.0;
2841 
2842  /*
2843  * Also, the fraction of the left variable that will be scanned before the
2844  * first join pair is found is the fraction that's < the right-side
2845  * minimum value. But only believe non-default estimates, else stick with
2846  * our own default.
2847  */
2848  selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
2849  rightmin, op_righttype);
2850  if (selec != DEFAULT_INEQ_SEL)
2851  *leftstart = selec;
2852 
2853  /* And similarly for the right variable. */
2854  selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
2855  leftmin, op_lefttype);
2856  if (selec != DEFAULT_INEQ_SEL)
2857  *rightstart = selec;
2858 
2859  /*
2860  * Only one of the two "start" fractions can really be more than zero;
2861  * believe the larger estimate and reset the other one to exactly 0.0. If
2862  * we get exactly equal estimates (as can easily happen with self-joins),
2863  * believe neither.
2864  */
2865  if (*leftstart < *rightstart)
2866  *leftstart = 0.0;
2867  else if (*leftstart > *rightstart)
2868  *rightstart = 0.0;
2869  else
2870  *leftstart = *rightstart = 0.0;
2871 
2872  /*
2873  * If the sort order is nulls-first, we're going to have to skip over any
2874  * nulls too. These would not have been counted by scalarineqsel, and we
2875  * can safely add in this fraction regardless of whether we believe
2876  * scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
2877  */
2878  if (nulls_first)
2879  {
2880  Form_pg_statistic stats;
2881 
2882  if (HeapTupleIsValid(leftvar.statsTuple))
2883  {
2884  stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
2885  *leftstart += stats->stanullfrac;
2886  CLAMP_PROBABILITY(*leftstart);
2887  *leftend += stats->stanullfrac;
2888  CLAMP_PROBABILITY(*leftend);
2889  }
2890  if (HeapTupleIsValid(rightvar.statsTuple))
2891  {
2892  stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
2893  *rightstart += stats->stanullfrac;
2894  CLAMP_PROBABILITY(*rightstart);
2895  *rightend += stats->stanullfrac;
2896  CLAMP_PROBABILITY(*rightend);
2897  }
2898  }
2899 
2900  /* Disbelieve start >= end, just in case that can happen */
2901  if (*leftstart >= *leftend)
2902  {
2903  *leftstart = 0.0;
2904  *leftend = 1.0;
2905  }
2906  if (*rightstart >= *rightend)
2907  {
2908  *rightstart = 0.0;
2909  *rightend = 1.0;
2910  }
2911 
2912 fail:
2913  ReleaseVariableStats(leftvar);
2914  ReleaseVariableStats(rightvar);
2915 }
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:71
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37
Definition: nodes.h:525
unsigned int Oid
Definition: postgres_ext.h:31
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define OidIsValid(objectId)
Definition: c.h:645
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:163
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:70
uintptr_t Datum
Definition: postgres.h:367
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:82
static double scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, VariableStatData *vardata, Datum constval, Oid consttype)
Definition: selfuncs.c:557
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4418
#define Assert(condition)
Definition: c.h:739
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
void get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, int *strategy, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:133
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max)
Definition: selfuncs.c:5099
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:63
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32

◆ nulltestsel()

Selectivity nulltestsel ( PlannerInfo root,
NullTestType  nulltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1454 of file selfuncs.c.

References CLAMP_PROBABILITY, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), GETSTRUCT, HeapTupleIsValid, IS_NOT_NULL, IS_NULL, IsA, ReleaseVariableStats, VariableStatData::statsTuple, and VariableStatData::var.

Referenced by clause_selectivity(), and clauselist_selectivity_simple().

1456 {
1457  VariableStatData vardata;
1458  double selec;
1459 
1460  examine_variable(root, arg, varRelid, &vardata);
1461 
1462  if (HeapTupleIsValid(vardata.statsTuple))
1463  {
1464  Form_pg_statistic stats;
1465  double freq_null;
1466 
1467  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1468  freq_null = stats->stanullfrac;
1469 
1470  switch (nulltesttype)
1471  {
1472  case IS_NULL:
1473 
1474  /*
1475  * Use freq_null directly.
1476  */
1477  selec = freq_null;
1478  break;
1479  case IS_NOT_NULL:
1480 
1481  /*
1482  * Select not unknown (not null) values. Calculate from
1483  * freq_null.
1484  */
1485  selec = 1.0 - freq_null;
1486  break;
1487  default:
1488  elog(ERROR, "unrecognized nulltesttype: %d",
1489  (int) nulltesttype);
1490  return (Selectivity) 0; /* keep compiler quiet */
1491  }
1492  }
1493  else if (vardata.var && IsA(vardata.var, Var) &&
1494  ((Var *) vardata.var)->varattno < 0)
1495  {
1496  /*
1497  * There are no stats for system columns, but we know they are never
1498  * NULL.
1499  */
1500  selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
1501  }
1502  else
1503  {
1504  /*
1505  * No ANALYZE stats available, so make a guess
1506  */
1507  switch (nulltesttype)
1508  {
1509  case IS_NULL:
1510  selec = DEFAULT_UNK_SEL;
1511  break;
1512  case IS_NOT_NULL:
1513  selec = DEFAULT_NOT_UNK_SEL;
1514  break;
1515  default:
1516  elog(ERROR, "unrecognized nulltesttype: %d",
1517  (int) nulltesttype);
1518  return (Selectivity) 0; /* keep compiler quiet */
1519  }
1520  }
1521 
1522  ReleaseVariableStats(vardata);
1523 
1524  /* result should be in range, but make sure... */
1525  CLAMP_PROBABILITY(selec);
1526 
1527  return (Selectivity) selec;
1528 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:71
double Selectivity
Definition: nodes.h:658
Definition: primnodes.h:167
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:50
#define ERROR
Definition: elog.h:43
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:49
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4418
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
#define elog(elevel,...)
Definition: elog.h:228

◆ rowcomparesel()

Selectivity rowcomparesel ( PlannerInfo root,
RowCompareExpr clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1929 of file selfuncs.c.

References RowCompareExpr::inputcollids, join_selectivity(), RowCompareExpr::largs, linitial, linitial_oid, list_make2, NumRelids(), RowCompareExpr::opnos, RowCompareExpr::rargs, restriction_selectivity(), and s1.

Referenced by clause_selectivity().

1932 {
1933  Selectivity s1;
1934  Oid opno = linitial_oid(clause->opnos);
1935  Oid inputcollid = linitial_oid(clause->inputcollids);
1936  List *opargs;
1937  bool is_join_clause;
1938 
1939  /* Build equivalent arg list for single operator */
1940  opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
1941 
1942  /*
1943  * Decide if it's a join clause. This should match clausesel.c's
1944  * treat_as_join_clause(), except that we intentionally consider only the
1945  * leading columns and not the rest of the clause.
1946  */
1947  if (varRelid != 0)
1948  {
1949  /*
1950  * Caller is forcing restriction mode (eg, because we are examining an
1951  * inner indexscan qual).
1952  */
1953  is_join_clause = false;
1954  }
1955  else if (sjinfo == NULL)
1956  {
1957  /*
1958  * It must be a restriction clause, since it's being evaluated at a
1959  * scan node.
1960  */
1961  is_join_clause = false;
1962  }
1963  else
1964  {
1965  /*
1966  * Otherwise, it's a join if there's more than one relation used.
1967  */
1968  is_join_clause = (NumRelids((Node *) opargs) > 1);
1969  }
1970 
1971  if (is_join_clause)
1972  {
1973  /* Estimate selectivity for a join clause. */
1974  s1 = join_selectivity(root, opno,
1975  opargs,
1976  inputcollid,
1977  jointype,
1978  sjinfo);
1979  }
1980  else
1981  {
1982  /* Estimate selectivity for a restriction clause. */
1983  s1 = restriction_selectivity(root, opno,
1984  opargs,
1985  inputcollid,
1986  varRelid);
1987  }
1988 
1989  return s1;
1990 }
#define list_make2(x1, x2)
Definition: pg_list.h:229
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Definition: plancat.c:1763
Definition: nodes.h:525
double Selectivity
Definition: nodes.h:658
unsigned int Oid
Definition: postgres_ext.h:31
#define linitial(l)
Definition: pg_list.h:195
char * s1
#define linitial_oid(l)
Definition: pg_list.h:197
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:1802
List * inputcollids
Definition: primnodes.h:1059
Definition: pg_list.h:50
int NumRelids(Node *clause)
Definition: clauses.c:2131

◆ scalararraysel()

Selectivity scalararraysel ( PlannerInfo root,
ScalarArrayOpExpr clause,
bool  is_join_clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1572 of file selfuncs.c.

References generate_unaccent_rules::args, ScalarArrayOpExpr::args, ARR_ELEMTYPE, Assert, CLAMP_PROBABILITY, CaseTestExpr::collation, DatumGetArrayTypeP, DatumGetFloat8, deconstruct_array(), ArrayExpr::element_typeid, ArrayExpr::elements, TypeCacheEntry::eq_opr, estimate_expression_value(), exprCollation(), exprType(), fmgr_info(), FunctionCall4Coll(), FunctionCall5Coll(), get_base_element_type(), get_negator(), get_oprjoin(), get_oprrest(), get_typlenbyval(), get_typlenbyvalalign(), i, ScalarArrayOpExpr::inputcollid, Int16GetDatum, Int32GetDatum, IsA, lfirst, linitial, list_length(), list_make2, lookup_type_cache(), lsecond, makeConst(), makeNode, ObjectIdGetDatum, OidIsValid, ScalarArrayOpExpr::opno, PointerGetDatum, s1, s2, scalararraysel_containment(), strip_array_coercion(), TYPECACHE_EQ_OPR, CaseTestExpr::typeId, CaseTestExpr::typeMod, and ScalarArrayOpExpr::useOr.

Referenced by clause_selectivity().

1578 {
1579  Oid operator = clause->opno;
1580  bool useOr = clause->useOr;
1581  bool isEquality = false;
1582  bool isInequality = false;
1583  Node *leftop;
1584  Node *rightop;
1585  Oid nominal_element_type;
1586  Oid nominal_element_collation;
1587  TypeCacheEntry *typentry;
1588  RegProcedure oprsel;
1589  FmgrInfo oprselproc;
1590  Selectivity s1;
1591  Selectivity s1disjoint;
1592 
1593  /* First, deconstruct the expression */
1594  Assert(list_length(clause->args) == 2);
1595  leftop = (Node *) linitial(clause->args);
1596  rightop = (Node *) lsecond(clause->args);
1597 
1598  /* aggressively reduce both sides to constants */
1599  leftop = estimate_expression_value(root, leftop);
1600  rightop = estimate_expression_value(root, rightop);
1601 
1602  /* get nominal (after relabeling) element type of rightop */
1603  nominal_element_type = get_base_element_type(exprType(rightop));
1604  if (!OidIsValid(nominal_element_type))
1605  return (Selectivity) 0.5; /* probably shouldn't happen */
1606  /* get nominal collation, too, for generating constants */
1607  nominal_element_collation = exprCollation(rightop);
1608 
1609  /* look through any binary-compatible relabeling of rightop */
1610  rightop = strip_array_coercion(rightop);
1611 
1612  /*
1613  * Detect whether the operator is the default equality or inequality
1614  * operator of the array element type.
1615  */
1616  typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1617  if (OidIsValid(typentry->eq_opr))
1618  {
1619  if (operator == typentry->eq_opr)
1620  isEquality = true;
1621  else if (get_negator(operator) == typentry->eq_opr)
1622  isInequality = true;
1623  }
1624 
1625  /*
1626  * If it is equality or inequality, we might be able to estimate this as a
1627  * form of array containment; for instance "const = ANY(column)" can be
1628  * treated as "ARRAY[const] <@ column". scalararraysel_containment tries
1629  * that, and returns the selectivity estimate if successful, or -1 if not.
1630  */
1631  if ((isEquality || isInequality) && !is_join_clause)
1632  {
1633  s1 = scalararraysel_containment(root, leftop, rightop,
1634  nominal_element_type,
1635  isEquality, useOr, varRelid);
1636  if (s1 >= 0.0)
1637  return s1;
1638  }
1639 
1640  /*
1641  * Look up the underlying operator's selectivity estimator. Punt if it
1642  * hasn't got one.
1643  */
1644  if (is_join_clause)
1645  oprsel = get_oprjoin(operator);
1646  else
1647  oprsel = get_oprrest(operator);
1648  if (!oprsel)
1649  return (Selectivity) 0.5;
1650  fmgr_info(oprsel, &oprselproc);
1651 
1652  /*
1653  * In the array-containment check above, we must only believe that an
1654  * operator is equality or inequality if it is the default btree equality
1655  * operator (or its negator) for the element type, since those are the
1656  * operators that array containment will use. But in what follows, we can
1657  * be a little laxer, and also believe that any operators using eqsel() or
1658  * neqsel() as selectivity estimator act like equality or inequality.
1659  */
1660  if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1661  isEquality = true;
1662  else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1663  isInequality = true;
1664 
1665  /*
1666  * We consider three cases:
1667  *
1668  * 1. rightop is an Array constant: deconstruct the array, apply the
1669  * operator's selectivity function for each array element, and merge the
1670  * results in the same way that clausesel.c does for AND/OR combinations.
1671  *
1672  * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1673  * function for each element of the ARRAY[] construct, and merge.
1674  *
1675  * 3. otherwise, make a guess ...
1676  */
1677  if (rightop && IsA(rightop, Const))
1678  {
1679  Datum arraydatum = ((Const *) rightop)->constvalue;
1680  bool arrayisnull = ((Const *) rightop)->constisnull;
1681  ArrayType *arrayval;
1682  int16 elmlen;
1683  bool elmbyval;
1684  char elmalign;
1685  int num_elems;
1686  Datum *elem_values;
1687  bool *elem_nulls;
1688  int i;
1689 
1690  if (arrayisnull) /* qual can't succeed if null array */
1691  return (Selectivity) 0.0;
1692  arrayval = DatumGetArrayTypeP(arraydatum);
1694  &elmlen, &elmbyval, &elmalign);
1695  deconstruct_array(arrayval,
1696  ARR_ELEMTYPE(arrayval),
1697  elmlen, elmbyval, elmalign,
1698  &elem_values, &elem_nulls, &num_elems);
1699 
1700  /*
1701  * For generic operators, we assume the probability of success is
1702  * independent for each array element. But for "= ANY" or "<> ALL",
1703  * if the array elements are distinct (which'd typically be the case)
1704  * then the probabilities are disjoint, and we should just sum them.
1705  *
1706  * If we were being really tense we would try to confirm that the
1707  * elements are all distinct, but that would be expensive and it
1708  * doesn't seem to be worth the cycles; it would amount to penalizing
1709  * well-written queries in favor of poorly-written ones. However, we
1710  * do protect ourselves a little bit by checking whether the
1711  * disjointness assumption leads to an impossible (out of range)
1712  * probability; if so, we fall back to the normal calculation.
1713  */
1714  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1715 
1716  for (i = 0; i < num_elems; i++)
1717  {
1718  List *args;
1719  Selectivity s2;
1720 
1721  args = list_make2(leftop,
1722  makeConst(nominal_element_type,
1723  -1,
1724  nominal_element_collation,
1725  elmlen,
1726  elem_values[i],
1727  elem_nulls[i],
1728  elmbyval));
1729  if (is_join_clause)
1730  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1731  clause->inputcollid,
1732  PointerGetDatum(root),
1733  ObjectIdGetDatum(operator),
1734  PointerGetDatum(args),
1735  Int16GetDatum(jointype),
1736  PointerGetDatum(sjinfo)));
1737  else
1738  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1739  clause->inputcollid,
1740  PointerGetDatum(root),
1741  ObjectIdGetDatum(operator),
1742  PointerGetDatum(args),
1743  Int32GetDatum(varRelid)));
1744 
1745  if (useOr)
1746  {
1747  s1 = s1 + s2 - s1 * s2;
1748  if (isEquality)
1749  s1disjoint += s2;
1750  }
1751  else
1752  {
1753  s1 = s1 * s2;
1754  if (isInequality)
1755  s1disjoint += s2 - 1.0;
1756  }
1757  }
1758 
1759  /* accept disjoint-probability estimate if in range */
1760  if ((useOr ? isEquality : isInequality) &&
1761  s1disjoint >= 0.0 && s1disjoint <= 1.0)
1762  s1 = s1disjoint;
1763  }
1764  else if (rightop && IsA(rightop, ArrayExpr) &&
1765  !((ArrayExpr *) rightop)->multidims)
1766  {
1767  ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
1768  int16 elmlen;
1769  bool elmbyval;
1770  ListCell *l;
1771 
1772  get_typlenbyval(arrayexpr->element_typeid,
1773  &elmlen, &elmbyval);
1774 
1775  /*
1776  * We use the assumption of disjoint probabilities here too, although
1777  * the odds of equal array elements are rather higher if the elements
1778  * are not all constants (which they won't be, else constant folding
1779  * would have reduced the ArrayExpr to a Const). In this path it's
1780  * critical to have the sanity check on the s1disjoint estimate.
1781  */
1782  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1783 
1784  foreach(l, arrayexpr->elements)
1785  {
1786  Node *elem = (Node *) lfirst(l);
1787  List *args;
1788  Selectivity s2;
1789 
1790  /*
1791  * Theoretically, if elem isn't of nominal_element_type we should
1792  * insert a RelabelType, but it seems unlikely that any operator
1793  * estimation function would really care ...
1794  */
1795  args = list_make2(leftop, elem);
1796  if (is_join_clause)
1797  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1798  clause->inputcollid,
1799  PointerGetDatum(root),
1800  ObjectIdGetDatum(operator),
1801  PointerGetDatum(args),
1802  Int16GetDatum(jointype),
1803  PointerGetDatum(sjinfo)));
1804  else
1805  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1806  clause->inputcollid,
1807  PointerGetDatum(root),
1808  ObjectIdGetDatum(operator),
1809  PointerGetDatum(args),
1810  Int32GetDatum(varRelid)));
1811 
1812  if (useOr)
1813  {
1814  s1 = s1 + s2 - s1 * s2;
1815  if (isEquality)
1816  s1disjoint += s2;
1817  }
1818  else
1819  {
1820  s1 = s1 * s2;
1821  if (isInequality)
1822  s1disjoint += s2 - 1.0;
1823  }
1824  }
1825 
1826  /* accept disjoint-probability estimate if in range */
1827  if ((useOr ? isEquality : isInequality) &&
1828  s1disjoint >= 0.0 && s1disjoint <= 1.0)
1829  s1 = s1disjoint;
1830  }
1831  else
1832  {
1833  CaseTestExpr *dummyexpr;
1834  List *args;
1835  Selectivity s2;
1836  int i;
1837 
1838  /*
1839  * We need a dummy rightop to pass to the operator selectivity
1840  * routine. It can be pretty much anything that doesn't look like a
1841  * constant; CaseTestExpr is a convenient choice.
1842  */
1843  dummyexpr = makeNode(CaseTestExpr);
1844  dummyexpr->typeId = nominal_element_type;
1845  dummyexpr->typeMod = -1;
1846  dummyexpr->collation = clause->inputcollid;
1847  args = list_make2(leftop, dummyexpr);
1848  if (is_join_clause)
1849  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1850  clause->inputcollid,
1851  PointerGetDatum(root),
1852  ObjectIdGetDatum(operator),
1853  PointerGetDatum(args),
1854  Int16GetDatum(jointype),
1855  PointerGetDatum(sjinfo)));
1856  else
1857  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1858  clause->inputcollid,
1859  PointerGetDatum(root),
1860  ObjectIdGetDatum(operator),
1861  PointerGetDatum(args),
1862  Int32GetDatum(varRelid)));
1863  s1 = useOr ? 0.0 : 1.0;
1864 
1865  /*
1866  * Arbitrarily assume 10 elements in the eventual array value (see
1867  * also estimate_array_length). We don't risk an assumption of
1868  * disjoint probabilities here.
1869  */
1870  for (i = 0; i < 10; i++)
1871  {
1872  if (useOr)
1873  s1 = s1 + s2 - s1 * s2;
1874  else
1875  s1 = s1 * s2;
1876  }
1877  }
1878 
1879  /* result should be in range, but make sure... */
1880  CLAMP_PROBABILITY(s1);
1881 
1882  return s1;
1883 }
#define list_make2(x1, x2)
Definition: pg_list.h:229
signed short int16
Definition: c.h:346
Definition: fmgr.h:56
RegProcedure get_oprjoin(Oid opno)
Definition: lsyscache.c:1383
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2286
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2049
#define PointerGetDatum(X)
Definition: postgres.h:556
Datum FunctionCall5Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4, Datum arg5)
Definition: fmgr.c:1224
regproc RegProcedure
Definition: c.h:512
#define Int16GetDatum(X)
Definition: postgres.h:451
Definition: nodes.h:525
#define TYPECACHE_EQ_OPR
Definition: typcache.h:128
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1197
double Selectivity
Definition: nodes.h:658
unsigned int Oid
Definition: postgres_ext.h:31
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:297
#define OidIsValid(objectId)
Definition: c.h:645
#define lsecond(l)
Definition: pg_list.h:200
int32 typeMod
Definition: primnodes.h:959
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define linitial(l)
Definition: pg_list.h:195
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
char * s1
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:124
List * elements
Definition: primnodes.h:977
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1359
char * s2
#define DatumGetFloat8(X)
Definition: postgres.h:714
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:322
#define makeNode(_type_)
Definition: nodes.h:573
#define Assert(condition)
Definition: c.h:739
#define lfirst(lc)
Definition: pg_list.h:190
Selectivity scalararraysel_containment(PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
static int list_length(const List *l)
Definition: pg_list.h:169
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:719
void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval)
Definition: lsyscache.c:2029
Oid element_typeid
Definition: primnodes.h:976
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3461
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2599
#define Int32GetDatum(X)
Definition: postgres.h:479
int i
Oid get_negator(Oid opno)
Definition: lsyscache.c:1335
Definition: pg_list.h:50
#define ARR_ELEMTYPE(a)
Definition: array.h:280
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1539
#define DatumGetArrayTypeP(X)
Definition: array.h:249

◆ scalararraysel_containment()

Selectivity scalararraysel_containment ( PlannerInfo root,
Node leftop,
Node rightop,
Oid  elemtype,
bool  isEquality,
bool  useOr,
int  varRelid 
)

Definition at line 82 of file array_selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, VariableStatData::rel, ReleaseVariableStats, statistic_proc_security_check(), VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and AttStatsSlot::values.

Referenced by scalararraysel().

86 {
87  Selectivity selec;
88  VariableStatData vardata;
89  Datum constval;
90  TypeCacheEntry *typentry;
91  FmgrInfo *cmpfunc;
92 
93  /*
94  * rightop must be a variable, else punt.
95  */
96  examine_variable(root, rightop, varRelid, &vardata);
97  if (!vardata.rel)
98  {
99  ReleaseVariableStats(vardata);
100  return -1.0;
101  }
102 
103  /*
104  * leftop must be a constant, else punt.
105  */
106  if (!IsA(leftop, Const))
107  {
108  ReleaseVariableStats(vardata);
109  return -1.0;
110  }
111  if (((Const *) leftop)->constisnull)
112  {
113  /* qual can't succeed if null on left */
114  ReleaseVariableStats(vardata);
115  return (Selectivity) 0.0;
116  }
117  constval = ((Const *) leftop)->constvalue;
118 
119  /* Get element type's default comparison function */
120  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
121  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
122  {
123  ReleaseVariableStats(vardata);
124  return -1.0;
125  }
126  cmpfunc = &typentry->cmp_proc_finfo;
127 
128  /*
129  * If the operator is <>, swap ANY/ALL, then invert the result later.
130  */
131  if (!isEquality)
132  useOr = !useOr;
133 
134  /* Get array element stats for var, if available */
135  if (HeapTupleIsValid(vardata.statsTuple) &&
136  statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
137  {
138  Form_pg_statistic stats;
139  AttStatsSlot sslot;
140  AttStatsSlot hslot;
141 
142  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
143 
144  /* MCELEM will be an array of same type as element */
145  if (get_attstatsslot(&sslot, vardata.statsTuple,
146  STATISTIC_KIND_MCELEM, InvalidOid,
148  {
149  /* For ALL case, also get histogram of distinct-element counts */
150  if (useOr ||
151  !get_attstatsslot(&hslot, vardata.statsTuple,
152  STATISTIC_KIND_DECHIST, InvalidOid,
154  memset(&hslot, 0, sizeof(hslot));
155 
156  /*
157  * For = ANY, estimate as var @> ARRAY[const].
158  *
159  * For = ALL, estimate as var <@ ARRAY[const].
160  */
161  if (useOr)
163  sslot.nvalues,
164  sslot.numbers,
165  sslot.nnumbers,
166  &constval, 1,
167  OID_ARRAY_CONTAINS_OP,
168  typentry);
169  else
170  selec = mcelem_array_contained_selec(sslot.values,
171  sslot.nvalues,
172  sslot.numbers,
173  sslot.nnumbers,
174  &constval, 1,
175  hslot.numbers,
176  hslot.nnumbers,
177  OID_ARRAY_CONTAINED_OP,
178  typentry);
179 
180  free_attstatsslot(&hslot);
181  free_attstatsslot(&sslot);
182  }
183  else
184  {
185  /* No most-common-elements info, so do without */
186  if (useOr)
187  selec = mcelem_array_contain_overlap_selec(NULL, 0,
188  NULL, 0,
189  &constval, 1,
190  OID_ARRAY_CONTAINS_OP,
191  typentry);
192  else
193  selec = mcelem_array_contained_selec(NULL, 0,
194  NULL, 0,
195  &constval, 1,
196  NULL, 0,
197  OID_ARRAY_CONTAINED_OP,
198  typentry);
199  }
200 
201  /*
202  * MCE stats count only non-null rows, so adjust for null rows.
203  */
204  selec *= (1.0 - stats->stanullfrac);
205  }
206  else
207  {
208  /* No stats at all, so do without */
209  if (useOr)
210  selec = mcelem_array_contain_overlap_selec(NULL, 0,
211  NULL, 0,
212  &constval, 1,
213  OID_ARRAY_CONTAINS_OP,
214  typentry);
215  else
216  selec = mcelem_array_contained_selec(NULL, 0,
217  NULL, 0,
218  &constval, 1,
219  NULL, 0,
220  OID_ARRAY_CONTAINED_OP,
221  typentry);
222  /* we assume no nulls here, so no stanullfrac correction */
223  }
224 
225  ReleaseVariableStats(vardata);
226 
227  /*
228  * If the operator is <>, invert the results.
229  */
230  if (!isEquality)
231  selec = 1.0 - selec;
232 
233  CLAMP_PROBABILITY(selec);
234 
235  return selec;
236 }
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4938
RelOptInfo * rel
Definition: selfuncs.h:70
double Selectivity
Definition: nodes.h:658
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define OidIsValid(objectId)
Definition: c.h:645
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, TypeCacheEntry *typentry)
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
FmgrInfo cmp_proc_finfo
Definition: typcache.h:73
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
float4 * numbers
Definition: lsyscache.h:53
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, TypeCacheEntry *typentry)
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:322
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4418
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:134
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ statistic_proc_security_check()

bool statistic_proc_security_check ( VariableStatData vardata,
Oid  func_oid 
)

Definition at line 4938 of file selfuncs.c.

References VariableStatData::acl_ok, DEBUG2, ereport, errmsg_internal(), get_func_leakproof(), get_func_name(), and OidIsValid.

Referenced by calc_arraycontsel(), calc_hist_selectivity(), eqjoinsel(), get_variable_range(), histogram_selectivity(), ineq_histogram_selectivity(), mcv_selectivity(), scalararraysel_containment(), and var_eq_const().

4939 {
4940  if (vardata->acl_ok)
4941  return true;
4942 
4943  if (!OidIsValid(func_oid))
4944  return false;
4945 
4946  if (get_func_leakproof(func_oid))
4947  return true;
4948 
4949  ereport(DEBUG2,
4950  (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
4951  get_func_name(func_oid))));
4952  return false;
4953 }
bool get_func_leakproof(Oid funcid)
Definition: lsyscache.c:1639
#define OidIsValid(objectId)
Definition: c.h:645
char * get_func_name(Oid funcid)
Definition: lsyscache.c:1410
#define DEBUG2
Definition: elog.h:24
#define ereport(elevel, rest)
Definition: elog.h:141
int errmsg_internal(const char *fmt,...)
Definition: elog.c:909

◆ var_eq_const()

double var_eq_const ( VariableStatData vardata,
Oid  oproid,
Datum  constval,
bool  constisnull,
bool  varonleft,
bool  negate 
)

Definition at line 289 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, DatumGetBool, fmgr_info(), free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), get_opcode(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, i, InvalidOid, VariableStatData::isunique, AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, VariableStatData::rel, AttStatsSlot::stacoll, statistic_proc_security_check(), VariableStatData::statsTuple, RelOptInfo::tuples, and AttStatsSlot::values.

Referenced by boolvarsel(), eqsel_internal(), patternsel_common(), and prefix_selectivity().

292 {
293  double selec;
294  double nullfrac = 0.0;
295  bool isdefault;
296  Oid opfuncoid;
297 
298  /*
299  * If the constant is NULL, assume operator is strict and return zero, ie,
300  * operator will never return TRUE. (It's zero even for a negator op.)
301  */
302  if (constisnull)
303  return 0.0;
304 
305  /*
306  * Grab the nullfrac for use below. Note we allow use of nullfrac
307  * regardless of security check.
308  */
309  if (HeapTupleIsValid(vardata->statsTuple))
310  {
311  Form_pg_statistic stats;
312 
313  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
314  nullfrac = stats->stanullfrac;
315  }
316 
317  /*
318  * If we matched the var to a unique index or DISTINCT clause, assume
319  * there is exactly one match regardless of anything else. (This is
320  * slightly bogus, since the index or clause's equality operator might be
321  * different from ours, but it's much more likely to be right than
322  * ignoring the information.)
323  */
324  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
325  {
326  selec = 1.0 / vardata->rel->tuples;
327  }
328  else if (HeapTupleIsValid(vardata->statsTuple) &&
330  (opfuncoid = get_opcode(operator))))
331  {
332  AttStatsSlot sslot;
333  bool match = false;
334  int i;
335 
336  /*
337  * Is the constant "=" to any of the column's most common values?
338  * (Although the given operator may not really be "=", we will assume
339  * that seeing whether it returns TRUE is an appropriate test. If you
340  * don't like this, maybe you shouldn't be using eqsel for your
341  * operator...)
342  */
343  if (get_attstatsslot(&sslot, vardata->statsTuple,
344  STATISTIC_KIND_MCV, InvalidOid,
346  {
347  FmgrInfo eqproc;
348 
349  fmgr_info(opfuncoid, &eqproc);
350 
351  for (i = 0; i < sslot.nvalues; i++)
352  {
353  /* be careful to apply operator right way 'round */
354  if (varonleft)
355  match = DatumGetBool(FunctionCall2Coll(&eqproc,
356  sslot.stacoll,
357  sslot.values[i],
358  constval));
359  else
360  match = DatumGetBool(FunctionCall2Coll(&eqproc,
361  sslot.stacoll,
362  constval,
363  sslot.values[i]));
364  if (match)
365  break;
366  }
367  }
368  else
369  {
370  /* no most-common-value info available */
371  i = 0; /* keep compiler quiet */
372  }
373 
374  if (match)
375  {
376  /*
377  * Constant is "=" to this common value. We know selectivity
378  * exactly (or as exactly as ANALYZE could calculate it, anyway).
379  */
380  selec = sslot.numbers[i];
381  }
382  else
383  {
384  /*
385  * Comparison is against a constant that is neither NULL nor any
386  * of the common values. Its selectivity cannot be more than
387  * this:
388  */
389  double sumcommon = 0.0;
390  double otherdistinct;
391 
392  for (i = 0; i < sslot.nnumbers; i++)
393  sumcommon += sslot.numbers[i];
394  selec = 1.0 - sumcommon - nullfrac;
395  CLAMP_PROBABILITY(selec);
396 
397  /*
398  * and in fact it's probably a good deal less. We approximate that
399  * all the not-common values share this remaining fraction
400  * equally, so we divide by the number of other distinct values.
401  */
402  otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
403  sslot.nnumbers;
404  if (otherdistinct > 1)
405  selec /= otherdistinct;
406 
407  /*
408  * Another cross-check: selectivity shouldn't be estimated as more
409  * than the least common "most common value".
410  */
411  if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
412  selec = sslot.numbers[sslot.nnumbers - 1];
413  }
414 
415  free_attstatsslot(&sslot);
416  }
417  else
418  {
419  /*
420  * No ANALYZE stats available, so make a guess using estimated number
421  * of distinct values and assuming they are equally common. (The guess
422  * is unlikely to be very good, but we do know a few special cases.)
423  */
424  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
425  }
426 
427  /* now adjust if we wanted <> rather than = */
428  if (negate)
429  selec = 1.0 - selec - nullfrac;
430 
431  /* result should be in range, but make sure... */
432  CLAMP_PROBABILITY(selec);
433 
434  return selec;
435 }
Definition: fmgr.h:56
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4938
double tuples
Definition: pathnodes.h:681
RelOptInfo * rel
Definition: selfuncs.h:70
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
unsigned int Oid
Definition: postgres_ext.h:31
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4967
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:124
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
#define InvalidOid
Definition: postgres_ext.h:36
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1092
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ var_eq_non_const()

double var_eq_non_const ( VariableStatData vardata,
Oid  oproid,
Node other,
bool  varonleft,
bool  negate 
)

Definition at line 443 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, CLAMP_PROBABILITY, free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, VariableStatData::isunique, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by eqsel_internal().

446 {
447  double selec;
448  double nullfrac = 0.0;
449  bool isdefault;
450 
451  /*
452  * Grab the nullfrac for use below.
453  */
454  if (HeapTupleIsValid(vardata->statsTuple))
455  {
456  Form_pg_statistic stats;
457 
458  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
459  nullfrac = stats->stanullfrac;
460  }
461 
462  /*
463  * If we matched the var to a unique index or DISTINCT clause, assume
464  * there is exactly one match regardless of anything else. (This is
465  * slightly bogus, since the index or clause's equality operator might be
466  * different from ours, but it's much more likely to be right than
467  * ignoring the information.)
468  */
469  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
470  {
471  selec = 1.0 / vardata->rel->tuples;
472  }
473  else if (HeapTupleIsValid(vardata->statsTuple))
474  {
475  double ndistinct;
476  AttStatsSlot sslot;
477 
478  /*
479  * Search is for a value that we do not know a priori, but we will
480  * assume it is not NULL. Estimate the selectivity as non-null
481  * fraction divided by number of distinct values, so that we get a
482  * result averaged over all possible values whether common or
483  * uncommon. (Essentially, we are assuming that the not-yet-known
484  * comparison value is equally likely to be any of the possible
485  * values, regardless of their frequency in the table. Is that a good
486  * idea?)
487  */
488  selec = 1.0 - nullfrac;
489  ndistinct = get_variable_numdistinct(vardata, &isdefault);
490  if (ndistinct > 1)
491  selec /= ndistinct;
492 
493  /*
494  * Cross-check: selectivity should never be estimated as more than the
495  * most common value's.
496  */
497  if (get_attstatsslot(&sslot, vardata->statsTuple,
498  STATISTIC_KIND_MCV, InvalidOid,
500  {
501  if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
502  selec = sslot.numbers[0];
503  free_attstatsslot(&sslot);
504  }
505  }
506  else
507  {
508  /*
509  * No ANALYZE stats available, so make a guess using estimated number
510  * of distinct values and assuming they are equally common. (The guess
511  * is unlikely to be very good, but we do know a few special cases.)
512  */
513  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
514  }
515 
516  /* now adjust if we wanted <> rather than = */
517  if (negate)
518  selec = 1.0 - selec - nullfrac;
519 
520  /* result should be in range, but make sure... */
521  CLAMP_PROBABILITY(selec);
522 
523  return selec;
524 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
double tuples
Definition: pathnodes.h:681
RelOptInfo * rel
Definition: selfuncs.h:70
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4967
float4 * numbers
Definition: lsyscache.h:53
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

Variable Documentation

◆ get_index_stats_hook

PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook

Definition at line 148 of file selfuncs.c.

Referenced by brincostestimate(), btcostestimate(), and examine_variable().

◆ get_relation_stats_hook

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook

Definition at line 147 of file selfuncs.c.

Referenced by brincostestimate(), btcostestimate(), and examine_simple_variable().