PostgreSQL Source Code  git master
selfuncs.h File Reference
#include "access/htup.h"
#include "fmgr.h"
#include "nodes/pathnodes.h"
Include dependency graph for selfuncs.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  VariableStatData
 
struct  GenericCosts
 

Macros

#define DEFAULT_EQ_SEL   0.005
 
#define DEFAULT_INEQ_SEL   0.3333333333333333
 
#define DEFAULT_RANGE_INEQ_SEL   0.005
 
#define DEFAULT_MATCH_SEL   0.005
 
#define DEFAULT_NUM_DISTINCT   200
 
#define DEFAULT_UNK_SEL   0.005
 
#define DEFAULT_NOT_UNK_SEL   (1.0 - DEFAULT_UNK_SEL)
 
#define CLAMP_PROBABILITY(p)
 
#define ReleaseVariableStats(vardata)
 

Typedefs

typedef struct VariableStatData VariableStatData
 
typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)
 
typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)
 

Functions

void examine_variable (PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
 
bool statistic_proc_security_check (VariableStatData *vardata, Oid func_oid)
 
bool get_restriction_variable (PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
 
void get_join_variables (PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
 
double get_variable_numdistinct (VariableStatData *vardata, bool *isdefault)
 
double mcv_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, double *sumcommonp)
 
double histogram_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
 
double ineq_histogram_selectivity (PlannerInfo *root, VariableStatData *vardata, FmgrInfo *opproc, bool isgt, bool iseq, Datum constval, Oid consttype)
 
double var_eq_const (VariableStatData *vardata, Oid oproid, Datum constval, bool constisnull, bool varonleft, bool negate)
 
double var_eq_non_const (VariableStatData *vardata, Oid oproid, Node *other, bool varonleft, bool negate)
 
Selectivity boolvarsel (PlannerInfo *root, Node *arg, int varRelid)
 
Selectivity booltestsel (PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity nulltestsel (PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity scalararraysel (PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
int estimate_array_length (Node *arrayexpr)
 
Selectivity rowcomparesel (PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
void mergejoinscansel (PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend)
 
double estimate_num_groups (PlannerInfo *root, List *groupExprs, double input_rows, List **pgset)
 
void estimate_hash_bucket_stats (PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, Selectivity *bucketsize_frac)
 
double estimate_hashagg_tablesize (Path *path, const AggClauseCosts *agg_costs, double dNumGroups)
 
Listget_quals_from_indexclauses (List *indexclauses)
 
Cost index_other_operands_eval_cost (PlannerInfo *root, List *indexquals)
 
Listadd_predicate_to_index_quals (IndexOptInfo *index, List *indexQuals)
 
void genericcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
 
Selectivity scalararraysel_containment (PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
 

Variables

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook
 
PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook
 

Macro Definition Documentation

◆ CLAMP_PROBABILITY

◆ DEFAULT_EQ_SEL

#define DEFAULT_EQ_SEL   0.005

Definition at line 34 of file selfuncs.h.

Referenced by _int_matchsel(), eqsel_internal(), int_query_opr_selec(), and neqjoinsel().

◆ DEFAULT_INEQ_SEL

◆ DEFAULT_MATCH_SEL

#define DEFAULT_MATCH_SEL   0.005

◆ DEFAULT_NOT_UNK_SEL

#define DEFAULT_NOT_UNK_SEL   (1.0 - DEFAULT_UNK_SEL)

Definition at line 50 of file selfuncs.h.

Referenced by booltestsel(), and nulltestsel().

◆ DEFAULT_NUM_DISTINCT

#define DEFAULT_NUM_DISTINCT   200

Definition at line 46 of file selfuncs.h.

Referenced by get_variable_numdistinct().

◆ DEFAULT_RANGE_INEQ_SEL

#define DEFAULT_RANGE_INEQ_SEL   0.005

Definition at line 40 of file selfuncs.h.

Referenced by clauselist_selectivity_simple(), and default_range_selectivity().

◆ DEFAULT_UNK_SEL

#define DEFAULT_UNK_SEL   0.005

Definition at line 49 of file selfuncs.h.

Referenced by booltestsel(), and nulltestsel().

◆ ReleaseVariableStats

Typedef Documentation

◆ get_index_stats_hook_type

typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)

Definition at line 122 of file selfuncs.h.

◆ get_relation_stats_hook_type

typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)

Definition at line 117 of file selfuncs.h.

◆ VariableStatData

Function Documentation

◆ add_predicate_to_index_quals()

List* add_predicate_to_index_quals ( IndexOptInfo index,
List indexQuals 
)

Definition at line 5742 of file selfuncs.c.

References IndexOptInfo::indpred, lfirst, list_concat(), list_make1, NIL, and predicate_implied_by().

Referenced by btcostestimate(), genericcostestimate(), and gincostestimate().

5743 {
5744  List *predExtraQuals = NIL;
5745  ListCell *lc;
5746 
5747  if (index->indpred == NIL)
5748  return indexQuals;
5749 
5750  foreach(lc, index->indpred)
5751  {
5752  Node *predQual = (Node *) lfirst(lc);
5753  List *oneQual = list_make1(predQual);
5754 
5755  if (!predicate_implied_by(oneQual, indexQuals, false))
5756  predExtraQuals = list_concat(predExtraQuals, oneQual);
5757  }
5758  return list_concat(predExtraQuals, indexQuals);
5759 }
#define NIL
Definition: pg_list.h:65
Definition: nodes.h:525
List * list_concat(List *list1, const List *list2)
Definition: list.c:516
#define list_make1(x1)
Definition: pg_list.h:227
#define lfirst(lc)
Definition: pg_list.h:190
List * indpred
Definition: pathnodes.h:814
bool predicate_implied_by(List *predicate_list, List *clause_list, bool weak)
Definition: predtest.c:151
Definition: pg_list.h:50

◆ booltestsel()

Selectivity booltestsel ( PlannerInfo root,
BoolTestType  booltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1295 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, AttStatsSlot::nnumbers, AttStatsSlot::numbers, ReleaseVariableStats, VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by clause_selectivity().

1297 {
1298  VariableStatData vardata;
1299  double selec;
1300 
1301  examine_variable(root, arg, varRelid, &vardata);
1302 
1303  if (HeapTupleIsValid(vardata.statsTuple))
1304  {
1305  Form_pg_statistic stats;
1306  double freq_null;
1307  AttStatsSlot sslot;
1308 
1309  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1310  freq_null = stats->stanullfrac;
1311 
1312  if (get_attstatsslot(&sslot, vardata.statsTuple,
1313  STATISTIC_KIND_MCV, InvalidOid,
1315  && sslot.nnumbers > 0)
1316  {
1317  double freq_true;
1318  double freq_false;
1319 
1320  /*
1321  * Get first MCV frequency and derive frequency for true.
1322  */
1323  if (DatumGetBool(sslot.values[0]))
1324  freq_true = sslot.numbers[0];
1325  else
1326  freq_true = 1.0 - sslot.numbers[0] - freq_null;
1327 
1328  /*
1329  * Next derive frequency for false. Then use these as appropriate
1330  * to derive frequency for each case.
1331  */
1332  freq_false = 1.0 - freq_true - freq_null;
1333 
1334  switch (booltesttype)
1335  {
1336  case IS_UNKNOWN:
1337  /* select only NULL values */
1338  selec = freq_null;
1339  break;
1340  case IS_NOT_UNKNOWN:
1341  /* select non-NULL values */
1342  selec = 1.0 - freq_null;
1343  break;
1344  case IS_TRUE:
1345  /* select only TRUE values */
1346  selec = freq_true;
1347  break;
1348  case IS_NOT_TRUE:
1349  /* select non-TRUE values */
1350  selec = 1.0 - freq_true;
1351  break;
1352  case IS_FALSE:
1353  /* select only FALSE values */
1354  selec = freq_false;
1355  break;
1356  case IS_NOT_FALSE:
1357  /* select non-FALSE values */
1358  selec = 1.0 - freq_false;
1359  break;
1360  default:
1361  elog(ERROR, "unrecognized booltesttype: %d",
1362  (int) booltesttype);
1363  selec = 0.0; /* Keep compiler quiet */
1364  break;
1365  }
1366 
1367  free_attstatsslot(&sslot);
1368  }
1369  else
1370  {
1371  /*
1372  * No most-common-value info available. Still have null fraction
1373  * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1374  * for null fraction and assume a 50-50 split of TRUE and FALSE.
1375  */
1376  switch (booltesttype)
1377  {
1378  case IS_UNKNOWN:
1379  /* select only NULL values */
1380  selec = freq_null;
1381  break;
1382  case IS_NOT_UNKNOWN:
1383  /* select non-NULL values */
1384  selec = 1.0 - freq_null;
1385  break;
1386  case IS_TRUE:
1387  case IS_FALSE:
1388  /* Assume we select half of the non-NULL values */
1389  selec = (1.0 - freq_null) / 2.0;
1390  break;
1391  case IS_NOT_TRUE:
1392  case IS_NOT_FALSE:
1393  /* Assume we select NULLs plus half of the non-NULLs */
1394  /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1395  selec = (freq_null + 1.0) / 2.0;
1396  break;
1397  default:
1398  elog(ERROR, "unrecognized booltesttype: %d",
1399  (int) booltesttype);
1400  selec = 0.0; /* Keep compiler quiet */
1401  break;
1402  }
1403  }
1404  }
1405  else
1406  {
1407  /*
1408  * If we can't get variable statistics for the argument, perhaps
1409  * clause_selectivity can do something with it. We ignore the
1410  * possibility of a NULL value when using clause_selectivity, and just
1411  * assume the value is either TRUE or FALSE.
1412  */
1413  switch (booltesttype)
1414  {
1415  case IS_UNKNOWN:
1416  selec = DEFAULT_UNK_SEL;
1417  break;
1418  case IS_NOT_UNKNOWN:
1419  selec = DEFAULT_NOT_UNK_SEL;
1420  break;
1421  case IS_TRUE:
1422  case IS_NOT_FALSE:
1423  selec = (double) clause_selectivity(root, arg,
1424  varRelid,
1425  jointype, sjinfo);
1426  break;
1427  case IS_FALSE:
1428  case IS_NOT_TRUE:
1429  selec = 1.0 - (double) clause_selectivity(root, arg,
1430  varRelid,
1431  jointype, sjinfo);
1432  break;
1433  default:
1434  elog(ERROR, "unrecognized booltesttype: %d",
1435  (int) booltesttype);
1436  selec = 0.0; /* Keep compiler quiet */
1437  break;
1438  }
1439  }
1440 
1441  ReleaseVariableStats(vardata);
1442 
1443  /* result should be in range, but make sure... */
1444  CLAMP_PROBABILITY(selec);
1445 
1446  return (Selectivity) selec;
1447 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
double Selectivity
Definition: nodes.h:658
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:50
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
#define ERROR
Definition: elog.h:43
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:601
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:49
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4408
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
#define elog(elevel,...)
Definition: elog.h:226
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ boolvarsel()

Selectivity boolvarsel ( PlannerInfo root,
Node arg,
int  varRelid 
)

Definition at line 1267 of file selfuncs.c.

References BoolGetDatum, examine_variable(), HeapTupleIsValid, ReleaseVariableStats, VariableStatData::statsTuple, and var_eq_const().

Referenced by clause_selectivity().

1268 {
1269  VariableStatData vardata;
1270  double selec;
1271 
1272  examine_variable(root, arg, varRelid, &vardata);
1273  if (HeapTupleIsValid(vardata.statsTuple))
1274  {
1275  /*
1276  * A boolean variable V is equivalent to the clause V = 't', so we
1277  * compute the selectivity as if that is what we have.
1278  */
1279  selec = var_eq_const(&vardata, BooleanEqualOperator,
1280  BoolGetDatum(true), false, true, false);
1281  }
1282  else
1283  {
1284  /* Otherwise, the default estimate is 0.5 */
1285  selec = 0.5;
1286  }
1287  ReleaseVariableStats(vardata);
1288  return selec;
1289 }
HeapTuple statsTuple
Definition: selfuncs.h:71
double var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:288
#define BoolGetDatum(X)
Definition: postgres.h:402
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4408
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81

◆ estimate_array_length()

int estimate_array_length ( Node arrayexpr)

Definition at line 1890 of file selfuncs.c.

References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), DatumGetArrayTypeP, IsA, list_length(), and strip_array_coercion().

Referenced by array_unnest_support(), btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().

1891 {
1892  /* look through any binary-compatible relabeling of arrayexpr */
1893  arrayexpr = strip_array_coercion(arrayexpr);
1894 
1895  if (arrayexpr && IsA(arrayexpr, Const))
1896  {
1897  Datum arraydatum = ((Const *) arrayexpr)->constvalue;
1898  bool arrayisnull = ((Const *) arrayexpr)->constisnull;
1899  ArrayType *arrayval;
1900 
1901  if (arrayisnull)
1902  return 0;
1903  arrayval = DatumGetArrayTypeP(arraydatum);
1904  return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
1905  }
1906  else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
1907  !((ArrayExpr *) arrayexpr)->multidims)
1908  {
1909  return list_length(((ArrayExpr *) arrayexpr)->elements);
1910  }
1911  else
1912  {
1913  /* default guess --- see also scalararraysel */
1914  return 10;
1915  }
1916 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define ARR_DIMS(a)
Definition: array.h:282
uintptr_t Datum
Definition: postgres.h:367
static int list_length(const List *l)
Definition: pg_list.h:169
#define ARR_NDIM(a)
Definition: array.h:278
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1538
#define DatumGetArrayTypeP(X)
Definition: array.h:249

◆ estimate_hash_bucket_stats()

void estimate_hash_bucket_stats ( PlannerInfo root,
Node hashkey,
double  nbuckets,
Selectivity mcv_freq,
Selectivity bucketsize_frac 
)

Definition at line 3406 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, clamp_row_est(), examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, Max, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by final_cost_hashjoin().

3409 {
3410  VariableStatData vardata;
3411  double estfract,
3412  ndistinct,
3413  stanullfrac,
3414  avgfreq;
3415  bool isdefault;
3416  AttStatsSlot sslot;
3417 
3418  examine_variable(root, hashkey, 0, &vardata);
3419 
3420  /* Look up the frequency of the most common value, if available */
3421  *mcv_freq = 0.0;
3422 
3423  if (HeapTupleIsValid(vardata.statsTuple))
3424  {
3425  if (get_attstatsslot(&sslot, vardata.statsTuple,
3426  STATISTIC_KIND_MCV, InvalidOid,
3428  {
3429  /*
3430  * The first MCV stat is for the most common value.
3431  */
3432  if (sslot.nnumbers > 0)
3433  *mcv_freq = sslot.numbers[0];
3434  free_attstatsslot(&sslot);
3435  }
3436  }
3437 
3438  /* Get number of distinct values */
3439  ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3440 
3441  /*
3442  * If ndistinct isn't real, punt. We normally return 0.1, but if the
3443  * mcv_freq is known to be even higher than that, use it instead.
3444  */
3445  if (isdefault)
3446  {
3447  *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3448  ReleaseVariableStats(vardata);
3449  return;
3450  }
3451 
3452  /* Get fraction that are null */
3453  if (HeapTupleIsValid(vardata.statsTuple))
3454  {
3455  Form_pg_statistic stats;
3456 
3457  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3458  stanullfrac = stats->stanullfrac;
3459  }
3460  else
3461  stanullfrac = 0.0;
3462 
3463  /* Compute avg freq of all distinct data values in raw relation */
3464  avgfreq = (1.0 - stanullfrac) / ndistinct;
3465 
3466  /*
3467  * Adjust ndistinct to account for restriction clauses. Observe we are
3468  * assuming that the data distribution is affected uniformly by the
3469  * restriction clauses!
3470  *
3471  * XXX Possibly better way, but much more expensive: multiply by
3472  * selectivity of rel's restriction clauses that mention the target Var.
3473  */
3474  if (vardata.rel && vardata.rel->tuples > 0)
3475  {
3476  ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3477  ndistinct = clamp_row_est(ndistinct);
3478  }
3479 
3480  /*
3481  * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3482  * number of buckets is less than the expected number of distinct values;
3483  * otherwise it is 1/ndistinct.
3484  */
3485  if (ndistinct > nbuckets)
3486  estfract = 1.0 / nbuckets;
3487  else
3488  estfract = 1.0 / ndistinct;
3489 
3490  /*
3491  * Adjust estimated bucketsize upward to account for skewed distribution.
3492  */
3493  if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3494  estfract *= *mcv_freq / avgfreq;
3495 
3496  /*
3497  * Clamp bucketsize to sane range (the above adjustment could easily
3498  * produce an out-of-range result). We set the lower bound a little above
3499  * zero, since zero isn't a very sane result.
3500  */
3501  if (estfract < 1.0e-6)
3502  estfract = 1.0e-6;
3503  else if (estfract > 1.0)
3504  estfract = 1.0;
3505 
3506  *bucketsize_frac = (Selectivity) estfract;
3507 
3508  ReleaseVariableStats(vardata);
3509 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
double tuples
Definition: pathnodes.h:681
RelOptInfo * rel
Definition: selfuncs.h:70
double Selectivity
Definition: nodes.h:658
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4841
float4 * numbers
Definition: lsyscache.h:53
double rows
Definition: pathnodes.h:644
#define InvalidOid
Definition: postgres_ext.h:36
#define Max(x, y)
Definition: c.h:898
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4408
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
e
Definition: preproc-init.c:82
double clamp_row_est(double nrows)
Definition: costsize.c:187
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ estimate_hashagg_tablesize()

double estimate_hashagg_tablesize ( Path path,
const AggClauseCosts agg_costs,
double  dNumGroups 
)

Definition at line 3525 of file selfuncs.c.

References hash_agg_entry_size(), MAXALIGN, AggClauseCosts::numAggs, Path::pathtarget, SizeofMinimalTupleHeader, AggClauseCosts::transitionSpace, and PathTarget::width.

Referenced by add_paths_to_grouping_rel(), consider_groupingsets_paths(), and create_partial_grouping_paths().

3527 {
3528  Size hashentrysize;
3529 
3530  /* Estimate per-hash-entry space at tuple width... */
3531  hashentrysize = MAXALIGN(path->pathtarget->width) +
3533 
3534  /* plus space for pass-by-ref transition values... */
3535  hashentrysize += agg_costs->transitionSpace;
3536  /* plus the per-hash-entry overhead */
3537  hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3538 
3539  /*
3540  * Note that this disregards the effect of fill-factor and growth policy
3541  * of the hash table. That's probably ok, given that the default
3542  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3543  * "double-in-size" growth policies here.
3544  */
3545  return hashentrysize * dNumGroups;
3546 }
PathTarget * pathtarget
Definition: pathnodes.h:1115
#define SizeofMinimalTupleHeader
Definition: htup_details.h:649
size_t Size
Definition: c.h:466
Size hash_agg_entry_size(int numAggs)
Definition: nodeAgg.c:1446
#define MAXALIGN(LEN)
Definition: c.h:685
Size transitionSpace
Definition: pathnodes.h:64

◆ estimate_num_groups()

double estimate_num_groups ( PlannerInfo root,
List groupExprs,
double  input_rows,
List **  pgset 
)

Definition at line 3043 of file selfuncs.c.

References add_unique_group_var(), Assert, clamp_row_est(), contain_volatile_functions(), estimate_multivariate_ndistinct(), examine_variable(), expression_returns_set_rows(), exprType(), for_each_cell, HeapTupleIsValid, i, IS_SIMPLE_REL, VariableStatData::isunique, lappend(), lfirst, linitial, list_length(), list_member_int(), list_second_cell(), GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, PVC_RECURSE_WINDOWFUNCS, GroupVarInfo::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by adjust_rowcount_for_semijoins(), create_distinct_paths(), create_unique_path(), estimate_path_cost_size(), get_number_of_groups(), and recurse_set_operations().

3045 {
3046  List *varinfos = NIL;
3047  double srf_multiplier = 1.0;
3048  double numdistinct;
3049  ListCell *l;
3050  int i;
3051 
3052  /*
3053  * We don't ever want to return an estimate of zero groups, as that tends
3054  * to lead to division-by-zero and other unpleasantness. The input_rows
3055  * estimate is usually already at least 1, but clamp it just in case it
3056  * isn't.
3057  */
3058  input_rows = clamp_row_est(input_rows);
3059 
3060  /*
3061  * If no grouping columns, there's exactly one group. (This can't happen
3062  * for normal cases with GROUP BY or DISTINCT, but it is possible for
3063  * corner cases with set operations.)
3064  */
3065  if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3066  return 1.0;
3067 
3068  /*
3069  * Count groups derived from boolean grouping expressions. For other
3070  * expressions, find the unique Vars used, treating an expression as a Var
3071  * if we can find stats for it. For each one, record the statistical
3072  * estimate of number of distinct values (total in its table, without
3073  * regard for filtering).
3074  */
3075  numdistinct = 1.0;
3076 
3077  i = 0;
3078  foreach(l, groupExprs)
3079  {
3080  Node *groupexpr = (Node *) lfirst(l);
3081  double this_srf_multiplier;
3082  VariableStatData vardata;
3083  List *varshere;
3084  ListCell *l2;
3085 
3086  /* is expression in this grouping set? */
3087  if (pgset && !list_member_int(*pgset, i++))
3088  continue;
3089 
3090  /*
3091  * Set-returning functions in grouping columns are a bit problematic.
3092  * The code below will effectively ignore their SRF nature and come up
3093  * with a numdistinct estimate as though they were scalar functions.
3094  * We compensate by scaling up the end result by the largest SRF
3095  * rowcount estimate. (This will be an overestimate if the SRF
3096  * produces multiple copies of any output value, but it seems best to
3097  * assume the SRF's outputs are distinct. In any case, it's probably
3098  * pointless to worry too much about this without much better
3099  * estimates for SRF output rowcounts than we have today.)
3100  */
3101  this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3102  if (srf_multiplier < this_srf_multiplier)
3103  srf_multiplier = this_srf_multiplier;
3104 
3105  /* Short-circuit for expressions returning boolean */
3106  if (exprType(groupexpr) == BOOLOID)
3107  {
3108  numdistinct *= 2.0;
3109  continue;
3110  }
3111 
3112  /*
3113  * If examine_variable is able to deduce anything about the GROUP BY
3114  * expression, treat it as a single variable even if it's really more
3115  * complicated.
3116  */
3117  examine_variable(root, groupexpr, 0, &vardata);
3118  if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3119  {
3120  varinfos = add_unique_group_var(root, varinfos,
3121  groupexpr, &vardata);
3122  ReleaseVariableStats(vardata);
3123  continue;
3124  }
3125  ReleaseVariableStats(vardata);
3126 
3127  /*
3128  * Else pull out the component Vars. Handle PlaceHolderVars by
3129  * recursing into their arguments (effectively assuming that the
3130  * PlaceHolderVar doesn't change the number of groups, which boils
3131  * down to ignoring the possible addition of nulls to the result set).
3132  */
3133  varshere = pull_var_clause(groupexpr,
3137 
3138  /*
3139  * If we find any variable-free GROUP BY item, then either it is a
3140  * constant (and we can ignore it) or it contains a volatile function;
3141  * in the latter case we punt and assume that each input row will
3142  * yield a distinct group.
3143  */
3144  if (varshere == NIL)
3145  {
3146  if (contain_volatile_functions(groupexpr))
3147  return input_rows;
3148  continue;
3149  }
3150 
3151  /*
3152  * Else add variables to varinfos list
3153  */
3154  foreach(l2, varshere)
3155  {
3156  Node *var = (Node *) lfirst(l2);
3157 
3158  examine_variable(root, var, 0, &vardata);
3159  varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3160  ReleaseVariableStats(vardata);
3161  }
3162  }
3163 
3164  /*
3165  * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3166  * list.
3167  */
3168  if (varinfos == NIL)
3169  {
3170  /* Apply SRF multiplier as we would do in the long path */
3171  numdistinct *= srf_multiplier;
3172  /* Round off */
3173  numdistinct = ceil(numdistinct);
3174  /* Guard against out-of-range answers */
3175  if (numdistinct > input_rows)
3176  numdistinct = input_rows;
3177  if (numdistinct < 1.0)
3178  numdistinct = 1.0;
3179  return numdistinct;
3180  }
3181 
3182  /*
3183  * Group Vars by relation and estimate total numdistinct.
3184  *
3185  * For each iteration of the outer loop, we process the frontmost Var in
3186  * varinfos, plus all other Vars in the same relation. We remove these
3187  * Vars from the newvarinfos list for the next iteration. This is the
3188  * easiest way to group Vars of same rel together.
3189  */
3190  do
3191  {
3192  GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3193  RelOptInfo *rel = varinfo1->rel;
3194  double reldistinct = 1;
3195  double relmaxndistinct = reldistinct;
3196  int relvarcount = 0;
3197  List *newvarinfos = NIL;
3198  List *relvarinfos = NIL;
3199 
3200  /*
3201  * Split the list of varinfos in two - one for the current rel, one
3202  * for remaining Vars on other rels.
3203  */
3204  relvarinfos = lappend(relvarinfos, varinfo1);
3205  for_each_cell(l, varinfos, list_second_cell(varinfos))
3206  {
3207  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3208 
3209  if (varinfo2->rel == varinfo1->rel)
3210  {
3211  /* varinfos on current rel */
3212  relvarinfos = lappend(relvarinfos, varinfo2);
3213  }
3214  else
3215  {
3216  /* not time to process varinfo2 yet */
3217  newvarinfos = lappend(newvarinfos, varinfo2);
3218  }
3219  }
3220 
3221  /*
3222  * Get the numdistinct estimate for the Vars of this rel. We
3223  * iteratively search for multivariate n-distinct with maximum number
3224  * of vars; assuming that each var group is independent of the others,
3225  * we multiply them together. Any remaining relvarinfos after no more
3226  * multivariate matches are found are assumed independent too, so
3227  * their individual ndistinct estimates are multiplied also.
3228  *
3229  * While iterating, count how many separate numdistinct values we
3230  * apply. We apply a fudge factor below, but only if we multiplied
3231  * more than one such values.
3232  */
3233  while (relvarinfos)
3234  {
3235  double mvndistinct;
3236 
3237  if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3238  &mvndistinct))
3239  {
3240  reldistinct *= mvndistinct;
3241  if (relmaxndistinct < mvndistinct)
3242  relmaxndistinct = mvndistinct;
3243  relvarcount++;
3244  }
3245  else
3246  {
3247  foreach(l, relvarinfos)
3248  {
3249  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3250 
3251  reldistinct *= varinfo2->ndistinct;
3252  if (relmaxndistinct < varinfo2->ndistinct)
3253  relmaxndistinct = varinfo2->ndistinct;
3254  relvarcount++;
3255  }
3256 
3257  /* we're done with this relation */
3258  relvarinfos = NIL;
3259  }
3260  }
3261 
3262  /*
3263  * Sanity check --- don't divide by zero if empty relation.
3264  */
3265  Assert(IS_SIMPLE_REL(rel));
3266  if (rel->tuples > 0)
3267  {
3268  /*
3269  * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3270  * fudge factor is because the Vars are probably correlated but we
3271  * don't know by how much. We should never clamp to less than the
3272  * largest ndistinct value for any of the Vars, though, since
3273  * there will surely be at least that many groups.
3274  */
3275  double clamp = rel->tuples;
3276 
3277  if (relvarcount > 1)
3278  {
3279  clamp *= 0.1;
3280  if (clamp < relmaxndistinct)
3281  {
3282  clamp = relmaxndistinct;
3283  /* for sanity in case some ndistinct is too large: */
3284  if (clamp > rel->tuples)
3285  clamp = rel->tuples;
3286  }
3287  }
3288  if (reldistinct > clamp)
3289  reldistinct = clamp;
3290 
3291  /*
3292  * Update the estimate based on the restriction selectivity,
3293  * guarding against division by zero when reldistinct is zero.
3294  * Also skip this if we know that we are returning all rows.
3295  */
3296  if (reldistinct > 0 && rel->rows < rel->tuples)
3297  {
3298  /*
3299  * Given a table containing N rows with n distinct values in a
3300  * uniform distribution, if we select p rows at random then
3301  * the expected number of distinct values selected is
3302  *
3303  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3304  *
3305  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3306  *
3307  * See "Approximating block accesses in database
3308  * organizations", S. B. Yao, Communications of the ACM,
3309  * Volume 20 Issue 4, April 1977 Pages 260-261.
3310  *
3311  * Alternatively, re-arranging the terms from the factorials,
3312  * this may be written as
3313  *
3314  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3315  *
3316  * This form of the formula is more efficient to compute in
3317  * the common case where p is larger than N/n. Additionally,
3318  * as pointed out by Dell'Era, if i << N for all terms in the
3319  * product, it can be approximated by
3320  *
3321  * n * (1 - ((N-p)/N)^(N/n))
3322  *
3323  * See "Expected distinct values when selecting from a bag
3324  * without replacement", Alberto Dell'Era,
3325  * http://www.adellera.it/investigations/distinct_balls/.
3326  *
3327  * The condition i << N is equivalent to n >> 1, so this is a
3328  * good approximation when the number of distinct values in
3329  * the table is large. It turns out that this formula also
3330  * works well even when n is small.
3331  */
3332  reldistinct *=
3333  (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3334  rel->tuples / reldistinct));
3335  }
3336  reldistinct = clamp_row_est(reldistinct);
3337 
3338  /*
3339  * Update estimate of total distinct groups.
3340  */
3341  numdistinct *= reldistinct;
3342  }
3343 
3344  varinfos = newvarinfos;
3345  } while (varinfos != NIL);
3346 
3347  /* Now we can account for the effects of any SRFs */
3348  numdistinct *= srf_multiplier;
3349 
3350  /* Round off */
3351  numdistinct = ceil(numdistinct);
3352 
3353  /* Guard against out-of-range answers */
3354  if (numdistinct > input_rows)
3355  numdistinct = input_rows;
3356  if (numdistinct < 1.0)
3357  numdistinct = 1.0;
3358 
3359  return numdistinct;
3360 }
#define NIL
Definition: pg_list.h:65
double expression_returns_set_rows(PlannerInfo *root, Node *clause)
Definition: clauses.c:569
#define PVC_RECURSE_PLACEHOLDERS
Definition: optimizer.h:176
HeapTuple statsTuple
Definition: selfuncs.h:71
#define for_each_cell(cell, lst, initcell)
Definition: pg_list.h:390
double tuples
Definition: pathnodes.h:681
Definition: nodes.h:525
List * pull_var_clause(Node *node, int flags)
Definition: var.c:535
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:724
double ndistinct
Definition: selfuncs.c:2926
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:614
#define linitial(l)
Definition: pg_list.h:195
bool list_member_int(const List *list, int datum)
Definition: list.c:655
static ListCell * list_second_cell(const List *l)
Definition: pg_list.h:139
static bool estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
Definition: selfuncs.c:3567
List * lappend(List *list, void *datum)
Definition: list.c:322
static List * add_unique_group_var(PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
Definition: selfuncs.c:2930
double rows
Definition: pathnodes.h:644
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4408
#define Assert(condition)
Definition: c.h:732
#define lfirst(lc)
Definition: pg_list.h:190
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
static int list_length(const List *l)
Definition: pg_list.h:169
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
#define PVC_RECURSE_WINDOWFUNCS
Definition: optimizer.h:173
int i
double clamp_row_est(double nrows)
Definition: costsize.c:187
Definition: pg_list.h:50
#define PVC_RECURSE_AGGREGATES
Definition: optimizer.h:171
RelOptInfo * rel
Definition: selfuncs.c:2925

◆ examine_variable()

void examine_variable ( PlannerInfo root,
Node node,
int  varRelid,
VariableStatData vardata 
)

Definition at line 4408 of file selfuncs.c.

References VariableStatData::acl_ok, ACL_SELECT, ACLCHECK_OK, arg, Assert, VariableStatData::atttype, VariableStatData::atttypmod, BMS_EMPTY_SET, bms_free(), bms_is_member(), bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), BoolGetDatum, RangeTblEntry::checkAsUser, elog, equal(), ERROR, examine_simple_variable(), exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, GetUserId(), has_unique_index(), HeapTupleIsValid, IndexOptInfo::indexkeys, RelOptInfo::indexlist, IndexOptInfo::indexoid, IndexOptInfo::indexprs, IndexOptInfo::indpred, Int16GetDatum, IsA, VariableStatData::isunique, lfirst, list_head(), lnext(), MemSet, IndexOptInfo::ncolumns, NIL, IndexOptInfo::nkeycolumns, ObjectIdGetDatum, pg_class_aclcheck(), planner_rt_fetch, IndexOptInfo::predOK, pull_varnos(), VariableStatData::rel, IndexOptInfo::rel, ReleaseSysCache(), RelOptInfo::relid, RangeTblEntry::relid, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), RangeTblEntry::securityQuals, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::unique, VariableStatData::var, Var::varattno, Var::varno, VariableStatData::vartype, Var::vartype, and Var::vartypmod.

Referenced by booltestsel(), boolvarsel(), estimate_hash_bucket_stats(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().

4410 {
4411  Node *basenode;
4412  Relids varnos;
4413  RelOptInfo *onerel;
4414 
4415  /* Make sure we don't return dangling pointers in vardata */
4416  MemSet(vardata, 0, sizeof(VariableStatData));
4417 
4418  /* Save the exposed type of the expression */
4419  vardata->vartype = exprType(node);
4420 
4421  /* Look inside any binary-compatible relabeling */
4422 
4423  if (IsA(node, RelabelType))
4424  basenode = (Node *) ((RelabelType *) node)->arg;
4425  else
4426  basenode = node;
4427 
4428  /* Fast path for a simple Var */
4429 
4430  if (IsA(basenode, Var) &&
4431  (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4432  {
4433  Var *var = (Var *) basenode;
4434 
4435  /* Set up result fields other than the stats tuple */
4436  vardata->var = basenode; /* return Var without relabeling */
4437  vardata->rel = find_base_rel(root, var->varno);
4438  vardata->atttype = var->vartype;
4439  vardata->atttypmod = var->vartypmod;
4440  vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4441 
4442  /* Try to locate some stats */
4443  examine_simple_variable(root, var, vardata);
4444 
4445  return;
4446  }
4447 
4448  /*
4449  * Okay, it's a more complicated expression. Determine variable
4450  * membership. Note that when varRelid isn't zero, only vars of that
4451  * relation are considered "real" vars.
4452  */
4453  varnos = pull_varnos(basenode);
4454 
4455  onerel = NULL;
4456 
4457  switch (bms_membership(varnos))
4458  {
4459  case BMS_EMPTY_SET:
4460  /* No Vars at all ... must be pseudo-constant clause */
4461  break;
4462  case BMS_SINGLETON:
4463  if (varRelid == 0 || bms_is_member(varRelid, varnos))
4464  {
4465  onerel = find_base_rel(root,
4466  (varRelid ? varRelid : bms_singleton_member(varnos)));
4467  vardata->rel = onerel;
4468  node = basenode; /* strip any relabeling */
4469  }
4470  /* else treat it as a constant */
4471  break;
4472  case BMS_MULTIPLE:
4473  if (varRelid == 0)
4474  {
4475  /* treat it as a variable of a join relation */
4476  vardata->rel = find_join_rel(root, varnos);
4477  node = basenode; /* strip any relabeling */
4478  }
4479  else if (bms_is_member(varRelid, varnos))
4480  {
4481  /* ignore the vars belonging to other relations */
4482  vardata->rel = find_base_rel(root, varRelid);
4483  node = basenode; /* strip any relabeling */
4484  /* note: no point in expressional-index search here */
4485  }
4486  /* else treat it as a constant */
4487  break;
4488  }
4489 
4490  bms_free(varnos);
4491 
4492  vardata->var = node;
4493  vardata->atttype = exprType(node);
4494  vardata->atttypmod = exprTypmod(node);
4495 
4496  if (onerel)
4497  {
4498  /*
4499  * We have an expression in vars of a single relation. Try to match
4500  * it to expressional index columns, in hopes of finding some
4501  * statistics.
4502  *
4503  * Note that we consider all index columns including INCLUDE columns,
4504  * since there could be stats for such columns. But the test for
4505  * uniqueness needs to be warier.
4506  *
4507  * XXX it's conceivable that there are multiple matches with different
4508  * index opfamilies; if so, we need to pick one that matches the
4509  * operator we are estimating for. FIXME later.
4510  */
4511  ListCell *ilist;
4512 
4513  foreach(ilist, onerel->indexlist)
4514  {
4515  IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4516  ListCell *indexpr_item;
4517  int pos;
4518 
4519  indexpr_item = list_head(index->indexprs);
4520  if (indexpr_item == NULL)
4521  continue; /* no expressions here... */
4522 
4523  for (pos = 0; pos < index->ncolumns; pos++)
4524  {
4525  if (index->indexkeys[pos] == 0)
4526  {
4527  Node *indexkey;
4528 
4529  if (indexpr_item == NULL)
4530  elog(ERROR, "too few entries in indexprs list");
4531  indexkey = (Node *) lfirst(indexpr_item);
4532  if (indexkey && IsA(indexkey, RelabelType))
4533  indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4534  if (equal(node, indexkey))
4535  {
4536  /*
4537  * Found a match ... is it a unique index? Tests here
4538  * should match has_unique_index().
4539  */
4540  if (index->unique &&
4541  index->nkeycolumns == 1 &&
4542  pos == 0 &&
4543  (index->indpred == NIL || index->predOK))
4544  vardata->isunique = true;
4545 
4546  /*
4547  * Has it got stats? We only consider stats for
4548  * non-partial indexes, since partial indexes probably
4549  * don't reflect whole-relation statistics; the above
4550  * check for uniqueness is the only info we take from
4551  * a partial index.
4552  *
4553  * An index stats hook, however, must make its own
4554  * decisions about what to do with partial indexes.
4555  */
4556  if (get_index_stats_hook &&
4557  (*get_index_stats_hook) (root, index->indexoid,
4558  pos + 1, vardata))
4559  {
4560  /*
4561  * The hook took control of acquiring a stats
4562  * tuple. If it did supply a tuple, it'd better
4563  * have supplied a freefunc.
4564  */
4565  if (HeapTupleIsValid(vardata->statsTuple) &&
4566  !vardata->freefunc)
4567  elog(ERROR, "no function provided to release variable stats with");
4568  }
4569  else if (index->indpred == NIL)
4570  {
4571  vardata->statsTuple =
4573  ObjectIdGetDatum(index->indexoid),
4574  Int16GetDatum(pos + 1),
4575  BoolGetDatum(false));
4576  vardata->freefunc = ReleaseSysCache;
4577 
4578  if (HeapTupleIsValid(vardata->statsTuple))
4579  {
4580  /* Get index's table for permission check */
4581  RangeTblEntry *rte;
4582  Oid userid;
4583 
4584  rte = planner_rt_fetch(index->rel->relid, root);
4585  Assert(rte->rtekind == RTE_RELATION);
4586 
4587  /*
4588  * Use checkAsUser if it's set, in case we're
4589  * accessing the table via a view.
4590  */
4591  userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4592 
4593  /*
4594  * For simplicity, we insist on the whole
4595  * table being selectable, rather than trying
4596  * to identify which column(s) the index
4597  * depends on. Also require all rows to be
4598  * selectable --- there must be no
4599  * securityQuals from security barrier views
4600  * or RLS policies.
4601  */
4602  vardata->acl_ok =
4603  rte->securityQuals == NIL &&
4604  (pg_class_aclcheck(rte->relid, userid,
4605  ACL_SELECT) == ACLCHECK_OK);
4606  }
4607  else
4608  {
4609  /* suppress leakproofness checks later */
4610  vardata->acl_ok = true;
4611  }
4612  }
4613  if (vardata->statsTuple)
4614  break;
4615  }
4616  indexpr_item = lnext(index->indexprs, indexpr_item);
4617  }
4618  }
4619  if (vardata->statsTuple)
4620  break;
4621  }
4622  }
4623 }
#define NIL
Definition: pg_list.h:65
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:321
RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids)
Definition: relnode.c:428
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3008
HeapTuple statsTuple
Definition: selfuncs.h:71
Oid GetUserId(void)
Definition: miscinit.c:380
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:276
List * securityQuals
Definition: parsenodes.h:1102
RelOptInfo * rel
Definition: selfuncs.h:70
#define Int16GetDatum(X)
Definition: postgres.h:451
Definition: nodes.h:525
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:73
#define MemSet(start, val, len)
Definition: c.h:955
AttrNumber varattno
Definition: primnodes.h:172
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:167
static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata)
Definition: selfuncs.c:4635
int32 atttypmod
Definition: selfuncs.h:76
Definition: type.h:89
RelOptInfo * rel
Definition: pathnodes.h:791
bool has_unique_index(RelOptInfo *rel, AttrNumber attno)
Definition: plancat.c:2025
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
Oid vartype
Definition: primnodes.h:174
HeapTuple SearchSysCache3(int cacheId, Datum key1, Datum key2, Datum key3)
Definition: syscache.c:1146
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:371
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
Relids pull_varnos(Node *node)
Definition: var.c:95
Index relid
Definition: pathnodes.h:669
Index varno
Definition: primnodes.h:170
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:672
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1172
#define ACL_SELECT
Definition: parsenodes.h:75
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:577
List * indexlist
Definition: pathnodes.h:678
#define BoolGetDatum(X)
Definition: postgres.h:402
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define Assert(condition)
Definition: c.h:732
#define lfirst(lc)
Definition: pg_list.h:190
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
int nkeycolumns
Definition: pathnodes.h:800
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:147
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4631
RTEKind rtekind
Definition: parsenodes.h:974
#define elog(elevel,...)
Definition: elog.h:226
void * arg
int * indexkeys
Definition: pathnodes.h:801
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:363
List * indpred
Definition: pathnodes.h:814
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
List * indexprs
Definition: pathnodes.h:813
int32 vartypmod
Definition: primnodes.h:175

◆ genericcostestimate()

void genericcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
GenericCosts costs 
)

Definition at line 5524 of file selfuncs.c.

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, estimate_array_length(), get_quals_from_indexclauses(), get_tablespace_page_costs(), index_other_operands_eval_cost(), index_pages_fetched(), IndexPath::indexclauses, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexPath::indexorderbys, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, IsA, JOIN_INNER, lfirst, list_length(), lsecond, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, IndexOptInfo::pages, IndexOptInfo::rel, RelOptInfo::relid, IndexOptInfo::reltablespace, rint(), GenericCosts::spc_random_page_cost, RelOptInfo::tuples, and IndexOptInfo::tuples.

Referenced by blcostestimate(), btcostestimate(), gistcostestimate(), hashcostestimate(), and spgcostestimate().

5528 {
5529  IndexOptInfo *index = path->indexinfo;
5530  List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
5531  List *indexOrderBys = path->indexorderbys;
5532  Cost indexStartupCost;
5533  Cost indexTotalCost;
5534  Selectivity indexSelectivity;
5535  double indexCorrelation;
5536  double numIndexPages;
5537  double numIndexTuples;
5538  double spc_random_page_cost;
5539  double num_sa_scans;
5540  double num_outer_scans;
5541  double num_scans;
5542  double qual_op_cost;
5543  double qual_arg_cost;
5544  List *selectivityQuals;
5545  ListCell *l;
5546 
5547  /*
5548  * If the index is partial, AND the index predicate with the explicitly
5549  * given indexquals to produce a more accurate idea of the index
5550  * selectivity.
5551  */
5552  selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
5553 
5554  /*
5555  * Check for ScalarArrayOpExpr index quals, and estimate the number of
5556  * index scans that will be performed.
5557  */
5558  num_sa_scans = 1;
5559  foreach(l, indexQuals)
5560  {
5561  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
5562 
5563  if (IsA(rinfo->clause, ScalarArrayOpExpr))
5564  {
5565  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
5566  int alength = estimate_array_length(lsecond(saop->args));
5567 
5568  if (alength > 1)
5569  num_sa_scans *= alength;
5570  }
5571  }
5572 
5573  /* Estimate the fraction of main-table tuples that will be visited */
5574  indexSelectivity = clauselist_selectivity(root, selectivityQuals,
5575  index->rel->relid,
5576  JOIN_INNER,
5577  NULL);
5578 
5579  /*
5580  * If caller didn't give us an estimate, estimate the number of index
5581  * tuples that will be visited. We do it in this rather peculiar-looking
5582  * way in order to get the right answer for partial indexes.
5583  */
5584  numIndexTuples = costs->numIndexTuples;
5585  if (numIndexTuples <= 0.0)
5586  {
5587  numIndexTuples = indexSelectivity * index->rel->tuples;
5588 
5589  /*
5590  * The above calculation counts all the tuples visited across all
5591  * scans induced by ScalarArrayOpExpr nodes. We want to consider the
5592  * average per-indexscan number, so adjust. This is a handy place to
5593  * round to integer, too. (If caller supplied tuple estimate, it's
5594  * responsible for handling these considerations.)
5595  */
5596  numIndexTuples = rint(numIndexTuples / num_sa_scans);
5597  }
5598 
5599  /*
5600  * We can bound the number of tuples by the index size in any case. Also,
5601  * always estimate at least one tuple is touched, even when
5602  * indexSelectivity estimate is tiny.
5603  */
5604  if (numIndexTuples > index->tuples)
5605  numIndexTuples = index->tuples;
5606  if (numIndexTuples < 1.0)
5607  numIndexTuples = 1.0;
5608 
5609  /*
5610  * Estimate the number of index pages that will be retrieved.
5611  *
5612  * We use the simplistic method of taking a pro-rata fraction of the total
5613  * number of index pages. In effect, this counts only leaf pages and not
5614  * any overhead such as index metapage or upper tree levels.
5615  *
5616  * In practice access to upper index levels is often nearly free because
5617  * those tend to stay in cache under load; moreover, the cost involved is
5618  * highly dependent on index type. We therefore ignore such costs here
5619  * and leave it to the caller to add a suitable charge if needed.
5620  */
5621  if (index->pages > 1 && index->tuples > 1)
5622  numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
5623  else
5624  numIndexPages = 1.0;
5625 
5626  /* fetch estimated page cost for tablespace containing index */
5628  &spc_random_page_cost,
5629  NULL);
5630 
5631  /*
5632  * Now compute the disk access costs.
5633  *
5634  * The above calculations are all per-index-scan. However, if we are in a
5635  * nestloop inner scan, we can expect the scan to be repeated (with
5636  * different search keys) for each row of the outer relation. Likewise,
5637  * ScalarArrayOpExpr quals result in multiple index scans. This creates
5638  * the potential for cache effects to reduce the number of disk page
5639  * fetches needed. We want to estimate the average per-scan I/O cost in
5640  * the presence of caching.
5641  *
5642  * We use the Mackert-Lohman formula (see costsize.c for details) to
5643  * estimate the total number of page fetches that occur. While this
5644  * wasn't what it was designed for, it seems a reasonable model anyway.
5645  * Note that we are counting pages not tuples anymore, so we take N = T =
5646  * index size, as if there were one "tuple" per page.
5647  */
5648  num_outer_scans = loop_count;
5649  num_scans = num_sa_scans * num_outer_scans;
5650 
5651  if (num_scans > 1)
5652  {
5653  double pages_fetched;
5654 
5655  /* total page fetches ignoring cache effects */
5656  pages_fetched = numIndexPages * num_scans;
5657 
5658  /* use Mackert and Lohman formula to adjust for cache effects */
5659  pages_fetched = index_pages_fetched(pages_fetched,
5660  index->pages,
5661  (double) index->pages,
5662  root);
5663 
5664  /*
5665  * Now compute the total disk access cost, and then report a pro-rated
5666  * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
5667  * since that's internal to the indexscan.)
5668  */
5669  indexTotalCost = (pages_fetched * spc_random_page_cost)
5670  / num_outer_scans;
5671  }
5672  else
5673  {
5674  /*
5675  * For a single index scan, we just charge spc_random_page_cost per
5676  * page touched.
5677  */
5678  indexTotalCost = numIndexPages * spc_random_page_cost;
5679  }
5680 
5681  /*
5682  * CPU cost: any complex expressions in the indexquals will need to be
5683  * evaluated once at the start of the scan to reduce them to runtime keys
5684  * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
5685  * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
5686  * indexqual operator. Because we have numIndexTuples as a per-scan
5687  * number, we have to multiply by num_sa_scans to get the correct result
5688  * for ScalarArrayOpExpr cases. Similarly add in costs for any index
5689  * ORDER BY expressions.
5690  *
5691  * Note: this neglects the possible costs of rechecking lossy operators.
5692  * Detecting that that might be needed seems more expensive than it's
5693  * worth, though, considering all the other inaccuracies here ...
5694  */
5695  qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
5696  index_other_operands_eval_cost(root, indexOrderBys);
5697  qual_op_cost = cpu_operator_cost *
5698  (list_length(indexQuals) + list_length(indexOrderBys));
5699 
5700  indexStartupCost = qual_arg_cost;
5701  indexTotalCost += qual_arg_cost;
5702  indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
5703 
5704  /*
5705  * Generic assumption about index correlation: there isn't any.
5706  */
5707  indexCorrelation = 0.0;
5708 
5709  /*
5710  * Return everything to caller.
5711  */
5712  costs->indexStartupCost = indexStartupCost;
5713  costs->indexTotalCost = indexTotalCost;
5714  costs->indexSelectivity = indexSelectivity;
5715  costs->indexCorrelation = indexCorrelation;
5716  costs->numIndexPages = numIndexPages;
5717  costs->numIndexTuples = numIndexTuples;
5718  costs->spc_random_page_cost = spc_random_page_cost;
5719  costs->num_sa_scans = num_sa_scans;
5720 }
Selectivity indexSelectivity
Definition: selfuncs.h:106
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
IndexOptInfo * indexinfo
Definition: pathnodes.h:1177
double tuples
Definition: pathnodes.h:681
Oid reltablespace
Definition: pathnodes.h:790
List * indexclauses
Definition: pathnodes.h:1178
double Selectivity
Definition: nodes.h:658
double tuples
Definition: pathnodes.h:795
#define lsecond(l)
Definition: pg_list.h:200
Definition: type.h:89
BlockNumber pages
Definition: pathnodes.h:794
int estimate_array_length(Node *arrayexpr)
Definition: selfuncs.c:1890
RelOptInfo * rel
Definition: pathnodes.h:791
double num_sa_scans
Definition: selfuncs.h:113
double cpu_operator_cost
Definition: costsize.c:114
List * get_quals_from_indexclauses(List *indexclauses)
Definition: selfuncs.c:5440
Cost indexTotalCost
Definition: selfuncs.h:105
double rint(double x)
Definition: rint.c:21
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
Index relid
Definition: pathnodes.h:669
Expr * clause
Definition: pathnodes.h:1943
double indexCorrelation
Definition: selfuncs.h:107
List * indexorderbys
Definition: pathnodes.h:1179
double spc_random_page_cost
Definition: selfuncs.h:112
double numIndexTuples
Definition: selfuncs.h:111
Cost index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
Definition: selfuncs.c:5470
#define lfirst(lc)
Definition: pg_list.h:190
static int list_length(const List *l)
Definition: pg_list.h:169
Cost indexStartupCost
Definition: selfuncs.h:104
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:70
Definition: pg_list.h:50
double cpu_index_tuple_cost
Definition: costsize.c:113
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:825
double Cost
Definition: nodes.h:659
double numIndexPages
Definition: selfuncs.h:110
List * add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:5742

◆ get_join_variables()

void get_join_variables ( PlannerInfo root,
List args,
SpecialJoinInfo sjinfo,
VariableStatData vardata1,
VariableStatData vardata2,
bool join_is_reversed 
)

Definition at line 4346 of file selfuncs.c.

References bms_is_subset(), elog, ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.

Referenced by eqjoinsel(), neqjoinsel(), and networkjoinsel().

4349 {
4350  Node *left,
4351  *right;
4352 
4353  if (list_length(args) != 2)
4354  elog(ERROR, "join operator should take two arguments");
4355 
4356  left = (Node *) linitial(args);
4357  right = (Node *) lsecond(args);
4358 
4359  examine_variable(root, left, 0, vardata1);
4360  examine_variable(root, right, 0, vardata2);
4361 
4362  if (vardata1->rel &&
4363  bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4364  *join_is_reversed = true; /* var1 is on RHS */
4365  else if (vardata2->rel &&
4366  bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4367  *join_is_reversed = true; /* var2 is on LHS */
4368  else
4369  *join_is_reversed = false;
4370 }
RelOptInfo * rel
Definition: selfuncs.h:70
Definition: nodes.h:525
#define lsecond(l)
Definition: pg_list.h:200
Relids syn_lefthand
Definition: pathnodes.h:2135
Relids syn_righthand
Definition: pathnodes.h:2136
#define linitial(l)
Definition: pg_list.h:195
#define ERROR
Definition: elog.h:43
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:315
Relids relids
Definition: pathnodes.h:641
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4408
static int list_length(const List *l)
Definition: pg_list.h:169
#define elog(elevel,...)
Definition: elog.h:226

◆ get_quals_from_indexclauses()

List* get_quals_from_indexclauses ( List indexclauses)

Definition at line 5440 of file selfuncs.c.

References IndexClause::indexquals, lappend(), lfirst_node, and NIL.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

5441 {
5442  List *result = NIL;
5443  ListCell *lc;
5444 
5445  foreach(lc, indexclauses)
5446  {
5447  IndexClause *iclause = lfirst_node(IndexClause, lc);
5448  ListCell *lc2;
5449 
5450  foreach(lc2, iclause->indexquals)
5451  {
5452  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5453 
5454  result = lappend(result, rinfo);
5455  }
5456  }
5457  return result;
5458 }
#define NIL
Definition: pg_list.h:65
#define lfirst_node(type, lc)
Definition: pg_list.h:193
List * indexquals
Definition: pathnodes.h:1224
List * lappend(List *list, void *datum)
Definition: list.c:322
Definition: pg_list.h:50

◆ get_restriction_variable()

bool get_restriction_variable ( PlannerInfo root,
List args,
int  varRelid,
VariableStatData vardata,
Node **  other,
bool varonleft 
)

Definition at line 4286 of file selfuncs.c.

References estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, ReleaseVariableStats, and VariableStatData::var.

Referenced by _int_matchsel(), arraycontsel(), eqsel_internal(), ltreeparentsel(), networksel(), patternsel_common(), rangesel(), scalarineqsel_wrapper(), and tsmatchsel().

4289 {
4290  Node *left,
4291  *right;
4292  VariableStatData rdata;
4293 
4294  /* Fail if not a binary opclause (probably shouldn't happen) */
4295  if (list_length(args) != 2)
4296  return false;
4297 
4298  left = (Node *) linitial(args);
4299  right = (Node *) lsecond(args);
4300 
4301  /*
4302  * Examine both sides. Note that when varRelid is nonzero, Vars of other
4303  * relations will be treated as pseudoconstants.
4304  */
4305  examine_variable(root, left, varRelid, vardata);
4306  examine_variable(root, right, varRelid, &rdata);
4307 
4308  /*
4309  * If one side is a variable and the other not, we win.
4310  */
4311  if (vardata->rel && rdata.rel == NULL)
4312  {
4313  *varonleft = true;
4314  *other = estimate_expression_value(root, rdata.var);
4315  /* Assume we need no ReleaseVariableStats(rdata) here */
4316  return true;
4317  }
4318 
4319  if (vardata->rel == NULL && rdata.rel)
4320  {
4321  *varonleft = false;
4322  *other = estimate_expression_value(root, vardata->var);
4323  /* Assume we need no ReleaseVariableStats(*vardata) here */
4324  *vardata = rdata;
4325  return true;
4326  }
4327 
4328  /* Oops, clause has wrong structure (probably var op var) */
4329  ReleaseVariableStats(*vardata);
4330  ReleaseVariableStats(rdata);
4331 
4332  return false;
4333 }
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2286
RelOptInfo * rel
Definition: selfuncs.h:70
Definition: nodes.h:525
#define lsecond(l)
Definition: pg_list.h:200
#define linitial(l)
Definition: pg_list.h:195
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4408
static int list_length(const List *l)
Definition: pg_list.h:169
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81

◆ get_variable_numdistinct()

double get_variable_numdistinct ( VariableStatData vardata,
bool isdefault 
)

Definition at line 4841 of file selfuncs.c.

References clamp_row_est(), DEFAULT_NUM_DISTINCT, GETSTRUCT, HeapTupleIsValid, IsA, VariableStatData::isunique, VariableStatData::rel, RTE_VALUES, RelOptInfo::rtekind, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.

Referenced by add_unique_group_var(), eqjoinsel(), estimate_hash_bucket_stats(), ineq_histogram_selectivity(), var_eq_const(), and var_eq_non_const().

4842 {
4843  double stadistinct;
4844  double stanullfrac = 0.0;
4845  double ntuples;
4846 
4847  *isdefault = false;
4848 
4849  /*
4850  * Determine the stadistinct value to use. There are cases where we can
4851  * get an estimate even without a pg_statistic entry, or can get a better
4852  * value than is in pg_statistic. Grab stanullfrac too if we can find it
4853  * (otherwise, assume no nulls, for lack of any better idea).
4854  */
4855  if (HeapTupleIsValid(vardata->statsTuple))
4856  {
4857  /* Use the pg_statistic entry */
4858  Form_pg_statistic stats;
4859 
4860  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
4861  stadistinct = stats->stadistinct;
4862  stanullfrac = stats->stanullfrac;
4863  }
4864  else if (vardata->vartype == BOOLOID)
4865  {
4866  /*
4867  * Special-case boolean columns: presumably, two distinct values.
4868  *
4869  * Are there any other datatypes we should wire in special estimates
4870  * for?
4871  */
4872  stadistinct = 2.0;
4873  }
4874  else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
4875  {
4876  /*
4877  * If the Var represents a column of a VALUES RTE, assume it's unique.
4878  * This could of course be very wrong, but it should tend to be true
4879  * in well-written queries. We could consider examining the VALUES'
4880  * contents to get some real statistics; but that only works if the
4881  * entries are all constants, and it would be pretty expensive anyway.
4882  */
4883  stadistinct = -1.0; /* unique (and all non null) */
4884  }
4885  else
4886  {
4887  /*
4888  * We don't keep statistics for system columns, but in some cases we
4889  * can infer distinctness anyway.
4890  */
4891  if (vardata->var && IsA(vardata->var, Var))
4892  {
4893  switch (((Var *) vardata->var)->varattno)
4894  {
4896  stadistinct = -1.0; /* unique (and all non null) */
4897  break;
4899  stadistinct = 1.0; /* only 1 value */
4900  break;
4901  default:
4902  stadistinct = 0.0; /* means "unknown" */
4903  break;
4904  }
4905  }
4906  else
4907  stadistinct = 0.0; /* means "unknown" */
4908 
4909  /*
4910  * XXX consider using estimate_num_groups on expressions?
4911  */
4912  }
4913 
4914  /*
4915  * If there is a unique index or DISTINCT clause for the variable, assume
4916  * it is unique no matter what pg_statistic says; the statistics could be
4917  * out of date, or we might have found a partial unique index that proves
4918  * the var is unique for this query. However, we'd better still believe
4919  * the null-fraction statistic.
4920  */
4921  if (vardata->isunique)
4922  stadistinct = -1.0 * (1.0 - stanullfrac);
4923 
4924  /*
4925  * If we had an absolute estimate, use that.
4926  */
4927  if (stadistinct > 0.0)
4928  return clamp_row_est(stadistinct);
4929 
4930  /*
4931  * Otherwise we need to get the relation size; punt if not available.
4932  */
4933  if (vardata->rel == NULL)
4934  {
4935  *isdefault = true;
4936  return DEFAULT_NUM_DISTINCT;
4937  }
4938  ntuples = vardata->rel->tuples;
4939  if (ntuples <= 0.0)
4940  {
4941  *isdefault = true;
4942  return DEFAULT_NUM_DISTINCT;
4943  }
4944 
4945  /*
4946  * If we had a relative estimate, use that.
4947  */
4948  if (stadistinct < 0.0)
4949  return clamp_row_est(-stadistinct * ntuples);
4950 
4951  /*
4952  * With no data, estimate ndistinct = ntuples if the table is small, else
4953  * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
4954  * that the behavior isn't discontinuous.
4955  */
4956  if (ntuples < DEFAULT_NUM_DISTINCT)
4957  return clamp_row_est(ntuples);
4958 
4959  *isdefault = true;
4960  return DEFAULT_NUM_DISTINCT;
4961 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:71
double tuples
Definition: pathnodes.h:681
RelOptInfo * rel
Definition: selfuncs.h:70
Definition: primnodes.h:167
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define TableOidAttributeNumber
Definition: sysattr.h:26
RTEKind rtekind
Definition: pathnodes.h:671
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define DEFAULT_NUM_DISTINCT
Definition: selfuncs.h:46
#define SelfItemPointerAttributeNumber
Definition: sysattr.h:21
double clamp_row_est(double nrows)
Definition: costsize.c:187

◆ histogram_selectivity()

double histogram_selectivity ( VariableStatData vardata,
FmgrInfo opproc,
Datum  constval,
bool  varonleft,
int  min_hist_size,
int  n_skip,
int *  hist_size 
)

Definition at line 778 of file selfuncs.c.

References Assert, ATTSTATSSLOT_VALUES, DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), HeapTupleIsValid, i, InvalidOid, AttStatsSlot::nvalues, AttStatsSlot::stacoll, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by ltreeparentsel(), and patternsel_common().

782 {
783  double result;
784  AttStatsSlot sslot;
785 
786  /* check sanity of parameters */
787  Assert(n_skip >= 0);
788  Assert(min_hist_size > 2 * n_skip);
789 
790  if (HeapTupleIsValid(vardata->statsTuple) &&
791  statistic_proc_security_check(vardata, opproc->fn_oid) &&
792  get_attstatsslot(&sslot, vardata->statsTuple,
793  STATISTIC_KIND_HISTOGRAM, InvalidOid,
795  {
796  *hist_size = sslot.nvalues;
797  if (sslot.nvalues >= min_hist_size)
798  {
799  int nmatch = 0;
800  int i;
801 
802  for (i = n_skip; i < sslot.nvalues - n_skip; i++)
803  {
804  if (varonleft ?
806  sslot.stacoll,
807  sslot.values[i],
808  constval)) :
810  sslot.stacoll,
811  constval,
812  sslot.values[i])))
813  nmatch++;
814  }
815  result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
816  }
817  else
818  result = -1;
819  free_attstatsslot(&sslot);
820  }
821  else
822  {
823  *hist_size = 0;
824  result = -1;
825  }
826 
827  return result;
828 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4812
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
#define DatumGetBool(X)
Definition: postgres.h:393
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
#define Assert(condition)
Definition: c.h:732
Datum * values
Definition: lsyscache.h:50
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ index_other_operands_eval_cost()

Cost index_other_operands_eval_cost ( PlannerInfo root,
List indexquals 
)

Definition at line 5470 of file selfuncs.c.

References OpExpr::args, ScalarArrayOpExpr::args, cost_qual_eval_node(), elog, ERROR, IsA, lfirst, lsecond, nodeTag, QualCost::per_tuple, RowCompareExpr::rargs, and QualCost::startup.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

5471 {
5472  Cost qual_arg_cost = 0;
5473  ListCell *lc;
5474 
5475  foreach(lc, indexquals)
5476  {
5477  Expr *clause = (Expr *) lfirst(lc);
5478  Node *other_operand;
5479  QualCost index_qual_cost;
5480 
5481  /*
5482  * Index quals will have RestrictInfos, indexorderbys won't. Look
5483  * through RestrictInfo if present.
5484  */
5485  if (IsA(clause, RestrictInfo))
5486  clause = ((RestrictInfo *) clause)->clause;
5487 
5488  if (IsA(clause, OpExpr))
5489  {
5490  OpExpr *op = (OpExpr *) clause;
5491 
5492  other_operand = (Node *) lsecond(op->args);
5493  }
5494  else if (IsA(clause, RowCompareExpr))
5495  {
5496  RowCompareExpr *rc = (RowCompareExpr *) clause;
5497 
5498  other_operand = (Node *) rc->rargs;
5499  }
5500  else if (IsA(clause, ScalarArrayOpExpr))
5501  {
5502  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
5503 
5504  other_operand = (Node *) lsecond(saop->args);
5505  }
5506  else if (IsA(clause, NullTest))
5507  {
5508  other_operand = NULL;
5509  }
5510  else
5511  {
5512  elog(ERROR, "unsupported indexqual type: %d",
5513  (int) nodeTag(clause));
5514  other_operand = NULL; /* keep compiler quiet */
5515  }
5516 
5517  cost_qual_eval_node(&index_qual_cost, other_operand, root);
5518  qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
5519  }
5520  return qual_arg_cost;
5521 }
void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root)
Definition: costsize.c:3845
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
Definition: nodes.h:525
#define lsecond(l)
Definition: pg_list.h:200
Cost startup
Definition: pathnodes.h:45
Cost per_tuple
Definition: pathnodes.h:46
#define ERROR
Definition: elog.h:43
#define lfirst(lc)
Definition: pg_list.h:190
#define nodeTag(nodeptr)
Definition: nodes.h:530
#define elog(elevel,...)
Definition: elog.h:226
List * args
Definition: primnodes.h:508
double Cost
Definition: nodes.h:659

◆ ineq_histogram_selectivity()

double ineq_histogram_selectivity ( PlannerInfo root,
VariableStatData vardata,
FmgrInfo opproc,
bool  isgt,
bool  iseq,
Datum  constval,
Oid  consttype 
)

Definition at line 846 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, convert_to_scalar(), DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCall2Coll(), get_actual_variable_range(), get_attstatsslot(), get_variable_numdistinct(), HeapTupleIsValid, i, InvalidOid, AttStatsSlot::nnumbers, AttStatsSlot::nvalues, AttStatsSlot::stacoll, AttStatsSlot::staop, statistic_proc_security_check(), VariableStatData::statsTuple, val, AttStatsSlot::values, and VariableStatData::vartype.

Referenced by prefix_selectivity(), and scalarineqsel().

850 {
851  double hist_selec;
852  AttStatsSlot sslot;
853 
854  hist_selec = -1.0;
855 
856  /*
857  * Someday, ANALYZE might store more than one histogram per rel/att,
858  * corresponding to more than one possible sort ordering defined for the
859  * column type. However, to make that work we will need to figure out
860  * which staop to search for --- it's not necessarily the one we have at
861  * hand! (For example, we might have a '<=' operator rather than the '<'
862  * operator that will appear in staop.) For now, assume that whatever
863  * appears in pg_statistic is sorted the same way our operator sorts, or
864  * the reverse way if isgt is true.
865  */
866  if (HeapTupleIsValid(vardata->statsTuple) &&
867  statistic_proc_security_check(vardata, opproc->fn_oid) &&
868  get_attstatsslot(&sslot, vardata->statsTuple,
869  STATISTIC_KIND_HISTOGRAM, InvalidOid,
871  {
872  if (sslot.nvalues > 1)
873  {
874  /*
875  * Use binary search to find the desired location, namely the
876  * right end of the histogram bin containing the comparison value,
877  * which is the leftmost entry for which the comparison operator
878  * succeeds (if isgt) or fails (if !isgt). (If the given operator
879  * isn't actually sort-compatible with the histogram, you'll get
880  * garbage results ... but probably not any more garbage-y than
881  * you would have from the old linear search.)
882  *
883  * In this loop, we pay no attention to whether the operator iseq
884  * or not; that detail will be mopped up below. (We cannot tell,
885  * anyway, whether the operator thinks the values are equal.)
886  *
887  * If the binary search accesses the first or last histogram
888  * entry, we try to replace that endpoint with the true column min
889  * or max as found by get_actual_variable_range(). This
890  * ameliorates misestimates when the min or max is moving as a
891  * result of changes since the last ANALYZE. Note that this could
892  * result in effectively including MCVs into the histogram that
893  * weren't there before, but we don't try to correct for that.
894  */
895  double histfrac;
896  int lobound = 0; /* first possible slot to search */
897  int hibound = sslot.nvalues; /* last+1 slot to search */
898  bool have_end = false;
899 
900  /*
901  * If there are only two histogram entries, we'll want up-to-date
902  * values for both. (If there are more than two, we need at most
903  * one of them to be updated, so we deal with that within the
904  * loop.)
905  */
906  if (sslot.nvalues == 2)
907  have_end = get_actual_variable_range(root,
908  vardata,
909  sslot.staop,
910  &sslot.values[0],
911  &sslot.values[1]);
912 
913  while (lobound < hibound)
914  {
915  int probe = (lobound + hibound) / 2;
916  bool ltcmp;
917 
918  /*
919  * If we find ourselves about to compare to the first or last
920  * histogram entry, first try to replace it with the actual
921  * current min or max (unless we already did so above).
922  */
923  if (probe == 0 && sslot.nvalues > 2)
924  have_end = get_actual_variable_range(root,
925  vardata,
926  sslot.staop,
927  &sslot.values[0],
928  NULL);
929  else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
930  have_end = get_actual_variable_range(root,
931  vardata,
932  sslot.staop,
933  NULL,
934  &sslot.values[probe]);
935 
936  ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
937  sslot.stacoll,
938  sslot.values[probe],
939  constval));
940  if (isgt)
941  ltcmp = !ltcmp;
942  if (ltcmp)
943  lobound = probe + 1;
944  else
945  hibound = probe;
946  }
947 
948  if (lobound <= 0)
949  {
950  /*
951  * Constant is below lower histogram boundary. More
952  * precisely, we have found that no entry in the histogram
953  * satisfies the inequality clause (if !isgt) or they all do
954  * (if isgt). We estimate that that's true of the entire
955  * table, so set histfrac to 0.0 (which we'll flip to 1.0
956  * below, if isgt).
957  */
958  histfrac = 0.0;
959  }
960  else if (lobound >= sslot.nvalues)
961  {
962  /*
963  * Inverse case: constant is above upper histogram boundary.
964  */
965  histfrac = 1.0;
966  }
967  else
968  {
969  /* We have values[i-1] <= constant <= values[i]. */
970  int i = lobound;
971  double eq_selec = 0;
972  double val,
973  high,
974  low;
975  double binfrac;
976 
977  /*
978  * In the cases where we'll need it below, obtain an estimate
979  * of the selectivity of "x = constval". We use a calculation
980  * similar to what var_eq_const() does for a non-MCV constant,
981  * ie, estimate that all distinct non-MCV values occur equally
982  * often. But multiplication by "1.0 - sumcommon - nullfrac"
983  * will be done by our caller, so we shouldn't do that here.
984  * Therefore we can't try to clamp the estimate by reference
985  * to the least common MCV; the result would be too small.
986  *
987  * Note: since this is effectively assuming that constval
988  * isn't an MCV, it's logically dubious if constval in fact is
989  * one. But we have to apply *some* correction for equality,
990  * and anyway we cannot tell if constval is an MCV, since we
991  * don't have a suitable equality operator at hand.
992  */
993  if (i == 1 || isgt == iseq)
994  {
995  double otherdistinct;
996  bool isdefault;
997  AttStatsSlot mcvslot;
998 
999  /* Get estimated number of distinct values */
1000  otherdistinct = get_variable_numdistinct(vardata,
1001  &isdefault);
1002 
1003  /* Subtract off the number of known MCVs */
1004  if (get_attstatsslot(&mcvslot, vardata->statsTuple,
1005  STATISTIC_KIND_MCV, InvalidOid,
1007  {
1008  otherdistinct -= mcvslot.nnumbers;
1009  free_attstatsslot(&mcvslot);
1010  }
1011 
1012  /* If result doesn't seem sane, leave eq_selec at 0 */
1013  if (otherdistinct > 1)
1014  eq_selec = 1.0 / otherdistinct;
1015  }
1016 
1017  /*
1018  * Convert the constant and the two nearest bin boundary
1019  * values to a uniform comparison scale, and do a linear
1020  * interpolation within this bin.
1021  */
1022  if (convert_to_scalar(constval, consttype, sslot.stacoll,
1023  &val,
1024  sslot.values[i - 1], sslot.values[i],
1025  vardata->vartype,
1026  &low, &high))
1027  {
1028  if (high <= low)
1029  {
1030  /* cope if bin boundaries appear identical */
1031  binfrac = 0.5;
1032  }
1033  else if (val <= low)
1034  binfrac = 0.0;
1035  else if (val >= high)
1036  binfrac = 1.0;
1037  else
1038  {
1039  binfrac = (val - low) / (high - low);
1040 
1041  /*
1042  * Watch out for the possibility that we got a NaN or
1043  * Infinity from the division. This can happen
1044  * despite the previous checks, if for example "low"
1045  * is -Infinity.
1046  */
1047  if (isnan(binfrac) ||
1048  binfrac < 0.0 || binfrac > 1.0)
1049  binfrac = 0.5;
1050  }
1051  }
1052  else
1053  {
1054  /*
1055  * Ideally we'd produce an error here, on the grounds that
1056  * the given operator shouldn't have scalarXXsel
1057  * registered as its selectivity func unless we can deal
1058  * with its operand types. But currently, all manner of
1059  * stuff is invoking scalarXXsel, so give a default
1060  * estimate until that can be fixed.
1061  */
1062  binfrac = 0.5;
1063  }
1064 
1065  /*
1066  * Now, compute the overall selectivity across the values
1067  * represented by the histogram. We have i-1 full bins and
1068  * binfrac partial bin below the constant.
1069  */
1070  histfrac = (double) (i - 1) + binfrac;
1071  histfrac /= (double) (sslot.nvalues - 1);
1072 
1073  /*
1074  * At this point, histfrac is an estimate of the fraction of
1075  * the population represented by the histogram that satisfies
1076  * "x <= constval". Somewhat remarkably, this statement is
1077  * true regardless of which operator we were doing the probes
1078  * with, so long as convert_to_scalar() delivers reasonable
1079  * results. If the probe constant is equal to some histogram
1080  * entry, we would have considered the bin to the left of that
1081  * entry if probing with "<" or ">=", or the bin to the right
1082  * if probing with "<=" or ">"; but binfrac would have come
1083  * out as 1.0 in the first case and 0.0 in the second, leading
1084  * to the same histfrac in either case. For probe constants
1085  * between histogram entries, we find the same bin and get the
1086  * same estimate with any operator.
1087  *
1088  * The fact that the estimate corresponds to "x <= constval"
1089  * and not "x < constval" is because of the way that ANALYZE
1090  * constructs the histogram: each entry is, effectively, the
1091  * rightmost value in its sample bucket. So selectivity
1092  * values that are exact multiples of 1/(histogram_size-1)
1093  * should be understood as estimates including a histogram
1094  * entry plus everything to its left.
1095  *
1096  * However, that breaks down for the first histogram entry,
1097  * which necessarily is the leftmost value in its sample
1098  * bucket. That means the first histogram bin is slightly
1099  * narrower than the rest, by an amount equal to eq_selec.
1100  * Another way to say that is that we want "x <= leftmost" to
1101  * be estimated as eq_selec not zero. So, if we're dealing
1102  * with the first bin (i==1), rescale to make that true while
1103  * adjusting the rest of that bin linearly.
1104  */
1105  if (i == 1)
1106  histfrac += eq_selec * (1.0 - binfrac);
1107 
1108  /*
1109  * "x <= constval" is good if we want an estimate for "<=" or
1110  * ">", but if we are estimating for "<" or ">=", we now need
1111  * to decrease the estimate by eq_selec.
1112  */
1113  if (isgt == iseq)
1114  histfrac -= eq_selec;
1115  }
1116 
1117  /*
1118  * Now the estimate is finished for "<" and "<=" cases. If we are
1119  * estimating for ">" or ">=", flip it.
1120  */
1121  hist_selec = isgt ? (1.0 - histfrac) : histfrac;
1122 
1123  /*
1124  * The histogram boundaries are only approximate to begin with,
1125  * and may well be out of date anyway. Therefore, don't believe
1126  * extremely small or large selectivity estimates --- unless we
1127  * got actual current endpoint values from the table, in which
1128  * case just do the usual sanity clamp. Somewhat arbitrarily, we
1129  * set the cutoff for other cases at a hundredth of the histogram
1130  * resolution.
1131  */
1132  if (have_end)
1133  CLAMP_PROBABILITY(hist_selec);
1134  else
1135  {
1136  double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1137 
1138  if (hist_selec < cutoff)
1139  hist_selec = cutoff;
1140  else if (hist_selec > 1.0 - cutoff)
1141  hist_selec = 1.0 - cutoff;
1142  }
1143  }
1144 
1145  free_attstatsslot(&sslot);
1146  }
1147 
1148  return hist_selec;
1149 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4812
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound)
Definition: selfuncs.c:3714
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4841
static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max)
Definition: selfuncs.c:5107
#define DatumGetBool(X)
Definition: postgres.h:393
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
int i
long val
Definition: informix.c:684
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ mcv_selectivity()

double mcv_selectivity ( VariableStatData vardata,
FmgrInfo opproc,
Datum  constval,
bool  varonleft,
double *  sumcommonp 
)

Definition at line 706 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, DatumGetBool, FmgrInfo::fn_oid, free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), HeapTupleIsValid, i, InvalidOid, AttStatsSlot::numbers, AttStatsSlot::nvalues, AttStatsSlot::stacoll, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by ltreeparentsel(), networksel(), patternsel_common(), and scalarineqsel().

709 {
710  double mcv_selec,
711  sumcommon;
712  AttStatsSlot sslot;
713  int i;
714 
715  mcv_selec = 0.0;
716  sumcommon = 0.0;
717 
718  if (HeapTupleIsValid(vardata->statsTuple) &&
719  statistic_proc_security_check(vardata, opproc->fn_oid) &&
720  get_attstatsslot(&sslot, vardata->statsTuple,
721  STATISTIC_KIND_MCV, InvalidOid,
723  {
724  for (i = 0; i < sslot.nvalues; i++)
725  {
726  if (varonleft ?
728  sslot.stacoll,
729  sslot.values[i],
730  constval)) :
732  sslot.stacoll,
733  constval,
734  sslot.values[i])))
735  mcv_selec += sslot.numbers[i];
736  sumcommon += sslot.numbers[i];
737  }
738  free_attstatsslot(&sslot);
739  }
740 
741  *sumcommonp = sumcommon;
742  return mcv_selec;
743 }
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4812
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ mergejoinscansel()

void mergejoinscansel ( PlannerInfo root,
Node clause,
Oid  opfamily,
int  strategy,
bool  nulls_first,
Selectivity leftstart,
Selectivity leftend,
Selectivity rightstart,
Selectivity rightend 
)

Definition at line 2624 of file selfuncs.c.

References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, CLAMP_PROBABILITY, DEFAULT_INEQ_SEL, examine_variable(), get_leftop(), get_op_opfamily_properties(), get_opfamily_member(), get_rightop(), get_variable_range(), GETSTRUCT, HeapTupleIsValid, is_opclause(), OidIsValid, ReleaseVariableStats, scalarineqsel(), and VariableStatData::statsTuple.

Referenced by cached_scansel().

2628 {
2629  Node *left,
2630  *right;
2631  VariableStatData leftvar,
2632  rightvar;
2633  int op_strategy;
2634  Oid op_lefttype;
2635  Oid op_righttype;
2636  Oid opno,
2637  lsortop,
2638  rsortop,
2639  lstatop,
2640  rstatop,
2641  ltop,
2642  leop,
2643  revltop,
2644  revleop;
2645  bool isgt;
2646  Datum leftmin,
2647  leftmax,
2648  rightmin,
2649  rightmax;
2650  double selec;
2651 
2652  /* Set default results if we can't figure anything out. */
2653  /* XXX should default "start" fraction be a bit more than 0? */
2654  *leftstart = *rightstart = 0.0;
2655  *leftend = *rightend = 1.0;
2656 
2657  /* Deconstruct the merge clause */
2658  if (!is_opclause(clause))
2659  return; /* shouldn't happen */
2660  opno = ((OpExpr *) clause)->opno;
2661  left = get_leftop((Expr *) clause);
2662  right = get_rightop((Expr *) clause);
2663  if (!right)
2664  return; /* shouldn't happen */
2665 
2666  /* Look for stats for the inputs */
2667  examine_variable(root, left, 0, &leftvar);
2668  examine_variable(root, right, 0, &rightvar);
2669 
2670  /* Extract the operator's declared left/right datatypes */
2671  get_op_opfamily_properties(opno, opfamily, false,
2672  &op_strategy,
2673  &op_lefttype,
2674  &op_righttype);
2675  Assert(op_strategy == BTEqualStrategyNumber);
2676 
2677  /*
2678  * Look up the various operators we need. If we don't find them all, it
2679  * probably means the opfamily is broken, but we just fail silently.
2680  *
2681  * Note: we expect that pg_statistic histograms will be sorted by the '<'
2682  * operator, regardless of which sort direction we are considering.
2683  */
2684  switch (strategy)
2685  {
2686  case BTLessStrategyNumber:
2687  isgt = false;
2688  if (op_lefttype == op_righttype)
2689  {
2690  /* easy case */
2691  ltop = get_opfamily_member(opfamily,
2692  op_lefttype, op_righttype,
2694  leop = get_opfamily_member(opfamily,
2695  op_lefttype, op_righttype,
2697  lsortop = ltop;
2698  rsortop = ltop;
2699  lstatop = lsortop;
2700  rstatop = rsortop;
2701  revltop = ltop;
2702  revleop = leop;
2703  }
2704  else
2705  {
2706  ltop = get_opfamily_member(opfamily,
2707  op_lefttype, op_righttype,
2709  leop = get_opfamily_member(opfamily,
2710  op_lefttype, op_righttype,
2712  lsortop = get_opfamily_member(opfamily,
2713  op_lefttype, op_lefttype,
2715  rsortop = get_opfamily_member(opfamily,
2716  op_righttype, op_righttype,
2718  lstatop = lsortop;
2719  rstatop = rsortop;
2720  revltop = get_opfamily_member(opfamily,
2721  op_righttype, op_lefttype,
2723  revleop = get_opfamily_member(opfamily,
2724  op_righttype, op_lefttype,
2726  }
2727  break;
2729  /* descending-order case */
2730  isgt = true;
2731  if (op_lefttype == op_righttype)
2732  {
2733  /* easy case */
2734  ltop = get_opfamily_member(opfamily,
2735  op_lefttype, op_righttype,
2737  leop = get_opfamily_member(opfamily,
2738  op_lefttype, op_righttype,
2740  lsortop = ltop;
2741  rsortop = ltop;
2742  lstatop = get_opfamily_member(opfamily,
2743  op_lefttype, op_lefttype,
2745  rstatop = lstatop;
2746  revltop = ltop;
2747  revleop = leop;
2748  }
2749  else
2750  {
2751  ltop = get_opfamily_member(opfamily,
2752  op_lefttype, op_righttype,
2754  leop = get_opfamily_member(opfamily,
2755  op_lefttype, op_righttype,
2757  lsortop = get_opfamily_member(opfamily,
2758  op_lefttype, op_lefttype,
2760  rsortop = get_opfamily_member(opfamily,
2761  op_righttype, op_righttype,
2763  lstatop = get_opfamily_member(opfamily,
2764  op_lefttype, op_lefttype,
2766  rstatop = get_opfamily_member(opfamily,
2767  op_righttype, op_righttype,
2769  revltop = get_opfamily_member(opfamily,
2770  op_righttype, op_lefttype,
2772  revleop = get_opfamily_member(opfamily,
2773  op_righttype, op_lefttype,
2775  }
2776  break;
2777  default:
2778  goto fail; /* shouldn't get here */
2779  }
2780 
2781  if (!OidIsValid(lsortop) ||
2782  !OidIsValid(rsortop) ||
2783  !OidIsValid(lstatop) ||
2784  !OidIsValid(rstatop) ||
2785  !OidIsValid(ltop) ||
2786  !OidIsValid(leop) ||
2787  !OidIsValid(revltop) ||
2788  !OidIsValid(revleop))
2789  goto fail; /* insufficient info in catalogs */
2790 
2791  /* Try to get ranges of both inputs */
2792  if (!isgt)
2793  {
2794  if (!get_variable_range(root, &leftvar, lstatop,
2795  &leftmin, &leftmax))
2796  goto fail; /* no range available from stats */
2797  if (!get_variable_range(root, &rightvar, rstatop,
2798  &rightmin, &rightmax))
2799  goto fail; /* no range available from stats */
2800  }
2801  else
2802  {
2803  /* need to swap the max and min */
2804  if (!get_variable_range(root, &leftvar, lstatop,
2805  &leftmax, &leftmin))
2806  goto fail; /* no range available from stats */
2807  if (!get_variable_range(root, &rightvar, rstatop,
2808  &rightmax, &rightmin))
2809  goto fail; /* no range available from stats */
2810  }
2811 
2812  /*
2813  * Now, the fraction of the left variable that will be scanned is the
2814  * fraction that's <= the right-side maximum value. But only believe
2815  * non-default estimates, else stick with our 1.0.
2816  */
2817  selec = scalarineqsel(root, leop, isgt, true, &leftvar,
2818  rightmax, op_righttype);
2819  if (selec != DEFAULT_INEQ_SEL)
2820  *leftend = selec;
2821 
2822  /* And similarly for the right variable. */
2823  selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
2824  leftmax, op_lefttype);
2825  if (selec != DEFAULT_INEQ_SEL)
2826  *rightend = selec;
2827 
2828  /*
2829  * Only one of the two "end" fractions can really be less than 1.0;
2830  * believe the smaller estimate and reset the other one to exactly 1.0. If
2831  * we get exactly equal estimates (as can easily happen with self-joins),
2832  * believe neither.
2833  */
2834  if (*leftend > *rightend)
2835  *leftend = 1.0;
2836  else if (*leftend < *rightend)
2837  *rightend = 1.0;
2838  else
2839  *leftend = *rightend = 1.0;
2840 
2841  /*
2842  * Also, the fraction of the left variable that will be scanned before the
2843  * first join pair is found is the fraction that's < the right-side
2844  * minimum value. But only believe non-default estimates, else stick with
2845  * our own default.
2846  */
2847  selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
2848  rightmin, op_righttype);
2849  if (selec != DEFAULT_INEQ_SEL)
2850  *leftstart = selec;
2851 
2852  /* And similarly for the right variable. */
2853  selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
2854  leftmin, op_lefttype);
2855  if (selec != DEFAULT_INEQ_SEL)
2856  *rightstart = selec;
2857 
2858  /*
2859  * Only one of the two "start" fractions can really be more than zero;
2860  * believe the larger estimate and reset the other one to exactly 0.0. If
2861  * we get exactly equal estimates (as can easily happen with self-joins),
2862  * believe neither.
2863  */
2864  if (*leftstart < *rightstart)
2865  *leftstart = 0.0;
2866  else if (*leftstart > *rightstart)
2867  *rightstart = 0.0;
2868  else
2869  *leftstart = *rightstart = 0.0;
2870 
2871  /*
2872  * If the sort order is nulls-first, we're going to have to skip over any
2873  * nulls too. These would not have been counted by scalarineqsel, and we
2874  * can safely add in this fraction regardless of whether we believe
2875  * scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
2876  */
2877  if (nulls_first)
2878  {
2879  Form_pg_statistic stats;
2880 
2881  if (HeapTupleIsValid(leftvar.statsTuple))
2882  {
2883  stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
2884  *leftstart += stats->stanullfrac;
2885  CLAMP_PROBABILITY(*leftstart);
2886  *leftend += stats->stanullfrac;
2887  CLAMP_PROBABILITY(*leftend);
2888  }
2889  if (HeapTupleIsValid(rightvar.statsTuple))
2890  {
2891  stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
2892  *rightstart += stats->stanullfrac;
2893  CLAMP_PROBABILITY(*rightstart);
2894  *rightend += stats->stanullfrac;
2895  CLAMP_PROBABILITY(*rightend);
2896  }
2897  }
2898 
2899  /* Disbelieve start >= end, just in case that can happen */
2900  if (*leftstart >= *leftend)
2901  {
2902  *leftstart = 0.0;
2903  *leftend = 1.0;
2904  }
2905  if (*rightstart >= *rightend)
2906  {
2907  *rightstart = 0.0;
2908  *rightend = 1.0;
2909  }
2910 
2911 fail:
2912  ReleaseVariableStats(leftvar);
2913  ReleaseVariableStats(rightvar);
2914 }
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:71
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37
Definition: nodes.h:525
unsigned int Oid
Definition: postgres_ext.h:31
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define OidIsValid(objectId)
Definition: c.h:638
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:163
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:70
uintptr_t Datum
Definition: postgres.h:367
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:82
static double scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, VariableStatData *vardata, Datum constval, Oid consttype)
Definition: selfuncs.c:556
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4408
#define Assert(condition)
Definition: c.h:732
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
void get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, int *strategy, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:133
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max)
Definition: selfuncs.c:4973
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:63
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32

◆ nulltestsel()

Selectivity nulltestsel ( PlannerInfo root,
NullTestType  nulltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1453 of file selfuncs.c.

References CLAMP_PROBABILITY, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), GETSTRUCT, HeapTupleIsValid, IS_NOT_NULL, IS_NULL, IsA, ReleaseVariableStats, VariableStatData::statsTuple, and VariableStatData::var.

Referenced by clause_selectivity(), and clauselist_selectivity_simple().

1455 {
1456  VariableStatData vardata;
1457  double selec;
1458 
1459  examine_variable(root, arg, varRelid, &vardata);
1460 
1461  if (HeapTupleIsValid(vardata.statsTuple))
1462  {
1463  Form_pg_statistic stats;
1464  double freq_null;
1465 
1466  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1467  freq_null = stats->stanullfrac;
1468 
1469  switch (nulltesttype)
1470  {
1471  case IS_NULL:
1472 
1473  /*
1474  * Use freq_null directly.
1475  */
1476  selec = freq_null;
1477  break;
1478  case IS_NOT_NULL:
1479 
1480  /*
1481  * Select not unknown (not null) values. Calculate from
1482  * freq_null.
1483  */
1484  selec = 1.0 - freq_null;
1485  break;
1486  default:
1487  elog(ERROR, "unrecognized nulltesttype: %d",
1488  (int) nulltesttype);
1489  return (Selectivity) 0; /* keep compiler quiet */
1490  }
1491  }
1492  else if (vardata.var && IsA(vardata.var, Var) &&
1493  ((Var *) vardata.var)->varattno < 0)
1494  {
1495  /*
1496  * There are no stats for system columns, but we know they are never
1497  * NULL.
1498  */
1499  selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
1500  }
1501  else
1502  {
1503  /*
1504  * No ANALYZE stats available, so make a guess
1505  */
1506  switch (nulltesttype)
1507  {
1508  case IS_NULL:
1509  selec = DEFAULT_UNK_SEL;
1510  break;
1511  case IS_NOT_NULL:
1512  selec = DEFAULT_NOT_UNK_SEL;
1513  break;
1514  default:
1515  elog(ERROR, "unrecognized nulltesttype: %d",
1516  (int) nulltesttype);
1517  return (Selectivity) 0; /* keep compiler quiet */
1518  }
1519  }
1520 
1521  ReleaseVariableStats(vardata);
1522 
1523  /* result should be in range, but make sure... */
1524  CLAMP_PROBABILITY(selec);
1525 
1526  return (Selectivity) selec;
1527 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:71
double Selectivity
Definition: nodes.h:658
Definition: primnodes.h:167
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:50
#define ERROR
Definition: elog.h:43
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:49
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4408
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
#define elog(elevel,...)
Definition: elog.h:226

◆ rowcomparesel()

Selectivity rowcomparesel ( PlannerInfo root,
RowCompareExpr clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1928 of file selfuncs.c.

References RowCompareExpr::inputcollids, join_selectivity(), RowCompareExpr::largs, linitial, linitial_oid, list_make2, NumRelids(), RowCompareExpr::opnos, RowCompareExpr::rargs, restriction_selectivity(), and s1.

Referenced by clause_selectivity().

1931 {
1932  Selectivity s1;
1933  Oid opno = linitial_oid(clause->opnos);
1934  Oid inputcollid = linitial_oid(clause->inputcollids);
1935  List *opargs;
1936  bool is_join_clause;
1937 
1938  /* Build equivalent arg list for single operator */
1939  opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
1940 
1941  /*
1942  * Decide if it's a join clause. This should match clausesel.c's
1943  * treat_as_join_clause(), except that we intentionally consider only the
1944  * leading columns and not the rest of the clause.
1945  */
1946  if (varRelid != 0)
1947  {
1948  /*
1949  * Caller is forcing restriction mode (eg, because we are examining an
1950  * inner indexscan qual).
1951  */
1952  is_join_clause = false;
1953  }
1954  else if (sjinfo == NULL)
1955  {
1956  /*
1957  * It must be a restriction clause, since it's being evaluated at a
1958  * scan node.
1959  */
1960  is_join_clause = false;
1961  }
1962  else
1963  {
1964  /*
1965  * Otherwise, it's a join if there's more than one relation used.
1966  */
1967  is_join_clause = (NumRelids((Node *) opargs) > 1);
1968  }
1969 
1970  if (is_join_clause)
1971  {
1972  /* Estimate selectivity for a join clause. */
1973  s1 = join_selectivity(root, opno,
1974  opargs,
1975  inputcollid,
1976  jointype,
1977  sjinfo);
1978  }
1979  else
1980  {
1981  /* Estimate selectivity for a restriction clause. */
1982  s1 = restriction_selectivity(root, opno,
1983  opargs,
1984  inputcollid,
1985  varRelid);
1986  }
1987 
1988  return s1;
1989 }
#define list_make2(x1, x2)
Definition: pg_list.h:229
Selectivity restriction_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, int varRelid)
Definition: plancat.c:1764
Definition: nodes.h:525
double Selectivity
Definition: nodes.h:658
unsigned int Oid
Definition: postgres_ext.h:31
#define linitial(l)
Definition: pg_list.h:195
char * s1
#define linitial_oid(l)
Definition: pg_list.h:197
Selectivity join_selectivity(PlannerInfo *root, Oid operatorid, List *args, Oid inputcollid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: plancat.c:1803
List * inputcollids
Definition: primnodes.h:1059
Definition: pg_list.h:50
int NumRelids(Node *clause)
Definition: clauses.c:2131

◆ scalararraysel()

Selectivity scalararraysel ( PlannerInfo root,
ScalarArrayOpExpr clause,
bool  is_join_clause,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1571 of file selfuncs.c.

References generate_unaccent_rules::args, ScalarArrayOpExpr::args, ARR_ELEMTYPE, Assert, CLAMP_PROBABILITY, CaseTestExpr::collation, DatumGetArrayTypeP, DatumGetFloat8, deconstruct_array(), ArrayExpr::element_typeid, ArrayExpr::elements, TypeCacheEntry::eq_opr, estimate_expression_value(), exprCollation(), exprType(), fmgr_info(), FunctionCall4Coll(), FunctionCall5Coll(), get_base_element_type(), get_negator(), get_oprjoin(), get_oprrest(), get_typlenbyval(), get_typlenbyvalalign(), i, ScalarArrayOpExpr::inputcollid, Int16GetDatum, Int32GetDatum, IsA, lfirst, linitial, list_length(), list_make2, lookup_type_cache(), lsecond, makeConst(), makeNode, ObjectIdGetDatum, OidIsValid, ScalarArrayOpExpr::opno, PointerGetDatum, s1, s2, scalararraysel_containment(), strip_array_coercion(), TYPECACHE_EQ_OPR, CaseTestExpr::typeId, CaseTestExpr::typeMod, and ScalarArrayOpExpr::useOr.

Referenced by clause_selectivity().

1577 {
1578  Oid operator = clause->opno;
1579  bool useOr = clause->useOr;
1580  bool isEquality = false;
1581  bool isInequality = false;
1582  Node *leftop;
1583  Node *rightop;
1584  Oid nominal_element_type;
1585  Oid nominal_element_collation;
1586  TypeCacheEntry *typentry;
1587  RegProcedure oprsel;
1588  FmgrInfo oprselproc;
1589  Selectivity s1;
1590  Selectivity s1disjoint;
1591 
1592  /* First, deconstruct the expression */
1593  Assert(list_length(clause->args) == 2);
1594  leftop = (Node *) linitial(clause->args);
1595  rightop = (Node *) lsecond(clause->args);
1596 
1597  /* aggressively reduce both sides to constants */
1598  leftop = estimate_expression_value(root, leftop);
1599  rightop = estimate_expression_value(root, rightop);
1600 
1601  /* get nominal (after relabeling) element type of rightop */
1602  nominal_element_type = get_base_element_type(exprType(rightop));
1603  if (!OidIsValid(nominal_element_type))
1604  return (Selectivity) 0.5; /* probably shouldn't happen */
1605  /* get nominal collation, too, for generating constants */
1606  nominal_element_collation = exprCollation(rightop);
1607 
1608  /* look through any binary-compatible relabeling of rightop */
1609  rightop = strip_array_coercion(rightop);
1610 
1611  /*
1612  * Detect whether the operator is the default equality or inequality
1613  * operator of the array element type.
1614  */
1615  typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1616  if (OidIsValid(typentry->eq_opr))
1617  {
1618  if (operator == typentry->eq_opr)
1619  isEquality = true;
1620  else if (get_negator(operator) == typentry->eq_opr)
1621  isInequality = true;
1622  }
1623 
1624  /*
1625  * If it is equality or inequality, we might be able to estimate this as a
1626  * form of array containment; for instance "const = ANY(column)" can be
1627  * treated as "ARRAY[const] <@ column". scalararraysel_containment tries
1628  * that, and returns the selectivity estimate if successful, or -1 if not.
1629  */
1630  if ((isEquality || isInequality) && !is_join_clause)
1631  {
1632  s1 = scalararraysel_containment(root, leftop, rightop,
1633  nominal_element_type,
1634  isEquality, useOr, varRelid);
1635  if (s1 >= 0.0)
1636  return s1;
1637  }
1638 
1639  /*
1640  * Look up the underlying operator's selectivity estimator. Punt if it
1641  * hasn't got one.
1642  */
1643  if (is_join_clause)
1644  oprsel = get_oprjoin(operator);
1645  else
1646  oprsel = get_oprrest(operator);
1647  if (!oprsel)
1648  return (Selectivity) 0.5;
1649  fmgr_info(oprsel, &oprselproc);
1650 
1651  /*
1652  * In the array-containment check above, we must only believe that an
1653  * operator is equality or inequality if it is the default btree equality
1654  * operator (or its negator) for the element type, since those are the
1655  * operators that array containment will use. But in what follows, we can
1656  * be a little laxer, and also believe that any operators using eqsel() or
1657  * neqsel() as selectivity estimator act like equality or inequality.
1658  */
1659  if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1660  isEquality = true;
1661  else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1662  isInequality = true;
1663 
1664  /*
1665  * We consider three cases:
1666  *
1667  * 1. rightop is an Array constant: deconstruct the array, apply the
1668  * operator's selectivity function for each array element, and merge the
1669  * results in the same way that clausesel.c does for AND/OR combinations.
1670  *
1671  * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1672  * function for each element of the ARRAY[] construct, and merge.
1673  *
1674  * 3. otherwise, make a guess ...
1675  */
1676  if (rightop && IsA(rightop, Const))
1677  {
1678  Datum arraydatum = ((Const *) rightop)->constvalue;
1679  bool arrayisnull = ((Const *) rightop)->constisnull;
1680  ArrayType *arrayval;
1681  int16 elmlen;
1682  bool elmbyval;
1683  char elmalign;
1684  int num_elems;
1685  Datum *elem_values;
1686  bool *elem_nulls;
1687  int i;
1688 
1689  if (arrayisnull) /* qual can't succeed if null array */
1690  return (Selectivity) 0.0;
1691  arrayval = DatumGetArrayTypeP(arraydatum);
1693  &elmlen, &elmbyval, &elmalign);
1694  deconstruct_array(arrayval,
1695  ARR_ELEMTYPE(arrayval),
1696  elmlen, elmbyval, elmalign,
1697  &elem_values, &elem_nulls, &num_elems);
1698 
1699  /*
1700  * For generic operators, we assume the probability of success is
1701  * independent for each array element. But for "= ANY" or "<> ALL",
1702  * if the array elements are distinct (which'd typically be the case)
1703  * then the probabilities are disjoint, and we should just sum them.
1704  *
1705  * If we were being really tense we would try to confirm that the
1706  * elements are all distinct, but that would be expensive and it
1707  * doesn't seem to be worth the cycles; it would amount to penalizing
1708  * well-written queries in favor of poorly-written ones. However, we
1709  * do protect ourselves a little bit by checking whether the
1710  * disjointness assumption leads to an impossible (out of range)
1711  * probability; if so, we fall back to the normal calculation.
1712  */
1713  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1714 
1715  for (i = 0; i < num_elems; i++)
1716  {
1717  List *args;
1718  Selectivity s2;
1719 
1720  args = list_make2(leftop,
1721  makeConst(nominal_element_type,
1722  -1,
1723  nominal_element_collation,
1724  elmlen,
1725  elem_values[i],
1726  elem_nulls[i],
1727  elmbyval));
1728  if (is_join_clause)
1729  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1730  clause->inputcollid,
1731  PointerGetDatum(root),
1732  ObjectIdGetDatum(operator),
1733  PointerGetDatum(args),
1734  Int16GetDatum(jointype),
1735  PointerGetDatum(sjinfo)));
1736  else
1737  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1738  clause->inputcollid,
1739  PointerGetDatum(root),
1740  ObjectIdGetDatum(operator),
1741  PointerGetDatum(args),
1742  Int32GetDatum(varRelid)));
1743 
1744  if (useOr)
1745  {
1746  s1 = s1 + s2 - s1 * s2;
1747  if (isEquality)
1748  s1disjoint += s2;
1749  }
1750  else
1751  {
1752  s1 = s1 * s2;
1753  if (isInequality)
1754  s1disjoint += s2 - 1.0;
1755  }
1756  }
1757 
1758  /* accept disjoint-probability estimate if in range */
1759  if ((useOr ? isEquality : isInequality) &&
1760  s1disjoint >= 0.0 && s1disjoint <= 1.0)
1761  s1 = s1disjoint;
1762  }
1763  else if (rightop && IsA(rightop, ArrayExpr) &&
1764  !((ArrayExpr *) rightop)->multidims)
1765  {
1766  ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
1767  int16 elmlen;
1768  bool elmbyval;
1769  ListCell *l;
1770 
1771  get_typlenbyval(arrayexpr->element_typeid,
1772  &elmlen, &elmbyval);
1773 
1774  /*
1775  * We use the assumption of disjoint probabilities here too, although
1776  * the odds of equal array elements are rather higher if the elements
1777  * are not all constants (which they won't be, else constant folding
1778  * would have reduced the ArrayExpr to a Const). In this path it's
1779  * critical to have the sanity check on the s1disjoint estimate.
1780  */
1781  s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1782 
1783  foreach(l, arrayexpr->elements)
1784  {
1785  Node *elem = (Node *) lfirst(l);
1786  List *args;
1787  Selectivity s2;
1788 
1789  /*
1790  * Theoretically, if elem isn't of nominal_element_type we should
1791  * insert a RelabelType, but it seems unlikely that any operator
1792  * estimation function would really care ...
1793  */
1794  args = list_make2(leftop, elem);
1795  if (is_join_clause)
1796  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1797  clause->inputcollid,
1798  PointerGetDatum(root),
1799  ObjectIdGetDatum(operator),
1800  PointerGetDatum(args),
1801  Int16GetDatum(jointype),
1802  PointerGetDatum(sjinfo)));
1803  else
1804  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1805  clause->inputcollid,
1806  PointerGetDatum(root),
1807  ObjectIdGetDatum(operator),
1808  PointerGetDatum(args),
1809  Int32GetDatum(varRelid)));
1810 
1811  if (useOr)
1812  {
1813  s1 = s1 + s2 - s1 * s2;
1814  if (isEquality)
1815  s1disjoint += s2;
1816  }
1817  else
1818  {
1819  s1 = s1 * s2;
1820  if (isInequality)
1821  s1disjoint += s2 - 1.0;
1822  }
1823  }
1824 
1825  /* accept disjoint-probability estimate if in range */
1826  if ((useOr ? isEquality : isInequality) &&
1827  s1disjoint >= 0.0 && s1disjoint <= 1.0)
1828  s1 = s1disjoint;
1829  }
1830  else
1831  {
1832  CaseTestExpr *dummyexpr;
1833  List *args;
1834  Selectivity s2;
1835  int i;
1836 
1837  /*
1838  * We need a dummy rightop to pass to the operator selectivity
1839  * routine. It can be pretty much anything that doesn't look like a
1840  * constant; CaseTestExpr is a convenient choice.
1841  */
1842  dummyexpr = makeNode(CaseTestExpr);
1843  dummyexpr->typeId = nominal_element_type;
1844  dummyexpr->typeMod = -1;
1845  dummyexpr->collation = clause->inputcollid;
1846  args = list_make2(leftop, dummyexpr);
1847  if (is_join_clause)
1848  s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1849  clause->inputcollid,
1850  PointerGetDatum(root),
1851  ObjectIdGetDatum(operator),
1852  PointerGetDatum(args),
1853  Int16GetDatum(jointype),
1854  PointerGetDatum(sjinfo)));
1855  else
1856  s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1857  clause->inputcollid,
1858  PointerGetDatum(root),
1859  ObjectIdGetDatum(operator),
1860  PointerGetDatum(args),
1861  Int32GetDatum(varRelid)));
1862  s1 = useOr ? 0.0 : 1.0;
1863 
1864  /*
1865  * Arbitrarily assume 10 elements in the eventual array value (see
1866  * also estimate_array_length). We don't risk an assumption of
1867  * disjoint probabilities here.
1868  */
1869  for (i = 0; i < 10; i++)
1870  {
1871  if (useOr)
1872  s1 = s1 + s2 - s1 * s2;
1873  else
1874  s1 = s1 * s2;
1875  }
1876  }
1877 
1878  /* result should be in range, but make sure... */
1879  CLAMP_PROBABILITY(s1);
1880 
1881  return s1;
1882 }
#define list_make2(x1, x2)
Definition: pg_list.h:229
signed short int16
Definition: c.h:345
Definition: fmgr.h:56
RegProcedure get_oprjoin(Oid opno)
Definition: lsyscache.c:1383
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2286
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2049
#define PointerGetDatum(X)
Definition: postgres.h:556
Datum FunctionCall5Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4, Datum arg5)
Definition: fmgr.c:1224
regproc RegProcedure
Definition: c.h:505
#define Int16GetDatum(X)
Definition: postgres.h:451
Definition: nodes.h:525
#define TYPECACHE_EQ_OPR
Definition: typcache.h:128
Datum FunctionCall4Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4)
Definition: fmgr.c:1197
double Selectivity
Definition: nodes.h:658
unsigned int Oid
Definition: postgres_ext.h:31
Const * makeConst(Oid consttype, int32 consttypmod, Oid constcollid, int constlen, Datum constvalue, bool constisnull, bool constbyval)
Definition: makefuncs.c:297
#define OidIsValid(objectId)
Definition: c.h:638
#define lsecond(l)
Definition: pg_list.h:200
int32 typeMod
Definition: primnodes.h:959
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define linitial(l)
Definition: pg_list.h:195
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
char * s1
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:124
List * elements
Definition: primnodes.h:977
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1359
char * s2
#define DatumGetFloat8(X)
Definition: postgres.h:728
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:322
#define makeNode(_type_)
Definition: nodes.h:573
#define Assert(condition)
Definition: c.h:732
#define lfirst(lc)
Definition: pg_list.h:190
Selectivity scalararraysel_containment(PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
static int list_length(const List *l)
Definition: pg_list.h:169
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:720
void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval)
Definition: lsyscache.c:2029
Oid element_typeid
Definition: primnodes.h:976
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3461
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2599
#define Int32GetDatum(X)
Definition: postgres.h:479
int i
Oid get_negator(Oid opno)
Definition: lsyscache.c:1335
Definition: pg_list.h:50
#define ARR_ELEMTYPE(a)
Definition: array.h:280
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1538
#define DatumGetArrayTypeP(X)
Definition: array.h:249

◆ scalararraysel_containment()

Selectivity scalararraysel_containment ( PlannerInfo root,
Node leftop,
Node rightop,
Oid  elemtype,
bool  isEquality,
bool  useOr,
int  varRelid 
)

Definition at line 82 of file array_selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, VariableStatData::rel, ReleaseVariableStats, statistic_proc_security_check(), VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and AttStatsSlot::values.

Referenced by scalararraysel().

86 {
87  Selectivity selec;
88  VariableStatData vardata;
89  Datum constval;
90  TypeCacheEntry *typentry;
91  FmgrInfo *cmpfunc;
92 
93  /*
94  * rightop must be a variable, else punt.
95  */
96  examine_variable(root, rightop, varRelid, &vardata);
97  if (!vardata.rel)
98  {
99  ReleaseVariableStats(vardata);
100  return -1.0;
101  }
102 
103  /*
104  * leftop must be a constant, else punt.
105  */
106  if (!IsA(leftop, Const))
107  {
108  ReleaseVariableStats(vardata);
109  return -1.0;
110  }
111  if (((Const *) leftop)->constisnull)
112  {
113  /* qual can't succeed if null on left */
114  ReleaseVariableStats(vardata);
115  return (Selectivity) 0.0;
116  }
117  constval = ((Const *) leftop)->constvalue;
118 
119  /* Get element type's default comparison function */
120  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
121  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
122  {
123  ReleaseVariableStats(vardata);
124  return -1.0;
125  }
126  cmpfunc = &typentry->cmp_proc_finfo;
127 
128  /*
129  * If the operator is <>, swap ANY/ALL, then invert the result later.
130  */
131  if (!isEquality)
132  useOr = !useOr;
133 
134  /* Get array element stats for var, if available */
135  if (HeapTupleIsValid(vardata.statsTuple) &&
136  statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
137  {
138  Form_pg_statistic stats;
139  AttStatsSlot sslot;
140  AttStatsSlot hslot;
141 
142  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
143 
144  /* MCELEM will be an array of same type as element */
145  if (get_attstatsslot(&sslot, vardata.statsTuple,
146  STATISTIC_KIND_MCELEM, InvalidOid,
148  {
149  /* For ALL case, also get histogram of distinct-element counts */
150  if (useOr ||
151  !get_attstatsslot(&hslot, vardata.statsTuple,
152  STATISTIC_KIND_DECHIST, InvalidOid,
154  memset(&hslot, 0, sizeof(hslot));
155 
156  /*
157  * For = ANY, estimate as var @> ARRAY[const].
158  *
159  * For = ALL, estimate as var <@ ARRAY[const].
160  */
161  if (useOr)
163  sslot.nvalues,
164  sslot.numbers,
165  sslot.nnumbers,
166  &constval, 1,
167  OID_ARRAY_CONTAINS_OP,
168  typentry);
169  else
170  selec = mcelem_array_contained_selec(sslot.values,
171  sslot.nvalues,
172  sslot.numbers,
173  sslot.nnumbers,
174  &constval, 1,
175  hslot.numbers,
176  hslot.nnumbers,
177  OID_ARRAY_CONTAINED_OP,
178  typentry);
179 
180  free_attstatsslot(&hslot);
181  free_attstatsslot(&sslot);
182  }
183  else
184  {
185  /* No most-common-elements info, so do without */
186  if (useOr)
187  selec = mcelem_array_contain_overlap_selec(NULL, 0,
188  NULL, 0,
189  &constval, 1,
190  OID_ARRAY_CONTAINS_OP,
191  typentry);
192  else
193  selec = mcelem_array_contained_selec(NULL, 0,
194  NULL, 0,
195  &constval, 1,
196  NULL, 0,
197  OID_ARRAY_CONTAINED_OP,
198  typentry);
199  }
200 
201  /*
202  * MCE stats count only non-null rows, so adjust for null rows.
203  */
204  selec *= (1.0 - stats->stanullfrac);
205  }
206  else
207  {
208  /* No stats at all, so do without */
209  if (useOr)
210  selec = mcelem_array_contain_overlap_selec(NULL, 0,
211  NULL, 0,
212  &constval, 1,
213  OID_ARRAY_CONTAINS_OP,
214  typentry);
215  else
216  selec = mcelem_array_contained_selec(NULL, 0,
217  NULL, 0,
218  &constval, 1,
219  NULL, 0,
220  OID_ARRAY_CONTAINED_OP,
221  typentry);
222  /* we assume no nulls here, so no stanullfrac correction */
223  }
224 
225  ReleaseVariableStats(vardata);
226 
227  /*
228  * If the operator is <>, invert the results.
229  */
230  if (!isEquality)
231  selec = 1.0 - selec;
232 
233  CLAMP_PROBABILITY(selec);
234 
235  return selec;
236 }
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4812
RelOptInfo * rel
Definition: selfuncs.h:70
double Selectivity
Definition: nodes.h:658
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define OidIsValid(objectId)
Definition: c.h:638
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, TypeCacheEntry *typentry)
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
FmgrInfo cmp_proc_finfo
Definition: typcache.h:73
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
float4 * numbers
Definition: lsyscache.h:53
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, TypeCacheEntry *typentry)
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:322
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4408
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:134
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ statistic_proc_security_check()

bool statistic_proc_security_check ( VariableStatData vardata,
Oid  func_oid 
)

Definition at line 4812 of file selfuncs.c.

References VariableStatData::acl_ok, DEBUG2, ereport, errmsg_internal(), get_func_leakproof(), get_func_name(), and OidIsValid.

Referenced by calc_arraycontsel(), calc_hist_selectivity(), eqjoinsel(), get_variable_range(), histogram_selectivity(), ineq_histogram_selectivity(), mcv_selectivity(), scalararraysel_containment(), and var_eq_const().

4813 {
4814  if (vardata->acl_ok)
4815  return true;
4816 
4817  if (!OidIsValid(func_oid))
4818  return false;
4819 
4820  if (get_func_leakproof(func_oid))
4821  return true;
4822 
4823  ereport(DEBUG2,
4824  (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
4825  get_func_name(func_oid))));
4826  return false;
4827 }
bool get_func_leakproof(Oid funcid)
Definition: lsyscache.c:1639
#define OidIsValid(objectId)
Definition: c.h:638
char * get_func_name(Oid funcid)
Definition: lsyscache.c:1410
#define DEBUG2
Definition: elog.h:24
#define ereport(elevel, rest)
Definition: elog.h:141
int errmsg_internal(const char *fmt,...)
Definition: elog.c:814

◆ var_eq_const()

double var_eq_const ( VariableStatData vardata,
Oid  oproid,
Datum  constval,
bool  constisnull,
bool  varonleft,
bool  negate 
)

Definition at line 288 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, DatumGetBool, fmgr_info(), free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), get_opcode(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, i, InvalidOid, VariableStatData::isunique, AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, VariableStatData::rel, AttStatsSlot::stacoll, statistic_proc_security_check(), VariableStatData::statsTuple, RelOptInfo::tuples, and AttStatsSlot::values.

Referenced by boolvarsel(), eqsel_internal(), patternsel_common(), and prefix_selectivity().

291 {
292  double selec;
293  double nullfrac = 0.0;
294  bool isdefault;
295  Oid opfuncoid;
296 
297  /*
298  * If the constant is NULL, assume operator is strict and return zero, ie,
299  * operator will never return TRUE. (It's zero even for a negator op.)
300  */
301  if (constisnull)
302  return 0.0;
303 
304  /*
305  * Grab the nullfrac for use below. Note we allow use of nullfrac
306  * regardless of security check.
307  */
308  if (HeapTupleIsValid(vardata->statsTuple))
309  {
310  Form_pg_statistic stats;
311 
312  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
313  nullfrac = stats->stanullfrac;
314  }
315 
316  /*
317  * If we matched the var to a unique index or DISTINCT clause, assume
318  * there is exactly one match regardless of anything else. (This is
319  * slightly bogus, since the index or clause's equality operator might be
320  * different from ours, but it's much more likely to be right than
321  * ignoring the information.)
322  */
323  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
324  {
325  selec = 1.0 / vardata->rel->tuples;
326  }
327  else if (HeapTupleIsValid(vardata->statsTuple) &&
329  (opfuncoid = get_opcode(operator))))
330  {
331  AttStatsSlot sslot;
332  bool match = false;
333  int i;
334 
335  /*
336  * Is the constant "=" to any of the column's most common values?
337  * (Although the given operator may not really be "=", we will assume
338  * that seeing whether it returns TRUE is an appropriate test. If you
339  * don't like this, maybe you shouldn't be using eqsel for your
340  * operator...)
341  */
342  if (get_attstatsslot(&sslot, vardata->statsTuple,
343  STATISTIC_KIND_MCV, InvalidOid,
345  {
346  FmgrInfo eqproc;
347 
348  fmgr_info(opfuncoid, &eqproc);
349 
350  for (i = 0; i < sslot.nvalues; i++)
351  {
352  /* be careful to apply operator right way 'round */
353  if (varonleft)
354  match = DatumGetBool(FunctionCall2Coll(&eqproc,
355  sslot.stacoll,
356  sslot.values[i],
357  constval));
358  else
359  match = DatumGetBool(FunctionCall2Coll(&eqproc,
360  sslot.stacoll,
361  constval,
362  sslot.values[i]));
363  if (match)
364  break;
365  }
366  }
367  else
368  {
369  /* no most-common-value info available */
370  i = 0; /* keep compiler quiet */
371  }
372 
373  if (match)
374  {
375  /*
376  * Constant is "=" to this common value. We know selectivity
377  * exactly (or as exactly as ANALYZE could calculate it, anyway).
378  */
379  selec = sslot.numbers[i];
380  }
381  else
382  {
383  /*
384  * Comparison is against a constant that is neither NULL nor any
385  * of the common values. Its selectivity cannot be more than
386  * this:
387  */
388  double sumcommon = 0.0;
389  double otherdistinct;
390 
391  for (i = 0; i < sslot.nnumbers; i++)
392  sumcommon += sslot.numbers[i];
393  selec = 1.0 - sumcommon - nullfrac;
394  CLAMP_PROBABILITY(selec);
395 
396  /*
397  * and in fact it's probably a good deal less. We approximate that
398  * all the not-common values share this remaining fraction
399  * equally, so we divide by the number of other distinct values.
400  */
401  otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
402  sslot.nnumbers;
403  if (otherdistinct > 1)
404  selec /= otherdistinct;
405 
406  /*
407  * Another cross-check: selectivity shouldn't be estimated as more
408  * than the least common "most common value".
409  */
410  if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
411  selec = sslot.numbers[sslot.nnumbers - 1];
412  }
413 
414  free_attstatsslot(&sslot);
415  }
416  else
417  {
418  /*
419  * No ANALYZE stats available, so make a guess using estimated number
420  * of distinct values and assuming they are equally common. (The guess
421  * is unlikely to be very good, but we do know a few special cases.)
422  */
423  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
424  }
425 
426  /* now adjust if we wanted <> rather than = */
427  if (negate)
428  selec = 1.0 - selec - nullfrac;
429 
430  /* result should be in range, but make sure... */
431  CLAMP_PROBABILITY(selec);
432 
433  return selec;
434 }
Definition: fmgr.h:56
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4812
double tuples
Definition: pathnodes.h:681
RelOptInfo * rel
Definition: selfuncs.h:70
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
unsigned int Oid
Definition: postgres_ext.h:31
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4841
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:124
float4 * numbers
Definition: lsyscache.h:53
#define DatumGetBool(X)
Definition: postgres.h:393
#define InvalidOid
Definition: postgres_ext.h:36
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1092
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
int i
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ var_eq_non_const()

double var_eq_non_const ( VariableStatData vardata,
Oid  oproid,
Node other,
bool  varonleft,
bool  negate 
)

Definition at line 442 of file selfuncs.c.

References ATTSTATSSLOT_NUMBERS, CLAMP_PROBABILITY, free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, VariableStatData::isunique, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by eqsel_internal().

445 {
446  double selec;
447  double nullfrac = 0.0;
448  bool isdefault;
449 
450  /*
451  * Grab the nullfrac for use below.
452  */
453  if (HeapTupleIsValid(vardata->statsTuple))
454  {
455  Form_pg_statistic stats;
456 
457  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
458  nullfrac = stats->stanullfrac;
459  }
460 
461  /*
462  * If we matched the var to a unique index or DISTINCT clause, assume
463  * there is exactly one match regardless of anything else. (This is
464  * slightly bogus, since the index or clause's equality operator might be
465  * different from ours, but it's much more likely to be right than
466  * ignoring the information.)
467  */
468  if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
469  {
470  selec = 1.0 / vardata->rel->tuples;
471  }
472  else if (HeapTupleIsValid(vardata->statsTuple))
473  {
474  double ndistinct;
475  AttStatsSlot sslot;
476 
477  /*
478  * Search is for a value that we do not know a priori, but we will
479  * assume it is not NULL. Estimate the selectivity as non-null
480  * fraction divided by number of distinct values, so that we get a
481  * result averaged over all possible values whether common or
482  * uncommon. (Essentially, we are assuming that the not-yet-known
483  * comparison value is equally likely to be any of the possible
484  * values, regardless of their frequency in the table. Is that a good
485  * idea?)
486  */
487  selec = 1.0 - nullfrac;
488  ndistinct = get_variable_numdistinct(vardata, &isdefault);
489  if (ndistinct > 1)
490  selec /= ndistinct;
491 
492  /*
493  * Cross-check: selectivity should never be estimated as more than the
494  * most common value's.
495  */
496  if (get_attstatsslot(&sslot, vardata->statsTuple,
497  STATISTIC_KIND_MCV, InvalidOid,
499  {
500  if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
501  selec = sslot.numbers[0];
502  free_attstatsslot(&sslot);
503  }
504  }
505  else
506  {
507  /*
508  * No ANALYZE stats available, so make a guess using estimated number
509  * of distinct values and assuming they are equally common. (The guess
510  * is unlikely to be very good, but we do know a few special cases.)
511  */
512  selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
513  }
514 
515  /* now adjust if we wanted <> rather than = */
516  if (negate)
517  selec = 1.0 - selec - nullfrac;
518 
519  /* result should be in range, but make sure... */
520  CLAMP_PROBABILITY(selec);
521 
522  return selec;
523 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
double tuples
Definition: pathnodes.h:681
RelOptInfo * rel
Definition: selfuncs.h:70
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4841
float4 * numbers
Definition: lsyscache.h:53
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

Variable Documentation

◆ get_index_stats_hook

PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook

Definition at line 147 of file selfuncs.c.

Referenced by brincostestimate(), btcostestimate(), and examine_variable().

◆ get_relation_stats_hook

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook

Definition at line 146 of file selfuncs.c.

Referenced by brincostestimate(), btcostestimate(), and examine_simple_variable().