PostgreSQL Source Code  git master
selfuncs.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include "access/brin.h"
#include "access/brin_page.h"
#include "access/gin.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/visibilitymap.h"
#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_statistic_ext.h"
#include "executor/nodeAgg.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "statistics/statistics.h"
#include "storage/bufmgr.h"
#include "utils/acl.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/index_selfuncs.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_locale.h"
#include "utils/rel.h"
#include "utils/selfuncs.h"
#include "utils/snapmgr.h"
#include "utils/spccache.h"
#include "utils/syscache.h"
#include "utils/timestamp.h"
#include "utils/typcache.h"
Include dependency graph for selfuncs.c:

Go to the source code of this file.

Data Structures

struct  GroupVarInfo
 
struct  GinQualCounts
 

Macros

#define DEFAULT_PAGE_CPU_MULTIPLIER   50.0
 
#define VISITED_PAGES_LIMIT   100
 

Functions

static double eqsel_internal (PG_FUNCTION_ARGS, bool negate)
 
static double eqjoinsel_inner (Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2)
 
static double eqjoinsel_semi (Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2, RelOptInfo *inner_rel)
 
static bool estimate_multivariate_ndistinct (PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
 
static bool convert_to_scalar (Datum value, Oid valuetypid, Oid collid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound)
 
static double convert_numeric_to_scalar (Datum value, Oid typid, bool *failure)
 
static void convert_string_to_scalar (char *value, double *scaledvalue, char *lobound, double *scaledlobound, char *hibound, double *scaledhibound)
 
static void convert_bytea_to_scalar (Datum value, double *scaledvalue, Datum lobound, double *scaledlobound, Datum hibound, double *scaledhibound)
 
static double convert_one_string_to_scalar (char *value, int rangelo, int rangehi)
 
static double convert_one_bytea_to_scalar (unsigned char *value, int valuelen, int rangelo, int rangehi)
 
static char * convert_string_datum (Datum value, Oid typid, Oid collid, bool *failure)
 
static double convert_timevalue_to_scalar (Datum value, Oid typid, bool *failure)
 
static void examine_simple_variable (PlannerInfo *root, Var *var, VariableStatData *vardata)
 
static bool get_variable_range (PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
 
static void get_stats_slot_range (AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc, Oid collation, int16 typLen, bool typByVal, Datum *min, Datum *max, bool *p_have_data)
 
static bool get_actual_variable_range (PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
 
static bool get_actual_variable_endpoint (Relation heapRel, Relation indexRel, ScanDirection indexscandir, ScanKey scankeys, int16 typLen, bool typByVal, TupleTableSlot *tableslot, MemoryContext outercontext, Datum *endpointDatum)
 
static RelOptInfofind_join_input_rel (PlannerInfo *root, Relids relids)
 
Datum eqsel (PG_FUNCTION_ARGS)
 
double var_eq_const (VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
 
double var_eq_non_const (VariableStatData *vardata, Oid oproid, Oid collation, Node *other, bool varonleft, bool negate)
 
Datum neqsel (PG_FUNCTION_ARGS)
 
static double scalarineqsel (PlannerInfo *root, Oid operator, bool isgt, bool iseq, Oid collation, VariableStatData *vardata, Datum constval, Oid consttype)
 
double mcv_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
 
double histogram_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
 
double generic_restriction_selectivity (PlannerInfo *root, Oid oproid, Oid collation, List *args, int varRelid, double default_selectivity)
 
double ineq_histogram_selectivity (PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
 
static Datum scalarineqsel_wrapper (PG_FUNCTION_ARGS, bool isgt, bool iseq)
 
Datum scalarltsel (PG_FUNCTION_ARGS)
 
Datum scalarlesel (PG_FUNCTION_ARGS)
 
Datum scalargtsel (PG_FUNCTION_ARGS)
 
Datum scalargesel (PG_FUNCTION_ARGS)
 
Selectivity boolvarsel (PlannerInfo *root, Node *arg, int varRelid)
 
Selectivity booltestsel (PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity nulltestsel (PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
static Nodestrip_array_coercion (Node *node)
 
Selectivity scalararraysel (PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
int estimate_array_length (Node *arrayexpr)
 
Selectivity rowcomparesel (PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Datum eqjoinsel (PG_FUNCTION_ARGS)
 
Datum neqjoinsel (PG_FUNCTION_ARGS)
 
Datum scalarltjoinsel (PG_FUNCTION_ARGS)
 
Datum scalarlejoinsel (PG_FUNCTION_ARGS)
 
Datum scalargtjoinsel (PG_FUNCTION_ARGS)
 
Datum scalargejoinsel (PG_FUNCTION_ARGS)
 
void mergejoinscansel (PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend)
 
Datum matchingsel (PG_FUNCTION_ARGS)
 
Datum matchingjoinsel (PG_FUNCTION_ARGS)
 
static Listadd_unique_group_var (PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
 
double estimate_num_groups (PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo)
 
void estimate_hash_bucket_stats (PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, Selectivity *bucketsize_frac)
 
double estimate_hashagg_tablesize (PlannerInfo *root, Path *path, const AggClauseCosts *agg_costs, double dNumGroups)
 
bool get_restriction_variable (PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
 
void get_join_variables (PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
 
static void ReleaseDummy (HeapTuple tuple)
 
void examine_variable (PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
 
bool statistic_proc_security_check (VariableStatData *vardata, Oid func_oid)
 
double get_variable_numdistinct (VariableStatData *vardata, bool *isdefault)
 
Listget_quals_from_indexclauses (List *indexclauses)
 
Cost index_other_operands_eval_cost (PlannerInfo *root, List *indexquals)
 
void genericcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
 
Listadd_predicate_to_index_quals (IndexOptInfo *index, List *indexQuals)
 
void btcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void hashcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void gistcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void spgcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
static bool gincost_pattern (IndexOptInfo *index, int indexcol, Oid clause_op, Datum query, GinQualCounts *counts)
 
static bool gincost_opexpr (PlannerInfo *root, IndexOptInfo *index, int indexcol, OpExpr *clause, GinQualCounts *counts)
 
static bool gincost_scalararrayopexpr (PlannerInfo *root, IndexOptInfo *index, int indexcol, ScalarArrayOpExpr *clause, double numIndexEntries, GinQualCounts *counts)
 
void gincostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void brincostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 

Variables

get_relation_stats_hook_type get_relation_stats_hook = NULL
 
get_index_stats_hook_type get_index_stats_hook = NULL
 

Macro Definition Documentation

◆ DEFAULT_PAGE_CPU_MULTIPLIER

#define DEFAULT_PAGE_CPU_MULTIPLIER   50.0

Definition at line 143 of file selfuncs.c.

◆ VISITED_PAGES_LIMIT

#define VISITED_PAGES_LIMIT   100

Function Documentation

◆ add_predicate_to_index_quals()

List* add_predicate_to_index_quals ( IndexOptInfo index,
List indexQuals 
)

Definition at line 6649 of file selfuncs.c.

6650 {
6651  List *predExtraQuals = NIL;
6652  ListCell *lc;
6653 
6654  if (index->indpred == NIL)
6655  return indexQuals;
6656 
6657  foreach(lc, index->indpred)
6658  {
6659  Node *predQual = (Node *) lfirst(lc);
6660  List *oneQual = list_make1(predQual);
6661 
6662  if (!predicate_implied_by(oneQual, indexQuals, false))
6663  predExtraQuals = list_concat(predExtraQuals, oneQual);
6664  }
6665  return list_concat(predExtraQuals, indexQuals);
6666 }
List * list_concat(List *list1, const List *list2)
Definition: list.c:560
#define lfirst(lc)
Definition: pg_list.h:172
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
bool predicate_implied_by(List *predicate_list, List *clause_list, bool weak)
Definition: predtest.c:152
Definition: pg_list.h:54
Definition: nodes.h:129
Definition: type.h:95

References lfirst, list_concat(), list_make1, NIL, and predicate_implied_by().

Referenced by btcostestimate(), genericcostestimate(), and gincostestimate().

◆ add_unique_group_var()

static List* add_unique_group_var ( PlannerInfo root,
List varinfos,
Node var,
VariableStatData vardata 
)
static

Definition at line 3266 of file selfuncs.c.

3268 {
3269  GroupVarInfo *varinfo;
3270  double ndistinct;
3271  bool isdefault;
3272  ListCell *lc;
3273 
3274  ndistinct = get_variable_numdistinct(vardata, &isdefault);
3275 
3276  foreach(lc, varinfos)
3277  {
3278  varinfo = (GroupVarInfo *) lfirst(lc);
3279 
3280  /* Drop exact duplicates */
3281  if (equal(var, varinfo->var))
3282  return varinfos;
3283 
3284  /*
3285  * Drop known-equal vars, but only if they belong to different
3286  * relations (see comments for estimate_num_groups)
3287  */
3288  if (vardata->rel != varinfo->rel &&
3289  exprs_known_equal(root, var, varinfo->var))
3290  {
3291  if (varinfo->ndistinct <= ndistinct)
3292  {
3293  /* Keep older item, forget new one */
3294  return varinfos;
3295  }
3296  else
3297  {
3298  /* Delete the older item */
3299  varinfos = foreach_delete_current(varinfos, lc);
3300  }
3301  }
3302  }
3303 
3304  varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
3305 
3306  varinfo->var = var;
3307  varinfo->rel = vardata->rel;
3308  varinfo->ndistinct = ndistinct;
3309  varinfo->isdefault = isdefault;
3310  varinfos = lappend(varinfos, varinfo);
3311  return varinfos;
3312 }
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
Definition: equivclass.c:2439
List * lappend(List *list, void *datum)
Definition: list.c:338
void * palloc(Size size)
Definition: mcxt.c:1226
#define foreach_delete_current(lst, cell)
Definition: pg_list.h:390
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5648
RelOptInfo * rel
Definition: selfuncs.c:3260
double ndistinct
Definition: selfuncs.c:3261
bool isdefault
Definition: selfuncs.c:3262
Node * var
Definition: selfuncs.c:3259
RelOptInfo * rel
Definition: selfuncs.h:88

References equal(), exprs_known_equal(), foreach_delete_current, get_variable_numdistinct(), GroupVarInfo::isdefault, lappend(), lfirst, GroupVarInfo::ndistinct, palloc(), GroupVarInfo::rel, VariableStatData::rel, and GroupVarInfo::var.

Referenced by estimate_num_groups().

◆ booltestsel()

Selectivity booltestsel ( PlannerInfo root,
BoolTestType  booltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1539 of file selfuncs.c.

1541 {
1542  VariableStatData vardata;
1543  double selec;
1544 
1545  examine_variable(root, arg, varRelid, &vardata);
1546 
1547  if (HeapTupleIsValid(vardata.statsTuple))
1548  {
1549  Form_pg_statistic stats;
1550  double freq_null;
1551  AttStatsSlot sslot;
1552 
1553  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1554  freq_null = stats->stanullfrac;
1555 
1556  if (get_attstatsslot(&sslot, vardata.statsTuple,
1557  STATISTIC_KIND_MCV, InvalidOid,
1559  && sslot.nnumbers > 0)
1560  {
1561  double freq_true;
1562  double freq_false;
1563 
1564  /*
1565  * Get first MCV frequency and derive frequency for true.
1566  */
1567  if (DatumGetBool(sslot.values[0]))
1568  freq_true = sslot.numbers[0];
1569  else
1570  freq_true = 1.0 - sslot.numbers[0] - freq_null;
1571 
1572  /*
1573  * Next derive frequency for false. Then use these as appropriate
1574  * to derive frequency for each case.
1575  */
1576  freq_false = 1.0 - freq_true - freq_null;
1577 
1578  switch (booltesttype)
1579  {
1580  case IS_UNKNOWN:
1581  /* select only NULL values */
1582  selec = freq_null;
1583  break;
1584  case IS_NOT_UNKNOWN:
1585  /* select non-NULL values */
1586  selec = 1.0 - freq_null;
1587  break;
1588  case IS_TRUE:
1589  /* select only TRUE values */
1590  selec = freq_true;
1591  break;
1592  case IS_NOT_TRUE:
1593  /* select non-TRUE values */
1594  selec = 1.0 - freq_true;
1595  break;
1596  case IS_FALSE:
1597  /* select only FALSE values */
1598  selec = freq_false;
1599  break;
1600  case IS_NOT_FALSE:
1601  /* select non-FALSE values */
1602  selec = 1.0 - freq_false;
1603  break;
1604  default:
1605  elog(ERROR, "unrecognized booltesttype: %d",
1606  (int) booltesttype);
1607  selec = 0.0; /* Keep compiler quiet */
1608  break;
1609  }
1610 
1611  free_attstatsslot(&sslot);
1612  }
1613  else
1614  {
1615  /*
1616  * No most-common-value info available. Still have null fraction
1617  * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1618  * for null fraction and assume a 50-50 split of TRUE and FALSE.
1619  */
1620  switch (booltesttype)
1621  {
1622  case IS_UNKNOWN:
1623  /* select only NULL values */
1624  selec = freq_null;
1625  break;
1626  case IS_NOT_UNKNOWN:
1627  /* select non-NULL values */
1628  selec = 1.0 - freq_null;
1629  break;
1630  case IS_TRUE:
1631  case IS_FALSE:
1632  /* Assume we select half of the non-NULL values */
1633  selec = (1.0 - freq_null) / 2.0;
1634  break;
1635  case IS_NOT_TRUE:
1636  case IS_NOT_FALSE:
1637  /* Assume we select NULLs plus half of the non-NULLs */
1638  /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1639  selec = (freq_null + 1.0) / 2.0;
1640  break;
1641  default:
1642  elog(ERROR, "unrecognized booltesttype: %d",
1643  (int) booltesttype);
1644  selec = 0.0; /* Keep compiler quiet */
1645  break;
1646  }
1647  }
1648  }
1649  else
1650  {
1651  /*
1652  * If we can't get variable statistics for the argument, perhaps
1653  * clause_selectivity can do something with it. We ignore the
1654  * possibility of a NULL value when using clause_selectivity, and just
1655  * assume the value is either TRUE or FALSE.
1656  */
1657  switch (booltesttype)
1658  {
1659  case IS_UNKNOWN:
1660  selec = DEFAULT_UNK_SEL;
1661  break;
1662  case IS_NOT_UNKNOWN:
1663  selec = DEFAULT_NOT_UNK_SEL;
1664  break;
1665  case IS_TRUE:
1666  case IS_NOT_FALSE:
1667  selec = (double) clause_selectivity(root, arg,
1668  varRelid,
1669  jointype, sjinfo);
1670  break;
1671  case IS_FALSE:
1672  case IS_NOT_TRUE:
1673  selec = 1.0 - (double) clause_selectivity(root, arg,
1674  varRelid,
1675  jointype, sjinfo);
1676  break;
1677  default:
1678  elog(ERROR, "unrecognized booltesttype: %d",
1679  (int) booltesttype);
1680  selec = 0.0; /* Keep compiler quiet */
1681  break;
1682  }
1683  }
1684 
1685  ReleaseVariableStats(vardata);
1686 
1687  /* result should be in range, but make sure... */
1688  CLAMP_PROBABILITY(selec);
1689 
1690  return (Selectivity) selec;
1691 }
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:669
#define ERROR
Definition: elog.h:39
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3302
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3192
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
double Selectivity
Definition: nodes.h:261
void * arg
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
#define InvalidOid
Definition: postgres_ext.h:36
@ IS_NOT_TRUE
Definition: primnodes.h:1701
@ IS_NOT_FALSE
Definition: primnodes.h:1701
@ IS_NOT_UNKNOWN
Definition: primnodes.h:1701
@ IS_TRUE
Definition: primnodes.h:1701
@ IS_UNKNOWN
Definition: primnodes.h:1701
@ IS_FALSE
Definition: primnodes.h:1701
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4978
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:56
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:99
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:55
Datum * values
Definition: lsyscache.h:53
float4 * numbers
Definition: lsyscache.h:56
int nnumbers
Definition: lsyscache.h:57
HeapTuple statsTuple
Definition: selfuncs.h:89

References arg, ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool(), DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog(), ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, AttStatsSlot::nnumbers, AttStatsSlot::numbers, ReleaseVariableStats, VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by clause_selectivity_ext().

◆ boolvarsel()

Selectivity boolvarsel ( PlannerInfo root,
Node arg,
int  varRelid 
)

Definition at line 1511 of file selfuncs.c.

1512 {
1513  VariableStatData vardata;
1514  double selec;
1515 
1516  examine_variable(root, arg, varRelid, &vardata);
1517  if (HeapTupleIsValid(vardata.statsTuple))
1518  {
1519  /*
1520  * A boolean variable V is equivalent to the clause V = 't', so we
1521  * compute the selectivity as if that is what we have.
1522  */
1523  selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
1524  BoolGetDatum(true), false, true, false);
1525  }
1526  else
1527  {
1528  /* Otherwise, the default estimate is 0.5 */
1529  selec = 0.5;
1530  }
1531  ReleaseVariableStats(vardata);
1532  return selec;
1533 }
static Datum BoolGetDatum(bool X)
Definition: postgres.h:102
double var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:294

References arg, BoolGetDatum(), examine_variable(), HeapTupleIsValid, InvalidOid, ReleaseVariableStats, VariableStatData::statsTuple, and var_eq_const().

Referenced by clause_selectivity_ext().

◆ brincostestimate()

void brincostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 7815 of file selfuncs.c.

7819 {
7820  IndexOptInfo *index = path->indexinfo;
7821  List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
7822  double numPages = index->pages;
7823  RelOptInfo *baserel = index->rel;
7824  RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
7825  Cost spc_seq_page_cost;
7826  Cost spc_random_page_cost;
7827  double qual_arg_cost;
7828  double qualSelectivity;
7829  BrinStatsData statsData;
7830  double indexRanges;
7831  double minimalRanges;
7832  double estimatedRanges;
7833  double selec;
7834  Relation indexRel;
7835  ListCell *l;
7836  VariableStatData vardata;
7837 
7838  Assert(rte->rtekind == RTE_RELATION);
7839 
7840  /* fetch estimated page cost for the tablespace containing the index */
7841  get_tablespace_page_costs(index->reltablespace,
7842  &spc_random_page_cost,
7843  &spc_seq_page_cost);
7844 
7845  /*
7846  * Obtain some data from the index itself, if possible. Otherwise invent
7847  * some plausible internal statistics based on the relation page count.
7848  */
7849  if (!index->hypothetical)
7850  {
7851  /*
7852  * A lock should have already been obtained on the index in plancat.c.
7853  */
7854  indexRel = index_open(index->indexoid, NoLock);
7855  brinGetStats(indexRel, &statsData);
7856  index_close(indexRel, NoLock);
7857 
7858  /* work out the actual number of ranges in the index */
7859  indexRanges = Max(ceil((double) baserel->pages /
7860  statsData.pagesPerRange), 1.0);
7861  }
7862  else
7863  {
7864  /*
7865  * Assume default number of pages per range, and estimate the number
7866  * of ranges based on that.
7867  */
7868  indexRanges = Max(ceil((double) baserel->pages /
7870 
7872  statsData.revmapNumPages = (indexRanges / REVMAP_PAGE_MAXITEMS) + 1;
7873  }
7874 
7875  /*
7876  * Compute index correlation
7877  *
7878  * Because we can use all index quals equally when scanning, we can use
7879  * the largest correlation (in absolute value) among columns used by the
7880  * query. Start at zero, the worst possible case. If we cannot find any
7881  * correlation statistics, we will keep it as 0.
7882  */
7883  *indexCorrelation = 0;
7884 
7885  foreach(l, path->indexclauses)
7886  {
7887  IndexClause *iclause = lfirst_node(IndexClause, l);
7888  AttrNumber attnum = index->indexkeys[iclause->indexcol];
7889 
7890  /* attempt to lookup stats in relation for this index column */
7891  if (attnum != 0)
7892  {
7893  /* Simple variable -- look to stats for the underlying table */
7895  (*get_relation_stats_hook) (root, rte, attnum, &vardata))
7896  {
7897  /*
7898  * The hook took control of acquiring a stats tuple. If it
7899  * did supply a tuple, it'd better have supplied a freefunc.
7900  */
7901  if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
7902  elog(ERROR,
7903  "no function provided to release variable stats with");
7904  }
7905  else
7906  {
7907  vardata.statsTuple =
7909  ObjectIdGetDatum(rte->relid),
7911  BoolGetDatum(false));
7912  vardata.freefunc = ReleaseSysCache;
7913  }
7914  }
7915  else
7916  {
7917  /*
7918  * Looks like we've found an expression column in the index. Let's
7919  * see if there's any stats for it.
7920  */
7921 
7922  /* get the attnum from the 0-based index. */
7923  attnum = iclause->indexcol + 1;
7924 
7925  if (get_index_stats_hook &&
7926  (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
7927  {
7928  /*
7929  * The hook took control of acquiring a stats tuple. If it
7930  * did supply a tuple, it'd better have supplied a freefunc.
7931  */
7932  if (HeapTupleIsValid(vardata.statsTuple) &&
7933  !vardata.freefunc)
7934  elog(ERROR, "no function provided to release variable stats with");
7935  }
7936  else
7937  {
7939  ObjectIdGetDatum(index->indexoid),
7941  BoolGetDatum(false));
7942  vardata.freefunc = ReleaseSysCache;
7943  }
7944  }
7945 
7946  if (HeapTupleIsValid(vardata.statsTuple))
7947  {
7948  AttStatsSlot sslot;
7949 
7950  if (get_attstatsslot(&sslot, vardata.statsTuple,
7951  STATISTIC_KIND_CORRELATION, InvalidOid,
7953  {
7954  double varCorrelation = 0.0;
7955 
7956  if (sslot.nnumbers > 0)
7957  varCorrelation = fabs(sslot.numbers[0]);
7958 
7959  if (varCorrelation > *indexCorrelation)
7960  *indexCorrelation = varCorrelation;
7961 
7962  free_attstatsslot(&sslot);
7963  }
7964  }
7965 
7966  ReleaseVariableStats(vardata);
7967  }
7968 
7969  qualSelectivity = clauselist_selectivity(root, indexQuals,
7970  baserel->relid,
7971  JOIN_INNER, NULL);
7972 
7973  /*
7974  * Now calculate the minimum possible ranges we could match with if all of
7975  * the rows were in the perfect order in the table's heap.
7976  */
7977  minimalRanges = ceil(indexRanges * qualSelectivity);
7978 
7979  /*
7980  * Now estimate the number of ranges that we'll touch by using the
7981  * indexCorrelation from the stats. Careful not to divide by zero (note
7982  * we're using the absolute value of the correlation).
7983  */
7984  if (*indexCorrelation < 1.0e-10)
7985  estimatedRanges = indexRanges;
7986  else
7987  estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
7988 
7989  /* we expect to visit this portion of the table */
7990  selec = estimatedRanges / indexRanges;
7991 
7992  CLAMP_PROBABILITY(selec);
7993 
7994  *indexSelectivity = selec;
7995 
7996  /*
7997  * Compute the index qual costs, much as in genericcostestimate, to add to
7998  * the index costs. We can disregard indexorderbys, since BRIN doesn't
7999  * support those.
8000  */
8001  qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
8002 
8003  /*
8004  * Compute the startup cost as the cost to read the whole revmap
8005  * sequentially, including the cost to execute the index quals.
8006  */
8007  *indexStartupCost =
8008  spc_seq_page_cost * statsData.revmapNumPages * loop_count;
8009  *indexStartupCost += qual_arg_cost;
8010 
8011  /*
8012  * To read a BRIN index there might be a bit of back and forth over
8013  * regular pages, as revmap might point to them out of sequential order;
8014  * calculate the total cost as reading the whole index in random order.
8015  */
8016  *indexTotalCost = *indexStartupCost +
8017  spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
8018 
8019  /*
8020  * Charge a small amount per range tuple which we expect to match to. This
8021  * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
8022  * will set a bit for each page in the range when we find a matching
8023  * range, so we must multiply the charge by the number of pages in the
8024  * range.
8025  */
8026  *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
8027  statsData.pagesPerRange;
8028 
8029  *indexPages = index->pages;
8030 }
int16 AttrNumber
Definition: attnum.h:21
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1272
#define BRIN_DEFAULT_PAGES_PER_RANGE
Definition: brin.h:38
#define REVMAP_PAGE_MAXITEMS
Definition: brin_page.h:93
#define Min(x, y)
Definition: c.h:988
#define Max(x, y)
Definition: c.h:982
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:102
double cpu_operator_cost
Definition: costsize.c:124
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:158
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:132
Assert(fmt[strlen(fmt) - 1] !='\n')
#define NoLock
Definition: lockdefs.h:34
double Cost
Definition: nodes.h:262
@ JOIN_INNER
Definition: nodes.h:304
@ RTE_RELATION
Definition: parsenodes.h:1014
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:555
int16 attnum
Definition: pg_attribute.h:74
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:172
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
List * get_quals_from_indexclauses(List *indexclauses)
Definition: selfuncs.c:6347
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:147
Cost index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
Definition: selfuncs.c:6377
get_relation_stats_hook_type get_relation_stats_hook
Definition: selfuncs.c:146
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
BlockNumber revmapNumPages
Definition: brin.h:34
BlockNumber pagesPerRange
Definition: brin.h:33
AttrNumber indexcol
Definition: pathnodes.h:1729
List * indexclauses
Definition: pathnodes.h:1679
IndexOptInfo * indexinfo
Definition: pathnodes.h:1678
RTEKind rtekind
Definition: parsenodes.h:1033
Index relid
Definition: pathnodes.h:903
BlockNumber pages
Definition: pathnodes.h:927
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:91
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:866
HeapTuple SearchSysCache3(int cacheId, Datum key1, Datum key2, Datum key3)
Definition: syscache.c:840
@ STATRELATTINH
Definition: syscache.h:97

References Assert(), attnum, ATTSTATSSLOT_NUMBERS, BoolGetDatum(), BRIN_DEFAULT_PAGES_PER_RANGE, brinGetStats(), CLAMP_PROBABILITY, clauselist_selectivity(), cpu_operator_cost, elog(), ERROR, free_attstatsslot(), VariableStatData::freefunc, get_attstatsslot(), get_index_stats_hook, get_quals_from_indexclauses(), get_relation_stats_hook, get_tablespace_page_costs(), HeapTupleIsValid, index_close(), index_open(), index_other_operands_eval_cost(), IndexPath::indexclauses, IndexClause::indexcol, IndexPath::indexinfo, Int16GetDatum(), InvalidOid, JOIN_INNER, lfirst_node, Max, Min, AttStatsSlot::nnumbers, NoLock, AttStatsSlot::numbers, ObjectIdGetDatum(), RelOptInfo::pages, BrinStatsData::pagesPerRange, planner_rt_fetch, ReleaseSysCache(), ReleaseVariableStats, RangeTblEntry::relid, RelOptInfo::relid, REVMAP_PAGE_MAXITEMS, BrinStatsData::revmapNumPages, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), STATRELATTINH, and VariableStatData::statsTuple.

Referenced by brinhandler().

◆ btcostestimate()

void btcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 6670 of file selfuncs.c.

6674 {
6675  IndexOptInfo *index = path->indexinfo;
6676  GenericCosts costs = {0};
6677  Oid relid;
6678  AttrNumber colnum;
6679  VariableStatData vardata = {0};
6680  double numIndexTuples;
6681  Cost descentCost;
6682  List *indexBoundQuals;
6683  int indexcol;
6684  bool eqQualHere;
6685  bool found_saop;
6686  bool found_is_null_op;
6687  double num_sa_scans;
6688  ListCell *lc;
6689 
6690  /*
6691  * For a btree scan, only leading '=' quals plus inequality quals for the
6692  * immediately next attribute contribute to index selectivity (these are
6693  * the "boundary quals" that determine the starting and stopping points of
6694  * the index scan). Additional quals can suppress visits to the heap, so
6695  * it's OK to count them in indexSelectivity, but they should not count
6696  * for estimating numIndexTuples. So we must examine the given indexquals
6697  * to find out which ones count as boundary quals. We rely on the
6698  * knowledge that they are given in index column order.
6699  *
6700  * For a RowCompareExpr, we consider only the first column, just as
6701  * rowcomparesel() does.
6702  *
6703  * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
6704  * index scans not one, but the ScalarArrayOpExpr's operator can be
6705  * considered to act the same as it normally does.
6706  */
6707  indexBoundQuals = NIL;
6708  indexcol = 0;
6709  eqQualHere = false;
6710  found_saop = false;
6711  found_is_null_op = false;
6712  num_sa_scans = 1;
6713  foreach(lc, path->indexclauses)
6714  {
6715  IndexClause *iclause = lfirst_node(IndexClause, lc);
6716  ListCell *lc2;
6717 
6718  if (indexcol != iclause->indexcol)
6719  {
6720  /* Beginning of a new column's quals */
6721  if (!eqQualHere)
6722  break; /* done if no '=' qual for indexcol */
6723  eqQualHere = false;
6724  indexcol++;
6725  if (indexcol != iclause->indexcol)
6726  break; /* no quals at all for indexcol */
6727  }
6728 
6729  /* Examine each indexqual associated with this index clause */
6730  foreach(lc2, iclause->indexquals)
6731  {
6732  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
6733  Expr *clause = rinfo->clause;
6734  Oid clause_op = InvalidOid;
6735  int op_strategy;
6736 
6737  if (IsA(clause, OpExpr))
6738  {
6739  OpExpr *op = (OpExpr *) clause;
6740 
6741  clause_op = op->opno;
6742  }
6743  else if (IsA(clause, RowCompareExpr))
6744  {
6745  RowCompareExpr *rc = (RowCompareExpr *) clause;
6746 
6747  clause_op = linitial_oid(rc->opnos);
6748  }
6749  else if (IsA(clause, ScalarArrayOpExpr))
6750  {
6751  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6752  Node *other_operand = (Node *) lsecond(saop->args);
6753  int alength = estimate_array_length(other_operand);
6754 
6755  clause_op = saop->opno;
6756  found_saop = true;
6757  /* count number of SA scans induced by indexBoundQuals only */
6758  if (alength > 1)
6759  num_sa_scans *= alength;
6760  }
6761  else if (IsA(clause, NullTest))
6762  {
6763  NullTest *nt = (NullTest *) clause;
6764 
6765  if (nt->nulltesttype == IS_NULL)
6766  {
6767  found_is_null_op = true;
6768  /* IS NULL is like = for selectivity purposes */
6769  eqQualHere = true;
6770  }
6771  }
6772  else
6773  elog(ERROR, "unsupported indexqual type: %d",
6774  (int) nodeTag(clause));
6775 
6776  /* check for equality operator */
6777  if (OidIsValid(clause_op))
6778  {
6779  op_strategy = get_op_opfamily_strategy(clause_op,
6780  index->opfamily[indexcol]);
6781  Assert(op_strategy != 0); /* not a member of opfamily?? */
6782  if (op_strategy == BTEqualStrategyNumber)
6783  eqQualHere = true;
6784  }
6785 
6786  indexBoundQuals = lappend(indexBoundQuals, rinfo);
6787  }
6788  }
6789 
6790  /*
6791  * If index is unique and we found an '=' clause for each column, we can
6792  * just assume numIndexTuples = 1 and skip the expensive
6793  * clauselist_selectivity calculations. However, a ScalarArrayOp or
6794  * NullTest invalidates that theory, even though it sets eqQualHere.
6795  */
6796  if (index->unique &&
6797  indexcol == index->nkeycolumns - 1 &&
6798  eqQualHere &&
6799  !found_saop &&
6800  !found_is_null_op)
6801  numIndexTuples = 1.0;
6802  else
6803  {
6804  List *selectivityQuals;
6805  Selectivity btreeSelectivity;
6806 
6807  /*
6808  * If the index is partial, AND the index predicate with the
6809  * index-bound quals to produce a more accurate idea of the number of
6810  * rows covered by the bound conditions.
6811  */
6812  selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals);
6813 
6814  btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
6815  index->rel->relid,
6816  JOIN_INNER,
6817  NULL);
6818  numIndexTuples = btreeSelectivity * index->rel->tuples;
6819 
6820  /*
6821  * As in genericcostestimate(), we have to adjust for any
6822  * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
6823  * to integer.
6824  */
6825  numIndexTuples = rint(numIndexTuples / num_sa_scans);
6826  }
6827 
6828  /*
6829  * Now do generic index cost estimation.
6830  */
6831  costs.numIndexTuples = numIndexTuples;
6832 
6833  genericcostestimate(root, path, loop_count, &costs);
6834 
6835  /*
6836  * Add a CPU-cost component to represent the costs of initial btree
6837  * descent. We don't charge any I/O cost for touching upper btree levels,
6838  * since they tend to stay in cache, but we still have to do about log2(N)
6839  * comparisons to descend a btree of N leaf tuples. We charge one
6840  * cpu_operator_cost per comparison.
6841  *
6842  * If there are ScalarArrayOpExprs, charge this once per SA scan. The
6843  * ones after the first one are not startup cost so far as the overall
6844  * plan is concerned, so add them only to "total" cost.
6845  */
6846  if (index->tuples > 1) /* avoid computing log(0) */
6847  {
6848  descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
6849  costs.indexStartupCost += descentCost;
6850  costs.indexTotalCost += costs.num_sa_scans * descentCost;
6851  }
6852 
6853  /*
6854  * Even though we're not charging I/O cost for touching upper btree pages,
6855  * it's still reasonable to charge some CPU cost per page descended
6856  * through. Moreover, if we had no such charge at all, bloated indexes
6857  * would appear to have the same search cost as unbloated ones, at least
6858  * in cases where only a single leaf page is expected to be visited. This
6859  * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
6860  * touched. The number of such pages is btree tree height plus one (ie,
6861  * we charge for the leaf page too). As above, charge once per SA scan.
6862  */
6863  descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
6864  costs.indexStartupCost += descentCost;
6865  costs.indexTotalCost += costs.num_sa_scans * descentCost;
6866 
6867  /*
6868  * If we can get an estimate of the first column's ordering correlation C
6869  * from pg_statistic, estimate the index correlation as C for a
6870  * single-column index, or C * 0.75 for multiple columns. (The idea here
6871  * is that multiple columns dilute the importance of the first column's
6872  * ordering, but don't negate it entirely. Before 8.0 we divided the
6873  * correlation by the number of columns, but that seems too strong.)
6874  */
6875  if (index->indexkeys[0] != 0)
6876  {
6877  /* Simple variable --- look to stats for the underlying table */
6878  RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
6879 
6880  Assert(rte->rtekind == RTE_RELATION);
6881  relid = rte->relid;
6882  Assert(relid != InvalidOid);
6883  colnum = index->indexkeys[0];
6884 
6886  (*get_relation_stats_hook) (root, rte, colnum, &vardata))
6887  {
6888  /*
6889  * The hook took control of acquiring a stats tuple. If it did
6890  * supply a tuple, it'd better have supplied a freefunc.
6891  */
6892  if (HeapTupleIsValid(vardata.statsTuple) &&
6893  !vardata.freefunc)
6894  elog(ERROR, "no function provided to release variable stats with");
6895  }
6896  else
6897  {
6899  ObjectIdGetDatum(relid),
6900  Int16GetDatum(colnum),
6901  BoolGetDatum(rte->inh));
6902  vardata.freefunc = ReleaseSysCache;
6903  }
6904  }
6905  else
6906  {
6907  /* Expression --- maybe there are stats for the index itself */
6908  relid = index->indexoid;
6909  colnum = 1;
6910 
6911  if (get_index_stats_hook &&
6912  (*get_index_stats_hook) (root, relid, colnum, &vardata))
6913  {
6914  /*
6915  * The hook took control of acquiring a stats tuple. If it did
6916  * supply a tuple, it'd better have supplied a freefunc.
6917  */
6918  if (HeapTupleIsValid(vardata.statsTuple) &&
6919  !vardata.freefunc)
6920  elog(ERROR, "no function provided to release variable stats with");
6921  }
6922  else
6923  {
6925  ObjectIdGetDatum(relid),
6926  Int16GetDatum(colnum),
6927  BoolGetDatum(false));
6928  vardata.freefunc = ReleaseSysCache;
6929  }
6930  }
6931 
6932  if (HeapTupleIsValid(vardata.statsTuple))
6933  {
6934  Oid sortop;
6935  AttStatsSlot sslot;
6936 
6937  sortop = get_opfamily_member(index->opfamily[0],
6938  index->opcintype[0],
6939  index->opcintype[0],
6941  if (OidIsValid(sortop) &&
6942  get_attstatsslot(&sslot, vardata.statsTuple,
6943  STATISTIC_KIND_CORRELATION, sortop,
6945  {
6946  double varCorrelation;
6947 
6948  Assert(sslot.nnumbers == 1);
6949  varCorrelation = sslot.numbers[0];
6950 
6951  if (index->reverse_sort[0])
6952  varCorrelation = -varCorrelation;
6953 
6954  if (index->nkeycolumns > 1)
6955  costs.indexCorrelation = varCorrelation * 0.75;
6956  else
6957  costs.indexCorrelation = varCorrelation;
6958 
6959  free_attstatsslot(&sslot);
6960  }
6961  }
6962 
6963  ReleaseVariableStats(vardata);
6964 
6965  *indexStartupCost = costs.indexStartupCost;
6966  *indexTotalCost = costs.indexTotalCost;
6967  *indexSelectivity = costs.indexSelectivity;
6968  *indexCorrelation = costs.indexCorrelation;
6969  *indexPages = costs.numIndexPages;
6970 }
#define OidIsValid(objectId)
Definition: c.h:759
int get_op_opfamily_strategy(Oid opno, Oid opfamily)
Definition: lsyscache.c:82
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:165
#define IsA(nodeptr, _type_)
Definition: nodes.h:179
#define nodeTag(nodeptr)
Definition: nodes.h:133
#define lsecond(l)
Definition: pg_list.h:183
#define linitial_oid(l)
Definition: pg_list.h:180
unsigned int Oid
Definition: postgres_ext.h:31
@ IS_NULL
Definition: primnodes.h:1677
#define DEFAULT_PAGE_CPU_MULTIPLIER
Definition: selfuncs.c:143
int estimate_array_length(Node *arrayexpr)
Definition: selfuncs.c:2134
void genericcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
Definition: selfuncs.c:6431
List * add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:6649
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
Selectivity indexSelectivity
Definition: selfuncs.h:124
Cost indexStartupCost
Definition: selfuncs.h:122
double indexCorrelation
Definition: selfuncs.h:125
double num_sa_scans
Definition: selfuncs.h:131
Cost indexTotalCost
Definition: selfuncs.h:123
double numIndexPages
Definition: selfuncs.h:128
double numIndexTuples
Definition: selfuncs.h:129
List * indexquals
Definition: pathnodes.h:1727
NullTestType nulltesttype
Definition: primnodes.h:1684
Oid opno
Definition: primnodes.h:745
Expr * clause
Definition: pathnodes.h:2516

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, Assert(), ATTSTATSSLOT_NUMBERS, BoolGetDatum(), BTEqualStrategyNumber, BTLessStrategyNumber, RestrictInfo::clause, clauselist_selectivity(), cpu_operator_cost, DEFAULT_PAGE_CPU_MULTIPLIER, elog(), ERROR, estimate_array_length(), free_attstatsslot(), VariableStatData::freefunc, genericcostestimate(), get_attstatsslot(), get_index_stats_hook, get_op_opfamily_strategy(), get_opfamily_member(), get_relation_stats_hook, HeapTupleIsValid, IndexPath::indexclauses, IndexClause::indexcol, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexClause::indexquals, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, RangeTblEntry::inh, Int16GetDatum(), InvalidOid, IS_NULL, IsA, JOIN_INNER, lappend(), lfirst_node, linitial_oid, lsecond, NIL, AttStatsSlot::nnumbers, nodeTag, NullTest::nulltesttype, GenericCosts::num_sa_scans, AttStatsSlot::numbers, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, ObjectIdGetDatum(), OidIsValid, OpExpr::opno, ScalarArrayOpExpr::opno, planner_rt_fetch, ReleaseSysCache(), ReleaseVariableStats, RangeTblEntry::relid, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), STATRELATTINH, and VariableStatData::statsTuple.

Referenced by bthandler().

◆ convert_bytea_to_scalar()

static void convert_bytea_to_scalar ( Datum  value,
double *  scaledvalue,
Datum  lobound,
double *  scaledlobound,
Datum  hibound,
double *  scaledhibound 
)
static

Definition at line 4696 of file selfuncs.c.

4702 {
4703  bytea *valuep = DatumGetByteaPP(value);
4704  bytea *loboundp = DatumGetByteaPP(lobound);
4705  bytea *hiboundp = DatumGetByteaPP(hibound);
4706  int rangelo,
4707  rangehi,
4708  valuelen = VARSIZE_ANY_EXHDR(valuep),
4709  loboundlen = VARSIZE_ANY_EXHDR(loboundp),
4710  hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
4711  i,
4712  minlen;
4713  unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep);
4714  unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp);
4715  unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp);
4716 
4717  /*
4718  * Assume bytea data is uniformly distributed across all byte values.
4719  */
4720  rangelo = 0;
4721  rangehi = 255;
4722 
4723  /*
4724  * Now strip any common prefix of the three strings.
4725  */
4726  minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4727  for (i = 0; i < minlen; i++)
4728  {
4729  if (*lostr != *histr || *lostr != *valstr)
4730  break;
4731  lostr++, histr++, valstr++;
4732  loboundlen--, hiboundlen--, valuelen--;
4733  }
4734 
4735  /*
4736  * Now we can do the conversions.
4737  */
4738  *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4739  *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4740  *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4741 }
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
static struct @147 value
int i
Definition: isn.c:73
static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi)
Definition: selfuncs.c:4744
Definition: c.h:671
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References convert_one_bytea_to_scalar(), DatumGetByteaPP, i, Min, value, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by convert_to_scalar().

◆ convert_numeric_to_scalar()

static double convert_numeric_to_scalar ( Datum  value,
Oid  typid,
bool failure 
)
static

Definition at line 4422 of file selfuncs.c.

4423 {
4424  switch (typid)
4425  {
4426  case BOOLOID:
4427  return (double) DatumGetBool(value);
4428  case INT2OID:
4429  return (double) DatumGetInt16(value);
4430  case INT4OID:
4431  return (double) DatumGetInt32(value);
4432  case INT8OID:
4433  return (double) DatumGetInt64(value);
4434  case FLOAT4OID:
4435  return (double) DatumGetFloat4(value);
4436  case FLOAT8OID:
4437  return (double) DatumGetFloat8(value);
4438  case NUMERICOID:
4439  /* Note: out-of-range values will be clamped to +-HUGE_VAL */
4440  return (double)
4442  value));
4443  case OIDOID:
4444  case REGPROCOID:
4445  case REGPROCEDUREOID:
4446  case REGOPEROID:
4447  case REGOPERATOROID:
4448  case REGCLASSOID:
4449  case REGTYPEOID:
4450  case REGCOLLATIONOID:
4451  case REGCONFIGOID:
4452  case REGDICTIONARYOID:
4453  case REGROLEOID:
4454  case REGNAMESPACEOID:
4455  /* we can treat OIDs as integers... */
4456  return (double) DatumGetObjectId(value);
4457  }
4458 
4459  *failure = true;
4460  return 0;
4461 }
Datum numeric_float8_no_overflow(PG_FUNCTION_ARGS)
Definition: numeric.c:4583
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:642
static int64 DatumGetInt64(Datum X)
Definition: postgres.h:385
static float4 DatumGetFloat4(Datum X)
Definition: postgres.h:458
static Oid DatumGetObjectId(Datum X)
Definition: postgres.h:242
static float8 DatumGetFloat8(Datum X)
Definition: postgres.h:494
static int16 DatumGetInt16(Datum X)
Definition: postgres.h:162
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202

References DatumGetBool(), DatumGetFloat4(), DatumGetFloat8(), DatumGetInt16(), DatumGetInt32(), DatumGetInt64(), DatumGetObjectId(), DirectFunctionCall1, numeric_float8_no_overflow(), and value.

Referenced by convert_to_scalar().

◆ convert_one_bytea_to_scalar()

static double convert_one_bytea_to_scalar ( unsigned char *  value,
int  valuelen,
int  rangelo,
int  rangehi 
)
static

Definition at line 4744 of file selfuncs.c.

4746 {
4747  double num,
4748  denom,
4749  base;
4750 
4751  if (valuelen <= 0)
4752  return 0.0; /* empty string has scalar value 0 */
4753 
4754  /*
4755  * Since base is 256, need not consider more than about 10 chars (even
4756  * this many seems like overkill)
4757  */
4758  if (valuelen > 10)
4759  valuelen = 10;
4760 
4761  /* Convert initial characters to fraction */
4762  base = rangehi - rangelo + 1;
4763  num = 0.0;
4764  denom = base;
4765  while (valuelen-- > 0)
4766  {
4767  int ch = *value++;
4768 
4769  if (ch < rangelo)
4770  ch = rangelo - 1;
4771  else if (ch > rangehi)
4772  ch = rangehi + 1;
4773  num += ((double) (ch - rangelo)) / denom;
4774  denom *= base;
4775  }
4776 
4777  return num;
4778 }

References value.

Referenced by convert_bytea_to_scalar().

◆ convert_one_string_to_scalar()

static double convert_one_string_to_scalar ( char *  value,
int  rangelo,
int  rangehi 
)
static

Definition at line 4564 of file selfuncs.c.

4565 {
4566  int slen = strlen(value);
4567  double num,
4568  denom,
4569  base;
4570 
4571  if (slen <= 0)
4572  return 0.0; /* empty string has scalar value 0 */
4573 
4574  /*
4575  * There seems little point in considering more than a dozen bytes from
4576  * the string. Since base is at least 10, that will give us nominal
4577  * resolution of at least 12 decimal digits, which is surely far more
4578  * precision than this estimation technique has got anyway (especially in
4579  * non-C locales). Also, even with the maximum possible base of 256, this
4580  * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4581  * overflow on any known machine.
4582  */
4583  if (slen > 12)
4584  slen = 12;
4585 
4586  /* Convert initial characters to fraction */
4587  base = rangehi - rangelo + 1;
4588  num = 0.0;
4589  denom = base;
4590  while (slen-- > 0)
4591  {
4592  int ch = (unsigned char) *value++;
4593 
4594  if (ch < rangelo)
4595  ch = rangelo - 1;
4596  else if (ch > rangehi)
4597  ch = rangehi + 1;
4598  num += ((double) (ch - rangelo)) / denom;
4599  denom *= base;
4600  }
4601 
4602  return num;
4603 }

References value.

Referenced by convert_string_to_scalar().

◆ convert_string_datum()

static char * convert_string_datum ( Datum  value,
Oid  typid,
Oid  collid,
bool failure 
)
static

Definition at line 4615 of file selfuncs.c.

4616 {
4617  char *val;
4618 
4619  switch (typid)
4620  {
4621  case CHAROID:
4622  val = (char *) palloc(2);
4623  val[0] = DatumGetChar(value);
4624  val[1] = '\0';
4625  break;
4626  case BPCHAROID:
4627  case VARCHAROID:
4628  case TEXTOID:
4630  break;
4631  case NAMEOID:
4632  {
4634 
4635  val = pstrdup(NameStr(*nm));
4636  break;
4637  }
4638  default:
4639  *failure = true;
4640  return NULL;
4641  }
4642 
4643  if (!lc_collate_is_c(collid))
4644  {
4645  char *xfrmstr;
4646  size_t xfrmlen;
4647  size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4648 
4649  /*
4650  * XXX: We could guess at a suitable output buffer size and only call
4651  * strxfrm twice if our guess is too small.
4652  *
4653  * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4654  * bogus data or set an error. This is not really a problem unless it
4655  * crashes since it will only give an estimation error and nothing
4656  * fatal.
4657  */
4658  xfrmlen = strxfrm(NULL, val, 0);
4659 #ifdef WIN32
4660 
4661  /*
4662  * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4663  * of trying to allocate this much memory (and fail), just return the
4664  * original string unmodified as if we were in the C locale.
4665  */
4666  if (xfrmlen == INT_MAX)
4667  return val;
4668 #endif
4669  xfrmstr = (char *) palloc(xfrmlen + 1);
4670  xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4671 
4672  /*
4673  * Some systems (e.g., glibc) can return a smaller value from the
4674  * second call than the first; thus the Assert must be <= not ==.
4675  */
4676  Assert(xfrmlen2 <= xfrmlen);
4677  pfree(val);
4678  val = xfrmstr;
4679  }
4680 
4681  return val;
4682 }
#define TextDatumGetCString(d)
Definition: builtins.h:95
#define NameStr(name)
Definition: c.h:730
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:166
Oid collid
long val
Definition: informix.c:664
char * pstrdup(const char *in)
Definition: mcxt.c:1644
void pfree(void *pointer)
Definition: mcxt.c:1456
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1266
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static char DatumGetChar(Datum X)
Definition: postgres.h:112
Definition: c.h:725

References Assert(), collid, DatumGetChar(), DatumGetPointer(), lc_collate_is_c(), NameStr, palloc(), pfree(), PG_USED_FOR_ASSERTS_ONLY, pstrdup(), TextDatumGetCString, val, and value.

Referenced by convert_to_scalar().

◆ convert_string_to_scalar()

static void convert_string_to_scalar ( char *  value,
double *  scaledvalue,
char *  lobound,
double *  scaledlobound,
char *  hibound,
double *  scaledhibound 
)
static

Definition at line 4484 of file selfuncs.c.

4490 {
4491  int rangelo,
4492  rangehi;
4493  char *sptr;
4494 
4495  rangelo = rangehi = (unsigned char) hibound[0];
4496  for (sptr = lobound; *sptr; sptr++)
4497  {
4498  if (rangelo > (unsigned char) *sptr)
4499  rangelo = (unsigned char) *sptr;
4500  if (rangehi < (unsigned char) *sptr)
4501  rangehi = (unsigned char) *sptr;
4502  }
4503  for (sptr = hibound; *sptr; sptr++)
4504  {
4505  if (rangelo > (unsigned char) *sptr)
4506  rangelo = (unsigned char) *sptr;
4507  if (rangehi < (unsigned char) *sptr)
4508  rangehi = (unsigned char) *sptr;
4509  }
4510  /* If range includes any upper-case ASCII chars, make it include all */
4511  if (rangelo <= 'Z' && rangehi >= 'A')
4512  {
4513  if (rangelo > 'A')
4514  rangelo = 'A';
4515  if (rangehi < 'Z')
4516  rangehi = 'Z';
4517  }
4518  /* Ditto lower-case */
4519  if (rangelo <= 'z' && rangehi >= 'a')
4520  {
4521  if (rangelo > 'a')
4522  rangelo = 'a';
4523  if (rangehi < 'z')
4524  rangehi = 'z';
4525  }
4526  /* Ditto digits */
4527  if (rangelo <= '9' && rangehi >= '0')
4528  {
4529  if (rangelo > '0')
4530  rangelo = '0';
4531  if (rangehi < '9')
4532  rangehi = '9';
4533  }
4534 
4535  /*
4536  * If range includes less than 10 chars, assume we have not got enough
4537  * data, and make it include regular ASCII set.
4538  */
4539  if (rangehi - rangelo < 9)
4540  {
4541  rangelo = ' ';
4542  rangehi = 127;
4543  }
4544 
4545  /*
4546  * Now strip any common prefix of the three strings.
4547  */
4548  while (*lobound)
4549  {
4550  if (*lobound != *hibound || *lobound != *value)
4551  break;
4552  lobound++, hibound++, value++;
4553  }
4554 
4555  /*
4556  * Now we can do the conversions.
4557  */
4558  *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4559  *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4560  *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4561 }
static double convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
Definition: selfuncs.c:4564

References convert_one_string_to_scalar(), and value.

Referenced by convert_to_scalar().

◆ convert_timevalue_to_scalar()

static double convert_timevalue_to_scalar ( Datum  value,
Oid  typid,
bool failure 
)
static

Definition at line 4787 of file selfuncs.c.

4788 {
4789  switch (typid)
4790  {
4791  case TIMESTAMPOID:
4792  return DatumGetTimestamp(value);
4793  case TIMESTAMPTZOID:
4794  return DatumGetTimestampTz(value);
4795  case DATEOID:
4797  case INTERVALOID:
4798  {
4800 
4801  /*
4802  * Convert the month part of Interval to days using assumed
4803  * average month length of 365.25/12.0 days. Not too
4804  * accurate, but plenty good enough for our purposes.
4805  */
4806  return interval->time + interval->day * (double) USECS_PER_DAY +
4808  }
4809  case TIMEOID:
4810  return DatumGetTimeADT(value);
4811  case TIMETZOID:
4812  {
4813  TimeTzADT *timetz = DatumGetTimeTzADTP(value);
4814 
4815  /* use GMT-equivalent time */
4816  return (double) (timetz->time + (timetz->zone * 1000000.0));
4817  }
4818  }
4819 
4820  *failure = true;
4821  return 0;
4822 }
#define MONTHS_PER_YEAR
Definition: timestamp.h:108
#define USECS_PER_DAY
Definition: timestamp.h:130
#define DAYS_PER_YEAR
Definition: timestamp.h:107
double date2timestamp_no_overflow(DateADT dateVal)
Definition: date.c:719
static DateADT DatumGetDateADT(Datum X)
Definition: date.h:54
static TimeADT DatumGetTimeADT(Datum X)
Definition: date.h:60
static TimeTzADT * DatumGetTimeTzADTP(Datum X)
Definition: date.h:66
Definition: date.h:28
TimeADT time
Definition: date.h:29
int32 zone
Definition: date.h:30
static Interval * DatumGetIntervalP(Datum X)
Definition: timestamp.h:40
static Timestamp DatumGetTimestamp(Datum X)
Definition: timestamp.h:28
static TimestampTz DatumGetTimestampTz(Datum X)
Definition: timestamp.h:34

References date2timestamp_no_overflow(), DatumGetDateADT(), DatumGetIntervalP(), DatumGetTimeADT(), DatumGetTimestamp(), DatumGetTimestampTz(), DatumGetTimeTzADTP(), DAYS_PER_YEAR, interval::month, MONTHS_PER_YEAR, TimeTzADT::time, interval::time, USECS_PER_DAY, value, and TimeTzADT::zone.

Referenced by convert_to_scalar().

◆ convert_to_scalar()

static bool convert_to_scalar ( Datum  value,
Oid  valuetypid,
Oid  collid,
double *  scaledvalue,
Datum  lobound,
Datum  hibound,
Oid  boundstypid,
double *  scaledlobound,
double *  scaledhibound 
)
static

Definition at line 4275 of file selfuncs.c.

4278 {
4279  bool failure = false;
4280 
4281  /*
4282  * Both the valuetypid and the boundstypid should exactly match the
4283  * declared input type(s) of the operator we are invoked for. However,
4284  * extensions might try to use scalarineqsel as estimator for operators
4285  * with input type(s) we don't handle here; in such cases, we want to
4286  * return false, not fail. In any case, we mustn't assume that valuetypid
4287  * and boundstypid are identical.
4288  *
4289  * XXX The histogram we are interpolating between points of could belong
4290  * to a column that's only binary-compatible with the declared type. In
4291  * essence we are assuming that the semantics of binary-compatible types
4292  * are enough alike that we can use a histogram generated with one type's
4293  * operators to estimate selectivity for the other's. This is outright
4294  * wrong in some cases --- in particular signed versus unsigned
4295  * interpretation could trip us up. But it's useful enough in the
4296  * majority of cases that we do it anyway. Should think about more
4297  * rigorous ways to do it.
4298  */
4299  switch (valuetypid)
4300  {
4301  /*
4302  * Built-in numeric types
4303  */
4304  case BOOLOID:
4305  case INT2OID:
4306  case INT4OID:
4307  case INT8OID:
4308  case FLOAT4OID:
4309  case FLOAT8OID:
4310  case NUMERICOID:
4311  case OIDOID:
4312  case REGPROCOID:
4313  case REGPROCEDUREOID:
4314  case REGOPEROID:
4315  case REGOPERATOROID:
4316  case REGCLASSOID:
4317  case REGTYPEOID:
4318  case REGCOLLATIONOID:
4319  case REGCONFIGOID:
4320  case REGDICTIONARYOID:
4321  case REGROLEOID:
4322  case REGNAMESPACEOID:
4323  *scaledvalue = convert_numeric_to_scalar(value, valuetypid,
4324  &failure);
4325  *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
4326  &failure);
4327  *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
4328  &failure);
4329  return !failure;
4330 
4331  /*
4332  * Built-in string types
4333  */
4334  case CHAROID:
4335  case BPCHAROID:
4336  case VARCHAROID:
4337  case TEXTOID:
4338  case NAMEOID:
4339  {
4340  char *valstr = convert_string_datum(value, valuetypid,
4341  collid, &failure);
4342  char *lostr = convert_string_datum(lobound, boundstypid,
4343  collid, &failure);
4344  char *histr = convert_string_datum(hibound, boundstypid,
4345  collid, &failure);
4346 
4347  /*
4348  * Bail out if any of the values is not of string type. We
4349  * might leak converted strings for the other value(s), but
4350  * that's not worth troubling over.
4351  */
4352  if (failure)
4353  return false;
4354 
4355  convert_string_to_scalar(valstr, scaledvalue,
4356  lostr, scaledlobound,
4357  histr, scaledhibound);
4358  pfree(valstr);
4359  pfree(lostr);
4360  pfree(histr);
4361  return true;
4362  }
4363 
4364  /*
4365  * Built-in bytea type
4366  */
4367  case BYTEAOID:
4368  {
4369  /* We only support bytea vs bytea comparison */
4370  if (boundstypid != BYTEAOID)
4371  return false;
4372  convert_bytea_to_scalar(value, scaledvalue,
4373  lobound, scaledlobound,
4374  hibound, scaledhibound);
4375  return true;
4376  }
4377 
4378  /*
4379  * Built-in time types
4380  */
4381  case TIMESTAMPOID:
4382  case TIMESTAMPTZOID:
4383  case DATEOID:
4384  case INTERVALOID:
4385  case TIMEOID:
4386  case TIMETZOID:
4387  *scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
4388  &failure);
4389  *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
4390  &failure);
4391  *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
4392  &failure);
4393  return !failure;
4394 
4395  /*
4396  * Built-in network types
4397  */
4398  case INETOID:
4399  case CIDROID:
4400  case MACADDROID:
4401  case MACADDR8OID:
4402  *scaledvalue = convert_network_to_scalar(value, valuetypid,
4403  &failure);
4404  *scaledlobound = convert_network_to_scalar(lobound, boundstypid,
4405  &failure);
4406  *scaledhibound = convert_network_to_scalar(hibound, boundstypid,
4407  &failure);
4408  return !failure;
4409  }
4410  /* Don't know how to convert */
4411  *scaledvalue = *scaledlobound = *scaledhibound = 0;
4412  return false;
4413 }
double convert_network_to_scalar(Datum value, Oid typid, bool *failure)
Definition: network.c:1502
static void convert_string_to_scalar(char *value, double *scaledvalue, char *lobound, double *scaledlobound, char *hibound, double *scaledhibound)
Definition: selfuncs.c:4484
static double convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure)
Definition: selfuncs.c:4787
static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
Definition: selfuncs.c:4422
static void convert_bytea_to_scalar(Datum value, double *scaledvalue, Datum lobound, double *scaledlobound, Datum hibound, double *scaledhibound)
Definition: selfuncs.c:4696
static char * convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
Definition: selfuncs.c:4615

References collid, convert_bytea_to_scalar(), convert_network_to_scalar(), convert_numeric_to_scalar(), convert_string_datum(), convert_string_to_scalar(), convert_timevalue_to_scalar(), pfree(), and value.

Referenced by ineq_histogram_selectivity().

◆ eqjoinsel()

Datum eqjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 2239 of file selfuncs.c.

2240 {
2241  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2242  Oid operator = PG_GETARG_OID(1);
2243  List *args = (List *) PG_GETARG_POINTER(2);
2244 
2245 #ifdef NOT_USED
2246  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
2247 #endif
2249  Oid collation = PG_GET_COLLATION();
2250  double selec;
2251  double selec_inner;
2252  VariableStatData vardata1;
2253  VariableStatData vardata2;
2254  double nd1;
2255  double nd2;
2256  bool isdefault1;
2257  bool isdefault2;
2258  Oid opfuncoid;
2259  AttStatsSlot sslot1;
2260  AttStatsSlot sslot2;
2261  Form_pg_statistic stats1 = NULL;
2262  Form_pg_statistic stats2 = NULL;
2263  bool have_mcvs1 = false;
2264  bool have_mcvs2 = false;
2265  bool get_mcv_stats;
2266  bool join_is_reversed;
2267  RelOptInfo *inner_rel;
2268 
2269  get_join_variables(root, args, sjinfo,
2270  &vardata1, &vardata2, &join_is_reversed);
2271 
2272  nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
2273  nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
2274 
2275  opfuncoid = get_opcode(operator);
2276 
2277  memset(&sslot1, 0, sizeof(sslot1));
2278  memset(&sslot2, 0, sizeof(sslot2));
2279 
2280  /*
2281  * There is no use in fetching one side's MCVs if we lack MCVs for the
2282  * other side, so do a quick check to verify that both stats exist.
2283  */
2284  get_mcv_stats = (HeapTupleIsValid(vardata1.statsTuple) &&
2285  HeapTupleIsValid(vardata2.statsTuple) &&
2286  get_attstatsslot(&sslot1, vardata1.statsTuple,
2287  STATISTIC_KIND_MCV, InvalidOid,
2288  0) &&
2289  get_attstatsslot(&sslot2, vardata2.statsTuple,
2290  STATISTIC_KIND_MCV, InvalidOid,
2291  0));
2292 
2293  if (HeapTupleIsValid(vardata1.statsTuple))
2294  {
2295  /* note we allow use of nullfrac regardless of security check */
2296  stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
2297  if (get_mcv_stats &&
2298  statistic_proc_security_check(&vardata1, opfuncoid))
2299  have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
2300  STATISTIC_KIND_MCV, InvalidOid,
2302  }
2303 
2304  if (HeapTupleIsValid(vardata2.statsTuple))
2305  {
2306  /* note we allow use of nullfrac regardless of security check */
2307  stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
2308  if (get_mcv_stats &&
2309  statistic_proc_security_check(&vardata2, opfuncoid))
2310  have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
2311  STATISTIC_KIND_MCV, InvalidOid,
2313  }
2314 
2315  /* We need to compute the inner-join selectivity in all cases */
2316  selec_inner = eqjoinsel_inner(opfuncoid, collation,
2317  &vardata1, &vardata2,
2318  nd1, nd2,
2319  isdefault1, isdefault2,
2320  &sslot1, &sslot2,
2321  stats1, stats2,
2322  have_mcvs1, have_mcvs2);
2323 
2324  switch (sjinfo->jointype)
2325  {
2326  case JOIN_INNER:
2327  case JOIN_LEFT:
2328  case JOIN_FULL:
2329  selec = selec_inner;
2330  break;
2331  case JOIN_SEMI:
2332  case JOIN_ANTI:
2333 
2334  /*
2335  * Look up the join's inner relation. min_righthand is sufficient
2336  * information because neither SEMI nor ANTI joins permit any
2337  * reassociation into or out of their RHS, so the righthand will
2338  * always be exactly that set of rels.
2339  */
2340  inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2341 
2342  if (!join_is_reversed)
2343  selec = eqjoinsel_semi(opfuncoid, collation,
2344  &vardata1, &vardata2,
2345  nd1, nd2,
2346  isdefault1, isdefault2,
2347  &sslot1, &sslot2,
2348  stats1, stats2,
2349  have_mcvs1, have_mcvs2,
2350  inner_rel);
2351  else
2352  {
2353  Oid commop = get_commutator(operator);
2354  Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
2355 
2356  selec = eqjoinsel_semi(commopfuncoid, collation,
2357  &vardata2, &vardata1,
2358  nd2, nd1,
2359  isdefault2, isdefault1,
2360  &sslot2, &sslot1,
2361  stats2, stats1,
2362  have_mcvs2, have_mcvs1,
2363  inner_rel);
2364  }
2365 
2366  /*
2367  * We should never estimate the output of a semijoin to be more
2368  * rows than we estimate for an inner join with the same input
2369  * rels and join condition; it's obviously impossible for that to
2370  * happen. The former estimate is N1 * Ssemi while the latter is
2371  * N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner. Doing
2372  * this is worthwhile because of the shakier estimation rules we
2373  * use in eqjoinsel_semi, particularly in cases where it has to
2374  * punt entirely.
2375  */
2376  selec = Min(selec, inner_rel->rows * selec_inner);
2377  break;
2378  default:
2379  /* other values not expected here */
2380  elog(ERROR, "unrecognized join type: %d",
2381  (int) sjinfo->jointype);
2382  selec = 0; /* keep compiler quiet */
2383  break;
2384  }
2385 
2386  free_attstatsslot(&sslot1);
2387  free_attstatsslot(&sslot2);
2388 
2389  ReleaseVariableStats(vardata1);
2390  ReleaseVariableStats(vardata2);
2391 
2392  CLAMP_PROBABILITY(selec);
2393 
2394  PG_RETURN_FLOAT8((float8) selec);
2395 }
double float8
Definition: c.h:614
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_GETARG_INT16(n)
Definition: fmgr.h:271
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1267
Oid get_commutator(Oid opno)
Definition: lsyscache.c:1491
JoinType
Definition: nodes.h:299
@ JOIN_SEMI
Definition: nodes.h:318
@ JOIN_FULL
Definition: nodes.h:306
@ JOIN_LEFT
Definition: nodes.h:305
@ JOIN_ANTI
Definition: nodes.h:319
static RelOptInfo * find_join_input_rel(PlannerInfo *root, Relids relids)
Definition: selfuncs.c:6312
static double eqjoinsel_inner(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2)
Definition: selfuncs.c:2404
static double eqjoinsel_semi(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2, RelOptInfo *inner_rel)
Definition: selfuncs.c:2601
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5619
void get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
Definition: selfuncs.c:4909
Cardinality rows
Definition: pathnodes.h:862
Relids min_righthand
Definition: pathnodes.h:2841
JoinType jointype
Definition: pathnodes.h:2844

References generate_unaccent_rules::args, ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, elog(), eqjoinsel_inner(), eqjoinsel_semi(), ERROR, find_join_input_rel(), free_attstatsslot(), get_attstatsslot(), get_commutator(), get_join_variables(), get_opcode(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_SEMI, SpecialJoinInfo::jointype, Min, SpecialJoinInfo::min_righthand, OidIsValid, PG_GET_COLLATION, PG_GETARG_INT16, PG_GETARG_OID, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, RelOptInfo::rows, statistic_proc_security_check(), and VariableStatData::statsTuple.

Referenced by neqjoinsel().

◆ eqjoinsel_inner()

static double eqjoinsel_inner ( Oid  opfuncoid,
Oid  collation,
VariableStatData vardata1,
VariableStatData vardata2,
double  nd1,
double  nd2,
bool  isdefault1,
bool  isdefault2,
AttStatsSlot sslot1,
AttStatsSlot sslot2,
Form_pg_statistic  stats1,
Form_pg_statistic  stats2,
bool  have_mcvs1,
bool  have_mcvs2 
)
static

Definition at line 2404 of file selfuncs.c.

2411 {
2412  double selec;
2413 
2414  if (have_mcvs1 && have_mcvs2)
2415  {
2416  /*
2417  * We have most-common-value lists for both relations. Run through
2418  * the lists to see which MCVs actually join to each other with the
2419  * given operator. This allows us to determine the exact join
2420  * selectivity for the portion of the relations represented by the MCV
2421  * lists. We still have to estimate for the remaining population, but
2422  * in a skewed distribution this gives us a big leg up in accuracy.
2423  * For motivation see the analysis in Y. Ioannidis and S.
2424  * Christodoulakis, "On the propagation of errors in the size of join
2425  * results", Technical Report 1018, Computer Science Dept., University
2426  * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2427  */
2428  LOCAL_FCINFO(fcinfo, 2);
2429  FmgrInfo eqproc;
2430  bool *hasmatch1;
2431  bool *hasmatch2;
2432  double nullfrac1 = stats1->stanullfrac;
2433  double nullfrac2 = stats2->stanullfrac;
2434  double matchprodfreq,
2435  matchfreq1,
2436  matchfreq2,
2437  unmatchfreq1,
2438  unmatchfreq2,
2439  otherfreq1,
2440  otherfreq2,
2441  totalsel1,
2442  totalsel2;
2443  int i,
2444  nmatches;
2445 
2446  fmgr_info(opfuncoid, &eqproc);
2447 
2448  /*
2449  * Save a few cycles by setting up the fcinfo struct just once. Using
2450  * FunctionCallInvoke directly also avoids failure if the eqproc
2451  * returns NULL, though really equality functions should never do
2452  * that.
2453  */
2454  InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
2455  NULL, NULL);
2456  fcinfo->args[0].isnull = false;
2457  fcinfo->args[1].isnull = false;
2458 
2459  hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2460  hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
2461 
2462  /*
2463  * Note we assume that each MCV will match at most one member of the
2464  * other MCV list. If the operator isn't really equality, there could
2465  * be multiple matches --- but we don't look for them, both for speed
2466  * and because the math wouldn't add up...
2467  */
2468  matchprodfreq = 0.0;
2469  nmatches = 0;
2470  for (i = 0; i < sslot1->nvalues; i++)
2471  {
2472  int j;
2473 
2474  fcinfo->args[0].value = sslot1->values[i];
2475 
2476  for (j = 0; j < sslot2->nvalues; j++)
2477  {
2478  Datum fresult;
2479 
2480  if (hasmatch2[j])
2481  continue;
2482  fcinfo->args[1].value = sslot2->values[j];
2483  fcinfo->isnull = false;
2484  fresult = FunctionCallInvoke(fcinfo);
2485  if (!fcinfo->isnull && DatumGetBool(fresult))
2486  {
2487  hasmatch1[i] = hasmatch2[j] = true;
2488  matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
2489  nmatches++;
2490  break;
2491  }
2492  }
2493  }
2494  CLAMP_PROBABILITY(matchprodfreq);
2495  /* Sum up frequencies of matched and unmatched MCVs */
2496  matchfreq1 = unmatchfreq1 = 0.0;
2497  for (i = 0; i < sslot1->nvalues; i++)
2498  {
2499  if (hasmatch1[i])
2500  matchfreq1 += sslot1->numbers[i];
2501  else
2502  unmatchfreq1 += sslot1->numbers[i];
2503  }
2504  CLAMP_PROBABILITY(matchfreq1);
2505  CLAMP_PROBABILITY(unmatchfreq1);
2506  matchfreq2 = unmatchfreq2 = 0.0;
2507  for (i = 0; i < sslot2->nvalues; i++)
2508  {
2509  if (hasmatch2[i])
2510  matchfreq2 += sslot2->numbers[i];
2511  else
2512  unmatchfreq2 += sslot2->numbers[i];
2513  }
2514  CLAMP_PROBABILITY(matchfreq2);
2515  CLAMP_PROBABILITY(unmatchfreq2);
2516  pfree(hasmatch1);
2517  pfree(hasmatch2);
2518 
2519  /*
2520  * Compute total frequency of non-null values that are not in the MCV
2521  * lists.
2522  */
2523  otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2524  otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2525  CLAMP_PROBABILITY(otherfreq1);
2526  CLAMP_PROBABILITY(otherfreq2);
2527 
2528  /*
2529  * We can estimate the total selectivity from the point of view of
2530  * relation 1 as: the known selectivity for matched MCVs, plus
2531  * unmatched MCVs that are assumed to match against random members of
2532  * relation 2's non-MCV population, plus non-MCV values that are
2533  * assumed to match against random members of relation 2's unmatched
2534  * MCVs plus non-MCV values.
2535  */
2536  totalsel1 = matchprodfreq;
2537  if (nd2 > sslot2->nvalues)
2538  totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues);
2539  if (nd2 > nmatches)
2540  totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2541  (nd2 - nmatches);
2542  /* Same estimate from the point of view of relation 2. */
2543  totalsel2 = matchprodfreq;
2544  if (nd1 > sslot1->nvalues)
2545  totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues);
2546  if (nd1 > nmatches)
2547  totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2548  (nd1 - nmatches);
2549 
2550  /*
2551  * Use the smaller of the two estimates. This can be justified in
2552  * essentially the same terms as given below for the no-stats case: to
2553  * a first approximation, we are estimating from the point of view of
2554  * the relation with smaller nd.
2555  */
2556  selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2557  }
2558  else
2559  {
2560  /*
2561  * We do not have MCV lists for both sides. Estimate the join
2562  * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2563  * is plausible if we assume that the join operator is strict and the
2564  * non-null values are about equally distributed: a given non-null
2565  * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2566  * of rel2, so total join rows are at most
2567  * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2568  * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2569  * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2570  * with MIN() is an upper bound. Using the MIN() means we estimate
2571  * from the point of view of the relation with smaller nd (since the
2572  * larger nd is determining the MIN). It is reasonable to assume that
2573  * most tuples in this rel will have join partners, so the bound is
2574  * probably reasonably tight and should be taken as-is.
2575  *
2576  * XXX Can we be smarter if we have an MCV list for just one side? It
2577  * seems that if we assume equal distribution for the other side, we
2578  * end up with the same answer anyway.
2579  */
2580  double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2581  double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2582 
2583  selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2584  if (nd1 > nd2)
2585  selec /= nd1;
2586  else
2587  selec /= nd2;
2588  }
2589 
2590  return selec;
2591 }
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
int j
Definition: isn.c:74
void * palloc0(Size size)
Definition: mcxt.c:1257
uintptr_t Datum
Definition: postgres.h:64
Definition: fmgr.h:57

References CLAMP_PROBABILITY, DatumGetBool(), fmgr_info(), FunctionCallInvoke, i, InitFunctionCallInfoData, j, LOCAL_FCINFO, AttStatsSlot::numbers, AttStatsSlot::nvalues, palloc0(), pfree(), and AttStatsSlot::values.

Referenced by eqjoinsel().

◆ eqjoinsel_semi()

static double eqjoinsel_semi ( Oid  opfuncoid,
Oid  collation,
VariableStatData vardata1,
VariableStatData vardata2,
double  nd1,
double  nd2,
bool  isdefault1,
bool  isdefault2,
AttStatsSlot sslot1,
AttStatsSlot sslot2,
Form_pg_statistic  stats1,
Form_pg_statistic  stats2,
bool  have_mcvs1,
bool  have_mcvs2,
RelOptInfo inner_rel 
)
static

Definition at line 2601 of file selfuncs.c.

2609 {
2610  double selec;
2611 
2612  /*
2613  * We clamp nd2 to be not more than what we estimate the inner relation's
2614  * size to be. This is intuitively somewhat reasonable since obviously
2615  * there can't be more than that many distinct values coming from the
2616  * inner rel. The reason for the asymmetry (ie, that we don't clamp nd1
2617  * likewise) is that this is the only pathway by which restriction clauses
2618  * applied to the inner rel will affect the join result size estimate,
2619  * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2620  * only the outer rel's size. If we clamped nd1 we'd be double-counting
2621  * the selectivity of outer-rel restrictions.
2622  *
2623  * We can apply this clamping both with respect to the base relation from
2624  * which the join variable comes (if there is just one), and to the
2625  * immediate inner input relation of the current join.
2626  *
2627  * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2628  * great, maybe, but it didn't come out of nowhere either. This is most
2629  * helpful when the inner relation is empty and consequently has no stats.
2630  */
2631  if (vardata2->rel)
2632  {
2633  if (nd2 >= vardata2->rel->rows)
2634  {
2635  nd2 = vardata2->rel->rows;
2636  isdefault2 = false;
2637  }
2638  }
2639  if (nd2 >= inner_rel->rows)
2640  {
2641  nd2 = inner_rel->rows;
2642  isdefault2 = false;
2643  }
2644 
2645  if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid))
2646  {
2647  /*
2648  * We have most-common-value lists for both relations. Run through
2649  * the lists to see which MCVs actually join to each other with the
2650  * given operator. This allows us to determine the exact join
2651  * selectivity for the portion of the relations represented by the MCV
2652  * lists. We still have to estimate for the remaining population, but
2653  * in a skewed distribution this gives us a big leg up in accuracy.
2654  */
2655  LOCAL_FCINFO(fcinfo, 2);
2656  FmgrInfo eqproc;
2657  bool *hasmatch1;
2658  bool *hasmatch2;
2659  double nullfrac1 = stats1->stanullfrac;
2660  double matchfreq1,
2661  uncertainfrac,
2662  uncertain;
2663  int i,
2664  nmatches,
2665  clamped_nvalues2;
2666 
2667  /*
2668  * The clamping above could have resulted in nd2 being less than
2669  * sslot2->nvalues; in which case, we assume that precisely the nd2
2670  * most common values in the relation will appear in the join input,
2671  * and so compare to only the first nd2 members of the MCV list. Of
2672  * course this is frequently wrong, but it's the best bet we can make.
2673  */
2674  clamped_nvalues2 = Min(sslot2->nvalues, nd2);
2675 
2676  fmgr_info(opfuncoid, &eqproc);
2677 
2678  /*
2679  * Save a few cycles by setting up the fcinfo struct just once. Using
2680  * FunctionCallInvoke directly also avoids failure if the eqproc
2681  * returns NULL, though really equality functions should never do
2682  * that.
2683  */
2684  InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
2685  NULL, NULL);
2686  fcinfo->args[0].isnull = false;
2687  fcinfo->args[1].isnull = false;
2688 
2689  hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2690  hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2691 
2692  /*
2693  * Note we assume that each MCV will match at most one member of the
2694  * other MCV list. If the operator isn't really equality, there could
2695  * be multiple matches --- but we don't look for them, both for speed
2696  * and because the math wouldn't add up...
2697  */
2698  nmatches = 0;
2699  for (i = 0; i < sslot1->nvalues; i++)
2700  {
2701  int j;
2702 
2703  fcinfo->args[0].value = sslot1->values[i];
2704 
2705  for (j = 0; j < clamped_nvalues2; j++)
2706  {
2707  Datum fresult;
2708 
2709  if (hasmatch2[j])
2710  continue;
2711  fcinfo->args[1].value = sslot2->values[j];
2712  fcinfo->isnull = false;
2713  fresult = FunctionCallInvoke(fcinfo);
2714  if (!fcinfo->isnull && DatumGetBool(fresult))
2715  {
2716  hasmatch1[i] = hasmatch2[j] = true;
2717  nmatches++;
2718  break;
2719  }
2720  }
2721  }
2722  /* Sum up frequencies of matched MCVs */
2723  matchfreq1 = 0.0;
2724  for (i = 0; i < sslot1->nvalues; i++)
2725  {
2726  if (hasmatch1[i])
2727  matchfreq1 += sslot1->numbers[i];
2728  }
2729  CLAMP_PROBABILITY(matchfreq1);
2730  pfree(hasmatch1);
2731  pfree(hasmatch2);
2732 
2733  /*
2734  * Now we need to estimate the fraction of relation 1 that has at
2735  * least one join partner. We know for certain that the matched MCVs
2736  * do, so that gives us a lower bound, but we're really in the dark
2737  * about everything else. Our crude approach is: if nd1 <= nd2 then
2738  * assume all non-null rel1 rows have join partners, else assume for
2739  * the uncertain rows that a fraction nd2/nd1 have join partners. We
2740  * can discount the known-matched MCVs from the distinct-values counts
2741  * before doing the division.
2742  *
2743  * Crude as the above is, it's completely useless if we don't have
2744  * reliable ndistinct values for both sides. Hence, if either nd1 or
2745  * nd2 is default, punt and assume half of the uncertain rows have
2746  * join partners.
2747  */
2748  if (!isdefault1 && !isdefault2)
2749  {
2750  nd1 -= nmatches;
2751  nd2 -= nmatches;
2752  if (nd1 <= nd2 || nd2 < 0)
2753  uncertainfrac = 1.0;
2754  else
2755  uncertainfrac = nd2 / nd1;
2756  }
2757  else
2758  uncertainfrac = 0.5;
2759  uncertain = 1.0 - matchfreq1 - nullfrac1;
2760  CLAMP_PROBABILITY(uncertain);
2761  selec = matchfreq1 + uncertainfrac * uncertain;
2762  }
2763  else
2764  {
2765  /*
2766  * Without MCV lists for both sides, we can only use the heuristic
2767  * about nd1 vs nd2.
2768  */
2769  double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2770 
2771  if (!isdefault1 && !isdefault2)
2772  {
2773  if (nd1 <= nd2 || nd2 < 0)
2774  selec = 1.0 - nullfrac1;
2775  else
2776  selec = (nd2 / nd1) * (1.0 - nullfrac1);
2777  }
2778  else
2779  selec = 0.5 * (1.0 - nullfrac1);
2780  }
2781 
2782  return selec;
2783 }

References CLAMP_PROBABILITY, DatumGetBool(), fmgr_info(), FunctionCallInvoke, i, InitFunctionCallInfoData, j, LOCAL_FCINFO, Min, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, palloc0(), pfree(), VariableStatData::rel, RelOptInfo::rows, and AttStatsSlot::values.

Referenced by eqjoinsel().

◆ eqsel()

Datum eqsel ( PG_FUNCTION_ARGS  )

Definition at line 226 of file selfuncs.c.

227 {
228  PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
229 }
static double eqsel_internal(PG_FUNCTION_ARGS, bool negate)
Definition: selfuncs.c:235

References eqsel_internal(), and PG_RETURN_FLOAT8.

◆ eqsel_internal()

static double eqsel_internal ( PG_FUNCTION_ARGS  ,
bool  negate 
)
static

Definition at line 235 of file selfuncs.c.

236 {
238  Oid operator = PG_GETARG_OID(1);
239  List *args = (List *) PG_GETARG_POINTER(2);
240  int varRelid = PG_GETARG_INT32(3);
241  Oid collation = PG_GET_COLLATION();
242  VariableStatData vardata;
243  Node *other;
244  bool varonleft;
245  double selec;
246 
247  /*
248  * When asked about <>, we do the estimation using the corresponding =
249  * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
250  */
251  if (negate)
252  {
253  operator = get_negator(operator);
254  if (!OidIsValid(operator))
255  {
256  /* Use default selectivity (should we raise an error instead?) */
257  return 1.0 - DEFAULT_EQ_SEL;
258  }
259  }
260 
261  /*
262  * If expression is not variable = something or something = variable, then
263  * punt and return a default estimate.
264  */
265  if (!get_restriction_variable(root, args, varRelid,
266  &vardata, &other, &varonleft))
267  return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
268 
269  /*
270  * We can do a lot better if the something is a constant. (Note: the
271  * Const might result from estimation rather than being a simple constant
272  * in the query.)
273  */
274  if (IsA(other, Const))
275  selec = var_eq_const(&vardata, operator, collation,
276  ((Const *) other)->constvalue,
277  ((Const *) other)->constisnull,
278  varonleft, negate);
279  else
280  selec = var_eq_non_const(&vardata, operator, collation, other,
281  varonleft, negate);
282 
283  ReleaseVariableStats(vardata);
284 
285  return selec;
286 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
Oid get_negator(Oid opno)
Definition: lsyscache.c:1515
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4849
double var_eq_non_const(VariableStatData *vardata, Oid oproid, Oid collation, Node *other, bool varonleft, bool negate)
Definition: selfuncs.c:465
#define DEFAULT_EQ_SEL
Definition: selfuncs.h:34

References generate_unaccent_rules::args, DEFAULT_EQ_SEL, get_negator(), get_restriction_variable(), IsA, OidIsValid, PG_GET_COLLATION, PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, ReleaseVariableStats, var_eq_const(), and var_eq_non_const().

Referenced by eqsel(), and neqsel().

◆ estimate_array_length()

int estimate_array_length ( Node arrayexpr)

Definition at line 2134 of file selfuncs.c.

2135 {
2136  /* look through any binary-compatible relabeling of arrayexpr */
2137  arrayexpr = strip_array_coercion(arrayexpr);
2138 
2139  if (arrayexpr && IsA(arrayexpr, Const))
2140  {
2141  Datum arraydatum = ((Const *) arrayexpr)->constvalue;
2142  bool arrayisnull = ((Const *) arrayexpr)->constisnull;
2143  ArrayType *arrayval;
2144 
2145  if (arrayisnull)
2146  return 0;
2147  arrayval = DatumGetArrayTypeP(arraydatum);
2148  return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2149  }
2150  else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2151  !((ArrayExpr *) arrayexpr)->multidims)
2152  {
2153  return list_length(((ArrayExpr *) arrayexpr)->elements);
2154  }
2155  else
2156  {
2157  /* default guess --- see also scalararraysel */
2158  return 10;
2159  }
2160 }
#define ARR_NDIM(a)
Definition: array.h:283
#define DatumGetArrayTypeP(X)
Definition: array.h:254
#define ARR_DIMS(a)
Definition: array.h:287
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:76
static int list_length(const List *l)
Definition: pg_list.h:152
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1782

References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), DatumGetArrayTypeP, IsA, list_length(), and strip_array_coercion().

Referenced by array_unnest_support(), btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().

◆ estimate_hash_bucket_stats()

void estimate_hash_bucket_stats ( PlannerInfo root,
Node hashkey,
double  nbuckets,
Selectivity mcv_freq,
Selectivity bucketsize_frac 
)

Definition at line 3768 of file selfuncs.c.

3771 {
3772  VariableStatData vardata;
3773  double estfract,
3774  ndistinct,
3775  stanullfrac,
3776  avgfreq;
3777  bool isdefault;
3778  AttStatsSlot sslot;
3779 
3780  examine_variable(root, hashkey, 0, &vardata);
3781 
3782  /* Look up the frequency of the most common value, if available */
3783  *mcv_freq = 0.0;
3784 
3785  if (HeapTupleIsValid(vardata.statsTuple))
3786  {
3787  if (get_attstatsslot(&sslot, vardata.statsTuple,
3788  STATISTIC_KIND_MCV, InvalidOid,
3790  {
3791  /*
3792  * The first MCV stat is for the most common value.
3793  */
3794  if (sslot.nnumbers > 0)
3795  *mcv_freq = sslot.numbers[0];
3796  free_attstatsslot(&sslot);
3797  }
3798  }
3799 
3800  /* Get number of distinct values */
3801  ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3802 
3803  /*
3804  * If ndistinct isn't real, punt. We normally return 0.1, but if the
3805  * mcv_freq is known to be even higher than that, use it instead.
3806  */
3807  if (isdefault)
3808  {
3809  *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3810  ReleaseVariableStats(vardata);
3811  return;
3812  }
3813 
3814  /* Get fraction that are null */
3815  if (HeapTupleIsValid(vardata.statsTuple))
3816  {
3817  Form_pg_statistic stats;
3818 
3819  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3820  stanullfrac = stats->stanullfrac;
3821  }
3822  else
3823  stanullfrac = 0.0;
3824 
3825  /* Compute avg freq of all distinct data values in raw relation */
3826  avgfreq = (1.0 - stanullfrac) / ndistinct;
3827 
3828  /*
3829  * Adjust ndistinct to account for restriction clauses. Observe we are
3830  * assuming that the data distribution is affected uniformly by the
3831  * restriction clauses!
3832  *
3833  * XXX Possibly better way, but much more expensive: multiply by
3834  * selectivity of rel's restriction clauses that mention the target Var.
3835  */
3836  if (vardata.rel && vardata.rel->tuples > 0)
3837  {
3838  ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3839  ndistinct = clamp_row_est(ndistinct);
3840  }
3841 
3842  /*
3843  * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3844  * number of buckets is less than the expected number of distinct values;
3845  * otherwise it is 1/ndistinct.
3846  */
3847  if (ndistinct > nbuckets)
3848  estfract = 1.0 / nbuckets;
3849  else
3850  estfract = 1.0 / ndistinct;
3851 
3852  /*
3853  * Adjust estimated bucketsize upward to account for skewed distribution.
3854  */
3855  if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3856  estfract *= *mcv_freq / avgfreq;
3857 
3858  /*
3859  * Clamp bucketsize to sane range (the above adjustment could easily
3860  * produce an out-of-range result). We set the lower bound a little above
3861  * zero, since zero isn't a very sane result.
3862  */
3863  if (estfract < 1.0e-6)
3864  estfract = 1.0e-6;
3865  else if (estfract > 1.0)
3866  estfract = 1.0;
3867 
3868  *bucketsize_frac = (Selectivity) estfract;
3869 
3870  ReleaseVariableStats(vardata);
3871 }
double clamp_row_est(double nrows)
Definition: costsize.c:203
Cardinality tuples
Definition: pathnodes.h:928

References ATTSTATSSLOT_NUMBERS, clamp_row_est(), examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, Max, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by final_cost_hashjoin().

◆ estimate_hashagg_tablesize()

double estimate_hashagg_tablesize ( PlannerInfo root,
Path path,
const AggClauseCosts agg_costs,
double  dNumGroups 
)

Definition at line 3887 of file selfuncs.c.

3889 {
3890  Size hashentrysize;
3891 
3892  hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
3893  path->pathtarget->width,
3894  agg_costs->transitionSpace);
3895 
3896  /*
3897  * Note that this disregards the effect of fill-factor and growth policy
3898  * of the hash table. That's probably ok, given that the default
3899  * fill-factor is relatively high. It'd be hard to meaningfully factor in
3900  * "double-in-size" growth policies here.
3901  */
3902  return hashentrysize * dNumGroups;
3903 }
size_t Size
Definition: c.h:589
Size hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
Definition: nodeAgg.c:1695
Size transitionSpace
Definition: pathnodes.h:62
List * aggtransinfos
Definition: pathnodes.h:509

References PlannerInfo::aggtransinfos, hash_agg_entry_size(), list_length(), and AggClauseCosts::transitionSpace.

Referenced by consider_groupingsets_paths().

◆ estimate_multivariate_ndistinct()

static bool estimate_multivariate_ndistinct ( PlannerInfo root,
RelOptInfo rel,
List **  varinfos,
double *  ndistinct 
)
static

Definition at line 3924 of file selfuncs.c.

3926 {
3927  ListCell *lc;
3928  int nmatches_vars;
3929  int nmatches_exprs;
3930  Oid statOid = InvalidOid;
3931  MVNDistinct *stats;
3932  StatisticExtInfo *matched_info = NULL;
3933  RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
3934 
3935  /* bail out immediately if the table has no extended statistics */
3936  if (!rel->statlist)
3937  return false;
3938 
3939  /* look for the ndistinct statistics object matching the most vars */
3940  nmatches_vars = 0; /* we require at least two matches */
3941  nmatches_exprs = 0;
3942  foreach(lc, rel->statlist)
3943  {
3944  ListCell *lc2;
3945  StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
3946  int nshared_vars = 0;
3947  int nshared_exprs = 0;
3948 
3949  /* skip statistics of other kinds */
3950  if (info->kind != STATS_EXT_NDISTINCT)
3951  continue;
3952 
3953  /* skip statistics with mismatching stxdinherit value */
3954  if (info->inherit != rte->inh)
3955  continue;
3956 
3957  /*
3958  * Determine how many expressions (and variables in non-matched
3959  * expressions) match. We'll then use these numbers to pick the
3960  * statistics object that best matches the clauses.
3961  */
3962  foreach(lc2, *varinfos)
3963  {
3964  ListCell *lc3;
3965  GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
3967 
3968  Assert(varinfo->rel == rel);
3969 
3970  /* simple Var, search in statistics keys directly */
3971  if (IsA(varinfo->var, Var))
3972  {
3973  attnum = ((Var *) varinfo->var)->varattno;
3974 
3975  /*
3976  * Ignore system attributes - we don't support statistics on
3977  * them, so can't match them (and it'd fail as the values are
3978  * negative).
3979  */
3981  continue;
3982 
3983  if (bms_is_member(attnum, info->keys))
3984  nshared_vars++;
3985 
3986  continue;
3987  }
3988 
3989  /* expression - see if it's in the statistics object */
3990  foreach(lc3, info->exprs)
3991  {
3992  Node *expr = (Node *) lfirst(lc3);
3993 
3994  if (equal(varinfo->var, expr))
3995  {
3996  nshared_exprs++;
3997  break;
3998  }
3999  }
4000  }
4001 
4002  if (nshared_vars + nshared_exprs < 2)
4003  continue;
4004 
4005  /*
4006  * Does this statistics object match more columns than the currently
4007  * best object? If so, use this one instead.
4008  *
4009  * XXX This should break ties using name of the object, or something
4010  * like that, to make the outcome stable.
4011  */
4012  if ((nshared_exprs > nmatches_exprs) ||
4013  (((nshared_exprs == nmatches_exprs)) && (nshared_vars > nmatches_vars)))
4014  {
4015  statOid = info->statOid;
4016  nmatches_vars = nshared_vars;
4017  nmatches_exprs = nshared_exprs;
4018  matched_info = info;
4019  }
4020  }
4021 
4022  /* No match? */
4023  if (statOid == InvalidOid)
4024  return false;
4025 
4026  Assert(nmatches_vars + nmatches_exprs > 1);
4027 
4028  stats = statext_ndistinct_load(statOid, rte->inh);
4029 
4030  /*
4031  * If we have a match, search it for the specific item that matches (there
4032  * must be one), and construct the output values.
4033  */
4034  if (stats)
4035  {
4036  int i;
4037  List *newlist = NIL;
4038  MVNDistinctItem *item = NULL;
4039  ListCell *lc2;
4040  Bitmapset *matched = NULL;
4041  AttrNumber attnum_offset;
4042 
4043  /*
4044  * How much we need to offset the attnums? If there are no
4045  * expressions, no offset is needed. Otherwise offset enough to move
4046  * the lowest one (which is equal to number of expressions) to 1.
4047  */
4048  if (matched_info->exprs)
4049  attnum_offset = (list_length(matched_info->exprs) + 1);
4050  else
4051  attnum_offset = 0;
4052 
4053  /* see what actually matched */
4054  foreach(lc2, *varinfos)
4055  {
4056  ListCell *lc3;
4057  int idx;
4058  bool found = false;
4059 
4060  GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
4061 
4062  /*
4063  * Process a simple Var expression, by matching it to keys
4064  * directly. If there's a matching expression, we'll try matching
4065  * it later.
4066  */
4067  if (IsA(varinfo->var, Var))
4068  {
4069  AttrNumber attnum = ((Var *) varinfo->var)->varattno;
4070 
4071  /*
4072  * Ignore expressions on system attributes. Can't rely on the
4073  * bms check for negative values.
4074  */
4076  continue;
4077 
4078  /* Is the variable covered by the statistics object? */
4079  if (!bms_is_member(attnum, matched_info->keys))
4080  continue;
4081 
4082  attnum = attnum + attnum_offset;
4083 
4084  /* ensure sufficient offset */
4086 
4087  matched = bms_add_member(matched, attnum);
4088 
4089  found = true;
4090  }
4091 
4092  /*
4093  * XXX Maybe we should allow searching the expressions even if we
4094  * found an attribute matching the expression? That would handle
4095  * trivial expressions like "(a)" but it seems fairly useless.
4096  */
4097  if (found)
4098  continue;
4099 
4100  /* expression - see if it's in the statistics object */
4101  idx = 0;
4102  foreach(lc3, matched_info->exprs)
4103  {
4104  Node *expr = (Node *) lfirst(lc3);
4105 
4106  if (equal(varinfo->var, expr))
4107  {
4108  AttrNumber attnum = -(idx + 1);
4109 
4110  attnum = attnum + attnum_offset;
4111 
4112  /* ensure sufficient offset */
4114 
4115  matched = bms_add_member(matched, attnum);
4116 
4117  /* there should be just one matching expression */
4118  break;
4119  }
4120 
4121  idx++;
4122  }
4123  }
4124 
4125  /* Find the specific item that exactly matches the combination */
4126  for (i = 0; i < stats->nitems; i++)
4127  {
4128  int j;
4129  MVNDistinctItem *tmpitem = &stats->items[i];
4130 
4131  if (tmpitem->nattributes != bms_num_members(matched))
4132  continue;
4133 
4134  /* assume it's the right item */
4135  item = tmpitem;
4136 
4137  /* check that all item attributes/expressions fit the match */
4138  for (j = 0; j < tmpitem->nattributes; j++)
4139  {
4140  AttrNumber attnum = tmpitem->attributes[j];
4141 
4142  /*
4143  * Thanks to how we constructed the matched bitmap above, we
4144  * can just offset all attnums the same way.
4145  */
4146  attnum = attnum + attnum_offset;
4147 
4148  if (!bms_is_member(attnum, matched))
4149  {
4150  /* nah, it's not this item */
4151  item = NULL;
4152  break;
4153  }
4154  }
4155 
4156  /*
4157  * If the item has all the matched attributes, we know it's the
4158  * right one - there can't be a better one. matching more.
4159  */
4160  if (item)
4161  break;
4162  }
4163 
4164  /*
4165  * Make sure we found an item. There has to be one, because ndistinct
4166  * statistics includes all combinations of attributes.
4167  */
4168  if (!item)
4169  elog(ERROR, "corrupt MVNDistinct entry");
4170 
4171  /* Form the output varinfo list, keeping only unmatched ones */
4172  foreach(lc, *varinfos)
4173  {
4174  GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
4175  ListCell *lc3;
4176  bool found = false;
4177 
4178  /*
4179  * Let's look at plain variables first, because it's the most
4180  * common case and the check is quite cheap. We can simply get the
4181  * attnum and check (with an offset) matched bitmap.
4182  */
4183  if (IsA(varinfo->var, Var))
4184  {
4185  AttrNumber attnum = ((Var *) varinfo->var)->varattno;
4186 
4187  /*
4188  * If it's a system attribute, we're done. We don't support
4189  * extended statistics on system attributes, so it's clearly
4190  * not matched. Just keep the expression and continue.
4191  */
4193  {
4194  newlist = lappend(newlist, varinfo);
4195  continue;
4196  }
4197 
4198  /* apply the same offset as above */
4199  attnum += attnum_offset;
4200 
4201  /* if it's not matched, keep the varinfo */
4202  if (!bms_is_member(attnum, matched))
4203  newlist = lappend(newlist, varinfo);
4204 
4205  /* The rest of the loop deals with complex expressions. */
4206  continue;
4207  }
4208 
4209  /*
4210  * Process complex expressions, not just simple Vars.
4211  *
4212  * First, we search for an exact match of an expression. If we
4213  * find one, we can just discard the whole GroupVarInfo, with all
4214  * the variables we extracted from it.
4215  *
4216  * Otherwise we inspect the individual vars, and try matching it
4217  * to variables in the item.
4218  */
4219  foreach(lc3, matched_info->exprs)
4220  {
4221  Node *expr = (Node *) lfirst(lc3);
4222 
4223  if (equal(varinfo->var, expr))
4224  {
4225  found = true;
4226  break;
4227  }
4228  }
4229 
4230  /* found exact match, skip */
4231  if (found)
4232  continue;
4233 
4234  newlist = lappend(newlist, varinfo);
4235  }
4236 
4237  *varinfos = newlist;
4238  *ndistinct = item->ndistinct;
4239  return true;
4240  }
4241 
4242  return false;
4243 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:665
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:444
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:755
MVNDistinct * statext_ndistinct_load(Oid mvoid, bool inh)
Definition: mvdistinct.c:149
double ndistinct
Definition: statistics.h:28
AttrNumber * attributes
Definition: statistics.h:30
uint32 nitems
Definition: statistics.h:38
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:39
List * statlist
Definition: pathnodes.h:925
Bitmapset * keys
Definition: pathnodes.h:1268
Definition: primnodes.h:226

References Assert(), attnum, MVNDistinctItem::attributes, AttrNumberIsForUserDefinedAttr, bms_add_member(), bms_is_member(), bms_num_members(), elog(), equal(), ERROR, StatisticExtInfo::exprs, i, idx(), RangeTblEntry::inh, StatisticExtInfo::inherit, InvalidOid, IsA, MVNDistinct::items, j, StatisticExtInfo::keys, StatisticExtInfo::kind, lappend(), lfirst, list_length(), MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, NIL, MVNDistinct::nitems, planner_rt_fetch, GroupVarInfo::rel, RelOptInfo::relid, statext_ndistinct_load(), RelOptInfo::statlist, StatisticExtInfo::statOid, and GroupVarInfo::var.

Referenced by estimate_num_groups().

◆ estimate_num_groups()

double estimate_num_groups ( PlannerInfo root,
List groupExprs,
double  input_rows,
List **  pgset,
EstimationInfo estinfo 
)

Definition at line 3386 of file selfuncs.c.

3388 {
3389  List *varinfos = NIL;
3390  double srf_multiplier = 1.0;
3391  double numdistinct;
3392  ListCell *l;
3393  int i;
3394 
3395  /* Zero the estinfo output parameter, if non-NULL */
3396  if (estinfo != NULL)
3397  memset(estinfo, 0, sizeof(EstimationInfo));
3398 
3399  /*
3400  * We don't ever want to return an estimate of zero groups, as that tends
3401  * to lead to division-by-zero and other unpleasantness. The input_rows
3402  * estimate is usually already at least 1, but clamp it just in case it
3403  * isn't.
3404  */
3405  input_rows = clamp_row_est(input_rows);
3406 
3407  /*
3408  * If no grouping columns, there's exactly one group. (This can't happen
3409  * for normal cases with GROUP BY or DISTINCT, but it is possible for
3410  * corner cases with set operations.)
3411  */
3412  if (groupExprs == NIL || (pgset && *pgset == NIL))
3413  return 1.0;
3414 
3415  /*
3416  * Count groups derived from boolean grouping expressions. For other
3417  * expressions, find the unique Vars used, treating an expression as a Var
3418  * if we can find stats for it. For each one, record the statistical
3419  * estimate of number of distinct values (total in its table, without
3420  * regard for filtering).
3421  */
3422  numdistinct = 1.0;
3423 
3424  i = 0;
3425  foreach(l, groupExprs)
3426  {
3427  Node *groupexpr = (Node *) lfirst(l);
3428  double this_srf_multiplier;
3429  VariableStatData vardata;
3430  List *varshere;
3431  ListCell *l2;
3432 
3433  /* is expression in this grouping set? */
3434  if (pgset && !list_member_int(*pgset, i++))
3435  continue;
3436 
3437  /*
3438  * Set-returning functions in grouping columns are a bit problematic.
3439  * The code below will effectively ignore their SRF nature and come up
3440  * with a numdistinct estimate as though they were scalar functions.
3441  * We compensate by scaling up the end result by the largest SRF
3442  * rowcount estimate. (This will be an overestimate if the SRF
3443  * produces multiple copies of any output value, but it seems best to
3444  * assume the SRF's outputs are distinct. In any case, it's probably
3445  * pointless to worry too much about this without much better
3446  * estimates for SRF output rowcounts than we have today.)
3447  */
3448  this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3449  if (srf_multiplier < this_srf_multiplier)
3450  srf_multiplier = this_srf_multiplier;
3451 
3452  /* Short-circuit for expressions returning boolean */
3453  if (exprType(groupexpr) == BOOLOID)
3454  {
3455  numdistinct *= 2.0;
3456  continue;
3457  }
3458 
3459  /*
3460  * If examine_variable is able to deduce anything about the GROUP BY
3461  * expression, treat it as a single variable even if it's really more
3462  * complicated.
3463  *
3464  * XXX This has the consequence that if there's a statistics object on
3465  * the expression, we don't split it into individual Vars. This
3466  * affects our selection of statistics in
3467  * estimate_multivariate_ndistinct, because it's probably better to
3468  * use more accurate estimate for each expression and treat them as
3469  * independent, than to combine estimates for the extracted variables
3470  * when we don't know how that relates to the expressions.
3471  */
3472  examine_variable(root, groupexpr, 0, &vardata);
3473  if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3474  {
3475  varinfos = add_unique_group_var(root, varinfos,
3476  groupexpr, &vardata);
3477  ReleaseVariableStats(vardata);
3478  continue;
3479  }
3480  ReleaseVariableStats(vardata);
3481 
3482  /*
3483  * Else pull out the component Vars. Handle PlaceHolderVars by
3484  * recursing into their arguments (effectively assuming that the
3485  * PlaceHolderVar doesn't change the number of groups, which boils
3486  * down to ignoring the possible addition of nulls to the result set).
3487  */
3488  varshere = pull_var_clause(groupexpr,
3492 
3493  /*
3494  * If we find any variable-free GROUP BY item, then either it is a
3495  * constant (and we can ignore it) or it contains a volatile function;
3496  * in the latter case we punt and assume that each input row will
3497  * yield a distinct group.
3498  */
3499  if (varshere == NIL)
3500  {
3501  if (contain_volatile_functions(groupexpr))
3502  return input_rows;
3503  continue;
3504  }
3505 
3506  /*
3507  * Else add variables to varinfos list
3508  */
3509  foreach(l2, varshere)
3510  {
3511  Node *var = (Node *) lfirst(l2);
3512 
3513  examine_variable(root, var, 0, &vardata);
3514  varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3515  ReleaseVariableStats(vardata);
3516  }
3517  }
3518 
3519  /*
3520  * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3521  * list.
3522  */
3523  if (varinfos == NIL)
3524  {
3525  /* Apply SRF multiplier as we would do in the long path */
3526  numdistinct *= srf_multiplier;
3527  /* Round off */
3528  numdistinct = ceil(numdistinct);
3529  /* Guard against out-of-range answers */
3530  if (numdistinct > input_rows)
3531  numdistinct = input_rows;
3532  if (numdistinct < 1.0)
3533  numdistinct = 1.0;
3534  return numdistinct;
3535  }
3536 
3537  /*
3538  * Group Vars by relation and estimate total numdistinct.
3539  *
3540  * For each iteration of the outer loop, we process the frontmost Var in
3541  * varinfos, plus all other Vars in the same relation. We remove these
3542  * Vars from the newvarinfos list for the next iteration. This is the
3543  * easiest way to group Vars of same rel together.
3544  */
3545  do
3546  {
3547  GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3548  RelOptInfo *rel = varinfo1->rel;
3549  double reldistinct = 1;
3550  double relmaxndistinct = reldistinct;
3551  int relvarcount = 0;
3552  List *newvarinfos = NIL;
3553  List *relvarinfos = NIL;
3554 
3555  /*
3556  * Split the list of varinfos in two - one for the current rel, one
3557  * for remaining Vars on other rels.
3558  */
3559  relvarinfos = lappend(relvarinfos, varinfo1);
3560  for_each_from(l, varinfos, 1)
3561  {
3562  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3563 
3564  if (varinfo2->rel == varinfo1->rel)
3565  {
3566  /* varinfos on current rel */
3567  relvarinfos = lappend(relvarinfos, varinfo2);
3568  }
3569  else
3570  {
3571  /* not time to process varinfo2 yet */
3572  newvarinfos = lappend(newvarinfos, varinfo2);
3573  }
3574  }
3575 
3576  /*
3577  * Get the numdistinct estimate for the Vars of this rel. We
3578  * iteratively search for multivariate n-distinct with maximum number
3579  * of vars; assuming that each var group is independent of the others,
3580  * we multiply them together. Any remaining relvarinfos after no more
3581  * multivariate matches are found are assumed independent too, so
3582  * their individual ndistinct estimates are multiplied also.
3583  *
3584  * While iterating, count how many separate numdistinct values we
3585  * apply. We apply a fudge factor below, but only if we multiplied
3586  * more than one such values.
3587  */
3588  while (relvarinfos)
3589  {
3590  double mvndistinct;
3591 
3592  if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3593  &mvndistinct))
3594  {
3595  reldistinct *= mvndistinct;
3596  if (relmaxndistinct < mvndistinct)
3597  relmaxndistinct = mvndistinct;
3598  relvarcount++;
3599  }
3600  else
3601  {
3602  foreach(l, relvarinfos)
3603  {
3604  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3605 
3606  reldistinct *= varinfo2->ndistinct;
3607  if (relmaxndistinct < varinfo2->ndistinct)
3608  relmaxndistinct = varinfo2->ndistinct;
3609  relvarcount++;
3610 
3611  /*
3612  * When varinfo2's isdefault is set then we'd better set
3613  * the SELFLAG_USED_DEFAULT bit in the EstimationInfo.
3614  */
3615  if (estinfo != NULL && varinfo2->isdefault)
3616  estinfo->flags |= SELFLAG_USED_DEFAULT;
3617  }
3618 
3619  /* we're done with this relation */
3620  relvarinfos = NIL;
3621  }
3622  }
3623 
3624  /*
3625  * Sanity check --- don't divide by zero if empty relation.
3626  */
3627  Assert(IS_SIMPLE_REL(rel));
3628  if (rel->tuples > 0)
3629  {
3630  /*
3631  * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3632  * fudge factor is because the Vars are probably correlated but we
3633  * don't know by how much. We should never clamp to less than the
3634  * largest ndistinct value for any of the Vars, though, since
3635  * there will surely be at least that many groups.
3636  */
3637  double clamp = rel->tuples;
3638 
3639  if (relvarcount > 1)
3640  {
3641  clamp *= 0.1;
3642  if (clamp < relmaxndistinct)
3643  {
3644  clamp = relmaxndistinct;
3645  /* for sanity in case some ndistinct is too large: */
3646  if (clamp > rel->tuples)
3647  clamp = rel->tuples;
3648  }
3649  }
3650  if (reldistinct > clamp)
3651  reldistinct = clamp;
3652 
3653  /*
3654  * Update the estimate based on the restriction selectivity,
3655  * guarding against division by zero when reldistinct is zero.
3656  * Also skip this if we know that we are returning all rows.
3657  */
3658  if (reldistinct > 0 && rel->rows < rel->tuples)
3659  {
3660  /*
3661  * Given a table containing N rows with n distinct values in a
3662  * uniform distribution, if we select p rows at random then
3663  * the expected number of distinct values selected is
3664  *
3665  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3666  *
3667  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3668  *
3669  * See "Approximating block accesses in database
3670  * organizations", S. B. Yao, Communications of the ACM,
3671  * Volume 20 Issue 4, April 1977 Pages 260-261.
3672  *
3673  * Alternatively, re-arranging the terms from the factorials,
3674  * this may be written as
3675  *
3676  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3677  *
3678  * This form of the formula is more efficient to compute in
3679  * the common case where p is larger than N/n. Additionally,
3680  * as pointed out by Dell'Era, if i << N for all terms in the
3681  * product, it can be approximated by
3682  *
3683  * n * (1 - ((N-p)/N)^(N/n))
3684  *
3685  * See "Expected distinct values when selecting from a bag
3686  * without replacement", Alberto Dell'Era,
3687  * http://www.adellera.it/investigations/distinct_balls/.
3688  *
3689  * The condition i << N is equivalent to n >> 1, so this is a
3690  * good approximation when the number of distinct values in
3691  * the table is large. It turns out that this formula also
3692  * works well even when n is small.
3693  */
3694  reldistinct *=
3695  (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3696  rel->tuples / reldistinct));
3697  }
3698  reldistinct = clamp_row_est(reldistinct);
3699 
3700  /*
3701  * Update estimate of total distinct groups.
3702  */
3703  numdistinct *= reldistinct;
3704  }
3705 
3706  varinfos = newvarinfos;
3707  } while (varinfos != NIL);
3708 
3709  /* Now we can account for the effects of any SRFs */
3710  numdistinct *= srf_multiplier;
3711 
3712  /* Round off */
3713  numdistinct = ceil(numdistinct);
3714 
3715  /* Guard against out-of-range answers */
3716  if (numdistinct > input_rows)
3717  numdistinct = input_rows;
3718  if (numdistinct < 1.0)
3719  numdistinct = 1.0;
3720 
3721  return numdistinct;
3722 }
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:483
double expression_returns_set_rows(PlannerInfo *root, Node *clause)
Definition: clauses.c:291
bool list_member_int(const List *list, int datum)
Definition: list.c:701
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:43
#define PVC_RECURSE_AGGREGATES
Definition: optimizer.h:184
#define PVC_RECURSE_PLACEHOLDERS
Definition: optimizer.h:188
#define PVC_RECURSE_WINDOWFUNCS
Definition: optimizer.h:186
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:824
#define for_each_from(cell, lst, N)
Definition: pg_list.h:414
#define linitial(l)
Definition: pg_list.h:178
static bool estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
Definition: selfuncs.c:3924
static List * add_unique_group_var(PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
Definition: selfuncs.c:3266
#define SELFLAG_USED_DEFAULT
Definition: selfuncs.h:76
uint32 flags
Definition: selfuncs.h:80
List * pull_var_clause(Node *node, int flags)
Definition: var.c:607

References add_unique_group_var(), Assert(), clamp_row_est(), contain_volatile_functions(), estimate_multivariate_ndistinct(), examine_variable(), expression_returns_set_rows(), exprType(), EstimationInfo::flags, for_each_from, HeapTupleIsValid, i, IS_SIMPLE_REL, GroupVarInfo::isdefault, VariableStatData::isunique, lappend(), lfirst, linitial, list_member_int(), GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, PVC_RECURSE_WINDOWFUNCS, GroupVarInfo::rel, ReleaseVariableStats, RelOptInfo::rows, SELFLAG_USED_DEFAULT, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by adjust_rowcount_for_semijoins(), cost_incremental_sort(), cost_memoize_rescan(), create_final_distinct_paths(), create_partial_distinct_paths(), create_unique_path(), estimate_path_cost_size(), get_number_of_groups(), and recurse_set_operations().

◆ examine_simple_variable()

static void examine_simple_variable ( PlannerInfo root,
Var var,
VariableStatData vardata 
)
static

Definition at line 5367 of file selfuncs.c.

5369 {
5370  RangeTblEntry *rte = root->simple_rte_array[var->varno];
5371 
5372  Assert(IsA(rte, RangeTblEntry));
5373 
5375  (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
5376  {
5377  /*
5378  * The hook took control of acquiring a stats tuple. If it did supply
5379  * a tuple, it'd better have supplied a freefunc.
5380  */
5381  if (HeapTupleIsValid(vardata->statsTuple) &&
5382  !vardata->freefunc)
5383  elog(ERROR, "no function provided to release variable stats with");
5384  }
5385  else if (rte->rtekind == RTE_RELATION)
5386  {
5387  /*
5388  * Plain table or parent of an inheritance appendrel, so look up the
5389  * column in pg_statistic
5390  */
5392  ObjectIdGetDatum(rte->relid),
5393  Int16GetDatum(var->varattno),
5394  BoolGetDatum(rte->inh));
5395  vardata->freefunc = ReleaseSysCache;
5396 
5397  if (HeapTupleIsValid(vardata->statsTuple))
5398  {
5399  RelOptInfo *onerel = find_base_rel(root, var->varno);
5400  Oid userid;
5401 
5402  /*
5403  * Check if user has permission to read this column. We require
5404  * all rows to be accessible, so there must be no securityQuals
5405  * from security barrier views or RLS policies. Use
5406  * onerel->userid if it's set, in case we're accessing the table
5407  * via a view.
5408  */
5409  userid = OidIsValid(onerel->userid) ? onerel->userid : GetUserId();
5410 
5411  vardata->acl_ok =
5412  rte->securityQuals == NIL &&
5413  ((pg_class_aclcheck(rte->relid, userid,
5414  ACL_SELECT) == ACLCHECK_OK) ||
5415  (pg_attribute_aclcheck(rte->relid, var->varattno, userid,
5416  ACL_SELECT) == ACLCHECK_OK));
5417 
5418  /*
5419  * If the user doesn't have permissions to access an inheritance
5420  * child relation or specifically this attribute, check the
5421  * permissions of the table/column actually mentioned in the
5422  * query, since most likely the user does have that permission
5423  * (else the query will fail at runtime), and if the user can read
5424  * the column there then he can get the values of the child table
5425  * too. To do that, we must find out which of the root parent's
5426  * attributes the child relation's attribute corresponds to.
5427  */
5428  if (!vardata->acl_ok && var->varattno > 0 &&
5429  root->append_rel_array != NULL)
5430  {
5431  AppendRelInfo *appinfo;
5432  Index varno = var->varno;
5433  int varattno = var->varattno;
5434  bool found = false;
5435 
5436  appinfo = root->append_rel_array[varno];
5437 
5438  /*
5439  * Partitions are mapped to their immediate parent, not the
5440  * root parent, so must be ready to walk up multiple
5441  * AppendRelInfos. But stop if we hit a parent that is not
5442  * RTE_RELATION --- that's a flattened UNION ALL subquery, not
5443  * an inheritance parent.
5444  */
5445  while (appinfo &&
5446  planner_rt_fetch(appinfo->parent_relid,
5447  root)->rtekind == RTE_RELATION)
5448  {
5449  int parent_varattno;
5450 
5451  found = false;
5452  if (varattno <= 0 || varattno > appinfo->num_child_cols)
5453  break; /* safety check */
5454  parent_varattno = appinfo->parent_colnos[varattno - 1];
5455  if (parent_varattno == 0)
5456  break; /* Var is local to child */
5457 
5458  varno = appinfo->parent_relid;
5459  varattno = parent_varattno;
5460  found = true;
5461 
5462  /* If the parent is itself a child, continue up. */
5463  appinfo = root->append_rel_array[varno];
5464  }
5465 
5466  /*
5467  * In rare cases, the Var may be local to the child table, in
5468  * which case, we've got to live with having no access to this
5469  * column's stats.
5470  */
5471  if (!found)
5472  return;
5473 
5474  /* Repeat the access check on this parent rel & column */
5475  rte = planner_rt_fetch(varno, root);
5476  Assert(rte->rtekind == RTE_RELATION);
5477 
5478  /*
5479  * Fine to use the same userid as it's the same in all
5480  * relations of a given inheritance tree.
5481  */
5482  vardata->acl_ok =
5483  rte->securityQuals == NIL &&
5484  ((pg_class_aclcheck(rte->relid, userid,
5485  ACL_SELECT) == ACLCHECK_OK) ||
5486  (pg_attribute_aclcheck(rte->relid, varattno, userid,
5487  ACL_SELECT) == ACLCHECK_OK));
5488  }
5489  }
5490  else
5491  {
5492  /* suppress any possible leakproofness checks later */
5493  vardata->acl_ok = true;
5494  }
5495  }
5496  else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
5497  {
5498  /*
5499  * Plain subquery (not one that was converted to an appendrel).
5500  */
5501  Query *subquery = rte->subquery;
5502  RelOptInfo *rel;
5503  TargetEntry *ste;
5504 
5505  /*
5506  * Punt if it's a whole-row var rather than a plain column reference.
5507  */
5508  if (var->varattno == InvalidAttrNumber)
5509  return;
5510 
5511  /*
5512  * Punt if subquery uses set operations or GROUP BY, as these will
5513  * mash underlying columns' stats beyond recognition. (Set ops are
5514  * particularly nasty; if we forged ahead, we would return stats
5515  * relevant to only the leftmost subselect...) DISTINCT is also
5516  * problematic, but we check that later because there is a possibility
5517  * of learning something even with it.
5518  */
5519  if (subquery->setOperations ||
5520  subquery->groupClause ||
5521  subquery->groupingSets)
5522  return;
5523 
5524  /*
5525  * OK, fetch RelOptInfo for subquery. Note that we don't change the
5526  * rel returned in vardata, since caller expects it to be a rel of the
5527  * caller's query level. Because we might already be recursing, we
5528  * can't use that rel pointer either, but have to look up the Var's
5529  * rel afresh.
5530  */
5531  rel = find_base_rel(root, var->varno);
5532 
5533  /* If the subquery hasn't been planned yet, we have to punt */
5534  if (rel->subroot == NULL)
5535  return;
5536  Assert(IsA(rel->subroot, PlannerInfo));
5537 
5538  /*
5539  * Switch our attention to the subquery as mangled by the planner. It
5540  * was okay to look at the pre-planning version for the tests above,
5541  * but now we need a Var that will refer to the subroot's live
5542  * RelOptInfos. For instance, if any subquery pullup happened during
5543  * planning, Vars in the targetlist might have gotten replaced, and we
5544  * need to see the replacement expressions.
5545  */
5546  subquery = rel->subroot->parse;
5547  Assert(IsA(subquery, Query));
5548 
5549  /* Get the subquery output expression referenced by the upper Var */
5550  ste = get_tle_by_resno(subquery->targetList, var->varattno);
5551  if (ste == NULL || ste->resjunk)
5552  elog(ERROR, "subquery %s does not have attribute %d",
5553  rte->eref->aliasname, var->varattno);
5554  var = (Var *) ste->expr;
5555 
5556  /*
5557  * If subquery uses DISTINCT, we can't make use of any stats for the
5558  * variable ... but, if it's the only DISTINCT column, we are entitled
5559  * to consider it unique. We do the test this way so that it works
5560  * for cases involving DISTINCT ON.
5561  */
5562  if (subquery->distinctClause)
5563  {
5564  if (list_length(subquery->distinctClause) == 1 &&
5565  targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
5566  vardata->isunique = true;
5567  /* cannot go further */
5568  return;
5569  }
5570 
5571  /*
5572  * If the sub-query originated from a view with the security_barrier
5573  * attribute, we must not look at the variable's statistics, though it
5574  * seems all right to notice the existence of a DISTINCT clause. So
5575  * stop here.
5576  *
5577  * This is probably a harsher restriction than necessary; it's
5578  * certainly OK for the selectivity estimator (which is a C function,
5579  * and therefore omnipotent anyway) to look at the statistics. But
5580  * many selectivity estimators will happily *invoke the operator
5581  * function* to try to work out a good estimate - and that's not OK.
5582  * So for now, don't dig down for stats.
5583  */
5584  if (rte->security_barrier)
5585  return;
5586 
5587  /* Can only handle a simple Var of subquery's query level */
5588  if (var && IsA(var, Var) &&
5589  var->varlevelsup == 0)
5590  {
5591  /*
5592  * OK, recurse into the subquery. Note that the original setting
5593  * of vardata->isunique (which will surely be false) is left
5594  * unchanged in this situation. That's what we want, since even
5595  * if the underlying column is unique, the subquery may have
5596  * joined to other tables in a way that creates duplicates.
5597  */
5598  examine_simple_variable(rel->subroot, var, vardata);
5599  }
5600  }
5601  else
5602  {
5603  /*
5604  * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We
5605  * won't see RTE_JOIN here because join alias Vars have already been
5606  * flattened.) There's not much we can do with function outputs, but
5607  * maybe someday try to be smarter about VALUES and/or CTEs.
5608  */
5609  }
5610 }
@ ACLCHECK_OK
Definition: acl.h:183
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
Definition: aclchk.c:3794
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:3923
#define InvalidAttrNumber
Definition: attnum.h:23
unsigned int Index
Definition: c.h:598
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Oid GetUserId(void)
Definition: miscinit.c:510
bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList)
TargetEntry * get_tle_by_resno(List *tlist, AttrNumber resno)
@ RTE_SUBQUERY
Definition: parsenodes.h:1015
#define ACL_SELECT
Definition: parsenodes.h:84
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:405
static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata)
Definition: selfuncs.c:5367
char * aliasname
Definition: primnodes.h:42
Index parent_relid
Definition: pathnodes.h:2915
int num_child_cols
Definition: pathnodes.h:2951
Query * parse
Definition: pathnodes.h:199
Node * setOperations
Definition: parsenodes.h:217
List * groupClause
Definition: parsenodes.h:198
List * targetList
Definition: parsenodes.h:189
List * groupingSets
Definition: parsenodes.h:201
List * distinctClause
Definition: parsenodes.h:207
bool security_barrier
Definition: parsenodes.h:1082
List * securityQuals
Definition: parsenodes.h:1204
Query * subquery
Definition: parsenodes.h:1081
Alias * eref
Definition: parsenodes.h:1200
Oid userid
Definition: pathnodes.h:945
PlannerInfo * subroot
Definition: pathnodes.h:932
Expr * expr
Definition: primnodes.h:1886
AttrNumber varattno
Definition: primnodes.h:238
int varno
Definition: primnodes.h:233
Index varlevelsup
Definition: primnodes.h:258

References VariableStatData::acl_ok, ACL_SELECT, ACLCHECK_OK, Alias::aliasname, Assert(), BoolGetDatum(), Query::distinctClause, elog(), RangeTblEntry::eref, ERROR, TargetEntry::expr, find_base_rel(), VariableStatData::freefunc, get_relation_stats_hook, get_tle_by_resno(), GetUserId(), Query::groupClause, Query::groupingSets, HeapTupleIsValid, if(), RangeTblEntry::inh, Int16GetDatum(), InvalidAttrNumber, InvalidOid, IsA, VariableStatData::isunique, list_length(), NIL, AppendRelInfo::num_child_cols, ObjectIdGetDatum(), OidIsValid, AppendRelInfo::parent_relid, PlannerInfo::parse, pg_attribute_aclcheck(), pg_class_aclcheck(), planner_rt_fetch, ReleaseSysCache(), RangeTblEntry::relid, RTE_RELATION, RTE_SUBQUERY, RangeTblEntry::rtekind, SearchSysCache3(), RangeTblEntry::security_barrier, RangeTblEntry::securityQuals, Query::setOperations, STATRELATTINH, VariableStatData::statsTuple, RangeTblEntry::subquery, RelOptInfo::subroot, targetIsInSortList(), Query::targetList, RelOptInfo::userid, Var::varattno, Var::varlevelsup, and Var::varno.

Referenced by examine_variable().

◆ examine_variable()

void examine_variable ( PlannerInfo root,
Node node,
int  varRelid,
VariableStatData vardata 
)

Definition at line 4978 of file selfuncs.c.

4980 {
4981  Node *basenode;
4982  Relids varnos;
4983  RelOptInfo *onerel;
4984 
4985  /* Make sure we don't return dangling pointers in vardata */
4986  MemSet(vardata, 0, sizeof(VariableStatData));
4987 
4988  /* Save the exposed type of the expression */
4989  vardata->vartype = exprType(node);
4990 
4991  /* Look inside any binary-compatible relabeling */
4992 
4993  if (IsA(node, RelabelType))
4994  basenode = (Node *) ((RelabelType *) node)->arg;
4995  else
4996  basenode = node;
4997 
4998  /* Fast path for a simple Var */
4999 
5000  if (IsA(basenode, Var) &&
5001  (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
5002  {
5003  Var *var = (Var *) basenode;
5004 
5005  /* Set up result fields other than the stats tuple */
5006  vardata->var = basenode; /* return Var without relabeling */
5007  vardata->rel = find_base_rel(root, var->varno);
5008  vardata->atttype = var->vartype;
5009  vardata->atttypmod = var->vartypmod;
5010  vardata->isunique = has_unique_index(vardata->rel, var->varattno);
5011 
5012  /* Try to locate some stats */
5013  examine_simple_variable(root, var, vardata);
5014 
5015  return;
5016  }
5017 
5018  /*
5019  * Okay, it's a more complicated expression. Determine variable
5020  * membership. Note that when varRelid isn't zero, only vars of that
5021  * relation are considered "real" vars.
5022  */
5023  varnos = pull_varnos(root, basenode);
5024 
5025  onerel = NULL;
5026 
5027  switch (bms_membership(varnos))
5028  {
5029  case BMS_EMPTY_SET:
5030  /* No Vars at all ... must be pseudo-constant clause */
5031  break;
5032  case BMS_SINGLETON:
5033  if (varRelid == 0 || bms_is_member(varRelid, varnos))
5034  {
5035  onerel = find_base_rel(root,
5036  (varRelid ? varRelid : bms_singleton_member(varnos)));
5037  vardata->rel = onerel;
5038  node = basenode; /* strip any relabeling */
5039  }
5040  /* else treat it as a constant */
5041  break;
5042  case BMS_MULTIPLE:
5043  if (varRelid == 0)
5044  {
5045  /* treat it as a variable of a join relation */
5046  vardata->rel = find_join_rel(root, varnos);
5047  node = basenode; /* strip any relabeling */
5048  }
5049  else if (bms_is_member(varRelid, varnos))
5050  {
5051  /* ignore the vars belonging to other relations */
5052  vardata->rel = find_base_rel(root, varRelid);
5053  node = basenode; /* strip any relabeling */
5054  /* note: no point in expressional-index search here */
5055  }
5056  /* else treat it as a constant */
5057  break;
5058  }
5059 
5060  bms_free(varnos);
5061 
5062  vardata->var = node;
5063  vardata->atttype = exprType(node);
5064  vardata->atttypmod = exprTypmod(node);
5065 
5066  if (onerel)
5067  {
5068  /*
5069  * We have an expression in vars of a single relation. Try to match
5070  * it to expressional index columns, in hopes of finding some
5071  * statistics.
5072  *
5073  * Note that we consider all index columns including INCLUDE columns,
5074  * since there could be stats for such columns. But the test for
5075  * uniqueness needs to be warier.
5076  *
5077  * XXX it's conceivable that there are multiple matches with different
5078  * index opfamilies; if so, we need to pick one that matches the
5079  * operator we are estimating for. FIXME later.
5080  */
5081  ListCell *ilist;
5082  ListCell *slist;
5083  Oid userid;
5084 
5085  /*
5086  * Determine the user ID to use for privilege checks: either
5087  * onerel->userid if it's set (e.g., in case we're accessing the table
5088  * via a view), or the current user otherwise.
5089  *
5090  * If we drill down to child relations, we keep using the same userid:
5091  * it's going to be the same anyway, due to how we set up the relation
5092  * tree (q.v. build_simple_rel).
5093  */
5094  userid = OidIsValid(onerel->userid) ? onerel->userid : GetUserId();
5095 
5096  foreach(ilist, onerel->indexlist)
5097  {
5098  IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
5099  ListCell *indexpr_item;
5100  int pos;
5101 
5102  indexpr_item = list_head(index->indexprs);
5103  if (indexpr_item == NULL)
5104  continue; /* no expressions here... */
5105 
5106  for (pos = 0; pos < index->ncolumns; pos++)
5107  {
5108  if (index->indexkeys[pos] == 0)
5109  {
5110  Node *indexkey;
5111 
5112  if (indexpr_item == NULL)
5113  elog(ERROR, "too few entries in indexprs list");
5114  indexkey = (Node *) lfirst(indexpr_item);
5115  if (indexkey && IsA(indexkey, RelabelType))
5116  indexkey = (Node *) ((RelabelType *) indexkey)->arg;
5117  if (equal(node, indexkey))
5118  {
5119  /*
5120  * Found a match ... is it a unique index? Tests here
5121  * should match has_unique_index().
5122  */
5123  if (index->unique &&
5124  index->nkeycolumns == 1 &&
5125  pos == 0 &&
5126  (index->indpred == NIL || index->predOK))
5127  vardata->isunique = true;
5128 
5129  /*
5130  * Has it got stats? We only consider stats for
5131  * non-partial indexes, since partial indexes probably
5132  * don't reflect whole-relation statistics; the above
5133  * check for uniqueness is the only info we take from
5134  * a partial index.
5135  *
5136  * An index stats hook, however, must make its own
5137  * decisions about what to do with partial indexes.
5138  */
5140  (*get_index_stats_hook) (root, index->indexoid,
5141  pos + 1, vardata))
5142  {
5143  /*
5144  * The hook took control of acquiring a stats
5145  * tuple. If it did supply a tuple, it'd better
5146  * have supplied a freefunc.
5147  */
5148  if (HeapTupleIsValid(vardata->statsTuple) &&
5149  !vardata->freefunc)
5150  elog(ERROR, "no function provided to release variable stats with");
5151  }
5152  else if (index->indpred == NIL)
5153  {
5154  vardata->statsTuple =
5156  ObjectIdGetDatum(index->indexoid),
5157  Int16GetDatum(pos + 1),
5158  BoolGetDatum(false));
5159  vardata->freefunc = ReleaseSysCache;
5160 
5161  if (HeapTupleIsValid(vardata->statsTuple))
5162  {
5163  /* Get index's table for permission check */
5164  RangeTblEntry *rte;
5165 
5166  rte = planner_rt_fetch(index->rel->relid, root);
5167  Assert(rte->rtekind == RTE_RELATION);
5168 
5169  /*
5170  * For simplicity, we insist on the whole
5171  * table being selectable, rather than trying
5172  * to identify which column(s) the index
5173  * depends on. Also require all rows to be
5174  * selectable --- there must be no
5175  * securityQuals from security barrier views
5176  * or RLS policies.
5177  */
5178  vardata->acl_ok =
5179  rte->securityQuals == NIL &&
5180  (pg_class_aclcheck(rte->relid, userid,
5181  ACL_SELECT) == ACLCHECK_OK);
5182 
5183  /*
5184  * If the user doesn't have permissions to
5185  * access an inheritance child relation, check
5186  * the permissions of the table actually
5187  * mentioned in the query, since most likely
5188  * the user does have that permission. Note
5189  * that whole-table select privilege on the
5190  * parent doesn't quite guarantee that the
5191  * user could read all columns of the child.
5192  * But in practice it's unlikely that any
5193  * interesting security violation could result
5194  * from allowing access to the expression
5195  * index's stats, so we allow it anyway. See
5196  * similar code in examine_simple_variable()
5197  * for additional comments.
5198  */
5199  if (!vardata->acl_ok &&
5200  root->append_rel_array != NULL)
5201  {
5202  AppendRelInfo *appinfo;
5203  Index varno = index->rel->relid;
5204 
5205  appinfo = root->append_rel_array[varno];
5206  while (appinfo &&
5207  planner_rt_fetch(appinfo->parent_relid,
5208  root)->rtekind == RTE_RELATION)
5209  {
5210  varno = appinfo->parent_relid;
5211  appinfo = root->append_rel_array[varno];
5212  }
5213  if (varno != index->rel->relid)
5214  {
5215  /* Repeat access check on this rel */
5216  rte = planner_rt_fetch(varno, root);
5217  Assert(rte->rtekind == RTE_RELATION);
5218 
5219  vardata->acl_ok =
5220  rte->securityQuals == NIL &&
5221  (pg_class_aclcheck(rte->relid,
5222  userid,
5223  ACL_SELECT) == ACLCHECK_OK);
5224  }
5225  }
5226  }
5227  else
5228  {
5229  /* suppress leakproofness checks later */
5230  vardata->acl_ok = true;
5231  }
5232  }
5233  if (vardata->statsTuple)
5234  break;
5235  }
5236  indexpr_item = lnext(index->indexprs, indexpr_item);
5237  }
5238  }
5239  if (vardata->statsTuple)
5240  break;
5241  }
5242 
5243  /*
5244  * Search extended statistics for one with a matching expression.
5245  * There might be multiple ones, so just grab the first one. In the
5246  * future, we might consider the statistics target (and pick the most
5247  * accurate statistics) and maybe some other parameters.
5248  */
5249  foreach(slist, onerel->statlist)
5250  {
5251  StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist);
5252  RangeTblEntry *rte = planner_rt_fetch(onerel->relid, root);
5253  ListCell *expr_item;
5254  int pos;
5255 
5256  /*
5257  * Stop once we've found statistics for the expression (either
5258  * from extended stats, or for an index in the preceding loop).
5259  */
5260  if (vardata->statsTuple)
5261  break;
5262 
5263  /* skip stats without per-expression stats */
5264  if (info->kind != STATS_EXT_EXPRESSIONS)
5265  continue;
5266 
5267  /* skip stats with mismatching stxdinherit value */
5268  if (info->inherit != rte->inh)
5269  continue;
5270 
5271  pos = 0;
5272  foreach(expr_item, info->exprs)
5273  {
5274  Node *expr = (Node *) lfirst(expr_item);
5275 
5276  Assert(expr);
5277 
5278  /* strip RelabelType before comparing it */
5279  if (expr && IsA(expr, RelabelType))
5280  expr = (Node *) ((RelabelType *) expr)->arg;
5281 
5282  /* found a match, see if we can extract pg_statistic row */
5283  if (equal(node, expr))
5284  {
5285  /*
5286  * XXX Not sure if we should cache the tuple somewhere.
5287  * Now we just create a new copy every time.
5288  */
5289  vardata->statsTuple =
5290  statext_expressions_load(info->statOid, rte->inh, pos);
5291 
5292  vardata->freefunc = ReleaseDummy;
5293 
5294  /*
5295  * For simplicity, we insist on the whole table being
5296  * selectable, rather than trying to identify which
5297  * column(s) the statistics object depends on. Also
5298  * require all rows to be selectable --- there must be no
5299  * securityQuals from security barrier views or RLS
5300  * policies.
5301  */
5302  vardata->acl_ok =
5303  rte->securityQuals == NIL &&
5304  (pg_class_aclcheck(rte->relid, userid,
5305  ACL_SELECT) == ACLCHECK_OK);
5306 
5307  /*
5308  * If the user doesn't have permissions to access an
5309  * inheritance child relation, check the permissions of
5310  * the table actually mentioned in the query, since most
5311  * likely the user does have that permission. Note that
5312  * whole-table select privilege on the parent doesn't
5313  * quite guarantee that the user could read all columns of
5314  * the child. But in practice it's unlikely that any
5315  * interesting security violation could result from
5316  * allowing access to the expression stats, so we allow it
5317  * anyway. See similar code in examine_simple_variable()
5318  * for additional comments.
5319  */
5320  if (!vardata->acl_ok &&
5321  root->append_rel_array != NULL)
5322  {
5323  AppendRelInfo *appinfo;
5324  Index varno = onerel->relid;
5325 
5326  appinfo = root->append_rel_array[varno];
5327  while (appinfo &&
5328  planner_rt_fetch(appinfo->parent_relid,
5329  root)->rtekind == RTE_RELATION)
5330  {
5331  varno = appinfo->parent_relid;
5332  appinfo = root->append_rel_array[varno];
5333  }
5334  if (varno != onerel->relid)
5335  {
5336  /* Repeat access check on this rel */
5337  rte = planner_rt_fetch(varno, root);
5338  Assert(rte->rtekind == RTE_RELATION);
5339 
5340  vardata->acl_ok =
5341  rte->securityQuals == NIL &&
5342  (pg_class_aclcheck(rte->relid,
5343  userid,
5344  ACL_SELECT) == ACLCHECK_OK);
5345  }
5346  }
5347 
5348  break;
5349  }
5350 
5351  pos++;
5352  }
5353  }
5354  }
5355 }
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:596
void bms_free(Bitmapset *a)
Definition: bitmapset.c:209
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:691
@ BMS_SINGLETON
Definition: bitmapset.h:72
@ BMS_EMPTY_SET
Definition: bitmapset.h:71
@ BMS_MULTIPLE
Definition: bitmapset.h:73
#define MemSet(start, val, len)
Definition: c.h:1004
HeapTuple statext_expressions_load(Oid stxoid, bool inh, int idx)
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:284
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
bool has_unique_index(RelOptInfo *rel, AttrNumber attno)
Definition: plancat.c:2160
RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids)
Definition: relnode.c:507
static void ReleaseDummy(HeapTuple tuple)
Definition: selfuncs.c:4937
List * indexlist
Definition: pathnodes.h:923
int32 atttypmod
Definition: selfuncs.h:94
Relids pull_varnos(PlannerInfo *root, Node *node)
Definition: var.c:108

References VariableStatData::acl_ok, ACL_SELECT, ACLCHECK_OK, arg, Assert(), VariableStatData::atttype, VariableStatData::atttypmod, BMS_EMPTY_SET, bms_free(), bms_is_member(), bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), BoolGetDatum(), elog(), equal(), ERROR, examine_simple_variable(), StatisticExtInfo::exprs, exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, GetUserId(), has_unique_index(), HeapTupleIsValid, if(), RelOptInfo::indexlist, RangeTblEntry::inh, StatisticExtInfo::inherit, Int16GetDatum(), IsA, VariableStatData::isunique, StatisticExtInfo::kind, lfirst, list_head(), lnext(), MemSet, NIL, ObjectIdGetDatum(), OidIsValid, AppendRelInfo::parent_relid, pg_class_aclcheck(), planner_rt_fetch, pull_varnos(), VariableStatData::rel, ReleaseDummy(), ReleaseSysCache(), RangeTblEntry::relid, RelOptInfo::relid, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), RangeTblEntry::securityQuals, statext_expressions_load(), RelOptInfo::statlist, StatisticExtInfo::statOid, STATRELATTINH, VariableStatData::statsTuple, RelOptInfo::userid, VariableStatData::var, Var::varattno, Var::varno, and VariableStatData::vartype.

Referenced by booltestsel(), boolvarsel(), estimate_hash_bucket_stats(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().

◆ find_join_input_rel()

static RelOptInfo * find_join_input_rel ( PlannerInfo root,
Relids  relids 
)
static

Definition at line 6312 of file selfuncs.c.

6313 {
6314  RelOptInfo *rel = NULL;
6315 
6316  switch (bms_membership(relids))
6317  {
6318  case BMS_EMPTY_SET:
6319  /* should not happen */
6320  break;
6321  case BMS_SINGLETON:
6322  rel = find_base_rel(root, bms_singleton_member(relids));
6323  break;
6324  case BMS_MULTIPLE:
6325  rel = find_join_rel(root, relids);
6326  break;
6327  }
6328 
6329  if (rel == NULL)
6330  elog(ERROR, "could not find RelOptInfo for given relids");
6331 
6332  return rel;
6333 }

References BMS_EMPTY_SET, bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), elog(), ERROR, find_base_rel(), and find_join_rel().

Referenced by eqjoinsel().

◆ generic_restriction_selectivity()

double generic_restriction_selectivity ( PlannerInfo root,
Oid  oproid,
Oid  collation,
List args,
int  varRelid,
double  default_selectivity 
)

Definition at line 913 of file selfuncs.c.

916 {
917  double selec;
918  VariableStatData vardata;
919  Node *other;
920  bool varonleft;
921 
922  /*
923  * If expression is not variable OP something or something OP variable,
924  * then punt and return the default estimate.
925  */
926  if (!get_restriction_variable(root, args, varRelid,
927  &vardata, &other, &varonleft))
928  return default_selectivity;
929 
930  /*
931  * If the something is a NULL constant, assume operator is strict and
932  * return zero, ie, operator will never return TRUE.
933  */
934  if (IsA(other, Const) &&
935  ((Const *) other)->constisnull)
936  {
937  ReleaseVariableStats(vardata);
938  return 0.0;
939  }
940 
941  if (IsA(other, Const))
942  {
943  /* Variable is being compared to a known non-null constant */
944  Datum constval = ((Const *) other)->constvalue;
945  FmgrInfo opproc;
946  double mcvsum;
947  double mcvsel;
948  double nullfrac;
949  int hist_size;
950 
951  fmgr_info(get_opcode(oproid), &opproc);
952 
953  /*
954  * Calculate the selectivity for the column's most common values.
955  */
956  mcvsel = mcv_selectivity(&vardata, &opproc, collation,
957  constval, varonleft,
958  &mcvsum);
959 
960  /*
961  * If the histogram is large enough, see what fraction of it matches
962  * the query, and assume that's representative of the non-MCV
963  * population. Otherwise use the default selectivity for the non-MCV
964  * population.
965  */
966  selec = histogram_selectivity(&vardata, &opproc, collation,
967  constval, varonleft,
968  10, 1, &hist_size);
969  if (selec < 0)
970  {
971  /* Nope, fall back on default */
972  selec = default_selectivity;
973  }
974  else if (hist_size < 100)
975  {
976  /*
977  * For histogram sizes from 10 to 100, we combine the histogram
978  * and default selectivities, putting increasingly more trust in
979  * the histogram for larger sizes.
980  */
981  double hist_weight = hist_size / 100.0;
982 
983  selec = selec * hist_weight +
984  default_selectivity * (1.0 - hist_weight);
985  }
986 
987  /* In any case, don't believe extremely small or large estimates. */
988  if (selec < 0.0001)
989  selec = 0.0001;
990  else if (selec > 0.9999)
991  selec = 0.9999;
992 
993  /* Don't forget to account for nulls. */
994  if (HeapTupleIsValid(vardata.statsTuple))
995  nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
996  else
997  nullfrac = 0.0;
998 
999  /*
1000  * Now merge the results from the MCV and histogram calculations,
1001  * realizing that the histogram covers only the non-null values that
1002  * are not listed in MCV.
1003  */
1004  selec *= 1.0 - nullfrac - mcvsum;
1005  selec += mcvsel;
1006  }
1007  else
1008  {
1009  /* Comparison value is not constant, so we can't do anything */
1010  selec = default_selectivity;
1011  }
1012 
1013  ReleaseVariableStats(vardata);
1014 
1015  /* result should be in range, but make sure... */
1016  CLAMP_PROBABILITY(selec);
1017 
1018  return selec;
1019 }
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:731
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:822

References generate_unaccent_rules::args, CLAMP_PROBABILITY, fmgr_info(), get_opcode(), get_restriction_variable(), GETSTRUCT, HeapTupleIsValid, histogram_selectivity(), IsA, mcv_selectivity(), ReleaseVariableStats, and VariableStatData::statsTuple.

Referenced by ltreeparentsel(), and matchingsel().

◆ genericcostestimate()

void genericcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
GenericCosts costs 
)

Definition at line 6431 of file selfuncs.c.

6435 {
6436  IndexOptInfo *index = path->indexinfo;
6437  List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
6438  List *indexOrderBys = path->indexorderbys;
6439  Cost indexStartupCost;
6440  Cost indexTotalCost;
6441  Selectivity indexSelectivity;
6442  double indexCorrelation;
6443  double numIndexPages;
6444  double numIndexTuples;
6445  double spc_random_page_cost;
6446  double num_sa_scans;
6447  double num_outer_scans;
6448  double num_scans;
6449  double qual_op_cost;
6450  double qual_arg_cost;
6451  List *selectivityQuals;
6452  ListCell *l;
6453 
6454  /*
6455  * If the index is partial, AND the index predicate with the explicitly
6456  * given indexquals to produce a more accurate idea of the index
6457  * selectivity.
6458  */
6459  selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
6460 
6461  /*
6462  * Check for ScalarArrayOpExpr index quals, and estimate the number of
6463  * index scans that will be performed.
6464  */
6465  num_sa_scans = 1;
6466  foreach(l, indexQuals)
6467  {
6468  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
6469 
6470  if (IsA(rinfo->clause, ScalarArrayOpExpr))
6471  {
6472  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
6473  int alength = estimate_array_length(lsecond(saop->args));
6474 
6475  if (alength > 1)
6476  num_sa_scans *= alength;
6477  }
6478  }
6479 
6480  /* Estimate the fraction of main-table tuples that will be visited */
6481  indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6482  index->rel->relid,
6483  JOIN_INNER,
6484  NULL);
6485 
6486  /*
6487  * If caller didn't give us an estimate, estimate the number of index
6488  * tuples that will be visited. We do it in this rather peculiar-looking
6489  * way in order to get the right answer for partial indexes.
6490  */
6491  numIndexTuples = costs->numIndexTuples;
6492  if (numIndexTuples <= 0.0)
6493  {
6494  numIndexTuples = indexSelectivity * index->rel->tuples;
6495 
6496  /*
6497  * The above calculation counts all the tuples visited across all
6498  * scans induced by ScalarArrayOpExpr nodes. We want to consider the
6499  * average per-indexscan number, so adjust. This is a handy place to
6500  * round to integer, too. (If caller supplied tuple estimate, it's
6501  * responsible for handling these considerations.)
6502  */
6503  numIndexTuples = rint(numIndexTuples / num_sa_scans);
6504  }
6505 
6506  /*
6507  * We can bound the number of tuples by the index size in any case. Also,
6508  * always estimate at least one tuple is touched, even when
6509  * indexSelectivity estimate is tiny.
6510  */
6511  if (numIndexTuples > index->tuples)
6512  numIndexTuples = index->tuples;
6513  if (numIndexTuples < 1.0)
6514  numIndexTuples = 1.0;
6515 
6516  /*
6517  * Estimate the number of index pages that will be retrieved.
6518  *
6519  * We use the simplistic method of taking a pro-rata fraction of the total
6520  * number of index pages. In effect, this counts only leaf pages and not
6521  * any overhead such as index metapage or upper tree levels.
6522  *
6523  * In practice access to upper index levels is often nearly free because
6524  * those tend to stay in cache under load; moreover, the cost involved is
6525  * highly dependent on index type. We therefore ignore such costs here
6526  * and leave it to the caller to add a suitable charge if needed.
6527  */
6528  if (index->pages > 1 && index->tuples > 1)
6529  numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
6530  else
6531  numIndexPages = 1.0;
6532 
6533  /* fetch estimated page cost for tablespace containing index */
6534  get_tablespace_page_costs(index->reltablespace,
6535  &spc_random_page_cost,
6536  NULL);
6537 
6538  /*
6539  * Now compute the disk access costs.
6540  *
6541  * The above calculations are all per-index-scan. However, if we are in a
6542  * nestloop inner scan, we can expect the scan to be repeated (with
6543  * different search keys) for each row of the outer relation. Likewise,
6544  * ScalarArrayOpExpr quals result in multiple index scans. This creates
6545  * the potential for cache effects to reduce the number of disk page
6546  * fetches needed. We want to estimate the average per-scan I/O cost in
6547  * the presence of caching.
6548  *
6549  * We use the Mackert-Lohman formula (see costsize.c for details) to
6550  * estimate the total number of page fetches that occur. While this
6551  * wasn't what it was designed for, it seems a reasonable model anyway.
6552  * Note that we are counting pages not tuples anymore, so we take N = T =
6553  * index size, as if there were one "tuple" per page.
6554  */
6555  num_outer_scans = loop_count;
6556  num_scans = num_sa_scans * num_outer_scans;
6557 
6558  if (num_scans > 1)
6559  {
6560  double pages_fetched;
6561 
6562  /* total page fetches ignoring cache effects */
6563  pages_fetched = numIndexPages * num_scans;
6564 
6565  /* use Mackert and Lohman formula to adjust for cache effects */
6566  pages_fetched = index_pages_fetched(pages_fetched,
6567  index->pages,
6568  (double) index->pages,
6569  root);
6570 
6571  /*
6572  * Now compute the total disk access cost, and then report a pro-rated
6573  * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
6574  * since that's internal to the indexscan.)
6575  */
6576  indexTotalCost = (pages_fetched * spc_random_page_cost)
6577  / num_outer_scans;
6578  }
6579  else
6580  {
6581  /*
6582  * For a single index scan, we just charge spc_random_page_cost per
6583  * page touched.
6584  */
6585  indexTotalCost = numIndexPages * spc_random_page_cost;
6586  }
6587 
6588  /*
6589  * CPU cost: any complex expressions in the indexquals will need to be
6590  * evaluated once at the start of the scan to reduce them to runtime keys
6591  * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
6592  * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
6593  * indexqual operator. Because we have numIndexTuples as a per-scan
6594  * number, we have to multiply by num_sa_scans to get the correct result
6595  * for ScalarArrayOpExpr cases. Similarly add in costs for any index
6596  * ORDER BY expressions.
6597  *
6598  * Note: this neglects the possible costs of rechecking lossy operators.
6599  * Detecting that that might be needed seems more expensive than it's
6600  * worth, though, considering all the other inaccuracies here ...
6601  */
6602  qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
6603  index_other_operands_eval_cost(root, indexOrderBys);
6604  qual_op_cost = cpu_operator_cost *
6605  (list_length(indexQuals) + list_length(indexOrderBys));
6606 
6607  indexStartupCost = qual_arg_cost;
6608  indexTotalCost += qual_arg_cost;
6609  indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
6610 
6611  /*
6612  * Generic assumption about index correlation: there isn't any.
6613  */
6614  indexCorrelation = 0.0;
6615 
6616  /*
6617  * Return everything to caller.
6618  */
6619  costs->indexStartupCost = indexStartupCost;
6620  costs->indexTotalCost = indexTotalCost;
6621  costs->indexSelectivity = indexSelectivity;
6622  costs->indexCorrelation = indexCorrelation;
6623  costs->numIndexPages = numIndexPages;
6624  costs->numIndexTuples = numIndexTuples;
6625  costs->spc_random_page_cost = spc_random_page_cost;
6626  costs->num_sa_scans = num_sa_scans;
6627 }
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:870
double cpu_index_tuple_cost
Definition: costsize.c:123
double spc_random_page_cost
Definition: selfuncs.h:130
List * indexorderbys
Definition: pathnodes.h:1680

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, estimate_array_length(), get_quals_from_indexclauses(), get_tablespace_page_costs(), index_other_operands_eval_cost(), index_pages_fetched(), IndexPath::indexclauses, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexPath::indexorderbys, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, IsA, JOIN_INNER, lfirst, list_length(), lsecond, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, and GenericCosts::spc_random_page_cost.

Referenced by blcostestimate(), btcostestimate(), gistcostestimate(), hashcostestimate(), and spgcostestimate().

◆ get_actual_variable_endpoint()

static bool get_actual_variable_endpoint ( Relation  heapRel,
Relation  indexRel,
ScanDirection  indexscandir,
ScanKey  scankeys,
int16  typLen,
bool  typByVal,
TupleTableSlot tableslot,
MemoryContext  outercontext,
Datum endpointDatum 
)
static

Definition at line 6151 of file selfuncs.c.

6160 {
6161  bool have_data = false;
6162  SnapshotData SnapshotNonVacuumable;
6163  IndexScanDesc index_scan;
6164  Buffer vmbuffer = InvalidBuffer;
6165  BlockNumber last_heap_block = InvalidBlockNumber;
6166  int n_visited_heap_pages = 0;
6167  ItemPointer tid;
6169  bool isnull[INDEX_MAX_KEYS];
6170  MemoryContext oldcontext;
6171 
6172  /*
6173  * We use the index-only-scan machinery for this. With mostly-static
6174  * tables that's a win because it avoids a heap visit. It's also a win
6175  * for dynamic data, but the reason is less obvious; read on for details.
6176  *
6177  * In principle, we should scan the index with our current active
6178  * snapshot, which is the best approximation we've got to what the query
6179  * will see when executed. But that won't be exact if a new snap is taken
6180  * before running the query, and it can be very expensive if a lot of
6181  * recently-dead or uncommitted rows exist at the beginning or end of the
6182  * index (because we'll laboriously fetch each one and reject it).
6183  * Instead, we use SnapshotNonVacuumable. That will accept recently-dead
6184  * and uncommitted rows as well as normal visible rows. On the other
6185  * hand, it will reject known-dead rows, and thus not give a bogus answer
6186  * when the extreme value has been deleted (unless the deletion was quite
6187  * recent); that case motivates not using SnapshotAny here.
6188  *
6189  * A crucial point here is that SnapshotNonVacuumable, with
6190  * GlobalVisTestFor(heapRel) as horizon, yields the inverse of the
6191  * condition that the indexscan will use to decide that index entries are
6192  * killable (see heap_hot_search_buffer()). Therefore, if the snapshot
6193  * rejects a tuple (or more precisely, all tuples of a HOT chain) and we
6194  * have to continue scanning past it, we know that the indexscan will mark
6195  * that index entry killed. That means that the next
6196  * get_actual_variable_endpoint() call will not have to re-consider that
6197  * index entry. In this way we avoid repetitive work when this function
6198  * is used a lot during planning.
6199  *
6200  * But using SnapshotNonVacuumable creates a hazard of its own. In a
6201  * recently-created index, some index entries may point at "broken" HOT
6202  * chains in which not all the tuple versions contain data matching the
6203  * index entry. The live tuple version(s) certainly do match the index,
6204  * but SnapshotNonVacuumable can accept recently-dead tuple versions that
6205  * don't match. Hence, if we took data from the selected heap tuple, we
6206  * might get a bogus answer that's not close to the index extremal value,
6207  * or could even be NULL. We avoid this hazard because we take the data
6208  * from the index entry not the heap.
6209  *
6210  * Despite all this care, there are situations where we might find many
6211  * non-visible tuples near the end of the index. We don't want to expend
6212  * a huge amount of time here, so we give up once we've read too many heap
6213  * pages. When we fail for that reason, the caller will end up using
6214  * whatever extremal value is recorded in pg_statistic.
6215  */
6216  InitNonVacuumableSnapshot(SnapshotNonVacuumable,
6217  GlobalVisTestFor(heapRel));
6218 
6219  index_scan = index_beginscan(heapRel, indexRel,
6220  &SnapshotNonVacuumable,
6221  1, 0);
6222  /* Set it up for index-only scan */
6223  index_scan->xs_want_itup = true;
6224  index_rescan(index_scan, scankeys, 1, NULL, 0);
6225 
6226  /* Fetch first/next tuple in specified direction */
6227  while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
6228  {
6230 
6231  if (!VM_ALL_VISIBLE(heapRel,
6232  block,
6233  &vmbuffer))
6234  {
6235  /* Rats, we have to visit the heap to check visibility */
6236  if (!index_fetch_heap(index_scan, tableslot))
6237  {
6238  /*
6239  * No visible tuple for this index entry, so we need to
6240  * advance to the next entry. Before doing so, count heap
6241  * page fetches and give up if we've done too many.
6242  *
6243  * We don't charge a page fetch if this is the same heap page
6244  * as the previous tuple. This is on the conservative side,
6245  * since other recently-accessed pages are probably still in
6246  * buffers too; but it's good enough for this heuristic.
6247  */
6248 #define VISITED_PAGES_LIMIT 100
6249 
6250  if (block != last_heap_block)
6251  {
6252  last_heap_block = block;
6253  n_visited_heap_pages++;
6254  if (n_visited_heap_pages > VISITED_PAGES_LIMIT)
6255  break;
6256  }
6257 
6258  continue; /* no visible tuple, try next index entry */
6259  }
6260 
6261  /* We don't actually need the heap tuple for anything */
6262  ExecClearTuple(tableslot);
6263 
6264  /*
6265  * We don't care whether there's more than one visible tuple in
6266  * the HOT chain; if any are visible, that's good enough.
6267  */
6268  }
6269 
6270  /*
6271  * We expect that btree will return data in IndexTuple not HeapTuple
6272  * format. It's not lossy either.
6273  */
6274  if (!index_scan->xs_itup)
6275  elog(ERROR, "no data returned for index-only scan");
6276  if (index_scan->xs_recheck)
6277  elog(ERROR, "unexpected recheck indication from btree");
6278 
6279  /* OK to deconstruct the index tuple */
6280  index_deform_tuple(index_scan->xs_itup,
6281  index_scan->xs_itupdesc,
6282  values, isnull);
6283 
6284  /* Shouldn't have got a null, but be careful */
6285  if (isnull[0])
6286  elog(ERROR, "found unexpected null value in index \"%s\"",
6287  RelationGetRelationName(indexRel));
6288 
6289  /* Copy the index column value out to caller's context */
6290  oldcontext = MemoryContextSwitchTo(outercontext);
6291  *endpointDatum = datumCopy(values[0], typByVal, typLen);
6292  MemoryContextSwitchTo(oldcontext);
6293  have_data = true;
6294  break;
6295  }
6296 
6297  if (vmbuffer != InvalidBuffer)
6298  ReleaseBuffer(vmbuffer);
6299  index_endscan(index_scan);
6300 
6301  return have_data;
6302 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:156
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4480
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
Definition: indexam.c:525
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:205
bool index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
Definition: indexam.c:583
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:327
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:301
void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:456
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
#define INDEX_MAX_KEYS
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4040
#define RelationGetRelationName(relation)
Definition: rel.h:538
#define VISITED_PAGES_LIMIT
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:82
IndexTuple xs_itup
Definition: relscan.h:142
struct TupleDescData * xs_itupdesc
Definition: relscan.h:143
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:433
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24

References datumCopy(), elog(), ERROR, ExecClearTuple(), GlobalVisTestFor(), index_beginscan(), index_deform_tuple(), index_endscan(), index_fetch_heap(), index_getnext_tid(), INDEX_MAX_KEYS, index_rescan(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, ItemPointerGetBlockNumber(), MemoryContextSwitchTo(), RelationGetRelationName, ReleaseBuffer(), values, VISITED_PAGES_LIMIT, VM_ALL_VISIBLE, IndexScanDescData::xs_itup, IndexScanDescData::xs_itupdesc, IndexScanDescData::xs_recheck, and IndexScanDescData::xs_want_itup.

Referenced by get_actual_variable_range().

◆ get_actual_variable_range()

static bool get_actual_variable_range ( PlannerInfo root,
VariableStatData vardata,
Oid  sortop,
Oid  collation,
Datum min,
Datum max 
)
static

Definition at line 5971 of file selfuncs.c.

5974 {
5975  bool have_data = false;
5976  RelOptInfo *rel = vardata->rel;
5977  RangeTblEntry *rte;
5978  ListCell *lc;
5979 
5980  /* No hope if no relation or it doesn't have indexes */
5981  if (rel == NULL || rel->indexlist == NIL)
5982  return false;
5983  /* If it has indexes it must be a plain relation */
5984  rte = root->simple_rte_array[rel->relid];
5985  Assert(rte->rtekind == RTE_RELATION);
5986 
5987  /* ignore partitioned tables. Any indexes here are not real indexes */
5988  if (rte->relkind == RELKIND_PARTITIONED_TABLE)
5989  return false;
5990 
5991  /* Search through the indexes to see if any match our problem */
5992  foreach(lc, rel->indexlist)
5993  {
5995  ScanDirection indexscandir;
5996 
5997  /* Ignore non-btree indexes */
5998  if (index->relam != BTREE_AM_OID)
5999  continue;
6000 
6001  /*
6002  * Ignore partial indexes --- we only want stats that cover the entire
6003  * relation.
6004  */
6005  if (index->indpred != NIL)
6006  continue;
6007 
6008  /*
6009  * The index list might include hypothetical indexes inserted by a
6010  * get_relation_info hook --- don't try to access them.
6011  */
6012  if (index->hypothetical)
6013  continue;
6014 
6015  /*
6016  * The first index column must match the desired variable, sortop, and
6017  * collation --- but we can use a descending-order index.
6018  */
6019  if (collation != index->indexcollations[0])
6020  continue; /* test first 'cause it's cheapest */
6021  if (!match_index_to_operand(vardata->var, 0, index))
6022  continue;
6023  switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
6024  {
6025  case BTLessStrategyNumber:
6026  if (index->reverse_sort[0])
6027  indexscandir = BackwardScanDirection;
6028  else
6029  indexscandir = ForwardScanDirection;
6030  break;
6032  if (index->reverse_sort[0])
6033  indexscandir = ForwardScanDirection;
6034  else
6035  indexscandir = BackwardScanDirection;
6036  break;
6037  default:
6038  /* index doesn't match the sortop */
6039  continue;
6040  }
6041 
6042  /*
6043  * Found a suitable index to extract data from. Set up some data that
6044  * can be used by both invocations of get_actual_variable_endpoint.
6045  */
6046  {
6047  MemoryContext tmpcontext;
6048  MemoryContext oldcontext;
6049  Relation heapRel;
6050  Relation indexRel;
6051  TupleTableSlot *slot;
6052  int16 typLen;
6053  bool typByVal;
6054  ScanKeyData scankeys[1];
6055 
6056  /* Make sure any cruft gets recycled when we're done */
6058  "get_actual_variable_range workspace",
6060  oldcontext = MemoryContextSwitchTo(tmpcontext);
6061 
6062  /*
6063  * Open the table and index so we can read from them. We should
6064  * already have some type of lock on each.
6065  */
6066  heapRel = table_open(rte->relid, NoLock);
6067  indexRel = index_open(index->indexoid, NoLock);
6068 
6069  /* build some stuff needed for indexscan execution */
6070  slot = table_slot_create(heapRel, NULL);
6071  get_typlenbyval(vardata->atttype, &typLen, &typByVal);
6072 
6073  /* set up an IS NOT NULL scan key so that we ignore nulls */
6074  ScanKeyEntryInitialize(&scankeys[0],
6076  1, /* index col to scan */
6077  InvalidStrategy, /* no strategy */
6078  InvalidOid, /* no strategy subtype */
6079  InvalidOid, /* no collation */
6080  InvalidOid, /* no reg proc for this */
6081  (Datum) 0); /* constant */
6082 
6083  /* If min is requested ... */
6084  if (min)
6085  {
6086  have_data = get_actual_variable_endpoint(heapRel,
6087  indexRel,
6088  indexscandir,
6089  scankeys,
6090  typLen,
6091  typByVal,
6092  slot,
6093  oldcontext,
6094  min);
6095  }
6096  else
6097  {
6098  /* If min not requested, still want to fetch max */
6099  have_data = true;
6100  }
6101 
6102  /* If max is requested, and we didn't already fail ... */
6103  if (max && have_data)
6104  {
6105  /* scan in the opposite direction; all else is the same */
6106  have_data = get_actual_variable_endpoint(heapRel,
6107  indexRel,
6108  -indexscandir,
6109  scankeys,
6110  typLen,
6111  typByVal,
6112  slot,
6113  oldcontext,
6114  max);
6115  }
6116 
6117  /* Clean everything up */
6119 
6120  index_close(indexRel, NoLock);
6121  table_close(heapRel, NoLock);
6122 
6123  MemoryContextSwitchTo(oldcontext);
6124  MemoryContextDelete(tmpcontext);
6125 
6126  /* And we're done */
6127  break;
6128  }
6129  }
6130 
6131  return have_data;
6132 }
signed short int16
Definition: c.h:477
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1255
bool match_index_to_operand(Node *operand, int indexcol, IndexOptInfo *index)
Definition: indxpath.c:3713
void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval)
Definition: lsyscache.c:2209
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:403
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
void ScanKeyEntryInitialize(ScanKey entry, int flags, AttrNumber attributeNumber, StrategyNumber strategy, Oid subtype, Oid collation, RegProcedure procedure, Datum argument)
Definition: scankey.c:32
ScanDirection
Definition: sdir.h:25
@ BackwardScanDirection
Definition: sdir.h:26
@ ForwardScanDirection
Definition: sdir.h:28
static bool get_actual_variable_endpoint(Relation heapRel, Relation indexRel, ScanDirection indexscandir, ScanKey scankeys, int16 typLen, bool typByVal, TupleTableSlot *tableslot, MemoryContext outercontext, Datum *endpointDatum)
Definition: selfuncs.c:6151
#define SK_SEARCHNOTNULL
Definition: skey.h:122
#define SK_ISNULL
Definition: skey.h:115
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define InvalidStrategy
Definition: stratnum.h:24
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), VariableStatData::atttype, BackwardScanDirection, BTGreaterStrategyNumber, BTLessStrategyNumber, CurrentMemoryContext, ExecDropSingleTupleTableSlot(), ForwardScanDirection, get_actual_variable_endpoint(), get_op_opfamily_strategy(), get_typlenbyval(), index_close(), index_open(), RelOptInfo::indexlist, InvalidOid, InvalidStrategy, lfirst, match_index_to_operand(), MemoryContextDelete(), MemoryContextSwitchTo(), NIL, NoLock, VariableStatData::rel, RangeTblEntry::relid, RelOptInfo::relid, RangeTblEntry::relkind, RTE_RELATION, RangeTblEntry::rtekind, ScanKeyEntryInitialize(), SK_ISNULL, SK_SEARCHNOTNULL, table_close(), table_open(), table_slot_create(), and VariableStatData::var.

Referenced by get_variable_range(), and ineq_histogram_selectivity().

◆ get_join_variables()

void get_join_variables ( PlannerInfo root,
List args,
SpecialJoinInfo sjinfo,
VariableStatData vardata1,
VariableStatData vardata2,
bool join_is_reversed 
)

Definition at line 4909 of file selfuncs.c.

4912 {
4913  Node *left,
4914  *right;
4915 
4916  if (list_length(args) != 2)
4917  elog(ERROR, "join operator should take two arguments");
4918 
4919  left = (Node *) linitial(args);
4920  right = (Node *) lsecond(args);
4921 
4922  examine_variable(root, left, 0, vardata1);
4923  examine_variable(root, right, 0, vardata2);
4924 
4925  if (vardata1->rel &&
4926  bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4927  *join_is_reversed = true; /* var1 is on RHS */
4928  else if (vardata2->rel &&
4929  bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4930  *join_is_reversed = true; /* var2 is on LHS */
4931  else
4932  *join_is_reversed = false;
4933 }
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:332
Relids relids
Definition: pathnodes.h:856
Relids syn_lefthand
Definition: pathnodes.h:2842
Relids syn_righthand
Definition: pathnodes.h:2843

References generate_unaccent_rules::args, bms_is_subset(), elog(), ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.

Referenced by eqjoinsel(), neqjoinsel(), and networkjoinsel().

◆ get_quals_from_indexclauses()

List* get_quals_from_indexclauses ( List indexclauses)

Definition at line 6347 of file selfuncs.c.

6348 {
6349  List *result = NIL;
6350  ListCell *lc;
6351 
6352  foreach(lc, indexclauses)
6353  {
6354  IndexClause *iclause = lfirst_node(IndexClause, lc);
6355  ListCell *lc2;
6356 
6357  foreach(lc2, iclause->indexquals)
6358  {
6359  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
6360 
6361  result = lappend(result, rinfo);
6362  }
6363  }
6364  return result;
6365 }

References IndexClause::indexquals, lappend(), lfirst_node, and NIL.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

◆ get_restriction_variable()

bool get_restriction_variable ( PlannerInfo root,
List args,
int  varRelid,
VariableStatData vardata,
Node **  other,
bool varonleft 
)

Definition at line 4849 of file selfuncs.c.

4852 {
4853  Node *left,
4854  *right;
4855  VariableStatData rdata;
4856 
4857  /* Fail if not a binary opclause (probably shouldn't happen) */
4858  if (list_length(args) != 2)
4859  return false;
4860 
4861  left = (Node *) linitial(args);
4862  right = (Node *) lsecond(args);
4863 
4864  /*
4865  * Examine both sides. Note that when varRelid is nonzero, Vars of other
4866  * relations will be treated as pseudoconstants.
4867  */
4868  examine_variable(root, left, varRelid, vardata);
4869  examine_variable(root, right, varRelid, &rdata);
4870 
4871  /*
4872  * If one side is a variable and the other not, we win.
4873  */
4874  if (vardata->rel && rdata.rel == NULL)
4875  {
4876  *varonleft = true;
4877  *other = estimate_expression_value(root, rdata.var);
4878  /* Assume we need no ReleaseVariableStats(rdata) here */
4879  return true;
4880  }
4881 
4882  if (vardata->rel == NULL && rdata.rel)
4883  {
4884  *varonleft = false;
4885  *other = estimate_expression_value(root, vardata->var);
4886  /* Assume we need no ReleaseVariableStats(*vardata) here */
4887  *vardata = rdata;
4888  return true;
4889  }
4890 
4891  /* Oops, clause has wrong structure (probably var op var) */
4892  ReleaseVariableStats(*vardata);
4893  ReleaseVariableStats(rdata);
4894 
4895  return false;
4896 }
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2312

References generate_unaccent_rules::args, estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, ReleaseVariableStats, and VariableStatData::var.

Referenced by _int_matchsel(), arraycontsel(), eqsel_internal(), generic_restriction_selectivity(), multirangesel(), networksel(), patternsel_common(), rangesel(), scalarineqsel_wrapper(), and tsmatchsel().

◆ get_stats_slot_range()

static void get_stats_slot_range ( AttStatsSlot sslot,
Oid  opfuncoid,
FmgrInfo opproc,
Oid  collation,
int16  typLen,
bool  typByVal,
Datum min,
Datum max,
bool p_have_data 
)
static

Definition at line 5908 of file selfuncs.c.

5911 {
5912  Datum tmin = *min;
5913  Datum tmax = *max;
5914  bool have_data = *p_have_data;
5915  bool found_tmin = false;
5916  bool found_tmax = false;
5917 
5918  /* Look up the comparison function, if we didn't already do so */
5919  if (opproc->fn_oid != opfuncoid)
5920  fmgr_info(opfuncoid, opproc);
5921 
5922  /* Scan all the slot's values */
5923  for (int i = 0; i < sslot->nvalues; i++)
5924  {
5925  if (!have_data)
5926  {
5927  tmin = tmax = sslot->values[i];
5928  found_tmin = found_tmax = true;
5929  *p_have_data = have_data = true;
5930  continue;
5931  }
5932  if (DatumGetBool(FunctionCall2Coll(opproc,
5933  collation,
5934  sslot->values[i], tmin)))
5935  {
5936  tmin = sslot->values[i];
5937  found_tmin = true;
5938  }
5939  if (DatumGetBool(FunctionCall2Coll(opproc,
5940  collation,
5941  tmax, sslot->values[i])))
5942  {
5943  tmax = sslot->values[i];
5944  found_tmax = true;
5945  }
5946  }
5947 
5948  /*
5949  * Copy the slot's values, if we found new extreme values.
5950  */
5951  if (found_tmin)
5952  *min = datumCopy(tmin, typByVal, typLen);
5953  if (found_tmax)
5954  *max = datumCopy(tmax, typByVal, typLen);
5955 }
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1120
Oid fn_oid
Definition: fmgr.h:59

References datumCopy(), DatumGetBool(), fmgr_info(), FmgrInfo::fn_oid, FunctionCall2Coll(), i, AttStatsSlot::nvalues, and AttStatsSlot::values.

Referenced by get_variable_range().

◆ get_variable_numdistinct()

double get_variable_numdistinct ( VariableStatData vardata,
bool isdefault 
)

Definition at line 5648 of file selfuncs.c.

5649 {
5650  double stadistinct;
5651  double stanullfrac = 0.0;
5652  double ntuples;
5653 
5654  *isdefault = false;
5655 
5656  /*
5657  * Determine the stadistinct value to use. There are cases where we can
5658  * get an estimate even without a pg_statistic entry, or can get a better
5659  * value than is in pg_statistic. Grab stanullfrac too if we can find it
5660  * (otherwise, assume no nulls, for lack of any better idea).
5661  */
5662  if (HeapTupleIsValid(vardata->statsTuple))
5663  {
5664  /* Use the pg_statistic entry */
5665  Form_pg_statistic stats;
5666 
5667  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
5668  stadistinct = stats->stadistinct;
5669  stanullfrac = stats->stanullfrac;
5670  }
5671  else if (vardata->vartype == BOOLOID)
5672  {
5673  /*
5674  * Special-case boolean columns: presumably, two distinct values.
5675  *
5676  * Are there any other datatypes we should wire in special estimates
5677  * for?
5678  */
5679  stadistinct = 2.0;
5680  }
5681  else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
5682  {
5683  /*
5684  * If the Var represents a column of a VALUES RTE, assume it's unique.
5685  * This could of course be very wrong, but it should tend to be true
5686  * in well-written queries. We could consider examining the VALUES'
5687  * contents to get some real statistics; but that only works if the
5688  * entries are all constants, and it would be pretty expensive anyway.
5689  */
5690  stadistinct = -1.0; /* unique (and all non null) */
5691  }
5692  else
5693  {
5694  /*
5695  * We don't keep statistics for system columns, but in some cases we
5696  * can infer distinctness anyway.
5697  */
5698  if (vardata->var && IsA(vardata->var, Var))
5699  {
5700  switch (((Var *) vardata->var)->varattno)
5701  {
5703  stadistinct = -1.0; /* unique (and all non null) */
5704  break;
5706  stadistinct = 1.0; /* only 1 value */
5707  break;
5708  default:
5709  stadistinct = 0.0; /* means "unknown" */
5710  break;
5711  }
5712  }
5713  else
5714  stadistinct = 0.0; /* means "unknown" */
5715 
5716  /*
5717  * XXX consider using estimate_num_groups on expressions?
5718  */
5719  }
5720 
5721  /*
5722  * If there is a unique index or DISTINCT clause for the variable, assume
5723  * it is unique no matter what pg_statistic says; the statistics could be
5724  * out of date, or we might have found a partial unique index that proves
5725  * the var is unique for this query. However, we'd better still believe
5726  * the null-fraction statistic.
5727  */
5728  if (vardata->isunique)
5729  stadistinct = -1.0 * (1.0 - stanullfrac);
5730 
5731  /*
5732  * If we had an absolute estimate, use that.
5733  */
5734  if (stadistinct > 0.0)
5735  return clamp_row_est(stadistinct);
5736 
5737  /*
5738  * Otherwise we need to get the relation size; punt if not available.
5739  */
5740  if (vardata->rel == NULL)
5741  {
5742  *isdefault = true;
5743  return DEFAULT_NUM_DISTINCT;
5744  }
5745  ntuples = vardata->rel->tuples;
5746  if (ntuples <= 0.0)
5747  {
5748  *isdefault = true;
5749  return DEFAULT_NUM_DISTINCT;
5750  }
5751 
5752  /*
5753  * If we had a relative estimate, use that.
5754  */
5755  if (stadistinct < 0.0)
5756  return clamp_row_est(-stadistinct * ntuples);
5757 
5758  /*
5759  * With no data, estimate ndistinct = ntuples if the table is small, else
5760  * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5761  * that the behavior isn't discontinuous.
5762  */
5763  if (ntuples < DEFAULT_NUM_DISTINCT)
5764  return clamp_row_est(ntuples);
5765 
5766  *isdefault = true;
5767  return DEFAULT_NUM_DISTINCT;
5768 }
@ RTE_VALUES
Definition: parsenodes.h:1019
#define DEFAULT_NUM_DISTINCT
Definition: selfuncs.h:52
RTEKind rtekind
Definition: pathnodes.h:907
#define TableOidAttributeNumber
Definition: sysattr.h:26
#define SelfItemPointerAttributeNumber
Definition: sysattr.h:21

References clamp_row_est(), DEFAULT_NUM_DISTINCT, GETSTRUCT, HeapTupleIsValid, IsA, VariableStatData::isunique, VariableStatData::rel, RTE_VALUES, RelOptInfo::rtekind, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.

Referenced by add_unique_group_var(), eqjoinsel(), estimate_hash_bucket_stats(), ineq_histogram_selectivity(), var_eq_const(), and var_eq_non_const().

◆ get_variable_range()

static bool get_variable_range ( PlannerInfo root,
VariableStatData vardata,
Oid  sortop,
Oid  collation,
Datum min,
Datum max 
)
static

Definition at line 5781 of file selfuncs.c.

5784 {
5785  Datum tmin = 0;
5786  Datum tmax = 0;
5787  bool have_data = false;
5788  int16 typLen;
5789  bool typByVal;
5790  Oid opfuncoid;
5791  FmgrInfo opproc;
5792  AttStatsSlot sslot;
5793 
5794  /*
5795  * XXX It's very tempting to try to use the actual column min and max, if
5796  * we can get them relatively-cheaply with an index probe. However, since
5797  * this function is called many times during join planning, that could
5798  * have unpleasant effects on planning speed. Need more investigation
5799  * before enabling this.
5800  */
5801 #ifdef NOT_USED
5802  if (get_actual_variable_range(root, vardata, sortop, collation, min, max))
5803  return true;
5804 #endif
5805 
5806  if (!HeapTupleIsValid(vardata->statsTuple))
5807  {
5808  /* no stats available, so default result */
5809  return false;
5810  }
5811 
5812  /*
5813  * If we can't apply the sortop to the stats data, just fail. In
5814  * principle, if there's a histogram and no MCVs, we could return the
5815  * histogram endpoints without ever applying the sortop ... but it's
5816  * probably not worth trying, because whatever the caller wants to do with
5817  * the endpoints would likely fail the security check too.
5818  */
5819  if (!statistic_proc_security_check(vardata,
5820  (opfuncoid = get_opcode(sortop))))
5821  return false;
5822 
5823  opproc.fn_oid = InvalidOid; /* mark this as not looked up yet */
5824 
5825