PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
selfuncs.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <float.h>
#include <math.h>
#include "access/brin.h"
#include "access/gin.h"
#include "access/htup_details.h"
#include "access/sysattr.h"
#include "catalog/index.h"
#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
#include "mb/pg_wchar.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "optimizer/predtest.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parsetree.h"
#include "statistics/statistics.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/date.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/index_selfuncs.h"
#include "utils/lsyscache.h"
#include "utils/nabstime.h"
#include "utils/pg_locale.h"
#include "utils/rel.h"
#include "utils/selfuncs.h"
#include "utils/spccache.h"
#include "utils/syscache.h"
#include "utils/timestamp.h"
#include "utils/tqual.h"
#include "utils/typcache.h"
#include "utils/varlena.h"
Include dependency graph for selfuncs.c:

Go to the source code of this file.

Data Structures

struct  GroupVarInfo
 
struct  GinQualCounts
 

Macros

#define FIXED_CHAR_SEL   0.20 /* about 1/5 */
 
#define CHAR_RANGE_SEL   0.25
 
#define ANY_CHAR_SEL   0.9 /* not 1, since it won't match end-of-string */
 
#define FULL_WILDCARD_SEL   5.0
 
#define PARTIAL_WILDCARD_SEL   2.0
 

Functions

static double var_eq_const (VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, bool varonleft)
 
static double var_eq_non_const (VariableStatData *vardata, Oid operator, Node *other, bool varonleft)
 
static double ineq_histogram_selectivity (PlannerInfo *root, VariableStatData *vardata, FmgrInfo *opproc, bool isgt, Datum constval, Oid consttype)
 
static double eqjoinsel_inner (Oid operator, VariableStatData *vardata1, VariableStatData *vardata2)
 
static double eqjoinsel_semi (Oid operator, VariableStatData *vardata1, VariableStatData *vardata2, RelOptInfo *inner_rel)
 
static bool estimate_multivariate_ndistinct (PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
 
static bool convert_to_scalar (Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound)
 
static double convert_numeric_to_scalar (Datum value, Oid typid)
 
static void convert_string_to_scalar (char *value, double *scaledvalue, char *lobound, double *scaledlobound, char *hibound, double *scaledhibound)
 
static void convert_bytea_to_scalar (Datum value, double *scaledvalue, Datum lobound, double *scaledlobound, Datum hibound, double *scaledhibound)
 
static double convert_one_string_to_scalar (char *value, int rangelo, int rangehi)
 
static double convert_one_bytea_to_scalar (unsigned char *value, int valuelen, int rangelo, int rangehi)
 
static char * convert_string_datum (Datum value, Oid typid)
 
static double convert_timevalue_to_scalar (Datum value, Oid typid)
 
static void examine_simple_variable (PlannerInfo *root, Var *var, VariableStatData *vardata)
 
static bool get_variable_range (PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max)
 
static bool get_actual_variable_range (PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max)
 
static RelOptInfofind_join_input_rel (PlannerInfo *root, Relids relids)
 
static Selectivity prefix_selectivity (PlannerInfo *root, VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon)
 
static Selectivity like_selectivity (const char *patt, int pattlen, bool case_insensitive)
 
static Selectivity regex_selectivity (const char *patt, int pattlen, bool case_insensitive, int fixed_prefix_len)
 
static Datum string_to_datum (const char *str, Oid datatype)
 
static Conststring_to_const (const char *str, Oid datatype)
 
static Conststring_to_bytea_const (const char *str, size_t str_len)
 
static Listadd_predicate_to_quals (IndexOptInfo *index, List *indexQuals)
 
Datum eqsel (PG_FUNCTION_ARGS)
 
Datum neqsel (PG_FUNCTION_ARGS)
 
static double scalarineqsel (PlannerInfo *root, Oid operator, bool isgt, VariableStatData *vardata, Datum constval, Oid consttype)
 
double mcv_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, double *sumcommonp)
 
double histogram_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
 
Datum scalarltsel (PG_FUNCTION_ARGS)
 
Datum scalargtsel (PG_FUNCTION_ARGS)
 
static double patternsel (PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 
Datum regexeqsel (PG_FUNCTION_ARGS)
 
Datum icregexeqsel (PG_FUNCTION_ARGS)
 
Datum likesel (PG_FUNCTION_ARGS)
 
Datum iclikesel (PG_FUNCTION_ARGS)
 
Datum regexnesel (PG_FUNCTION_ARGS)
 
Datum icregexnesel (PG_FUNCTION_ARGS)
 
Datum nlikesel (PG_FUNCTION_ARGS)
 
Datum icnlikesel (PG_FUNCTION_ARGS)
 
Selectivity boolvarsel (PlannerInfo *root, Node *arg, int varRelid)
 
Selectivity booltestsel (PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity nulltestsel (PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
static Nodestrip_array_coercion (Node *node)
 
Selectivity scalararraysel (PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
int estimate_array_length (Node *arrayexpr)
 
Selectivity rowcomparesel (PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Datum eqjoinsel (PG_FUNCTION_ARGS)
 
Datum neqjoinsel (PG_FUNCTION_ARGS)
 
Datum scalarltjoinsel (PG_FUNCTION_ARGS)
 
Datum scalargtjoinsel (PG_FUNCTION_ARGS)
 
static double patternjoinsel (PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
 
Datum regexeqjoinsel (PG_FUNCTION_ARGS)
 
Datum icregexeqjoinsel (PG_FUNCTION_ARGS)
 
Datum likejoinsel (PG_FUNCTION_ARGS)
 
Datum iclikejoinsel (PG_FUNCTION_ARGS)
 
Datum regexnejoinsel (PG_FUNCTION_ARGS)
 
Datum icregexnejoinsel (PG_FUNCTION_ARGS)
 
Datum nlikejoinsel (PG_FUNCTION_ARGS)
 
Datum icnlikejoinsel (PG_FUNCTION_ARGS)
 
void mergejoinscansel (PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend)
 
static Listadd_unique_group_var (PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
 
double estimate_num_groups (PlannerInfo *root, List *groupExprs, double input_rows, List **pgset)
 
Selectivity estimate_hash_bucketsize (PlannerInfo *root, Node *hashkey, double nbuckets)
 
bool get_restriction_variable (PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
 
void get_join_variables (PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
 
void examine_variable (PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
 
double get_variable_numdistinct (VariableStatData *vardata, bool *isdefault)
 
static int pattern_char_isalpha (char c, bool is_multibyte, pg_locale_t locale, bool locale_is_c)
 
static Pattern_Prefix_Status like_fixed_prefix (Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
 
static Pattern_Prefix_Status regex_fixed_prefix (Const *patt_const, bool case_insensitive, Oid collation, Const **prefix_const, Selectivity *rest_selec)
 
Pattern_Prefix_Status pattern_fixed_prefix (Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, Selectivity *rest_selec)
 
static Selectivity regex_selectivity_sub (const char *patt, int pattlen, bool case_insensitive)
 
static bool byte_increment (unsigned char *ptr, int len)
 
Constmake_greater_string (const Const *str_const, FmgrInfo *ltproc, Oid collation)
 
Listdeconstruct_indexquals (IndexPath *path)
 
static Cost other_operands_eval_cost (PlannerInfo *root, List *qinfos)
 
static Cost orderby_operands_eval_cost (PlannerInfo *root, IndexPath *path)
 
void genericcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, List *qinfos, GenericCosts *costs)
 
void btcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void hashcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void gistcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void spgcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
static bool gincost_pattern (IndexOptInfo *index, int indexcol, Oid clause_op, Datum query, GinQualCounts *counts)
 
static bool gincost_opexpr (PlannerInfo *root, IndexOptInfo *index, IndexQualInfo *qinfo, GinQualCounts *counts)
 
static bool gincost_scalararrayopexpr (PlannerInfo *root, IndexOptInfo *index, IndexQualInfo *qinfo, double numIndexEntries, GinQualCounts *counts)
 
void gincostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void brincostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 

Variables

get_relation_stats_hook_type get_relation_stats_hook = NULL
 
get_index_stats_hook_type get_index_stats_hook = NULL
 

Macro Definition Documentation

#define ANY_CHAR_SEL   0.9 /* not 1, since it won't match end-of-string */

Definition at line 5792 of file selfuncs.c.

Referenced by like_selectivity(), and regex_selectivity_sub().

#define CHAR_RANGE_SEL   0.25

Definition at line 5791 of file selfuncs.c.

Referenced by regex_selectivity_sub().

#define FIXED_CHAR_SEL   0.20 /* about 1/5 */

Definition at line 5790 of file selfuncs.c.

Referenced by like_selectivity(), regex_selectivity(), and regex_selectivity_sub().

#define FULL_WILDCARD_SEL   5.0

Definition at line 5793 of file selfuncs.c.

Referenced by like_selectivity(), and regex_selectivity().

#define PARTIAL_WILDCARD_SEL   2.0

Definition at line 5794 of file selfuncs.c.

Referenced by regex_selectivity_sub().

Function Documentation

static List * add_predicate_to_quals ( IndexOptInfo index,
List indexQuals 
)
static

Definition at line 6605 of file selfuncs.c.

References IndexOptInfo::indpred, lfirst, list_concat(), list_make1, NIL, and predicate_implied_by().

Referenced by btcostestimate(), and genericcostestimate().

6606 {
6607  List *predExtraQuals = NIL;
6608  ListCell *lc;
6609 
6610  if (index->indpred == NIL)
6611  return indexQuals;
6612 
6613  foreach(lc, index->indpred)
6614  {
6615  Node *predQual = (Node *) lfirst(lc);
6616  List *oneQual = list_make1(predQual);
6617 
6618  if (!predicate_implied_by(oneQual, indexQuals))
6619  predExtraQuals = list_concat(predExtraQuals, oneQual);
6620  }
6621  /* list_concat avoids modifying the passed-in indexQuals list */
6622  return list_concat(predExtraQuals, indexQuals);
6623 }
#define NIL
Definition: pg_list.h:69
bool predicate_implied_by(List *predicate_list, List *restrictinfo_list)
Definition: predtest.c:128
Definition: nodes.h:509
List * list_concat(List *list1, List *list2)
Definition: list.c:321
#define list_make1(x1)
Definition: pg_list.h:139
#define lfirst(lc)
Definition: pg_list.h:106
List * indpred
Definition: relation.h:653
Definition: pg_list.h:45
static List* add_unique_group_var ( PlannerInfo root,
List varinfos,
Node var,
VariableStatData vardata 
)
static

Definition at line 3160 of file selfuncs.c.

References equal(), exprs_known_equal(), get_variable_numdistinct(), lappend(), lfirst, list_delete_ptr(), list_head(), lnext, GroupVarInfo::ndistinct, palloc(), VariableStatData::rel, GroupVarInfo::rel, and GroupVarInfo::var.

Referenced by estimate_num_groups().

3162 {
3163  GroupVarInfo *varinfo;
3164  double ndistinct;
3165  bool isdefault;
3166  ListCell *lc;
3167 
3168  ndistinct = get_variable_numdistinct(vardata, &isdefault);
3169 
3170  /* cannot use foreach here because of possible list_delete */
3171  lc = list_head(varinfos);
3172  while (lc)
3173  {
3174  varinfo = (GroupVarInfo *) lfirst(lc);
3175 
3176  /* must advance lc before list_delete possibly pfree's it */
3177  lc = lnext(lc);
3178 
3179  /* Drop exact duplicates */
3180  if (equal(var, varinfo->var))
3181  return varinfos;
3182 
3183  /*
3184  * Drop known-equal vars, but only if they belong to different
3185  * relations (see comments for estimate_num_groups)
3186  */
3187  if (vardata->rel != varinfo->rel &&
3188  exprs_known_equal(root, var, varinfo->var))
3189  {
3190  if (varinfo->ndistinct <= ndistinct)
3191  {
3192  /* Keep older item, forget new one */
3193  return varinfos;
3194  }
3195  else
3196  {
3197  /* Delete the older item */
3198  varinfos = list_delete_ptr(varinfos, varinfo);
3199  }
3200  }
3201  }
3202 
3203  varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
3204 
3205  varinfo->var = var;
3206  varinfo->rel = vardata->rel;
3207  varinfo->ndistinct = ndistinct;
3208  varinfos = lappend(varinfos, varinfo);
3209  return varinfos;
3210 }
bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
Definition: equivclass.c:1944
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:2961
RelOptInfo * rel
Definition: selfuncs.h:70
List * list_delete_ptr(List *list, void *datum)
Definition: list.c:590
double ndistinct
Definition: selfuncs.c:3156
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4909
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
Node * var
Definition: selfuncs.c:3154
#define lnext(lc)
Definition: pg_list.h:105
List * lappend(List *list, void *datum)
Definition: list.c:128
#define lfirst(lc)
Definition: pg_list.h:106
void * palloc(Size size)
Definition: mcxt.c:849
RelOptInfo * rel
Definition: selfuncs.c:3155
Selectivity booltestsel ( PlannerInfo root,
BoolTestType  booltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1500 of file selfuncs.c.

References VariableStatData::atttype, VariableStatData::atttypmod, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, NULL, ReleaseVariableStats, STATISTIC_KIND_MCV, VariableStatData::statsTuple, and values.

Referenced by clause_selectivity().

1502 {
1503  VariableStatData vardata;
1504  double selec;
1505 
1506  examine_variable(root, arg, varRelid, &vardata);
1507 
1508  if (HeapTupleIsValid(vardata.statsTuple))
1509  {
1510  Form_pg_statistic stats;
1511  double freq_null;
1512  Datum *values;
1513  int nvalues;
1514  float4 *numbers;
1515  int nnumbers;
1516 
1517  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1518  freq_null = stats->stanullfrac;
1519 
1520  if (get_attstatsslot(vardata.statsTuple,
1521  vardata.atttype, vardata.atttypmod,
1523  NULL,
1524  &values, &nvalues,
1525  &numbers, &nnumbers)
1526  && nnumbers > 0)
1527  {
1528  double freq_true;
1529  double freq_false;
1530 
1531  /*
1532  * Get first MCV frequency and derive frequency for true.
1533  */
1534  if (DatumGetBool(values[0]))
1535  freq_true = numbers[0];
1536  else
1537  freq_true = 1.0 - numbers[0] - freq_null;
1538 
1539  /*
1540  * Next derive frequency for false. Then use these as appropriate
1541  * to derive frequency for each case.
1542  */
1543  freq_false = 1.0 - freq_true - freq_null;
1544 
1545  switch (booltesttype)
1546  {
1547  case IS_UNKNOWN:
1548  /* select only NULL values */
1549  selec = freq_null;
1550  break;
1551  case IS_NOT_UNKNOWN:
1552  /* select non-NULL values */
1553  selec = 1.0 - freq_null;
1554  break;
1555  case IS_TRUE:
1556  /* select only TRUE values */
1557  selec = freq_true;
1558  break;
1559  case IS_NOT_TRUE:
1560  /* select non-TRUE values */
1561  selec = 1.0 - freq_true;
1562  break;
1563  case IS_FALSE:
1564  /* select only FALSE values */
1565  selec = freq_false;
1566  break;
1567  case IS_NOT_FALSE:
1568  /* select non-FALSE values */
1569  selec = 1.0 - freq_false;
1570  break;
1571  default:
1572  elog(ERROR, "unrecognized booltesttype: %d",
1573  (int) booltesttype);
1574  selec = 0.0; /* Keep compiler quiet */
1575  break;
1576  }
1577 
1578  free_attstatsslot(vardata.atttype, values, nvalues,
1579  numbers, nnumbers);
1580  }
1581  else
1582  {
1583  /*
1584  * No most-common-value info available. Still have null fraction
1585  * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1586  * for null fraction and assume a 50-50 split of TRUE and FALSE.
1587  */
1588  switch (booltesttype)
1589  {
1590  case IS_UNKNOWN:
1591  /* select only NULL values */
1592  selec = freq_null;
1593  break;
1594  case IS_NOT_UNKNOWN:
1595  /* select non-NULL values */
1596  selec = 1.0 - freq_null;
1597  break;
1598  case IS_TRUE:
1599  case IS_FALSE:
1600  /* Assume we select half of the non-NULL values */
1601  selec = (1.0 - freq_null) / 2.0;
1602  break;
1603  case IS_NOT_TRUE:
1604  case IS_NOT_FALSE:
1605  /* Assume we select NULLs plus half of the non-NULLs */
1606  /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1607  selec = (freq_null + 1.0) / 2.0;
1608  break;
1609  default:
1610  elog(ERROR, "unrecognized booltesttype: %d",
1611  (int) booltesttype);
1612  selec = 0.0; /* Keep compiler quiet */
1613  break;
1614  }
1615  }
1616  }
1617  else
1618  {
1619  /*
1620  * If we can't get variable statistics for the argument, perhaps
1621  * clause_selectivity can do something with it. We ignore the
1622  * possibility of a NULL value when using clause_selectivity, and just
1623  * assume the value is either TRUE or FALSE.
1624  */
1625  switch (booltesttype)
1626  {
1627  case IS_UNKNOWN:
1628  selec = DEFAULT_UNK_SEL;
1629  break;
1630  case IS_NOT_UNKNOWN:
1631  selec = DEFAULT_NOT_UNK_SEL;
1632  break;
1633  case IS_TRUE:
1634  case IS_NOT_FALSE:
1635  selec = (double) clause_selectivity(root, arg,
1636  varRelid,
1637  jointype, sjinfo);
1638  break;
1639  case IS_FALSE:
1640  case IS_NOT_TRUE:
1641  selec = 1.0 - (double) clause_selectivity(root, arg,
1642  varRelid,
1643  jointype, sjinfo);
1644  break;
1645  default:
1646  elog(ERROR, "unrecognized booltesttype: %d",
1647  (int) booltesttype);
1648  selec = 0.0; /* Keep compiler quiet */
1649  break;
1650  }
1651  }
1652 
1653  ReleaseVariableStats(vardata);
1654 
1655  /* result should be in range, but make sure... */
1656  CLAMP_PROBABILITY(selec);
1657 
1658  return (Selectivity) selec;
1659 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
HeapTuple statsTuple
Definition: selfuncs.h:71
double Selectivity
Definition: nodes.h:638
bool get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers)
Definition: lsyscache.c:2886
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:129
int32 atttypmod
Definition: selfuncs.h:76
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:50
#define ERROR
Definition: elog.h:43
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:572
#define DatumGetBool(X)
Definition: postgres.h:399
#define STATISTIC_KIND_MCV
Definition: pg_statistic.h:204
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:49
float float4
Definition: c.h:380
uintptr_t Datum
Definition: postgres.h:372
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4565
#define NULL
Definition: c.h:229
static Datum values[MAXATTR]
Definition: bootstrap.c:163
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
#define elog
Definition: elog.h:219
void free_attstatsslot(Oid atttype, Datum *values, int nvalues, float4 *numbers, int nnumbers)
Definition: lsyscache.c:3010
Selectivity boolvarsel ( PlannerInfo root,
Node arg,
int  varRelid 
)

Definition at line 1461 of file selfuncs.c.

References BooleanEqualOperator, BoolGetDatum, examine_variable(), HeapTupleIsValid, is_funcclause, ReleaseVariableStats, VariableStatData::statsTuple, and var_eq_const().

Referenced by clause_selectivity().

1462 {
1463  VariableStatData vardata;
1464  double selec;
1465 
1466  examine_variable(root, arg, varRelid, &vardata);
1467  if (HeapTupleIsValid(vardata.statsTuple))
1468  {
1469  /*
1470  * A boolean variable V is equivalent to the clause V = 't', so we
1471  * compute the selectivity as if that is what we have.
1472  */
1473  selec = var_eq_const(&vardata, BooleanEqualOperator,
1474  BoolGetDatum(true), false, true);
1475  }
1476  else if (is_funcclause(arg))
1477  {
1478  /*
1479  * If we have no stats and it's a function call, estimate 0.3333333.
1480  * This seems a pretty unprincipled choice, but Postgres has been
1481  * using that estimate for function calls since 1992. The hoariness
1482  * of this behavior suggests that we should not be in too much hurry
1483  * to use another value.
1484  */
1485  selec = 0.3333333;
1486  }
1487  else
1488  {
1489  /* Otherwise, the default estimate is 0.5 */
1490  selec = 0.5;
1491  }
1492  ReleaseVariableStats(vardata);
1493  return selec;
1494 }
HeapTuple statsTuple
Definition: selfuncs.h:71
#define is_funcclause(clause)
Definition: clauses.h:21
#define BooleanEqualOperator
Definition: pg_operator.h:114
static double var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, bool varonleft)
Definition: selfuncs.c:270
#define BoolGetDatum(X)
Definition: postgres.h:408
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4565
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
void brincostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 7711 of file selfuncs.c.

References Abs, AccessShareLock, Assert, BoolGetDatum, brinGetStats(), CLAMP_PROBABILITY, clauselist_selectivity(), cpu_operator_cost, deconstruct_indexquals(), elog, ERROR, free_attstatsslot(), VariableStatData::freefunc, get_attstatsslot(), get_index_stats_hook, get_relation_stats_hook, get_tablespace_page_costs(), HeapTupleIsValid, index_close(), index_open(), IndexQualInfo::indexcol, IndexPath::indexinfo, IndexOptInfo::indexkeys, IndexOptInfo::indexoid, IndexPath::indexquals, Int16GetDatum, InvalidOid, JOIN_INNER, lfirst, Max, Min, NULL, ObjectIdGetDatum, orderby_operands_eval_cost(), other_operands_eval_cost(), RelOptInfo::pages, IndexOptInfo::pages, BrinStatsData::pagesPerRange, planner_rt_fetch, IndexOptInfo::rel, ReleaseSysCache(), ReleaseVariableStats, RelOptInfo::relid, IndexOptInfo::reltablespace, BrinStatsData::revmapNumPages, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3, STATISTIC_KIND_CORRELATION, STATRELATTINH, and VariableStatData::statsTuple.

Referenced by brinhandler().

7715 {
7716  IndexOptInfo *index = path->indexinfo;
7717  List *indexQuals = path->indexquals;
7718  double numPages = index->pages;
7719  RelOptInfo *baserel = index->rel;
7720  RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
7721  List *qinfos;
7722  Cost spc_seq_page_cost;
7723  Cost spc_random_page_cost;
7724  double qual_arg_cost;
7725  double qualSelectivity;
7726  BrinStatsData statsData;
7727  double indexRanges;
7728  double minimalRanges;
7729  double estimatedRanges;
7730  double selec;
7731  Relation indexRel;
7732  ListCell *l;
7733  VariableStatData vardata;
7734 
7735  Assert(rte->rtekind == RTE_RELATION);
7736 
7737  /* fetch estimated page cost for the tablespace containing the index */
7739  &spc_random_page_cost,
7740  &spc_seq_page_cost);
7741 
7742  /*
7743  * Obtain some data from the index itself.
7744  */
7745  indexRel = index_open(index->indexoid, AccessShareLock);
7746  brinGetStats(indexRel, &statsData);
7747  index_close(indexRel, AccessShareLock);
7748 
7749  /*
7750  * Compute index correlation
7751  *
7752  * Because we can use all index quals equally when scanning, we can use
7753  * the largest correlation (in absolute value) among columns used by the
7754  * query. Start at zero, the worst possible case. If we cannot find
7755  * any correlation statistics, we will keep it as 0.
7756  */
7757  *indexCorrelation = 0;
7758 
7759  qinfos = deconstruct_indexquals(path);
7760  foreach(l, qinfos)
7761  {
7762  IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
7763  AttrNumber attnum = index->indexkeys[qinfo->indexcol];
7764 
7765  /* attempt to lookup stats in relation for this index column */
7766  if (attnum != 0)
7767  {
7768  /* Simple variable -- look to stats for the underlying table */
7770  (*get_relation_stats_hook) (root, rte, attnum, &vardata))
7771  {
7772  /*
7773  * The hook took control of acquiring a stats tuple. If it
7774  * did supply a tuple, it'd better have supplied a freefunc.
7775  */
7776  if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
7777  elog(ERROR,
7778  "no function provided to release variable stats with");
7779  }
7780  else
7781  {
7782  vardata.statsTuple =
7784  ObjectIdGetDatum(rte->relid),
7785  Int16GetDatum(attnum),
7786  BoolGetDatum(false));
7787  vardata.freefunc = ReleaseSysCache;
7788  }
7789  }
7790  else
7791  {
7792  /*
7793  * Looks like we've found an expression column in the index. Let's
7794  * see if there's any stats for it.
7795  */
7796 
7797  /* get the attnum from the 0-based index. */
7798  attnum = qinfo->indexcol + 1;
7799 
7800  if (get_index_stats_hook &&
7801  (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
7802  {
7803  /*
7804  * The hook took control of acquiring a stats tuple. If it did
7805  * supply a tuple, it'd better have supplied a freefunc.
7806  */
7807  if (HeapTupleIsValid(vardata.statsTuple) &&
7808  !vardata.freefunc)
7809  elog(ERROR, "no function provided to release variable stats with");
7810  }
7811  else
7812  {
7814  ObjectIdGetDatum(index->indexoid),
7815  Int16GetDatum(attnum),
7816  BoolGetDatum(false));
7817  vardata.freefunc = ReleaseSysCache;
7818  }
7819  }
7820 
7821  if (HeapTupleIsValid(vardata.statsTuple))
7822  {
7823  float4 *numbers;
7824  int nnumbers;
7825 
7826  if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
7828  InvalidOid,
7829  NULL,
7830  NULL, NULL,
7831  &numbers, &nnumbers))
7832  {
7833  double varCorrelation = 0.0;
7834 
7835  if (nnumbers > 0)
7836  varCorrelation = Abs(numbers[0]);
7837 
7838  if (varCorrelation > *indexCorrelation)
7839  *indexCorrelation = varCorrelation;
7840 
7841  free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
7842  }
7843  }
7844 
7845  ReleaseVariableStats(vardata);
7846  }
7847 
7848  qualSelectivity = clauselist_selectivity(root, indexQuals,
7849  baserel->relid,
7850  JOIN_INNER, NULL);
7851 
7852  /* work out the actual number of ranges in the index */
7853  indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
7854  1.0);
7855 
7856  /*
7857  * Now calculate the minimum possible ranges we could match with if all of
7858  * the rows were in the perfect order in the table's heap.
7859  */
7860  minimalRanges = ceil(indexRanges * qualSelectivity);
7861 
7862  /*
7863  * Now estimate the number of ranges that we'll touch by using the
7864  * indexCorrelation from the stats. Careful not to divide by zero
7865  * (note we're using the absolute value of the correlation).
7866  */
7867  if (*indexCorrelation < 1.0e-10)
7868  estimatedRanges = indexRanges;
7869  else
7870  estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
7871 
7872  /* we expect to visit this portion of the table */
7873  selec = estimatedRanges / indexRanges;
7874 
7875  CLAMP_PROBABILITY(selec);
7876 
7877  *indexSelectivity = selec;
7878 
7879  /*
7880  * Compute the index qual costs, much as in genericcostestimate, to add
7881  * to the index costs.
7882  */
7883  qual_arg_cost = other_operands_eval_cost(root, qinfos) +
7884  orderby_operands_eval_cost(root, path);
7885 
7886  /*
7887  * Compute the startup cost as the cost to read the whole revmap
7888  * sequentially, including the cost to execute the index quals.
7889  */
7890  *indexStartupCost =
7891  spc_seq_page_cost * statsData.revmapNumPages * loop_count;
7892  *indexStartupCost += qual_arg_cost;
7893 
7894  /*
7895  * To read a BRIN index there might be a bit of back and forth over
7896  * regular pages, as revmap might point to them out of sequential order;
7897  * calculate the total cost as reading the whole index in random order.
7898  */
7899  *indexTotalCost = *indexStartupCost +
7900  spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
7901 
7902  /*
7903  * Charge a small amount per range tuple which we expect to match to. This
7904  * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
7905  * will set a bit for each page in the range when we find a matching
7906  * range, so we must multiply the charge by the number of pages in the
7907  * range.
7908  */
7909  *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
7910  statsData.pagesPerRange;
7911 
7912  *indexPages = index->pages;
7913 }
IndexOptInfo * indexinfo
Definition: relation.h:1030
HeapTuple statsTuple
Definition: selfuncs.h:71
#define Min(x, y)
Definition: c.h:806
#define Int16GetDatum(X)
Definition: postgres.h:457
#define AccessShareLock
Definition: lockdefs.h:36
Oid reltablespace
Definition: relation.h:631
static Cost other_operands_eval_cost(PlannerInfo *root, List *qinfos)
Definition: selfuncs.c:6332
bool get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers)
Definition: lsyscache.c:2886
List * deconstruct_indexquals(IndexPath *path)
Definition: selfuncs.c:6237
static Cost orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
Definition: selfuncs.c:6357
#define Abs(x)
Definition: c.h:812
Definition: type.h:90
BlockNumber pages
Definition: relation.h:635
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
List * indexquals
Definition: relation.h:1032
RelOptInfo * rel
Definition: relation.h:632
#define planner_rt_fetch(rti, root)
Definition: relation.h:324
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
#define STATISTIC_KIND_CORRELATION
Definition: pg_statistic.h:233
double cpu_operator_cost
Definition: costsize.c:108
get_relation_stats_hook_type get_relation_stats_hook
Definition: selfuncs.c:152
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
Index relid
Definition: relation.h:552
float float4
Definition: c.h:380
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1116
#define BoolGetDatum(X)
Definition: postgres.h:408
#define InvalidOid
Definition: postgres_ext.h:36
BlockNumber pagesPerRange
Definition: brin.h:34
#define Max(x, y)
Definition: c.h:800
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
BlockNumber pages
Definition: relation.h:563
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:153
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:176
RTEKind rtekind
Definition: parsenodes.h:928
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
e
Definition: preproc-init.c:82
#define SearchSysCache3(cacheId, key1, key2, key3)
Definition: syscache.h:156
int * indexkeys
Definition: relation.h:641
#define elog
Definition: elog.h:219
Oid indexoid
Definition: relation.h:630
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:99
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:73
Definition: pg_list.h:45
int16 AttrNumber
Definition: attnum.h:21
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:151
void free_attstatsslot(Oid atttype, Datum *values, int nvalues, float4 *numbers, int nnumbers)
Definition: lsyscache.c:3010
double Cost
Definition: nodes.h:639
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1062
BlockNumber revmapNumPages
Definition: brin.h:35
void btcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 6627 of file selfuncs.c.

References add_predicate_to_quals(), Assert, BoolGetDatum, BTEqualStrategyNumber, BTLessStrategyNumber, RestrictInfo::clause, IndexQualInfo::clause_op, clauselist_selectivity(), cpu_operator_cost, deconstruct_indexquals(), elog, ERROR, estimate_array_length(), free_attstatsslot(), VariableStatData::freefunc, genericcostestimate(), get_attstatsslot(), get_index_stats_hook, get_op_opfamily_strategy(), get_opfamily_member(), get_relation_stats_hook, HeapTupleIsValid, IndexQualInfo::indexcol, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexOptInfo::indexkeys, IndexOptInfo::indexoid, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, RangeTblEntry::inh, Int16GetDatum, InvalidOid, IS_NULL, IsA, JOIN_INNER, lappend(), lfirst, MemSet, IndexOptInfo::ncolumns, NIL, NULL, NullTest::nulltesttype, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, ObjectIdGetDatum, OidIsValid, IndexOptInfo::opcintype, IndexOptInfo::opfamily, IndexQualInfo::other_operand, planner_rt_fetch, IndexOptInfo::rel, ReleaseSysCache(), ReleaseVariableStats, RelOptInfo::relid, RangeTblEntry::relid, IndexOptInfo::reverse_sort, IndexQualInfo::rinfo, rint(), RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3, STATISTIC_KIND_CORRELATION, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::tree_height, RelOptInfo::tuples, IndexOptInfo::tuples, and IndexOptInfo::unique.

Referenced by bthandler().

6631 {
6632  IndexOptInfo *index = path->indexinfo;
6633  List *qinfos;
6634  GenericCosts costs;
6635  Oid relid;
6636  AttrNumber colnum;
6637  VariableStatData vardata;
6638  double numIndexTuples;
6639  Cost descentCost;
6640  List *indexBoundQuals;
6641  int indexcol;
6642  bool eqQualHere;
6643  bool found_saop;
6644  bool found_is_null_op;
6645  double num_sa_scans;
6646  ListCell *lc;
6647 
6648  /* Do preliminary analysis of indexquals */
6649  qinfos = deconstruct_indexquals(path);
6650 
6651  /*
6652  * For a btree scan, only leading '=' quals plus inequality quals for the
6653  * immediately next attribute contribute to index selectivity (these are
6654  * the "boundary quals" that determine the starting and stopping points of
6655  * the index scan). Additional quals can suppress visits to the heap, so
6656  * it's OK to count them in indexSelectivity, but they should not count
6657  * for estimating numIndexTuples. So we must examine the given indexquals
6658  * to find out which ones count as boundary quals. We rely on the
6659  * knowledge that they are given in index column order.
6660  *
6661  * For a RowCompareExpr, we consider only the first column, just as
6662  * rowcomparesel() does.
6663  *
6664  * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
6665  * index scans not one, but the ScalarArrayOpExpr's operator can be
6666  * considered to act the same as it normally does.
6667  */
6668  indexBoundQuals = NIL;
6669  indexcol = 0;
6670  eqQualHere = false;
6671  found_saop = false;
6672  found_is_null_op = false;
6673  num_sa_scans = 1;
6674  foreach(lc, qinfos)
6675  {
6676  IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
6677  RestrictInfo *rinfo = qinfo->rinfo;
6678  Expr *clause = rinfo->clause;
6679  Oid clause_op;
6680  int op_strategy;
6681 
6682  if (indexcol != qinfo->indexcol)
6683  {
6684  /* Beginning of a new column's quals */
6685  if (!eqQualHere)
6686  break; /* done if no '=' qual for indexcol */
6687  eqQualHere = false;
6688  indexcol++;
6689  if (indexcol != qinfo->indexcol)
6690  break; /* no quals at all for indexcol */
6691  }
6692 
6693  if (IsA(clause, ScalarArrayOpExpr))
6694  {
6695  int alength = estimate_array_length(qinfo->other_operand);
6696 
6697  found_saop = true;
6698  /* count up number of SA scans induced by indexBoundQuals only */
6699  if (alength > 1)
6700  num_sa_scans *= alength;
6701  }
6702  else if (IsA(clause, NullTest))
6703  {
6704  NullTest *nt = (NullTest *) clause;
6705 
6706  if (nt->nulltesttype == IS_NULL)
6707  {
6708  found_is_null_op = true;
6709  /* IS NULL is like = for selectivity determination purposes */
6710  eqQualHere = true;
6711  }
6712  }
6713 
6714  /*
6715  * We would need to commute the clause_op if not varonleft, except
6716  * that we only care if it's equality or not, so that refinement is
6717  * unnecessary.
6718  */
6719  clause_op = qinfo->clause_op;
6720 
6721  /* check for equality operator */
6722  if (OidIsValid(clause_op))
6723  {
6724  op_strategy = get_op_opfamily_strategy(clause_op,
6725  index->opfamily[indexcol]);
6726  Assert(op_strategy != 0); /* not a member of opfamily?? */
6727  if (op_strategy == BTEqualStrategyNumber)
6728  eqQualHere = true;
6729  }
6730 
6731  indexBoundQuals = lappend(indexBoundQuals, rinfo);
6732  }
6733 
6734  /*
6735  * If index is unique and we found an '=' clause for each column, we can
6736  * just assume numIndexTuples = 1 and skip the expensive
6737  * clauselist_selectivity calculations. However, a ScalarArrayOp or
6738  * NullTest invalidates that theory, even though it sets eqQualHere.
6739  */
6740  if (index->unique &&
6741  indexcol == index->ncolumns - 1 &&
6742  eqQualHere &&
6743  !found_saop &&
6744  !found_is_null_op)
6745  numIndexTuples = 1.0;
6746  else
6747  {
6748  List *selectivityQuals;
6749  Selectivity btreeSelectivity;
6750 
6751  /*
6752  * If the index is partial, AND the index predicate with the
6753  * index-bound quals to produce a more accurate idea of the number of
6754  * rows covered by the bound conditions.
6755  */
6756  selectivityQuals = add_predicate_to_quals(index, indexBoundQuals);
6757 
6758  btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
6759  index->rel->relid,
6760  JOIN_INNER,
6761  NULL);
6762  numIndexTuples = btreeSelectivity * index->rel->tuples;
6763 
6764  /*
6765  * As in genericcostestimate(), we have to adjust for any
6766  * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
6767  * to integer.
6768  */
6769  numIndexTuples = rint(numIndexTuples / num_sa_scans);
6770  }
6771 
6772  /*
6773  * Now do generic index cost estimation.
6774  */
6775  MemSet(&costs, 0, sizeof(costs));
6776  costs.numIndexTuples = numIndexTuples;
6777 
6778  genericcostestimate(root, path, loop_count, qinfos, &costs);
6779 
6780  /*
6781  * Add a CPU-cost component to represent the costs of initial btree
6782  * descent. We don't charge any I/O cost for touching upper btree levels,
6783  * since they tend to stay in cache, but we still have to do about log2(N)
6784  * comparisons to descend a btree of N leaf tuples. We charge one
6785  * cpu_operator_cost per comparison.
6786  *
6787  * If there are ScalarArrayOpExprs, charge this once per SA scan. The
6788  * ones after the first one are not startup cost so far as the overall
6789  * plan is concerned, so add them only to "total" cost.
6790  */
6791  if (index->tuples > 1) /* avoid computing log(0) */
6792  {
6793  descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
6794  costs.indexStartupCost += descentCost;
6795  costs.indexTotalCost += costs.num_sa_scans * descentCost;
6796  }
6797 
6798  /*
6799  * Even though we're not charging I/O cost for touching upper btree pages,
6800  * it's still reasonable to charge some CPU cost per page descended
6801  * through. Moreover, if we had no such charge at all, bloated indexes
6802  * would appear to have the same search cost as unbloated ones, at least
6803  * in cases where only a single leaf page is expected to be visited. This
6804  * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
6805  * touched. The number of such pages is btree tree height plus one (ie,
6806  * we charge for the leaf page too). As above, charge once per SA scan.
6807  */
6808  descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6809  costs.indexStartupCost += descentCost;
6810  costs.indexTotalCost += costs.num_sa_scans * descentCost;
6811 
6812  /*
6813  * If we can get an estimate of the first column's ordering correlation C
6814  * from pg_statistic, estimate the index correlation as C for a
6815  * single-column index, or C * 0.75 for multiple columns. (The idea here
6816  * is that multiple columns dilute the importance of the first column's
6817  * ordering, but don't negate it entirely. Before 8.0 we divided the
6818  * correlation by the number of columns, but that seems too strong.)
6819  */
6820  MemSet(&vardata, 0, sizeof(vardata));
6821 
6822  if (index->indexkeys[0] != 0)
6823  {
6824  /* Simple variable --- look to stats for the underlying table */
6825  RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
6826 
6827  Assert(rte->rtekind == RTE_RELATION);
6828  relid = rte->relid;
6829  Assert(relid != InvalidOid);
6830  colnum = index->indexkeys[0];
6831 
6833  (*get_relation_stats_hook) (root, rte, colnum, &vardata))
6834  {
6835  /*
6836  * The hook took control of acquiring a stats tuple. If it did
6837  * supply a tuple, it'd better have supplied a freefunc.
6838  */
6839  if (HeapTupleIsValid(vardata.statsTuple) &&
6840  !vardata.freefunc)
6841  elog(ERROR, "no function provided to release variable stats with");
6842  }
6843  else
6844  {
6846  ObjectIdGetDatum(relid),
6847  Int16GetDatum(colnum),
6848  BoolGetDatum(rte->inh));
6849  vardata.freefunc = ReleaseSysCache;
6850  }
6851  }
6852  else
6853  {
6854  /* Expression --- maybe there are stats for the index itself */
6855  relid = index->indexoid;
6856  colnum = 1;
6857 
6858  if (get_index_stats_hook &&
6859  (*get_index_stats_hook) (root, relid, colnum, &vardata))
6860  {
6861  /*
6862  * The hook took control of acquiring a stats tuple. If it did
6863  * supply a tuple, it'd better have supplied a freefunc.
6864  */
6865  if (HeapTupleIsValid(vardata.statsTuple) &&
6866  !vardata.freefunc)
6867  elog(ERROR, "no function provided to release variable stats with");
6868  }
6869  else
6870  {
6872  ObjectIdGetDatum(relid),
6873  Int16GetDatum(colnum),
6874  BoolGetDatum(false));
6875  vardata.freefunc = ReleaseSysCache;
6876  }
6877  }
6878 
6879  if (HeapTupleIsValid(vardata.statsTuple))
6880  {
6881  Oid sortop;
6882  float4 *numbers;
6883  int nnumbers;
6884 
6885  sortop = get_opfamily_member(index->opfamily[0],
6886  index->opcintype[0],
6887  index->opcintype[0],
6889  if (OidIsValid(sortop) &&
6892  sortop,
6893  NULL,
6894  NULL, NULL,
6895  &numbers, &nnumbers))
6896  {
6897  double varCorrelation;
6898 
6899  Assert(nnumbers == 1);
6900  varCorrelation = numbers[0];
6901 
6902  if (index->reverse_sort[0])
6903  varCorrelation = -varCorrelation;
6904 
6905  if (index->ncolumns > 1)
6906  costs.indexCorrelation = varCorrelation * 0.75;
6907  else
6908  costs.indexCorrelation = varCorrelation;
6909 
6910  free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
6911  }
6912  }
6913 
6914  ReleaseVariableStats(vardata);
6915 
6916  *indexStartupCost = costs.indexStartupCost;
6917  *indexTotalCost = costs.indexTotalCost;
6918  *indexSelectivity = costs.indexSelectivity;
6919  *indexCorrelation = costs.indexCorrelation;
6920  *indexPages = costs.numIndexPages;
6921 }
Selectivity indexSelectivity
Definition: selfuncs.h:130
#define NIL
Definition: pg_list.h:69
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
IndexOptInfo * indexinfo
Definition: relation.h:1030
HeapTuple statsTuple
Definition: selfuncs.h:71
double tuples
Definition: relation.h:564
static List * add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:6605
#define Int16GetDatum(X)
Definition: postgres.h:457
#define MemSet(start, val, len)
Definition: c.h:857
double Selectivity
Definition: nodes.h:638
bool get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers)
Definition: lsyscache.c:2886
double tuples
Definition: relation.h:636
unsigned int Oid
Definition: postgres_ext.h:31
int tree_height
Definition: relation.h:637
#define OidIsValid(objectId)
Definition: c.h:538
RestrictInfo * rinfo
Definition: selfuncs.h:105
List * deconstruct_indexquals(IndexPath *path)
Definition: selfuncs.c:6237
bool unique
Definition: relation.h:663
Definition: type.h:90
int estimate_array_length(Node *arrayexpr)
Definition: selfuncs.c:2084
RelOptInfo * rel
Definition: relation.h:632
#define planner_rt_fetch(rti, root)
Definition: relation.h:324
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
double num_sa_scans
Definition: selfuncs.h:137
#define STATISTIC_KIND_CORRELATION
Definition: pg_statistic.h:233
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:163
double cpu_operator_cost
Definition: costsize.c:108
Cost indexTotalCost
Definition: selfuncs.h:129
get_relation_stats_hook_type get_relation_stats_hook
Definition: selfuncs.c:152
double rint(double x)
Definition: rint.c:22
int ncolumns
Definition: relation.h:640
Index relid
Definition: relation.h:552
List * lappend(List *list, void *datum)
Definition: list.c:128
Expr * clause
Definition: relation.h:1746
float float4
Definition: c.h:380
double indexCorrelation
Definition: selfuncs.h:131
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1116
NullTestType nulltesttype
Definition: primnodes.h:1180
#define BoolGetDatum(X)
Definition: postgres.h:408
#define InvalidOid
Definition: postgres_ext.h:36
double numIndexTuples
Definition: selfuncs.h:135
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:153
Oid * opcintype
Definition: relation.h:644
Cost indexStartupCost
Definition: selfuncs.h:128
Oid * opfamily
Definition: relation.h:643
RTEKind rtekind
Definition: parsenodes.h:928
int get_op_opfamily_strategy(Oid opno, Oid opfamily)
Definition: lsyscache.c:80
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
Node * other_operand
Definition: selfuncs.h:109
#define SearchSysCache3(cacheId, key1, key2, key3)
Definition: syscache.h:156
int * indexkeys
Definition: relation.h:641
#define elog
Definition: elog.h:219
Oid indexoid
Definition: relation.h:630
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:99
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:73
bool * reverse_sort
Definition: relation.h:646
#define BTLessStrategyNumber
Definition: stratnum.h:29
Definition: pg_list.h:45
int16 AttrNumber
Definition: attnum.h:21
void free_attstatsslot(Oid atttype, Datum *values, int nvalues, float4 *numbers, int nnumbers)
Definition: lsyscache.c:3010
#define BTEqualStrategyNumber
Definition: stratnum.h:31
double Cost
Definition: nodes.h:639
void genericcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, List *qinfos, GenericCosts *costs)
Definition: selfuncs.c:6386
double numIndexPages
Definition: selfuncs.h:134
static bool byte_increment ( unsigned char *  ptr,
int  len 
)
static

Definition at line 5961 of file selfuncs.c.

Referenced by make_greater_string().

5962 {
5963  if (*ptr >= 255)
5964  return false;
5965  (*ptr)++;
5966  return true;
5967 }
static void convert_bytea_to_scalar ( Datum  value,
double *  scaledvalue,
Datum  lobound,
double *  scaledlobound,
Datum  hibound,
double *  scaledhibound 
)
static

Definition at line 4282 of file selfuncs.c.

References convert_one_bytea_to_scalar(), DatumGetPointer, i, Min, VARDATA, VARHDRSZ, and VARSIZE.

Referenced by convert_to_scalar().

4288 {
4289  int rangelo,
4290  rangehi,
4291  valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ,
4292  loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ,
4293  hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ,
4294  i,
4295  minlen;
4296  unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)),
4297  *lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)),
4298  *histr = (unsigned char *) VARDATA(DatumGetPointer(hibound));
4299 
4300  /*
4301  * Assume bytea data is uniformly distributed across all byte values.
4302  */
4303  rangelo = 0;
4304  rangehi = 255;
4305 
4306  /*
4307  * Now strip any common prefix of the three strings.
4308  */
4309  minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4310  for (i = 0; i < minlen; i++)
4311  {
4312  if (*lostr != *histr || *lostr != *valstr)
4313  break;
4314  lostr++, histr++, valstr++;
4315  loboundlen--, hiboundlen--, valuelen--;
4316  }
4317 
4318  /*
4319  * Now we can do the conversions.
4320  */
4321  *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4322  *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4323  *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4324 }
#define VARDATA(PTR)
Definition: postgres.h:303
#define VARSIZE(PTR)
Definition: postgres.h:304
#define VARHDRSZ
Definition: c.h:445
#define Min(x, y)
Definition: c.h:806
static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi)
Definition: selfuncs.c:4327
#define DatumGetPointer(X)
Definition: postgres.h:555
int i
static struct @121 value
static double convert_numeric_to_scalar ( Datum  value,
Oid  typid 
)
static

Definition at line 3990 of file selfuncs.c.

References BOOLOID, DatumGetBool, DatumGetFloat4, DatumGetFloat8, DatumGetInt16, DatumGetInt32, DatumGetInt64, DatumGetObjectId, DirectFunctionCall1, elog, ERROR, FLOAT4OID, FLOAT8OID, INT2OID, INT4OID, INT8OID, numeric_float8_no_overflow(), NUMERICOID, OIDOID, REGCLASSOID, REGCONFIGOID, REGDICTIONARYOID, REGNAMESPACEOID, REGOPERATOROID, REGOPEROID, REGPROCEDUREOID, REGPROCOID, REGROLEOID, and REGTYPEOID.

Referenced by convert_to_scalar().

3991 {
3992  switch (typid)
3993  {
3994  case BOOLOID:
3995  return (double) DatumGetBool(value);
3996  case INT2OID:
3997  return (double) DatumGetInt16(value);
3998  case INT4OID:
3999  return (double) DatumGetInt32(value);
4000  case INT8OID:
4001  return (double) DatumGetInt64(value);
4002  case FLOAT4OID:
4003  return (double) DatumGetFloat4(value);
4004  case FLOAT8OID:
4005  return (double) DatumGetFloat8(value);
4006  case NUMERICOID:
4007  /* Note: out-of-range values will be clamped to +-HUGE_VAL */
4008  return (double)
4010  value));
4011  case OIDOID:
4012  case REGPROCOID:
4013  case REGPROCEDUREOID:
4014  case REGOPEROID:
4015  case REGOPERATOROID:
4016  case REGCLASSOID:
4017  case REGTYPEOID:
4018  case REGCONFIGOID:
4019  case REGDICTIONARYOID:
4020  case REGROLEOID:
4021  case REGNAMESPACEOID:
4022  /* we can treat OIDs as integers... */
4023  return (double) DatumGetObjectId(value);
4024  }
4025 
4026  /*
4027  * Can't get here unless someone tries to use scalarltsel/scalargtsel on
4028  * an operator with one numeric and one non-numeric operand.
4029  */
4030  elog(ERROR, "unsupported type: %u", typid);
4031  return 0;
4032 }
#define REGCLASSOID
Definition: pg_type.h:577
#define DatumGetInt32(X)
Definition: postgres.h:478
#define REGROLEOID
Definition: pg_type.h:585
#define OIDOID
Definition: pg_type.h:328
#define NUMERICOID
Definition: pg_type.h:554
#define DatumGetObjectId(X)
Definition: postgres.h:506
#define INT4OID
Definition: pg_type.h:316
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:584
#define REGTYPEOID
Definition: pg_type.h:581
#define REGOPEROID
Definition: pg_type.h:569
#define ERROR
Definition: elog.h:43
#define DatumGetInt64(X)
Definition: postgres.h:613
#define INT2OID
Definition: pg_type.h:308
#define DatumGetInt16(X)
Definition: postgres.h:450
#define DatumGetBool(X)
Definition: postgres.h:399
#define REGDICTIONARYOID
Definition: pg_type.h:627
#define FLOAT4OID
Definition: pg_type.h:416
#define DatumGetFloat8(X)
Definition: postgres.h:734
#define INT8OID
Definition: pg_type.h:304
#define DatumGetFloat4(X)
Definition: postgres.h:686
Datum numeric_float8_no_overflow(PG_FUNCTION_ARGS)
Definition: numeric.c:3118
#define FLOAT8OID
Definition: pg_type.h:419
#define BOOLOID
Definition: pg_type.h:288
#define REGCONFIGOID
Definition: pg_type.h:624
#define elog
Definition: elog.h:219
static struct @121 value
#define REGPROCEDUREOID
Definition: pg_type.h:565
#define REGNAMESPACEOID
Definition: pg_type.h:589
#define REGOPERATOROID
Definition: pg_type.h:573
#define REGPROCOID
Definition: pg_type.h:320
static double convert_one_bytea_to_scalar ( unsigned char *  value,
int  valuelen,
int  rangelo,
int  rangehi 
)
static

Definition at line 4327 of file selfuncs.c.

Referenced by convert_bytea_to_scalar().

4329 {
4330  double num,
4331  denom,
4332  base;
4333 
4334  if (valuelen <= 0)
4335  return 0.0; /* empty string has scalar value 0 */
4336 
4337  /*
4338  * Since base is 256, need not consider more than about 10 chars (even
4339  * this many seems like overkill)
4340  */
4341  if (valuelen > 10)
4342  valuelen = 10;
4343 
4344  /* Convert initial characters to fraction */
4345  base = rangehi - rangelo + 1;
4346  num = 0.0;
4347  denom = base;
4348  while (valuelen-- > 0)
4349  {
4350  int ch = *value++;
4351 
4352  if (ch < rangelo)
4353  ch = rangelo - 1;
4354  else if (ch > rangehi)
4355  ch = rangehi + 1;
4356  num += ((double) (ch - rangelo)) / denom;
4357  denom *= base;
4358  }
4359 
4360  return num;
4361 }
static struct @121 value
static double convert_one_string_to_scalar ( char *  value,
int  rangelo,
int  rangehi 
)
static

Definition at line 4135 of file selfuncs.c.

Referenced by convert_string_to_scalar().

4136 {
4137  int slen = strlen(value);
4138  double num,
4139  denom,
4140  base;
4141 
4142  if (slen <= 0)
4143  return 0.0; /* empty string has scalar value 0 */
4144 
4145  /*
4146  * There seems little point in considering more than a dozen bytes from
4147  * the string. Since base is at least 10, that will give us nominal
4148  * resolution of at least 12 decimal digits, which is surely far more
4149  * precision than this estimation technique has got anyway (especially in
4150  * non-C locales). Also, even with the maximum possible base of 256, this
4151  * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4152  * overflow on any known machine.
4153  */
4154  if (slen > 12)
4155  slen = 12;
4156 
4157  /* Convert initial characters to fraction */
4158  base = rangehi - rangelo + 1;
4159  num = 0.0;
4160  denom = base;
4161  while (slen-- > 0)
4162  {
4163  int ch = (unsigned char) *value++;
4164 
4165  if (ch < rangelo)
4166  ch = rangelo - 1;
4167  else if (ch > rangehi)
4168  ch = rangehi + 1;
4169  num += ((double) (ch - rangelo)) / denom;
4170  denom *= base;
4171  }
4172 
4173  return num;
4174 }
static struct @121 value
static char * convert_string_datum ( Datum  value,
Oid  typid 
)
static

Definition at line 4183 of file selfuncs.c.

References Assert, BPCHAROID, CHAROID, DatumGetChar, DatumGetPointer, DEFAULT_COLLATION_OID, elog, ERROR, lc_collate_is_c(), NAMEOID, NameStr, NULL, palloc(), pfree(), PG_USED_FOR_ASSERTS_ONLY, pstrdup(), TextDatumGetCString, TEXTOID, val, and VARCHAROID.

Referenced by convert_to_scalar().

4184 {
4185  char *val;
4186 
4187  switch (typid)
4188  {
4189  case CHAROID:
4190  val = (char *) palloc(2);
4191  val[0] = DatumGetChar(value);
4192  val[1] = '\0';
4193  break;
4194  case BPCHAROID:
4195  case VARCHAROID:
4196  case TEXTOID:
4197  val = TextDatumGetCString(value);
4198  break;
4199  case NAMEOID:
4200  {
4202 
4203  val = pstrdup(NameStr(*nm));
4204  break;
4205  }
4206  default:
4207 
4208  /*
4209  * Can't get here unless someone tries to use scalarltsel on an
4210  * operator with one string and one non-string operand.
4211  */
4212  elog(ERROR, "unsupported type: %u", typid);
4213  return NULL;
4214  }
4215 
4217  {
4218  char *xfrmstr;
4219  size_t xfrmlen;
4220  size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4221 
4222  /*
4223  * XXX: We could guess at a suitable output buffer size and only call
4224  * strxfrm twice if our guess is too small.
4225  *
4226  * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4227  * bogus data or set an error. This is not really a problem unless it
4228  * crashes since it will only give an estimation error and nothing
4229  * fatal.
4230  */
4231 #if _MSC_VER == 1400 /* VS.Net 2005 */
4232 
4233  /*
4234  *
4235  * http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?
4236  * FeedbackID=99694 */
4237  {
4238  char x[1];
4239 
4240  xfrmlen = strxfrm(x, val, 0);
4241  }
4242 #else
4243  xfrmlen = strxfrm(NULL, val, 0);
4244 #endif
4245 #ifdef WIN32
4246 
4247  /*
4248  * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4249  * of trying to allocate this much memory (and fail), just return the
4250  * original string unmodified as if we were in the C locale.
4251  */
4252  if (xfrmlen == INT_MAX)
4253  return val;
4254 #endif
4255  xfrmstr = (char *) palloc(xfrmlen + 1);
4256  xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4257 
4258  /*
4259  * Some systems (e.g., glibc) can return a smaller value from the
4260  * second call than the first; thus the Assert must be <= not ==.
4261  */
4262  Assert(xfrmlen2 <= xfrmlen);
4263  pfree(val);
4264  val = xfrmstr;
4265  }
4266 
4267  return val;
4268 }
#define BPCHAROID
Definition: pg_type.h:504
#define NAMEOID
Definition: pg_type.h:300
#define TEXTOID
Definition: pg_type.h:324
char * pstrdup(const char *in)
Definition: mcxt.c:1077
void pfree(void *pointer)
Definition: mcxt.c:950
#define ERROR
Definition: elog.h:43
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1128
Definition: c.h:493
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:74
#define VARCHAROID
Definition: pg_type.h:507
#define TextDatumGetCString(d)
Definition: builtins.h:92
#define DatumGetChar(X)
Definition: postgres.h:415
#define CHAROID
Definition: pg_type.h:296
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define DatumGetPointer(X)
Definition: postgres.h:555
void * palloc(Size size)
Definition: mcxt.c:849
#define NameStr(name)
Definition: c.h:499
#define elog
Definition: elog.h:219
static struct @121 value
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:990
long val
Definition: informix.c:689
static void convert_string_to_scalar ( char *  value,
double *  scaledvalue,
char *  lobound,
double *  scaledlobound,
char *  hibound,
double *  scaledhibound 
)
static

Definition at line 4055 of file selfuncs.c.

References convert_one_string_to_scalar().

Referenced by convert_to_scalar().

4061 {
4062  int rangelo,
4063  rangehi;
4064  char *sptr;
4065 
4066  rangelo = rangehi = (unsigned char) hibound[0];
4067  for (sptr = lobound; *sptr; sptr++)
4068  {
4069  if (rangelo > (unsigned char) *sptr)
4070  rangelo = (unsigned char) *sptr;
4071  if (rangehi < (unsigned char) *sptr)
4072  rangehi = (unsigned char) *sptr;
4073  }
4074  for (sptr = hibound; *sptr; sptr++)
4075  {
4076  if (rangelo > (unsigned char) *sptr)
4077  rangelo = (unsigned char) *sptr;
4078  if (rangehi < (unsigned char) *sptr)
4079  rangehi = (unsigned char) *sptr;
4080  }
4081  /* If range includes any upper-case ASCII chars, make it include all */
4082  if (rangelo <= 'Z' && rangehi >= 'A')
4083  {
4084  if (rangelo > 'A')
4085  rangelo = 'A';
4086  if (rangehi < 'Z')
4087  rangehi = 'Z';
4088  }
4089  /* Ditto lower-case */
4090  if (rangelo <= 'z' && rangehi >= 'a')
4091  {
4092  if (rangelo > 'a')
4093  rangelo = 'a';
4094  if (rangehi < 'z')
4095  rangehi = 'z';
4096  }
4097  /* Ditto digits */
4098  if (rangelo <= '9' && rangehi >= '0')
4099  {
4100  if (rangelo > '0')
4101  rangelo = '0';
4102  if (rangehi < '9')
4103  rangehi = '9';
4104  }
4105 
4106  /*
4107  * If range includes less than 10 chars, assume we have not got enough
4108  * data, and make it include regular ASCII set.
4109  */
4110  if (rangehi - rangelo < 9)
4111  {
4112  rangelo = ' ';
4113  rangehi = 127;
4114  }
4115 
4116  /*
4117  * Now strip any common prefix of the three strings.
4118  */
4119  while (*lobound)
4120  {
4121  if (*lobound != *hibound || *lobound != *value)
4122  break;
4123  lobound++, hibound++, value++;
4124  }
4125 
4126  /*
4127  * Now we can do the conversions.
4128  */
4129  *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4130  *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4131  *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4132 }
static double convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
Definition: selfuncs.c:4135
static struct @121 value
static double convert_timevalue_to_scalar ( Datum  value,
Oid  typid 
)
static

Definition at line 4367 of file selfuncs.c.

References abstime_timestamp(), ABSTIMEOID, TimeIntervalData::data, date2timestamp_no_overflow(), DATEOID, DatumGetDateADT, DatumGetIntervalP, DatumGetRelativeTime, DatumGetTimeADT, DatumGetTimeInterval, DatumGetTimestamp, DatumGetTimestampTz, DatumGetTimeTzADTP, Interval::day, DAYS_PER_YEAR, DirectFunctionCall1, elog, ERROR, INTERVALOID, Interval::month, MONTHS_PER_YEAR, RELTIMEOID, TimeIntervalData::status, TimeTzADT::time, Interval::time, TIMEOID, TIMESTAMPOID, TIMESTAMPTZOID, TIMETZOID, TINTERVALOID, USECS_PER_DAY, and TimeTzADT::zone.

Referenced by convert_to_scalar().

4368 {
4369  switch (typid)
4370  {
4371  case TIMESTAMPOID:
4372  return DatumGetTimestamp(value);
4373  case TIMESTAMPTZOID:
4374  return DatumGetTimestampTz(value);
4375  case ABSTIMEOID:
4377  value));
4378  case DATEOID:
4380  case INTERVALOID:
4381  {
4383 
4384  /*
4385  * Convert the month part of Interval to days using assumed
4386  * average month length of 365.25/12.0 days. Not too
4387  * accurate, but plenty good enough for our purposes.
4388  */
4389  return interval->time + interval->day * (double) USECS_PER_DAY +
4390  interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
4391  }
4392  case RELTIMEOID:
4393  return (DatumGetRelativeTime(value) * 1000000.0);
4394  case TINTERVALOID:
4395  {
4397 
4398  if (tinterval->status != 0)
4399  return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
4400  return 0; /* for lack of a better idea */
4401  }
4402  case TIMEOID:
4403  return DatumGetTimeADT(value);
4404  case TIMETZOID:
4405  {
4406  TimeTzADT *timetz = DatumGetTimeTzADTP(value);
4407 
4408  /* use GMT-equivalent time */
4409  return (double) (timetz->time + (timetz->zone * 1000000.0));
4410  }
4411  }
4412 
4413  /*
4414  * Can't get here unless someone tries to use scalarltsel/scalargtsel on
4415  * an operator with one timevalue and one non-timevalue operand.
4416  */
4417  elog(ERROR, "unsupported type: %u", typid);
4418  return 0;
4419 }
#define TIMESTAMPTZOID
Definition: pg_type.h:525
#define TIMEOID
Definition: pg_type.h:514
#define DATEOID
Definition: pg_type.h:511
#define DatumGetDateADT(X)
Definition: date.h:52
#define DatumGetIntervalP(X)
Definition: timestamp.h:29
TimeADT time
Definition: date.h:28
#define DatumGetTimeTzADTP(X)
Definition: date.h:54
#define TINTERVALOID
Definition: pg_type.h:428
double date2timestamp_no_overflow(DateADT dateVal)
Definition: date.c:658
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:584
int32 day
Definition: timestamp.h:47
#define MONTHS_PER_YEAR
Definition: timestamp.h:69
#define DAYS_PER_YEAR
Definition: timestamp.h:68
#define TIMESTAMPOID
Definition: pg_type.h:519
#define ERROR
Definition: elog.h:43
int32 zone
Definition: date.h:29
#define DatumGetRelativeTime(X)
Definition: nabstime.h:51
#define DatumGetTimestampTz(X)
Definition: timestamp.h:28
#define INTERVALOID
Definition: pg_type.h:529
TimeOffset time
Definition: timestamp.h:45
#define USECS_PER_DAY
Definition: timestamp.h:91
int32 month
Definition: timestamp.h:48
Datum abstime_timestamp(PG_FUNCTION_ARGS)
Definition: nabstime.c:467
#define DatumGetTimeADT(X)
Definition: date.h:53
AbsoluteTime data[2]
Definition: nabstime.h:42
#define TIMETZOID
Definition: pg_type.h:536
#define DatumGetTimeInterval(X)
Definition: nabstime.h:52
#define elog
Definition: elog.h:219
static struct @121 value
#define ABSTIMEOID
Definition: pg_type.h:422
Definition: date.h:26
#define DatumGetTimestamp(X)
Definition: timestamp.h:27
#define RELTIMEOID
Definition: pg_type.h:425
static bool convert_to_scalar ( Datum  value,
Oid  valuetypid,
double *  scaledvalue,
Datum  lobound,
Datum  hibound,
Oid  boundstypid,
double *  scaledlobound,
double *  scaledhibound 
)
static

Definition at line 3872 of file selfuncs.c.

References ABSTIMEOID, BOOLOID, BPCHAROID, BYTEAOID, CHAROID, CIDROID, convert_bytea_to_scalar(), convert_network_to_scalar(), convert_numeric_to_scalar(), convert_string_datum(), convert_string_to_scalar(), convert_timevalue_to_scalar(), DATEOID, FLOAT4OID, FLOAT8OID, INETOID, INT2OID, INT4OID, INT8OID, INTERVALOID, MACADDR8OID, MACADDROID, NAMEOID, NUMERICOID, OIDOID, pfree(), REGCLASSOID, REGCONFIGOID, REGDICTIONARYOID, REGNAMESPACEOID, REGOPERATOROID, REGOPEROID, REGPROCEDUREOID, REGPROCOID, REGROLEOID, REGTYPEOID, RELTIMEOID, TEXTOID, TIMEOID, TIMESTAMPOID, TIMESTAMPTZOID, TIMETZOID, TINTERVALOID, and VARCHAROID.

Referenced by ineq_histogram_selectivity().

3875 {
3876  /*
3877  * Both the valuetypid and the boundstypid should exactly match the
3878  * declared input type(s) of the operator we are invoked for, so we just
3879  * error out if either is not recognized.
3880  *
3881  * XXX The histogram we are interpolating between points of could belong
3882  * to a column that's only binary-compatible with the declared type. In
3883  * essence we are assuming that the semantics of binary-compatible types
3884  * are enough alike that we can use a histogram generated with one type's
3885  * operators to estimate selectivity for the other's. This is outright
3886  * wrong in some cases --- in particular signed versus unsigned
3887  * interpretation could trip us up. But it's useful enough in the
3888  * majority of cases that we do it anyway. Should think about more
3889  * rigorous ways to do it.
3890  */
3891  switch (valuetypid)
3892  {
3893  /*
3894  * Built-in numeric types
3895  */
3896  case BOOLOID:
3897  case INT2OID:
3898  case INT4OID:
3899  case INT8OID:
3900  case FLOAT4OID:
3901  case FLOAT8OID:
3902  case NUMERICOID:
3903  case OIDOID:
3904  case REGPROCOID:
3905  case REGPROCEDUREOID:
3906  case REGOPEROID:
3907  case REGOPERATOROID:
3908  case REGCLASSOID:
3909  case REGTYPEOID:
3910  case REGCONFIGOID:
3911  case REGDICTIONARYOID:
3912  case REGROLEOID:
3913  case REGNAMESPACEOID:
3914  *scaledvalue = convert_numeric_to_scalar(value, valuetypid);
3915  *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
3916  *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
3917  return true;
3918 
3919  /*
3920  * Built-in string types
3921  */
3922  case CHAROID:
3923  case BPCHAROID:
3924  case VARCHAROID:
3925  case TEXTOID:
3926  case NAMEOID:
3927  {
3928  char *valstr = convert_string_datum(value, valuetypid);
3929  char *lostr = convert_string_datum(lobound, boundstypid);
3930  char *histr = convert_string_datum(hibound, boundstypid);
3931 
3932  convert_string_to_scalar(valstr, scaledvalue,
3933  lostr, scaledlobound,
3934  histr, scaledhibound);
3935  pfree(valstr);
3936  pfree(lostr);
3937  pfree(histr);
3938  return true;
3939  }
3940 
3941  /*
3942  * Built-in bytea type
3943  */
3944  case BYTEAOID:
3945  {
3946  convert_bytea_to_scalar(value, scaledvalue,
3947  lobound, scaledlobound,
3948  hibound, scaledhibound);
3949  return true;
3950  }
3951 
3952  /*
3953  * Built-in time types
3954  */
3955  case TIMESTAMPOID:
3956  case TIMESTAMPTZOID:
3957  case ABSTIMEOID:
3958  case DATEOID:
3959  case INTERVALOID:
3960  case RELTIMEOID:
3961  case TINTERVALOID:
3962  case TIMEOID:
3963  case TIMETZOID:
3964  *scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
3965  *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
3966  *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
3967  return true;
3968 
3969  /*
3970  * Built-in network types
3971  */
3972  case INETOID:
3973  case CIDROID:
3974  case MACADDROID:
3975  case MACADDR8OID:
3976  *scaledvalue = convert_network_to_scalar(value, valuetypid);
3977  *scaledlobound = convert_network_to_scalar(lobound, boundstypid);
3978  *scaledhibound = convert_network_to_scalar(hibound, boundstypid);
3979  return true;
3980  }
3981  /* Don't know how to convert */
3982  *scaledvalue = *scaledlobound = *scaledhibound = 0;
3983  return false;
3984 }
#define CIDROID
Definition: pg_type.h:451
#define TIMESTAMPTZOID
Definition: pg_type.h:525
#define TIMEOID
Definition: pg_type.h:514
#define REGCLASSOID
Definition: pg_type.h:577
#define BPCHAROID
Definition: pg_type.h:504
#define DATEOID
Definition: pg_type.h:511
#define NAMEOID
Definition: pg_type.h:300
#define REGROLEOID
Definition: pg_type.h:585
#define OIDOID
Definition: pg_type.h:328
#define TEXTOID
Definition: pg_type.h:324
#define INETOID
Definition: pg_type.h:448
#define NUMERICOID
Definition: pg_type.h:554
static double convert_numeric_to_scalar(Datum value, Oid typid)
Definition: selfuncs.c:3990
#define INT4OID
Definition: pg_type.h:316
#define TINTERVALOID
Definition: pg_type.h:428
#define REGTYPEOID
Definition: pg_type.h:581
#define REGOPEROID
Definition: pg_type.h:569
void pfree(void *pointer)
Definition: mcxt.c:950
#define TIMESTAMPOID
Definition: pg_type.h:519
double convert_network_to_scalar(Datum value, Oid typid)
Definition: network.c:897
#define INT2OID
Definition: pg_type.h:308
#define INTERVALOID
Definition: pg_type.h:529
#define REGDICTIONARYOID
Definition: pg_type.h:627
#define VARCHAROID
Definition: pg_type.h:507
#define FLOAT4OID
Definition: pg_type.h:416
#define CHAROID
Definition: pg_type.h:296
#define INT8OID
Definition: pg_type.h:304
static char * convert_string_datum(Datum value, Oid typid)
Definition: selfuncs.c:4183
#define TIMETZOID
Definition: pg_type.h:536
#define FLOAT8OID
Definition: pg_type.h:419
#define BOOLOID
Definition: pg_type.h:288
static double convert_timevalue_to_scalar(Datum value, Oid typid)
Definition: selfuncs.c:4367
#define BYTEAOID
Definition: pg_type.h:292
#define REGCONFIGOID
Definition: pg_type.h:624
#define MACADDR8OID
Definition: pg_type.h:454
static void convert_bytea_to_scalar(Datum value, double *scaledvalue, Datum lobound, double *scaledlobound, Datum hibound, double *scaledhibound)
Definition: selfuncs.c:4282
#define MACADDROID
Definition: pg_type.h:445
static struct @121 value
#define REGPROCEDUREOID
Definition: pg_type.h:565
#define ABSTIMEOID
Definition: pg_type.h:422
#define REGNAMESPACEOID
Definition: pg_type.h:589
static void convert_string_to_scalar(char *value, double *scaledvalue, char *lobound, double *scaledlobound, char *hibound, double *scaledhibound)
Definition: selfuncs.c:4055
#define REGOPERATOROID
Definition: pg_type.h:573
#define REGPROCOID
Definition: pg_type.h:320
#define RELTIMEOID
Definition: pg_type.h:425
List* deconstruct_indexquals ( IndexPath path)

Definition at line 6237 of file selfuncs.c.

References arg, ScalarArrayOpExpr::args, Assert, RestrictInfo::clause, IndexQualInfo::clause_op, elog, ERROR, forboth, get_leftop(), get_rightop(), IndexQualInfo::indexcol, IndexPath::indexinfo, IndexPath::indexqualcols, IndexPath::indexquals, InvalidOid, IsA, lappend(), RowCompareExpr::largs, lfirst_int, lfirst_node, linitial, linitial_oid, lsecond, match_index_to_operand(), NIL, nodeTag, NULL, ScalarArrayOpExpr::opno, RowCompareExpr::opnos, IndexQualInfo::other_operand, palloc(), RowCompareExpr::rargs, result, IndexQualInfo::rinfo, and IndexQualInfo::varonleft.

Referenced by blcostestimate(), brincostestimate(), btcostestimate(), gincostestimate(), gistcostestimate(), hashcostestimate(), and spgcostestimate().

6238 {
6239  List *result = NIL;
6240  IndexOptInfo *index = path->indexinfo;
6241  ListCell *lcc,
6242  *lci;
6243 
6244  forboth(lcc, path->indexquals, lci, path->indexqualcols)
6245  {
6246  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lcc);
6247  int indexcol = lfirst_int(lci);
6248  Expr *clause;
6249  Node *leftop,
6250  *rightop;
6251  IndexQualInfo *qinfo;
6252 
6253  clause = rinfo->clause;
6254 
6255  qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
6256  qinfo->rinfo = rinfo;
6257  qinfo->indexcol = indexcol;
6258 
6259  if (IsA(clause, OpExpr))
6260  {
6261  qinfo->clause_op = ((OpExpr *) clause)->opno;
6262  leftop = get_leftop(clause);
6263  rightop = get_rightop(clause);
6264  if (match_index_to_operand(leftop, indexcol, index))
6265  {
6266  qinfo->varonleft = true;
6267  qinfo->other_operand = rightop;
6268  }
6269  else
6270  {
6271  Assert(match_index_to_operand(rightop, indexcol, index));
6272  qinfo->varonleft = false;
6273  qinfo->other_operand = leftop;
6274  }
6275  }
6276  else if (IsA(clause, RowCompareExpr))
6277  {
6278  RowCompareExpr *rc = (RowCompareExpr *) clause;
6279 
6280  qinfo->clause_op = linitial_oid(rc->opnos);
6281  /* Examine only first columns to determine left/right sides */
6283  indexcol, index))
6284  {
6285  qinfo->varonleft = true;
6286  qinfo->other_operand = (Node *) rc->rargs;
6287  }
6288  else
6289  {
6291  indexcol, index));
6292  qinfo->varonleft = false;
6293  qinfo->other_operand = (Node *) rc->largs;
6294  }
6295  }
6296  else if (IsA(clause, ScalarArrayOpExpr))
6297  {
6298  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6299 
6300  qinfo->clause_op = saop->opno;
6301  /* index column is always on the left in this case */
6303  indexcol, index));
6304  qinfo->varonleft = true;
6305  qinfo->other_operand = (Node *) lsecond(saop->args);
6306  }
6307  else if (IsA(clause, NullTest))
6308  {
6309  qinfo->clause_op = InvalidOid;
6310  Assert(match_index_to_operand((Node *) ((NullTest *) clause)->arg,
6311  indexcol, index));
6312  qinfo->varonleft = true;
6313  qinfo->other_operand = NULL;
6314  }
6315  else
6316  {
6317  elog(ERROR, "unsupported indexqual type: %d",
6318  (int) nodeTag(clause));
6319  }
6320 
6321  result = lappend(result, qinfo);
6322  }
6323  return result;
6324 }
#define NIL
Definition: pg_list.h:69
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:180
IndexOptInfo * indexinfo
Definition: relation.h:1030
bool match_index_to_operand(Node *operand, int indexcol, IndexOptInfo *index)
Definition: indxpath.c:3179
Definition: nodes.h:509
return result
Definition: formatting.c:1618
RestrictInfo * rinfo
Definition: selfuncs.h:105
#define lsecond(l)
Definition: pg_list.h:116
Definition: type.h:90
List * indexquals
Definition: relation.h:1032
#define linitial(l)
Definition: pg_list.h:111
#define ERROR
Definition: elog.h:43
#define lfirst_int(lc)
Definition: pg_list.h:107
Node * get_leftop(const Expr *clause)
Definition: clauses.c:199
#define lfirst_node(type, lc)
Definition: pg_list.h:109
List * lappend(List *list, void *datum)
Definition: list.c:128
Expr * clause
Definition: relation.h:1746
bool varonleft
Definition: selfuncs.h:107
#define InvalidOid
Definition: postgres_ext.h:36
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define linitial_oid(l)
Definition: pg_list.h:113
#define nodeTag(nodeptr)
Definition: nodes.h:514
Node * get_rightop(const Expr *clause)
Definition: clauses.c:216
List * indexqualcols
Definition: relation.h:1033
void * palloc(Size size)
Definition: mcxt.c:849
Node * other_operand
Definition: selfuncs.h:109
void * arg
#define elog
Definition: elog.h:219
Definition: pg_list.h:45
Datum eqjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 2189 of file selfuncs.c.

References generate_unaccent_rules::args, CLAMP_PROBABILITY, elog, eqjoinsel_inner(), eqjoinsel_semi(), ERROR, find_join_input_rel(), get_commutator(), get_join_variables(), JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_SEMI, SpecialJoinInfo::jointype, SpecialJoinInfo::min_righthand, PG_GETARG_INT16, PG_GETARG_OID, PG_GETARG_POINTER, PG_RETURN_FLOAT8, and ReleaseVariableStats.

Referenced by neqjoinsel().

2190 {
2191  PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2192  Oid operator = PG_GETARG_OID(1);
2193  List *args = (List *) PG_GETARG_POINTER(2);
2194 
2195 #ifdef NOT_USED
2196  JoinType jointype = (JoinType) PG_GETARG_INT16(3);
2197 #endif
2199  double selec;
2200  VariableStatData vardata1;
2201  VariableStatData vardata2;
2202  bool join_is_reversed;
2203  RelOptInfo *inner_rel;
2204 
2205  get_join_variables(root, args, sjinfo,
2206  &vardata1, &vardata2, &join_is_reversed);
2207 
2208  switch (sjinfo->jointype)
2209  {
2210  case JOIN_INNER:
2211  case JOIN_LEFT:
2212  case JOIN_FULL:
2213  selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
2214  break;
2215  case JOIN_SEMI:
2216  case JOIN_ANTI:
2217 
2218  /*
2219  * Look up the join's inner relation. min_righthand is sufficient
2220  * information because neither SEMI nor ANTI joins permit any
2221  * reassociation into or out of their RHS, so the righthand will
2222  * always be exactly that set of rels.
2223  */
2224  inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2225 
2226  if (!join_is_reversed)
2227  selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
2228  inner_rel);
2229  else
2230  selec = eqjoinsel_semi(get_commutator(operator),
2231  &vardata2, &vardata1,
2232  inner_rel);
2233  break;
2234  default:
2235  /* other values not expected here */
2236  elog(ERROR, "unrecognized join type: %d",
2237  (int) sjinfo->jointype);
2238  selec = 0; /* keep compiler quiet */
2239  break;
2240  }
2241 
2242  ReleaseVariableStats(vardata1);
2243  ReleaseVariableStats(vardata2);
2244 
2245  CLAMP_PROBABILITY(selec);
2246 
2247  PG_RETURN_FLOAT8((float8) selec);
2248 }
Oid get_commutator(Oid opno)
Definition: lsyscache.c:1313
Relids min_righthand
Definition: relation.h:1917
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:326
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:241
unsigned int Oid
Definition: postgres_ext.h:31
JoinType
Definition: nodes.h:672
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define ERROR
Definition: elog.h:43
double float8
Definition: c.h:381
void get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
Definition: selfuncs.c:4506
#define PG_GETARG_OID(n)
Definition: fmgr.h:240
static double eqjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2, RelOptInfo *inner_rel)
Definition: selfuncs.c:2483
#define PG_GETARG_INT16(n)
Definition: fmgr.h:236
static RelOptInfo * find_join_input_rel(PlannerInfo *root, Relids relids)
Definition: selfuncs.c:5389
JoinType jointype
Definition: relation.h:1920
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
static double eqjoinsel_inner(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2)
Definition: selfuncs.c:2257
#define elog
Definition: elog.h:219
Definition: pg_list.h:45
static double eqjoinsel_inner ( Oid  operator,
VariableStatData vardata1,
VariableStatData vardata2 
)
static

Definition at line 2257 of file selfuncs.c.

References VariableStatData::atttype, VariableStatData::atttypmod, CLAMP_PROBABILITY, DatumGetBool, DEFAULT_COLLATION_OID, fmgr_info(), free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), get_opcode(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, i, InvalidOid, NULL, palloc0(), pfree(), STATISTIC_KIND_MCV, and VariableStatData::statsTuple.

Referenced by eqjoinsel().

2259 {
2260  double selec;
2261  double nd1;
2262  double nd2;
2263  bool isdefault1;
2264  bool isdefault2;
2265  Form_pg_statistic stats1 = NULL;
2266  Form_pg_statistic stats2 = NULL;
2267  bool have_mcvs1 = false;
2268  Datum *values1 = NULL;
2269  int nvalues1 = 0;
2270  float4 *numbers1 = NULL;
2271  int nnumbers1 = 0;
2272  bool have_mcvs2 = false;
2273  Datum *values2 = NULL;
2274  int nvalues2 = 0;
2275  float4 *numbers2 = NULL;
2276  int nnumbers2 = 0;
2277 
2278  nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2279  nd2 = get_variable_numdistinct(vardata2, &isdefault2);
2280 
2281  if (HeapTupleIsValid(vardata1->statsTuple))
2282  {
2283  stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
2284  have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
2285  vardata1->atttype,
2286  vardata1->atttypmod,
2288  InvalidOid,
2289  NULL,
2290  &values1, &nvalues1,
2291  &numbers1, &nnumbers1);
2292  }
2293 
2294  if (HeapTupleIsValid(vardata2->statsTuple))
2295  {
2296  stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
2297  have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
2298  vardata2->atttype,
2299  vardata2->atttypmod,
2301  InvalidOid,
2302  NULL,
2303  &values2, &nvalues2,
2304  &numbers2, &nnumbers2);
2305  }
2306 
2307  if (have_mcvs1 && have_mcvs2)
2308  {
2309  /*
2310  * We have most-common-value lists for both relations. Run through
2311  * the lists to see which MCVs actually join to each other with the
2312  * given operator. This allows us to determine the exact join
2313  * selectivity for the portion of the relations represented by the MCV
2314  * lists. We still have to estimate for the remaining population, but
2315  * in a skewed distribution this gives us a big leg up in accuracy.
2316  * For motivation see the analysis in Y. Ioannidis and S.
2317  * Christodoulakis, "On the propagation of errors in the size of join
2318  * results", Technical Report 1018, Computer Science Dept., University
2319  * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2320  */
2321  FmgrInfo eqproc;
2322  bool *hasmatch1;
2323  bool *hasmatch2;
2324  double nullfrac1 = stats1->stanullfrac;
2325  double nullfrac2 = stats2->stanullfrac;
2326  double matchprodfreq,
2327  matchfreq1,
2328  matchfreq2,
2329  unmatchfreq1,
2330  unmatchfreq2,
2331  otherfreq1,
2332  otherfreq2,
2333  totalsel1,
2334  totalsel2;
2335  int i,
2336  nmatches;
2337 
2338  fmgr_info(get_opcode(operator), &eqproc);
2339  hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
2340  hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
2341 
2342  /*
2343  * Note we assume that each MCV will match at most one member of the
2344  * other MCV list. If the operator isn't really equality, there could
2345  * be multiple matches --- but we don't look for them, both for speed
2346  * and because the math wouldn't add up...
2347  */
2348  matchprodfreq = 0.0;
2349  nmatches = 0;
2350  for (i = 0; i < nvalues1; i++)
2351  {
2352  int j;
2353 
2354  for (j = 0; j < nvalues2; j++)
2355  {
2356  if (hasmatch2[j])
2357  continue;
2358  if (DatumGetBool(FunctionCall2Coll(&eqproc,
2360  values1[i],
2361  values2[j])))
2362  {
2363  hasmatch1[i] = hasmatch2[j] = true;
2364  matchprodfreq += numbers1[i] * numbers2[j];
2365  nmatches++;
2366  break;
2367  }
2368  }
2369  }
2370  CLAMP_PROBABILITY(matchprodfreq);
2371  /* Sum up frequencies of matched and unmatched MCVs */
2372  matchfreq1 = unmatchfreq1 = 0.0;
2373  for (i = 0; i < nvalues1; i++)
2374  {
2375  if (hasmatch1[i])
2376  matchfreq1 += numbers1[i];
2377  else
2378  unmatchfreq1 += numbers1[i];
2379  }
2380  CLAMP_PROBABILITY(matchfreq1);
2381  CLAMP_PROBABILITY(unmatchfreq1);
2382  matchfreq2 = unmatchfreq2 = 0.0;
2383  for (i = 0; i < nvalues2; i++)
2384  {
2385  if (hasmatch2[i])
2386  matchfreq2 += numbers2[i];
2387  else
2388  unmatchfreq2 += numbers2[i];
2389  }
2390  CLAMP_PROBABILITY(matchfreq2);
2391  CLAMP_PROBABILITY(unmatchfreq2);
2392  pfree(hasmatch1);
2393  pfree(hasmatch2);
2394 
2395  /*
2396  * Compute total frequency of non-null values that are not in the MCV
2397  * lists.
2398  */
2399  otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2400  otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2401  CLAMP_PROBABILITY(otherfreq1);
2402  CLAMP_PROBABILITY(otherfreq2);
2403 
2404  /*
2405  * We can estimate the total selectivity from the point of view of
2406  * relation 1 as: the known selectivity for matched MCVs, plus
2407  * unmatched MCVs that are assumed to match against random members of
2408  * relation 2's non-MCV population, plus non-MCV values that are
2409  * assumed to match against random members of relation 2's unmatched
2410  * MCVs plus non-MCV values.
2411  */
2412  totalsel1 = matchprodfreq;
2413  if (nd2 > nvalues2)
2414  totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2);
2415  if (nd2 > nmatches)
2416  totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2417  (nd2 - nmatches);
2418  /* Same estimate from the point of view of relation 2. */
2419  totalsel2 = matchprodfreq;
2420  if (nd1 > nvalues1)
2421  totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1);
2422  if (nd1 > nmatches)
2423  totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2424  (nd1 - nmatches);
2425 
2426  /*
2427  * Use the smaller of the two estimates. This can be justified in
2428  * essentially the same terms as given below for the no-stats case: to
2429  * a first approximation, we are estimating from the point of view of
2430  * the relation with smaller nd.
2431  */
2432  selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2433  }
2434  else
2435  {
2436  /*
2437  * We do not have MCV lists for both sides. Estimate the join
2438  * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2439  * is plausible if we assume that the join operator is strict and the
2440  * non-null values are about equally distributed: a given non-null
2441  * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2442  * of rel2, so total join rows are at most
2443  * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2444  * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2445  * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2446  * with MIN() is an upper bound. Using the MIN() means we estimate
2447  * from the point of view of the relation with smaller nd (since the
2448  * larger nd is determining the MIN). It is reasonable to assume that
2449  * most tuples in this rel will have join partners, so the bound is
2450  * probably reasonably tight and should be taken as-is.
2451  *
2452  * XXX Can we be smarter if we have an MCV list for just one side? It
2453  * seems that if we assume equal distribution for the other side, we
2454  * end up with the same answer anyway.
2455  */
2456  double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2457  double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2458 
2459  selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2460  if (nd1 > nd2)
2461  selec /= nd1;
2462  else
2463  selec /= nd2;
2464  }
2465 
2466  if (have_mcvs1)
2467  free_attstatsslot(vardata1->atttype, values1, nvalues1,
2468  numbers1, nnumbers1);
2469  if (have_mcvs2)
2470  free_attstatsslot(vardata2->atttype, values2, nvalues2,
2471  numbers2, nnumbers2);
2472 
2473  return selec;
2474 }
Definition: fmgr.h:56
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
HeapTuple statsTuple
Definition: selfuncs.h:71
bool get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers)
Definition: lsyscache.c:2886
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1047
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:129
int32 atttypmod
Definition: selfuncs.h:76
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
void pfree(void *pointer)
Definition: mcxt.c:950
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4909
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:74
#define DatumGetBool(X)
Definition: postgres.h:399
#define STATISTIC_KIND_MCV
Definition: pg_statistic.h:204
float float4
Definition: c.h:380
void * palloc0(Size size)
Definition: mcxt.c:878
uintptr_t Datum
Definition: postgres.h:372
#define InvalidOid
Definition: postgres_ext.h:36
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1094
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define NULL
Definition: c.h:229
int i
void free_attstatsslot(Oid atttype, Datum *values, int nvalues, float4 *numbers, int nnumbers)
Definition: lsyscache.c:3010
static double eqjoinsel_semi ( Oid  operator,
VariableStatData vardata1,
VariableStatData vardata2,
RelOptInfo inner_rel 
)
static

Definition at line 2483 of file selfuncs.c.

References VariableStatData::atttype, VariableStatData::atttypmod, CLAMP_PROBABILITY, DatumGetBool, DEFAULT_COLLATION_OID, fmgr_info(), free_attstatsslot(), FunctionCall2Coll(), get_attstatsslot(), get_opcode(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, i, InvalidOid, Min, NULL, OidIsValid, palloc0(), pfree(), VariableStatData::rel, RelOptInfo::rows, STATISTIC_KIND_MCV, and VariableStatData::statsTuple.

Referenced by eqjoinsel().

2486 {
2487  double selec;
2488  double nd1;
2489  double nd2;
2490  bool isdefault1;
2491  bool isdefault2;
2492  Form_pg_statistic stats1 = NULL;
2493  bool have_mcvs1 = false;
2494  Datum *values1 = NULL;
2495  int nvalues1 = 0;
2496  float4 *numbers1 = NULL;
2497  int nnumbers1 = 0;
2498  bool have_mcvs2 = false;
2499  Datum *values2 = NULL;
2500  int nvalues2 = 0;
2501  float4 *numbers2 = NULL;
2502  int nnumbers2 = 0;
2503 
2504  nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2505  nd2 = get_variable_numdistinct(vardata2, &isdefault2);
2506 
2507  /*
2508  * We clamp nd2 to be not more than what we estimate the inner relation's
2509  * size to be. This is intuitively somewhat reasonable since obviously
2510  * there can't be more than that many distinct values coming from the
2511  * inner rel. The reason for the asymmetry (ie, that we don't clamp nd1
2512  * likewise) is that this is the only pathway by which restriction clauses
2513  * applied to the inner rel will affect the join result size estimate,
2514  * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2515  * only the outer rel's size. If we clamped nd1 we'd be double-counting
2516  * the selectivity of outer-rel restrictions.
2517  *
2518  * We can apply this clamping both with respect to the base relation from
2519  * which the join variable comes (if there is just one), and to the
2520  * immediate inner input relation of the current join.
2521  *
2522  * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2523  * great, maybe, but it didn't come out of nowhere either. This is most
2524  * helpful when the inner relation is empty and consequently has no stats.
2525  */
2526  if (vardata2->rel)
2527  {
2528  if (nd2 >= vardata2->rel->rows)
2529  {
2530  nd2 = vardata2->rel->rows;
2531  isdefault2 = false;
2532  }
2533  }
2534  if (nd2 >= inner_rel->rows)
2535  {
2536  nd2 = inner_rel->rows;
2537  isdefault2 = false;
2538  }
2539 
2540  if (HeapTupleIsValid(vardata1->statsTuple))
2541  {
2542  stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
2543  have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
2544  vardata1->atttype,
2545  vardata1->atttypmod,
2547  InvalidOid,
2548  NULL,
2549  &values1, &nvalues1,
2550  &numbers1, &nnumbers1);
2551  }
2552 
2553  if (HeapTupleIsValid(vardata2->statsTuple))
2554  {
2555  have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
2556  vardata2->atttype,
2557  vardata2->atttypmod,
2559  InvalidOid,
2560  NULL,
2561  &values2, &nvalues2,
2562  &numbers2, &nnumbers2);
2563  }
2564 
2565  if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
2566  {
2567  /*
2568  * We have most-common-value lists for both relations. Run through
2569  * the lists to see which MCVs actually join to each other with the
2570  * given operator. This allows us to determine the exact join
2571  * selectivity for the portion of the relations represented by the MCV
2572  * lists. We still have to estimate for the remaining population, but
2573  * in a skewed distribution this gives us a big leg up in accuracy.
2574  */
2575  FmgrInfo eqproc;
2576  bool *hasmatch1;
2577  bool *hasmatch2;
2578  double nullfrac1 = stats1->stanullfrac;
2579  double matchfreq1,
2580  uncertainfrac,
2581  uncertain;
2582  int i,
2583  nmatches,
2584  clamped_nvalues2;
2585 
2586  /*
2587  * The clamping above could have resulted in nd2 being less than
2588  * nvalues2; in which case, we assume that precisely the nd2 most
2589  * common values in the relation will appear in the join input, and so
2590  * compare to only the first nd2 members of the MCV list. Of course
2591  * this is frequently wrong, but it's the best bet we can make.
2592  */
2593  clamped_nvalues2 = Min(nvalues2, nd2);
2594 
2595  fmgr_info(get_opcode(operator), &eqproc);
2596  hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
2597  hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2598 
2599  /*
2600  * Note we assume that each MCV will match at most one member of the
2601  * other MCV list. If the operator isn't really equality, there could
2602  * be multiple matches --- but we don't look for them, both for speed
2603  * and because the math wouldn't add up...
2604  */
2605  nmatches = 0;
2606  for (i = 0; i < nvalues1; i++)
2607  {
2608  int j;
2609 
2610  for (j = 0; j < clamped_nvalues2; j++)
2611  {
2612  if (hasmatch2[j])
2613  continue;
2614  if (DatumGetBool(FunctionCall2Coll(&eqproc,
2616  values1[i],
2617  values2[j])))
2618  {
2619  hasmatch1[i] = hasmatch2[j] = true;
2620  nmatches++;
2621  break;
2622  }
2623  }
2624  }
2625  /* Sum up frequencies of matched MCVs */
2626  matchfreq1 = 0.0;
2627  for (i = 0; i < nvalues1; i++)
2628  {
2629  if (hasmatch1[i])
2630  matchfreq1 += numbers1[i];
2631  }
2632  CLAMP_PROBABILITY(matchfreq1);
2633  pfree(hasmatch1);
2634  pfree(hasmatch2);
2635 
2636  /*
2637  * Now we need to estimate the fraction of relation 1 that has at
2638  * least one join partner. We know for certain that the matched MCVs
2639  * do, so that gives us a lower bound, but we're really in the dark
2640  * about everything else. Our crude approach is: if nd1 <= nd2 then
2641  * assume all non-null rel1 rows have join partners, else assume for
2642  * the uncertain rows that a fraction nd2/nd1 have join partners. We
2643  * can discount the known-matched MCVs from the distinct-values counts
2644  * before doing the division.
2645  *
2646  * Crude as the above is, it's completely useless if we don't have
2647  * reliable ndistinct values for both sides. Hence, if either nd1 or
2648  * nd2 is default, punt and assume half of the uncertain rows have
2649  * join partners.
2650  */
2651  if (!isdefault1 && !isdefault2)
2652  {
2653  nd1 -= nmatches;
2654  nd2 -= nmatches;
2655  if (nd1 <= nd2 || nd2 < 0)
2656  uncertainfrac = 1.0;
2657  else
2658  uncertainfrac = nd2 / nd1;
2659  }
2660  else
2661  uncertainfrac = 0.5;
2662  uncertain = 1.0 - matchfreq1 - nullfrac1;
2663  CLAMP_PROBABILITY(uncertain);
2664  selec = matchfreq1 + uncertainfrac * uncertain;
2665  }
2666  else
2667  {
2668  /*
2669  * Without MCV lists for both sides, we can only use the heuristic
2670  * about nd1 vs nd2.
2671  */
2672  double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2673 
2674  if (!isdefault1 && !isdefault2)
2675  {
2676  if (nd1 <= nd2 || nd2 < 0)
2677  selec = 1.0 - nullfrac1;
2678  else
2679  selec = (nd2 / nd1) * (1.0 - nullfrac1);
2680  }
2681  else
2682  selec = 0.5 * (1.0 - nullfrac1);
2683  }
2684 
2685  if (have_mcvs1)
2686  free_attstatsslot(vardata1->atttype, values1, nvalues1,
2687  numbers1, nnumbers1);
2688  if (have_mcvs2)
2689  free_attstatsslot(vardata2->atttype, values2, nvalues2,
2690  numbers2, nnumbers2);
2691 
2692  return selec;
2693 }
Definition: fmgr.h:56
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
HeapTuple statsTuple
Definition: selfuncs.h:71
RelOptInfo * rel
Definition: selfuncs.h:70
#define Min(x, y)
Definition: c.h:806
bool get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers)
Definition: lsyscache.c:2886
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1047
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:129
#define OidIsValid(objectId)
Definition: c.h:538
int32 atttypmod
Definition: selfuncs.h:76
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
void pfree(void *pointer)
Definition: mcxt.c:950
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4909
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:74
#define DatumGetBool(X)
Definition: postgres.h:399
#define STATISTIC_KIND_MCV
Definition: pg_statistic.h:204
float float4
Definition: c.h:380
void * palloc0(Size size)
Definition: mcxt.c:878
uintptr_t Datum
Definition: postgres.h:372
double rows
Definition: relation.h:527
#define InvalidOid
Definition: postgres_ext.h:36
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1094
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define NULL
Definition: c.h:229
int i
void free_attstatsslot(Oid atttype, Datum *values, int nvalues, float4 *numbers, int nnumbers)
Definition: lsyscache.c:3010
Datum eqsel ( PG_FUNCTION_ARGS  )

Definition at line 226 of file selfuncs.c.

References generate_unaccent_rules::args, DEFAULT_EQ_SEL, get_restriction_variable(), IsA, PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, var_eq_const(), and var_eq_non_const().

Referenced by neqsel().

227 {
229  Oid operator = PG_GETARG_OID(1);
230  List *args = (List *) PG_GETARG_POINTER(2);
231  int varRelid = PG_GETARG_INT32(3);
232  VariableStatData vardata;
233  Node *other;
234  bool varonleft;
235  double selec;
236 
237  /*
238  * If expression is not variable = something or something = variable, then
239  * punt and return a default estimate.
240  */
241  if (!get_restriction_variable(root, args, varRelid,
242  &vardata, &other, &varonleft))
244 
245  /*
246  * We can do a lot better if the something is a constant. (Note: the
247  * Const might result from estimation rather than being a simple constant
248  * in the query.)
249  */
250  if (IsA(other, Const))
251  selec = var_eq_const(&vardata, operator,
252  ((Const *) other)->constvalue,
253  ((Const *) other)->constisnull,
254  varonleft);
255  else
256  selec = var_eq_non_const(&vardata, operator, other,
257  varonleft);
258 
259  ReleaseVariableStats(vardata);
260 
261  PG_RETURN_FLOAT8((float8) selec);
262 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4446
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:326
Definition: nodes.h:509
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:241
unsigned int Oid
Definition: postgres_ext.h:31
double float8
Definition: c.h:381
#define PG_GETARG_OID(n)
Definition: fmgr.h:240
static double var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, bool varonleft)
Definition: selfuncs.c:270
#define DEFAULT_EQ_SEL
Definition: selfuncs.h:34
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
static double var_eq_non_const(VariableStatData *vardata, Oid operator, Node *other, bool varonleft)
Definition: selfuncs.c:412
Definition: pg_list.h:45
int estimate_array_length ( Node arrayexpr)

Definition at line 2084 of file selfuncs.c.

References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), DatumGetArrayTypeP, IsA, list_length(), and strip_array_coercion().

Referenced by btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().

2085 {
2086  /* look through any binary-compatible relabeling of arrayexpr */
2087  arrayexpr = strip_array_coercion(arrayexpr);
2088 
2089  if (arrayexpr && IsA(arrayexpr, Const))
2090  {
2091  Datum arraydatum = ((Const *) arrayexpr)->constvalue;
2092  bool arrayisnull = ((Const *) arrayexpr)->constisnull;
2093  ArrayType *arrayval;
2094 
2095  if (arrayisnull)
2096  return 0;
2097  arrayval = DatumGetArrayTypeP(arraydatum);
2098  return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2099  }
2100  else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2101  !((ArrayExpr *) arrayexpr)->multidims)
2102  {
2103  return list_length(((ArrayExpr *) arrayexpr)->elements);
2104  }
2105  else
2106  {
2107  /* default guess --- see also scalararraysel */
2108  return 10;
2109  }
2110 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define ARR_DIMS(a)
Definition: array.h:275
uintptr_t Datum
Definition: postgres.h:372
static int list_length(const List *l)
Definition: pg_list.h:89
#define ARR_NDIM(a)
Definition: array.h:271
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1741
#define DatumGetArrayTypeP(X)
Definition: array.h:242
Selectivity estimate_hash_bucketsize ( PlannerInfo root,
Node hashkey,
double  nbuckets 
)

Definition at line 3601 of file selfuncs.c.

References VariableStatData::atttype, VariableStatData::atttypmod, clamp_row_est(), examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT, HeapTupleIsValid, InvalidOid, NULL, VariableStatData::rel, ReleaseVariableStats, RelOptInfo::rows, STATISTIC_KIND_MCV, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by final_cost_hashjoin().

3602 {
3603  VariableStatData vardata;
3604  double estfract,
3605  ndistinct,
3606  stanullfrac,
3607  mcvfreq,
3608  avgfreq;
3609  bool isdefault;
3610  float4 *numbers;
3611  int nnumbers;
3612 
3613  examine_variable(root, hashkey, 0, &vardata);
3614 
3615  /* Get number of distinct values */
3616  ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3617 
3618  /* If ndistinct isn't real, punt and return 0.1, per comments above */
3619  if (isdefault)
3620  {
3621  ReleaseVariableStats(vardata);
3622  return (Selectivity) 0.1;
3623  }
3624 
3625  /* Get fraction that are null */
3626  if (HeapTupleIsValid(vardata.statsTuple))
3627  {
3628  Form_pg_statistic stats;
3629 
3630  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3631  stanullfrac = stats->stanullfrac;
3632  }
3633  else
3634  stanullfrac = 0.0;
3635 
3636  /* Compute avg freq of all distinct data values in raw relation */
3637  avgfreq = (1.0 - stanullfrac) / ndistinct;
3638 
3639  /*
3640  * Adjust ndistinct to account for restriction clauses. Observe we are
3641  * assuming that the data distribution is affected uniformly by the
3642  * restriction clauses!
3643  *
3644  * XXX Possibly better way, but much more expensive: multiply by
3645  * selectivity of rel's restriction clauses that mention the target Var.
3646  */
3647  if (vardata.rel && vardata.rel->tuples > 0)
3648  {
3649  ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3650  ndistinct = clamp_row_est(ndistinct);
3651  }
3652 
3653  /*
3654  * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3655  * number of buckets is less than the expected number of distinct values;
3656  * otherwise it is 1/ndistinct.
3657  */
3658  if (ndistinct > nbuckets)
3659  estfract = 1.0 / nbuckets;
3660  else
3661  estfract = 1.0 / ndistinct;
3662 
3663  /*
3664  * Look up the frequency of the most common value, if available.
3665  */
3666  mcvfreq = 0.0;
3667 
3668  if (HeapTupleIsValid(vardata.statsTuple))
3669  {
3670  if (get_attstatsslot(vardata.statsTuple,
3671  vardata.atttype, vardata.atttypmod,
3673  NULL,
3674  NULL, NULL,
3675  &numbers, &nnumbers))
3676  {
3677  /*
3678  * The first MCV stat is for the most common value.
3679  */
3680  if (nnumbers > 0)
3681  mcvfreq = numbers[0];
3682  free_attstatsslot(vardata.atttype, NULL, 0,
3683  numbers, nnumbers);
3684  }
3685  }
3686 
3687  /*
3688  * Adjust estimated bucketsize upward to account for skewed distribution.
3689  */
3690  if (avgfreq > 0.0 && mcvfreq > avgfreq)
3691  estfract *= mcvfreq / avgfreq;
3692 
3693  /*
3694  * Clamp bucketsize to sane range (the above adjustment could easily
3695  * produce an out-of-range result). We set the lower bound a little above
3696  * zero, since zero isn't a very sane result.
3697  */
3698  if (estfract < 1.0e-6)
3699  estfract = 1.0e-6;
3700  else if (estfract > 1.0)
3701  estfract = 1.0;
3702 
3703  ReleaseVariableStats(vardata);
3704 
3705  return (Selectivity) estfract;
3706 }
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
HeapTuple statsTuple
Definition: selfuncs.h:71
double tuples
Definition: relation.h:564
RelOptInfo * rel
Definition: selfuncs.h:70
double Selectivity
Definition: nodes.h:638
bool get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers)
Definition: lsyscache.c:2886
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:129
int32 atttypmod
Definition: selfuncs.h:76
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:4909
#define STATISTIC_KIND_MCV
Definition: pg_statistic.h:204
float float4
Definition: c.h:380
double rows
Definition: relation.h:527
#define InvalidOid
Definition: postgres_ext.h:36
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4565
#define NULL
Definition: c.h:229
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
e
Definition: preproc-init.c:82
double clamp_row_est(double nrows)
Definition: costsize.c:173
void free_attstatsslot(Oid atttype, Datum *values, int nvalues, float4 *numbers, int nnumbers)
Definition: lsyscache.c:3010
static bool estimate_multivariate_ndistinct ( PlannerInfo root,
RelOptInfo rel,
List **  varinfos,
double *  ndistinct 
)
static

Definition at line 3727 of file selfuncs.c.

References Assert, MVNDistinctItem::attrs, bms_add_member(), BMS_EQUAL, bms_intersect(), bms_is_member(), bms_num_members(), bms_subset_compare(), elog, ERROR, i, InvalidOid, IsA, MVNDistinct::items, StatisticExtInfo::keys, StatisticExtInfo::kind, lappend(), lfirst, MVNDistinctItem::ndistinct, NIL, MVNDistinct::nitems, NULL, GroupVarInfo::rel, statext_ndistinct_load(), RelOptInfo::statlist, StatisticExtInfo::statOid, STATS_EXT_NDISTINCT, and GroupVarInfo::var.

Referenced by estimate_num_groups().

3729 {
3730  ListCell *lc;
3731  Bitmapset *attnums = NULL;
3732  int nmatches;
3733  Oid statOid = InvalidOid;
3734  MVNDistinct *stats;
3735  Bitmapset *matched = NULL;
3736 
3737  /* bail out immediately if the table has no extended statistics */
3738  if (!rel->statlist)
3739  return false;
3740 
3741  /* Determine the attnums we're looking for */
3742  foreach(lc, *varinfos)
3743  {
3744  GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3745 
3746  Assert(varinfo->rel == rel);
3747 
3748  if (IsA(varinfo->var, Var))
3749  {
3750  attnums = bms_add_member(attnums,
3751  ((Var *) varinfo->var)->varattno);
3752  }
3753  }
3754 
3755  /* look for the ndistinct statistics matching the most vars */
3756  nmatches = 1; /* we require at least two matches */
3757  foreach(lc, rel->statlist)
3758  {
3759  StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
3760  Bitmapset *shared;
3761 
3762  /* skip statistics of other kinds */
3763  if (info->kind != STATS_EXT_NDISTINCT)
3764  continue;
3765 
3766  /* compute attnums shared by the vars and the statistic */
3767  shared = bms_intersect(info->keys, attnums);
3768 
3769  /*
3770  * Does this statistics matches more columns than the currently
3771  * best statistic? If so, use this one instead.
3772  *
3773  * XXX This should break ties using name of the statistic, or
3774  * something like that, to make the outcome stable.
3775  */
3776  if (bms_num_members(shared) > nmatches)
3777  {
3778  statOid = info->statOid;
3779  nmatches = bms_num_members(shared);
3780  matched = shared;
3781  }
3782  }
3783 
3784  /* No match? */
3785  if (statOid == InvalidOid)
3786  return false;
3787  Assert(nmatches > 1 && matched != NULL);
3788 
3789  stats = statext_ndistinct_load(statOid);
3790 
3791  /*
3792  * If we have a match, search it for the specific item that matches (there
3793  * must be one), and construct the output values.
3794  */
3795  if (stats)
3796  {
3797  int i;
3798  List *newlist = NIL;
3799  MVNDistinctItem *item = NULL;
3800 
3801  /* Find the specific item that exactly matches the combination */
3802  for (i = 0; i < stats->nitems; i++)
3803  {
3804  MVNDistinctItem *tmpitem = &stats->items[i];
3805 
3806  if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
3807  {
3808  item = tmpitem;
3809  break;
3810  }
3811  }
3812 
3813  /* make sure we found an item */
3814  if (!item)
3815  elog(ERROR, "corrupt MVNDistinct entry");
3816 
3817  /* Form the output varinfo list, keeping only unmatched ones */
3818  foreach(lc, *varinfos)
3819  {
3820  GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3821  AttrNumber attnum;
3822 
3823  if (!IsA(varinfo->var, Var))
3824  {
3825  newlist = lappend(newlist, varinfo);
3826  continue;
3827  }
3828 
3829  attnum = ((Var *) varinfo->var)->varattno;
3830  if (!bms_is_member(attnum, matched))
3831  newlist = lappend(newlist, varinfo);
3832  }
3833 
3834  *varinfos = newlist;
3835  *ndistinct = item->ndistinct;
3836  return true;
3837  }
3838 
3839  return false;
3840 }
#define NIL
Definition: pg_list.h:69
#define STATS_EXT_NDISTINCT
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
List * statlist
Definition: relation.h:562
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:41
double ndistinct
Definition: statistics.h:28
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:163
#define ERROR
Definition: elog.h:43
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:605
Node * var
Definition: selfuncs.c:3154
uint32 nitems
Definition: statistics.h:40
List * lappend(List *list, void *datum)
Definition: list.c:128
Bitmapset * bms_intersect(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:252
#define InvalidOid
Definition: postgres_ext.h:36
BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:345
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
Bitmapset * attrs
Definition: statistics.h:29
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:698
Bitmapset * keys
Definition: relation.h:723
int i
#define elog
Definition: elog.h:219
MVNDistinct * statext_ndistinct_load(Oid mvoid)
Definition: mvdistinct.c:126
Definition: pg_list.h:45
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:420
int16 AttrNumber
Definition: attnum.h:21
RelOptInfo * rel
Definition: selfuncs.c:3155
double estimate_num_groups ( PlannerInfo root,
List groupExprs,
double  input_rows,
List **  pgset 
)

Definition at line 3278 of file selfuncs.c.

References add_unique_group_var(), Assert, BOOLOID, clamp_row_est(), contain_volatile_functions(), estimate_multivariate_ndistinct(), examine_variable(), exprType(), for_each_cell, HeapTupleIsValid, i, IS_SIMPLE_REL, VariableStatData::isunique, lcons(), lfirst, linitial, list_head(), list_length(), list_member_int(), lnext, GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, PVC_RECURSE_WINDOWFUNCS, GroupVarInfo::rel, ReleaseVariableStats, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by adjust_rowcount_for_semijoins(), create_distinct_paths(), create_unique_path(), estimate_path_cost_size(), get_number_of_groups(), and recurse_set_operations().

3280 {
3281  List *varinfos = NIL;
3282  double numdistinct;
3283  ListCell *l;
3284  int i;
3285 
3286  /*
3287  * We don't ever want to return an estimate of zero groups, as that tends
3288  * to lead to division-by-zero and other unpleasantness. The input_rows
3289  * estimate is usually already at least 1, but clamp it just in case it
3290  * isn't.
3291  */
3292  input_rows = clamp_row_est(input_rows);
3293 
3294  /*
3295  * If no grouping columns, there's exactly one group. (This can't happen
3296  * for normal cases with GROUP BY or DISTINCT, but it is possible for
3297  * corner cases with set operations.)
3298  */
3299  if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3300  return 1.0;
3301 
3302  /*
3303  * Count groups derived from boolean grouping expressions. For other
3304  * expressions, find the unique Vars used, treating an expression as a Var
3305  * if we can find stats for it. For each one, record the statistical
3306  * estimate of number of distinct values (total in its table, without
3307  * regard for filtering).
3308  */
3309  numdistinct = 1.0;
3310 
3311  i = 0;
3312  foreach(l, groupExprs)
3313  {
3314  Node *groupexpr = (Node *) lfirst(l);
3315  VariableStatData vardata;
3316  List *varshere;
3317  ListCell *l2;
3318 
3319  /* is expression in this grouping set? */
3320  if (pgset && !list_member_int(*pgset, i++))
3321  continue;
3322 
3323  /* Short-circuit for expressions returning boolean */
3324  if (exprType(groupexpr) == BOOLOID)
3325  {
3326  numdistinct *= 2.0;
3327  continue;
3328  }
3329 
3330  /*
3331  * If examine_variable is able to deduce anything about the GROUP BY
3332  * expression, treat it as a single variable even if it's really more
3333  * complicated.
3334  */
3335  examine_variable(root, groupexpr, 0, &vardata);
3336  if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3337  {
3338  varinfos = add_unique_group_var(root, varinfos,
3339  groupexpr, &vardata);
3340  ReleaseVariableStats(vardata);
3341  continue;
3342  }
3343  ReleaseVariableStats(vardata);
3344 
3345  /*
3346  * Else pull out the component Vars. Handle PlaceHolderVars by
3347  * recursing into their arguments (effectively assuming that the
3348  * PlaceHolderVar doesn't change the number of groups, which boils
3349  * down to ignoring the possible addition of nulls to the result set).
3350  */
3351  varshere = pull_var_clause(groupexpr,
3355 
3356  /*
3357  * If we find any variable-free GROUP BY item, then either it is a
3358  * constant (and we can ignore it) or it contains a volatile function;
3359  * in the latter case we punt and assume that each input row will
3360  * yield a distinct group.
3361  */
3362  if (varshere == NIL)
3363  {
3364  if (contain_volatile_functions(groupexpr))
3365  return input_rows;
3366  continue;
3367  }
3368 
3369  /*
3370  * Else add variables to varinfos list
3371  */
3372  foreach(l2, varshere)
3373  {
3374  Node *var = (Node *) lfirst(l2);
3375 
3376  examine_variable(root, var, 0, &vardata);
3377  varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3378  ReleaseVariableStats(vardata);
3379  }
3380  }
3381 
3382  /*
3383  * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3384  * list.
3385  */
3386  if (varinfos == NIL)
3387  {
3388  /* Guard against out-of-range answers */
3389  if (numdistinct > input_rows)
3390  numdistinct = input_rows;
3391  return numdistinct;
3392  }
3393 
3394  /*
3395  * Group Vars by relation and estimate total numdistinct.
3396  *
3397  * For each iteration of the outer loop, we process the frontmost Var in
3398  * varinfos, plus all other Vars in the same relation. We remove these
3399  * Vars from the newvarinfos list for the next iteration. This is the
3400  * easiest way to group Vars of same rel together.
3401  */
3402  do
3403  {
3404  GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3405  RelOptInfo *rel = varinfo1->rel;
3406  double reldistinct = 1;
3407  double relmaxndistinct = reldistinct;
3408  int relvarcount = 0;
3409  List *newvarinfos = NIL;
3410  List *relvarinfos = NIL;
3411 
3412  /*
3413  * Split the list of varinfos in two - one for the current rel,
3414  * one for remaining Vars on other rels.
3415  */
3416  relvarinfos = lcons(varinfo1, relvarinfos);
3417  for_each_cell(l, lnext(list_head(varinfos)))
3418  {
3419  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3420 
3421  if (varinfo2->rel == varinfo1->rel)
3422  {
3423  /* varinfos on current rel */
3424  relvarinfos = lcons(varinfo2, relvarinfos);
3425  }
3426  else
3427  {
3428  /* not time to process varinfo2 yet */
3429  newvarinfos = lcons(varinfo2, newvarinfos);
3430  }
3431  }
3432 
3433  /*
3434  * Get the numdistinct estimate for the Vars of this rel. We
3435  * iteratively search for multivariate n-distinct with maximum number
3436  * of vars; assuming that each var group is independent of the others,
3437  * we multiply them together. Any remaining relvarinfos after
3438  * no more multivariate matches are found are assumed independent too,
3439  * so their individual ndistinct estimates are multiplied also.
3440  *
3441  * While iterating, count how many separate numdistinct values we
3442  * apply. We apply a fudge factor below, but only if we multiplied
3443  * more than one such values.
3444  */
3445  while (relvarinfos)
3446  {
3447  double mvndistinct;
3448 
3449  if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3450  &mvndistinct))
3451  {
3452  reldistinct *= mvndistinct;
3453  if (relmaxndistinct < mvndistinct)
3454  relmaxndistinct = mvndistinct;
3455  relvarcount++;
3456  }
3457  else
3458  {
3459  foreach (l, relvarinfos)
3460  {
3461  GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3462 
3463  reldistinct *= varinfo2->ndistinct;
3464  if (relmaxndistinct < varinfo2->ndistinct)
3465  relmaxndistinct = varinfo2->ndistinct;
3466  relvarcount++;
3467  }
3468 
3469  /* we're done with this relation */
3470  relvarinfos = NIL;
3471  }
3472  }
3473 
3474  /*
3475  * Sanity check --- don't divide by zero if empty relation.
3476  */
3477  Assert(IS_SIMPLE_REL(rel));
3478  if (rel->tuples > 0)
3479  {
3480  /*
3481  * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3482  * fudge factor is because the Vars are probably correlated but we
3483  * don't know by how much. We should never clamp to less than the
3484  * largest ndistinct value for any of the Vars, though, since
3485  * there will surely be at least that many groups.
3486  */
3487  double clamp = rel->tuples;
3488 
3489  if (relvarcount > 1)
3490  {
3491  clamp *= 0.1;
3492  if (clamp < relmaxndistinct)
3493  {
3494  clamp = relmaxndistinct;
3495  /* for sanity in case some ndistinct is too large: */
3496  if (clamp > rel->tuples)
3497  clamp = rel->tuples;
3498  }
3499  }
3500  if (reldistinct > clamp)
3501  reldistinct = clamp;
3502 
3503  /*
3504  * Update the estimate based on the restriction selectivity,
3505  * guarding against division by zero when reldistinct is zero.
3506  * Also skip this if we know that we are returning all rows.
3507  */
3508  if (reldistinct > 0 && rel->rows < rel->tuples)
3509  {
3510  /*
3511  * Given a table containing N rows with n distinct values in a
3512  * uniform distribution, if we select p rows at random then
3513  * the expected number of distinct values selected is
3514  *
3515  * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3516  *
3517  * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3518  *
3519  * See "Approximating block accesses in database
3520  * organizations", S. B. Yao, Communications of the ACM,
3521  * Volume 20 Issue 4, April 1977 Pages 260-261.
3522  *
3523  * Alternatively, re-arranging the terms from the factorials,
3524  * this may be written as
3525  *
3526  * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3527  *
3528  * This form of the formula is more efficient to compute in
3529  * the common case where p is larger than N/n. Additionally,
3530  * as pointed out by Dell'Era, if i << N for all terms in the
3531  * product, it can be approximated by
3532  *
3533  * n * (1 - ((N-p)/N)^(N/n))
3534  *
3535  * See "Expected distinct values when selecting from a bag
3536  * without replacement", Alberto Dell'Era,
3537  * http://www.adellera.it/investigations/distinct_balls/.
3538  *
3539  * The condition i << N is equivalent to n >> 1, so this is a
3540  * good approximation when the number of distinct values in
3541  * the table is large. It turns out that this formula also
3542  * works well even when n is small.
3543  */
3544  reldistinct *=
3545  (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3546  rel->tuples / reldistinct));
3547  }
3548  reldistinct = clamp_row_est(reldistinct);
3549 
3550  /*
3551  * Update estimate of total distinct groups.
3552  */
3553  numdistinct *= reldistinct;
3554  }
3555 
3556  varinfos = newvarinfos;
3557  } while (varinfos != NIL);
3558 
3559  numdistinct = ceil(numdistinct);
3560 
3561  /* Guard against out-of-range answers */
3562  if (numdistinct > input_rows)
3563  numdistinct = input_rows;
3564  if (numdistinct < 1.0)
3565  numdistinct = 1.0;
3566 
3567  return numdistinct;
3568 }
#define NIL
Definition: pg_list.h:69
#define PVC_RECURSE_AGGREGATES
Definition: var.h:21
HeapTuple statsTuple
Definition: selfuncs.h:71
double tuples
Definition: relation.h:564
Definition: nodes.h:509
List * pull_var_clause(Node *node, int flags)
Definition: var.c:535
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:951
double ndistinct
Definition: selfuncs.c:3156
#define PVC_RECURSE_PLACEHOLDERS
Definition: var.h:26
#define IS_SIMPLE_REL(rel)
Definition: relation.h:504
#define linitial(l)
Definition: pg_list.h:111
bool list_member_int(const List *list, int datum)
Definition: list.c:485
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
#define PVC_RECURSE_WINDOWFUNCS
Definition: var.h:23
static bool estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
Definition: selfuncs.c:3727
#define lnext(lc)
Definition: pg_list.h:105
static List * add_unique_group_var(PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
Definition: selfuncs.c:3160
double rows
Definition: relation.h:527
List * lcons(void *datum, List *list)
Definition: list.c:259
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4565
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
static int list_length(const List *l)
Definition: pg_list.h:89
#define for_each_cell(cell, initcell)
Definition: pg_list.h:169
#define BOOLOID
Definition: pg_type.h:288
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
int i
double clamp_row_est(double nrows)
Definition: costsize.c:173
Definition: pg_list.h:45
RelOptInfo * rel
Definition: selfuncs.c:3155
static void examine_simple_variable ( PlannerInfo root,
Var var,
VariableStatData vardata 
)
static

Definition at line 4752 of file selfuncs.c.

References Alias::aliasname, Assert, BoolGetDatum, Query::distinctClause, elog, RangeTblEntry::eref, ERROR, TargetEntry::expr, find_base_rel(), VariableStatData::freefunc, get_relation_stats_hook, get_tle_by_resno(), Query::groupClause, HeapTupleIsValid, RangeTblEntry::inh, Int16GetDatum, InvalidAttrNumber, InvalidOid, IsA, VariableStatData::isunique, list_length(), NULL, ObjectIdGetDatum, PlannerInfo::parse, ReleaseSysCache(), RangeTblEntry::relid, TargetEntry::resjunk, RTE_RELATION, RTE_SUBQUERY, RangeTblEntry::rtekind, SearchSysCache3, RangeTblEntry::security_barrier, Query::setOperations, PlannerInfo::simple_rte_array, STATRELATTINH, VariableStatData::statsTuple, RangeTblEntry::subquery, RelOptInfo::subroot, targetIsInSortList(), Query::targetList, Var::varattno, Var::varlevelsup, and Var::varno.

Referenced by examine_variable().

4754 {
4755  RangeTblEntry *rte = root->simple_rte_array[var->varno];
4756 
4757  Assert(IsA(rte, RangeTblEntry));
4758 
4760  (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
4761  {
4762  /*
4763  * The hook took control of acquiring a stats tuple. If it did supply
4764  * a tuple, it'd better have supplied a freefunc.
4765  */
4766  if (HeapTupleIsValid(vardata->statsTuple) &&
4767  !vardata->freefunc)
4768  elog(ERROR, "no function provided to release variable stats with");
4769  }
4770  else if (rte->rtekind == RTE_RELATION)
4771  {
4772  /*
4773  * Plain table or parent of an inheritance appendrel, so look up the
4774  * column in pg_statistic
4775  */
4777  ObjectIdGetDatum(rte->relid),
4778  Int16GetDatum(var->varattno),
4779  BoolGetDatum(rte->inh));
4780  vardata->freefunc = ReleaseSysCache;
4781  }
4782  else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
4783  {
4784  /*
4785  * Plain subquery (not one that was converted to an appendrel).
4786  */
4787  Query *subquery = rte->subquery;
4788  RelOptInfo *rel;
4789  TargetEntry *ste;
4790 
4791  /*
4792  * Punt if it's a whole-row var rather than a plain column reference.
4793  */
4794  if (var->varattno == InvalidAttrNumber)
4795  return;
4796 
4797  /*
4798  * Punt if subquery uses set operations or GROUP BY, as these will
4799  * mash underlying columns' stats beyond recognition. (Set ops are
4800  * particularly nasty; if we forged ahead, we would return stats
4801  * relevant to only the leftmost subselect...) DISTINCT is also
4802  * problematic, but we check that later because there is a possibility
4803  * of learning something even with it.
4804  */
4805  if (subquery->setOperations ||
4806  subquery->groupClause)
4807  return;
4808 
4809  /*
4810  * OK, fetch RelOptInfo for subquery. Note that we don't change the
4811  * rel returned in vardata, since caller expects it to be a rel of the
4812  * caller's query level. Because we might already be recursing, we
4813  * can't use that rel pointer either, but have to look up the Var's
4814  * rel afresh.
4815  */
4816  rel = find_base_rel(root, var->varno);
4817 
4818  /* If the subquery hasn't been planned yet, we have to punt */
4819  if (rel->subroot == NULL)
4820  return;
4821  Assert(IsA(rel->subroot, PlannerInfo));
4822 
4823  /*
4824  * Switch our attention to the subquery as mangled by the planner. It
4825  * was okay to look at the pre-planning version for the tests above,
4826  * but now we need a Var that will refer to the subroot's live
4827  * RelOptInfos. For instance, if any subquery pullup happened during
4828  * planning, Vars in the targetlist might have gotten replaced, and we
4829  * need to see the replacement expressions.
4830  */
4831  subquery = rel->subroot->parse;
4832  Assert(IsA(subquery, Query));
4833 
4834  /* Get the subquery output expression referenced by the upper Var */
4835  ste = get_tle_by_resno(subquery->targetList, var->varattno);
4836  if (ste == NULL || ste->resjunk)
4837  elog(ERROR, "subquery %s does not have attribute %d",
4838  rte->eref->aliasname, var->varattno);
4839  var = (Var *) ste->expr;
4840 
4841  /*
4842  * If subquery uses DISTINCT, we can't make use of any stats for the
4843  * variable ... but, if it's the only DISTINCT column, we are entitled
4844  * to consider it unique. We do the test this way so that it works
4845  * for cases involving DISTINCT ON.
4846  */
4847  if (subquery->distinctClause)
4848  {
4849  if (list_length(subquery->distinctClause) == 1 &&
4850  targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
4851  vardata->isunique = true;
4852  /* cannot go further */
4853  return;
4854  }
4855 
4856  /*
4857  * If the sub-query originated from a view with the security_barrier
4858  * attribute, we must not look at the variable's statistics, though it
4859  * seems all right to notice the existence of a DISTINCT clause. So
4860  * stop here.
4861  *
4862  * This is probably a harsher restriction than necessary; it's
4863  * certainly OK for the selectivity estimator (which is a C function,
4864  * and therefore omnipotent anyway) to look at the statistics. But
4865  * many selectivity estimators will happily *invoke the operator
4866  * function* to try to work out a good estimate - and that's not OK.
4867  * So for now, don't dig down for stats.
4868  */
4869  if (rte->security_barrier)
4870  return;
4871 
4872  /* Can only handle a simple Var of subquery's query level */
4873  if (var && IsA(var, Var) &&
4874  var->varlevelsup == 0)
4875  {
4876  /*
4877  * OK, recurse into the subquery. Note that the original setting
4878  * of vardata->isunique (which will surely be false) is left
4879  * unchanged in this situation. That's what we want, since even
4880  * if the underlying column is unique, the subquery may have
4881  * joined to other tables in a way that creates duplicates.
4882  */
4883  examine_simple_variable(rel->subroot, var, vardata);
4884  }
4885  }
4886  else
4887  {
4888  /*
4889  * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We
4890  * won't see RTE_JOIN here because join alias Vars have already been
4891  * flattened.) There's not much we can do with function outputs, but
4892  * maybe someday try to be smarter about VALUES and/or CTEs.
4893  */
4894  }
4895 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
Query * parse
Definition: relation.h:154
Index varlevelsup
Definition: primnodes.h:173
HeapTuple statsTuple
Definition: selfuncs.h:71
#define Int16GetDatum(X)
Definition: postgres.h:457
AttrNumber varattno
Definition: primnodes.h:168
Definition: primnodes.h:163
static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata)
Definition: selfuncs.c:4752
List * targetList
Definition: parsenodes.h:138
PlannerInfo * subroot
Definition: relation.h:566
bool resjunk
Definition: primnodes.h:1374
List * distinctClause
Definition: parsenodes.h:154
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
get_relation_stats_hook_type get_relation_stats_hook
Definition: selfuncs.c:152
RangeTblEntry ** simple_rte_array
Definition: relation.h:187
Index varno
Definition: primnodes.h:166
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1116
bool security_barrier
Definition: parsenodes.h:947
#define BoolGetDatum(X)
Definition: postgres.h:408
#define InvalidOid
Definition: postgres_ext.h:36
bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList)
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
char * aliasname
Definition: primnodes.h:42
Expr * expr
Definition: primnodes.h:1367
static int list_length(const List *l)
Definition: pg_list.h:89
#define InvalidAttrNumber
Definition: attnum.h:23
RTEKind rtekind
Definition: parsenodes.h:928
Node * setOperations
Definition: parsenodes.h:163
Query * subquery
Definition: parsenodes.h:946
List * groupClause
Definition: parsenodes.h:146
#define SearchSysCache3(cacheId, key1, key2, key3)
Definition: syscache.h:156
TargetEntry * get_tle_by_resno(List *tlist, AttrNumber resno)
#define elog
Definition: elog.h:219
Alias * eref
Definition: parsenodes.h:1015
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:243
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:73
void examine_variable ( PlannerInfo root,
Node node,
int  varRelid,
VariableStatData vardata 
)

Definition at line 4565 of file selfuncs.c.

References arg, VariableStatData::atttype, VariableStatData::atttypmod, BMS_EMPTY_SET, bms_free(), bms_is_member(), bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), BoolGetDatum, elog, equal(), ERROR, examine_simple_variable(), exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, has_unique_index(), HeapTupleIsValid, IndexOptInfo::indexkeys, RelOptInfo::indexlist, IndexOptInfo::indexoid, IndexOptInfo::indexprs, IndexOptInfo::indpred, Int16GetDatum, IsA, VariableStatData::isunique, lfirst, list_head(), lnext, MemSet, IndexOptInfo::ncolumns, NIL, NULL, ObjectIdGetDatum, IndexOptInfo::predOK, pull_varnos(), VariableStatData::rel, ReleaseSysCache(), SearchSysCache3, STATRELATTINH, VariableStatData::statsTuple, IndexOptInfo::unique, VariableStatData::var, Var::varattno, Var::varno, VariableStatData::vartype, Var::vartype, and Var::vartypmod.

Referenced by booltestsel(), boolvarsel(), estimate_hash_bucketsize(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().

4567 {
4568  Node *basenode;
4569  Relids varnos;
4570  RelOptInfo *onerel;
4571 
4572  /* Make sure we don't return dangling pointers in vardata */
4573  MemSet(vardata, 0, sizeof(VariableStatData));
4574 
4575  /* Save the exposed type of the expression */
4576  vardata->vartype = exprType(node);
4577 
4578  /* Look inside any binary-compatible relabeling */
4579 
4580  if (IsA(node, RelabelType))
4581  basenode = (Node *) ((RelabelType *) node)->arg;
4582  else
4583  basenode = node;
4584 
4585  /* Fast path for a simple Var */
4586 
4587  if (IsA(basenode, Var) &&
4588  (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4589  {
4590  Var *var = (Var *) basenode;
4591 
4592  /* Set up result fields other than the stats tuple */
4593  vardata->var = basenode; /* return Var without relabeling */
4594  vardata->rel = find_base_rel(root, var->varno);
4595  vardata->atttype = var->vartype;
4596  vardata->atttypmod = var->vartypmod;
4597  vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4598 
4599  /* Try to locate some stats */
4600  examine_simple_variable(root, var, vardata);
4601 
4602  return;
4603  }
4604 
4605  /*
4606  * Okay, it's a more complicated expression. Determine variable
4607  * membership. Note that when varRelid isn't zero, only vars of that
4608  * relation are considered "real" vars.
4609  */
4610  varnos = pull_varnos(basenode);
4611 
4612  onerel = NULL;
4613 
4614  switch (bms_membership(varnos))
4615  {
4616  case BMS_EMPTY_SET:
4617  /* No Vars at all ... must be pseudo-constant clause */
4618  break;
4619  case BMS_SINGLETON:
4620  if (varRelid == 0 || bms_is_member(varRelid, varnos))
4621  {
4622  onerel = find_base_rel(root,
4623  (varRelid ? varRelid : bms_singleton_member(varnos)));
4624  vardata->rel = onerel;
4625  node = basenode; /* strip any relabeling */
4626  }
4627  /* else treat it as a constant */
4628  break;
4629  case BMS_MULTIPLE:
4630  if (varRelid == 0)
4631  {
4632  /* treat it as a variable of a join relation */
4633  vardata->rel = find_join_rel(root, varnos);
4634  node = basenode; /* strip any relabeling */
4635  }
4636  else if (bms_is_member(varRelid, varnos))
4637  {
4638  /* ignore the vars belonging to other relations */
4639  vardata->rel = find_base_rel(root, varRelid);
4640  node = basenode; /* strip any relabeling */
4641  /* note: no point in expressional-index search here */
4642  }
4643  /* else treat it as a constant */
4644  break;
4645  }
4646 
4647  bms_free(varnos);
4648 
4649  vardata->var = node;
4650  vardata->atttype = exprType(node);
4651  vardata->atttypmod = exprTypmod(node);
4652 
4653  if (onerel)
4654  {
4655  /*
4656  * We have an expression in vars of a single relation. Try to match
4657  * it to expressional index columns, in hopes of finding some
4658  * statistics.
4659  *
4660  * XXX it's conceivable that there are multiple matches with different
4661  * index opfamilies; if so, we need to pick one that matches the
4662  * operator we are estimating for. FIXME later.
4663  */
4664  ListCell *ilist;
4665 
4666  foreach(ilist, onerel->indexlist)
4667  {
4668  IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4669  ListCell *indexpr_item;
4670  int pos;
4671 
4672  indexpr_item = list_head(index->indexprs);
4673  if (indexpr_item == NULL)
4674  continue; /* no expressions here... */
4675 
4676  for (pos = 0; pos < index->ncolumns; pos++)
4677  {
4678  if (index->indexkeys[pos] == 0)
4679  {
4680  Node *indexkey;
4681 
4682  if (indexpr_item == NULL)
4683  elog(ERROR, "too few entries in indexprs list");
4684  indexkey = (Node *) lfirst(indexpr_item);
4685  if (indexkey && IsA(indexkey, RelabelType))
4686  indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4687  if (equal(node, indexkey))
4688  {
4689  /*
4690  * Found a match ... is it a unique index? Tests here
4691  * should match has_unique_index().
4692  */
4693  if (index->unique &&
4694  index->ncolumns == 1 &&
4695  (index->indpred == NIL || index->predOK))
4696  vardata->isunique = true;
4697 
4698  /*
4699  * Has it got stats? We only consider stats for
4700  * non-partial indexes, since partial indexes probably
4701  * don't reflect whole-relation statistics; the above
4702  * check for uniqueness is the only info we take from
4703  * a partial index.
4704  *
4705  * An index stats hook, however, must make its own
4706  * decisions about what to do with partial indexes.
4707  */
4708  if (get_index_stats_hook &&
4709  (*get_index_stats_hook) (root, index->indexoid,
4710  pos + 1, vardata))
4711  {
4712  /*
4713  * The hook took control of acquiring a stats
4714  * tuple. If it did supply a tuple, it'd better
4715  * have supplied a freefunc.
4716  */
4717  if (HeapTupleIsValid(vardata->statsTuple) &&
4718  !vardata->freefunc)
4719  elog(ERROR, "no function provided to release variable stats with");
4720  }
4721  else if (index->indpred == NIL)
4722  {
4723  vardata->statsTuple =
4725  ObjectIdGetDatum(index->indexoid),
4726  Int16GetDatum(pos + 1),
4727  BoolGetDatum(false));
4728  vardata->freefunc = ReleaseSysCache;
4729  }
4730  if (vardata->statsTuple)
4731  break;
4732  }
4733  indexpr_item = lnext(indexpr_item);
4734  }
4735  }
4736  if (vardata->statsTuple)
4737  break;
4738  }
4739  }
4740 }
#define NIL
Definition: pg_list.h:69
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
bool predOK
Definition: relation.h:662
RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids)
Definition: relnode.c:308
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:2961
HeapTuple statsTuple
Definition: selfuncs.h:71
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:276
RelOptInfo * rel
Definition: selfuncs.h:70
#define Int16GetDatum(X)
Definition: postgres.h:457
Definition: nodes.h:509
#define MemSet(start, val, len)
Definition: c.h:857
AttrNumber varattno
Definition: primnodes.h:168
Definition: primnodes.h:163
static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata)
Definition: selfuncs.c:4752
int32 atttypmod
Definition: selfuncs.h:76
bool unique
Definition: relation.h:663
Definition: type.h:90
bool has_unique_index(RelOptInfo *rel, AttrNumber attno)
Definition: plancat.c:1733
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
Oid vartype
Definition: primnodes.h:170
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
int ncolumns
Definition: relation.h:640
#define lnext(lc)
Definition: pg_list.h:105
Relids pull_varnos(Node *node)
Definition: var.c:95
Index varno
Definition: primnodes.h:166
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:634
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1116
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:526
List * indexlist
Definition: relation.h:561
#define BoolGetDatum(X)
Definition: postgres.h:408
void bms_free(Bitmapset *a)
Definition: bitmapset.c:201
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define NULL
Definition: c.h:229
#define lfirst(lc)
Definition: pg_list.h:106
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:153
#define SearchSysCache3(cacheId, key1, key2, key3)
Definition: syscache.h:156
void * arg
int * indexkeys
Definition: relation.h:641
#define elog
Definition: elog.h:219
Oid indexoid
Definition: relation.h:630
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:243
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:73
List * indpred
Definition: relation.h:653
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:420
List * indexprs
Definition: relation.h:652
int32 vartypmod
Definition: primnodes.h:171
static RelOptInfo * find_join_input_rel ( PlannerInfo root,
Relids  relids 
)
static

Definition at line 5389 of file selfuncs.c.

References BMS_EMPTY_SET, bms_membership(), BMS_MULTIPLE, BMS_SINGLETON, bms_singleton_member(), elog, ERROR, find_base_rel(), find_join_rel(), and NULL.

Referenced by eqjoinsel().

5390 {
5391  RelOptInfo *rel = NULL;
5392 
5393  switch (bms_membership(relids))
5394  {
5395  case BMS_EMPTY_SET:
5396  /* should not happen */
5397  break;
5398  case BMS_SINGLETON:
5399  rel = find_base_rel(root, bms_singleton_member(relids));
5400  break;
5401  case BMS_MULTIPLE:
5402  rel = find_join_rel(root, relids);
5403  break;
5404  }
5405 
5406  if (rel == NULL)
5407  elog(ERROR, "could not find RelOptInfo for given relids");
5408 
5409  return rel;
5410 }
RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids)
Definition: relnode.c:308
#define ERROR
Definition: elog.h:43
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:634
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:526
#define NULL
Definition: c.h:229
#define elog
Definition: elog.h:219
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:243
void genericcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
List qinfos,
GenericCosts costs 
)

Definition at line 6386 of file selfuncs.c.

References add_predicate_to_quals(), ScalarArrayOpExpr::args, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, estimate_array_length(), get_tablespace_page_costs(), index_pages_fetched(), GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexPath::indexorderbys, IndexPath::indexquals, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, IsA, JOIN_INNER, lfirst, list_length(), lsecond, NULL, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, orderby_operands_eval_cost(), other_operands_eval_cost(), IndexOptInfo::pages, IndexOptInfo::rel, RelOptInfo::relid, IndexOptInfo::reltablespace, rint(), GenericCosts::spc_random_page_cost, RelOptInfo::tuples, and IndexOptInfo::tuples.

Referenced by blcostestimate(), btcostestimate(), gistcostestimate(), hashcostestimate(), and spgcostestimate().

6391 {
6392  IndexOptInfo *index = path->indexinfo;
6393  List *indexQuals = path->indexquals;
6394  List *indexOrderBys = path->indexorderbys;
6395  Cost indexStartupCost;
6396  Cost indexTotalCost;
6397  Selectivity indexSelectivity;
6398  double indexCorrelation;
6399  double numIndexPages;
6400  double numIndexTuples;
6401  double spc_random_page_cost;
6402  double num_sa_scans;
6403  double num_outer_scans;
6404  double num_scans;
6405  double qual_op_cost;
6406  double qual_arg_cost;
6407  List *selectivityQuals;
6408  ListCell *l;
6409 
6410  /*
6411  * If the index is partial, AND the index predicate with the explicitly
6412  * given indexquals to produce a more accurate idea of the index
6413  * selectivity.
6414  */
6415  selectivityQuals = add_predicate_to_quals(index, indexQuals);
6416 
6417  /*
6418  * Check for ScalarArrayOpExpr index quals, and estimate the number of
6419  * index scans that will be performed.
6420  */
6421  num_sa_scans = 1;
6422  foreach(l, indexQuals)
6423  {
6424  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
6425 
6426  if (IsA(rinfo->clause, ScalarArrayOpExpr))
6427  {
6428  ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
6429  int alength = estimate_array_length(lsecond(saop->args));
6430 
6431  if (alength > 1)
6432  num_sa_scans *= alength;
6433  }
6434  }
6435 
6436  /* Estimate the fraction of main-table tuples that will be visited */
6437  indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6438  index->rel->relid,
6439  JOIN_INNER,
6440  NULL);
6441 
6442  /*
6443  * If caller didn't give us an estimate, estimate the number of index
6444  * tuples that will be visited. We do it in this rather peculiar-looking
6445  * way in order to get the right answer for partial indexes.
6446  */
6447  numIndexTuples = costs->numIndexTuples;
6448  if (numIndexTuples <= 0.0)
6449  {
6450  numIndexTuples = indexSelectivity * index->rel->tuples;
6451 
6452  /*
6453  * The above calculation counts all the tuples visited across all
6454  * scans induced by ScalarArrayOpExpr nodes. We want to consider the
6455  * average per-indexscan number, so adjust. This is a handy place to
6456  * round to integer, too. (If caller supplied tuple estimate, it's
6457  * responsible for handling these considerations.)
6458  */
6459  numIndexTuples = rint(numIndexTuples / num_sa_scans);
6460  }
6461 
6462  /*
6463  * We can bound the number of tuples by the index size in any case. Also,
6464  * always estimate at least one tuple is touched, even when
6465  * indexSelectivity estimate is tiny.
6466  */
6467  if (numIndexTuples > index->tuples)
6468  numIndexTuples = index->tuples;
6469  if (numIndexTuples < 1.0)
6470  numIndexTuples = 1.0;
6471 
6472  /*
6473  * Estimate the number of index pages that will be retrieved.
6474  *
6475  * We use the simplistic method of taking a pro-rata fraction of the total
6476  * number of index pages. In effect, this counts only leaf pages and not
6477  * any overhead such as index metapage or upper tree levels.
6478  *
6479  * In practice access to upper index levels is often nearly free because
6480  * those tend to stay in cache under load; moreover, the cost involved is
6481  * highly dependent on index type. We therefore ignore such costs here
6482  * and leave it to the caller to add a suitable charge if needed.
6483  */
6484  if (index->pages > 1 && index->tuples > 1)
6485  numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
6486  else
6487  numIndexPages = 1.0;
6488 
6489  /* fetch estimated page cost for tablespace containing index */
6491  &spc_random_page_cost,
6492  NULL);
6493 
6494  /*
6495  * Now compute the disk access costs.
6496  *
6497  * The above calculations are all per-index-scan. However, if we are in a
6498  * nestloop inner scan, we can expect the scan to be repeated (with
6499  * different search keys) for each row of the outer relation. Likewise,
6500  * ScalarArrayOpExpr quals result in multiple index scans. This creates
6501  * the potential for cache effects to reduce the number of disk page
6502  * fetches needed. We want to estimate the average per-scan I/O cost in
6503  * the presence of caching.
6504  *
6505  * We use the Mackert-Lohman formula (see costsize.c for details) to
6506  * estimate the total number of page fetches that occur. While this
6507  * wasn't what it was designed for, it seems a reasonable model anyway.
6508  * Note that we are counting pages not tuples anymore, so we take N = T =
6509  * index size, as if there were one "tuple" per page.
6510  */
6511  num_outer_scans = loop_count;
6512  num_scans = num_sa_scans * num_outer_scans;
6513 
6514  if (num_scans > 1)
6515  {
6516  double pages_fetched;
6517 
6518  /* total page fetches ignoring cache effects */
6519  pages_fetched = numIndexPages * num_scans;
6520 
6521  /* use Mackert and Lohman formula to adjust for cache effects */
6522  pages_fetched = index_pages_fetched(pages_fetched,
6523  index->pages,
6524  (double) index->pages,
6525  root);
6526 
6527  /*
6528  * Now compute the total disk access cost, and then report a pro-rated
6529  * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
6530  * since that's internal to the indexscan.)
6531  */
6532  indexTotalCost = (pages_fetched * spc_random_page_cost)
6533  / num_outer_scans;
6534  }
6535  else
6536  {
6537  /*
6538  * For a single index scan, we just charge spc_random_page_cost per
6539  * page touched.
6540  */
6541  indexTotalCost = numIndexPages * spc_random_page_cost;
6542  }
6543 
6544  /*
6545  * CPU cost: any complex expressions in the indexquals will need to be
6546  * evaluated once at the start of the scan to reduce them to runtime keys
6547  * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
6548  * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
6549  * indexqual operator. Because we have numIndexTuples as a per-scan
6550  * number, we have to multiply by num_sa_scans to get the correct result
6551  * for ScalarArrayOpExpr cases. Similarly add in costs for any index
6552  * ORDER BY expressions.
6553  *
6554  * Note: this neglects the possible costs of rechecking lossy operators.
6555  * Detecting that that might be needed seems more expensive than it's
6556  * worth, though, considering all the other inaccuracies here ...
6557  */
6558  qual_arg_cost = other_operands_eval_cost(root, qinfos) +
6559  orderby_operands_eval_cost(root, path);
6560  qual_op_cost = cpu_operator_cost *
6561  (list_length(indexQuals) + list_length(indexOrderBys));
6562 
6563  indexStartupCost = qual_arg_cost;
6564  indexTotalCost += qual_arg_cost;
6565  indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
6566 
6567  /*
6568  * Generic assumption about index correlation: there isn't any.
6569  */
6570  indexCorrelation = 0.0;
6571 
6572  /*
6573  * Return everything to caller.
6574  */
6575  costs->indexStartupCost = indexStartupCost;
6576  costs->indexTotalCost = indexTotalCost;
6577  costs->indexSelectivity = indexSelectivity;
6578  costs->indexCorrelation = indexCorrelation;
6579  costs->numIndexPages = numIndexPages;
6580  costs->numIndexTuples = numIndexTuples;
6581  costs->spc_random_page_cost = spc_random_page_cost;
6582  costs->num_sa_scans = num_sa_scans;
6583 }
Selectivity indexSelectivity
Definition: selfuncs.h:130
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
IndexOptInfo * indexinfo
Definition: relation.h:1030
double tuples
Definition: relation.h:564
static List * add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:6605
Oid reltablespace
Definition: relation.h:631
static Cost other_operands_eval_cost(PlannerInfo *root, List *qinfos)
Definition: selfuncs.c:6332
double Selectivity
Definition: nodes.h:638
double tuples
Definition: relation.h:636
#define lsecond(l)
Definition: pg_list.h:116
static Cost orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
Definition: selfuncs.c:6357
Definition: type.h:90
BlockNumber pages
Definition: relation.h:635
List * indexquals
Definition: relation.h:1032
int estimate_array_length(Node *arrayexpr)
Definition: selfuncs.c:2084
RelOptInfo * rel
Definition: relation.h:632
double num_sa_scans
Definition: selfuncs.h:137
double cpu_operator_cost
Definition: costsize.c:108
Cost indexTotalCost
Definition: selfuncs.h:129
double rint(double x)
Definition: rint.c:22
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
Index relid
Definition: relation.h:552
Expr * clause
Definition: relation.h:1746
double indexCorrelation
Definition: selfuncs.h:131
List * indexorderbys
Definition: relation.h:1034
double spc_random_page_cost
Definition: selfuncs.h:136
double numIndexTuples
Definition: selfuncs.h:135
#define NULL
Definition: c.h:229
#define lfirst(lc)
Definition: pg_list.h:106
static int list_length(const List *l)
Definition: pg_list.h:89
Cost indexStartupCost
Definition: selfuncs.h:128
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:99
Definition: pg_list.h:45
double cpu_index_tuple_cost
Definition: costsize.c:107
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:813
double Cost
Definition: nodes.h:639
double numIndexPages
Definition: selfuncs.h:134
static bool get_actual_variable_range ( PlannerInfo root,
VariableStatData vardata,
Oid  sortop,
Datum min,
Datum max 
)
static

Definition at line 5163 of file selfuncs.c.

References AccessShareLock, Assert, VariableStatData::atttype, BackwardScanDirection, BTGreaterStrategyNumber, BTLessStrategyNumber, BTREE_AM_OID, BuildIndexInfo(), CreateExecutorState(), datumCopy(), ExprContext::ecxt_per_tuple_memory, ExprContext::ecxt_scantuple, elog, ERROR, ExecDropSingleTupleTableSlot(), ExecStoreTuple(), FormIndexDatum(), ForwardScanDirection, FreeExecutorState(), get_op_opfamily_strategy(), get_typlenbyval(), GetPerTupleExprContext, heap_close, heap_open(), IndexOptInfo::hypothetical, index_beginscan(), index_close(), index_endscan(), index_getnext(), INDEX_MAX_KEYS, index_open(), index_rescan(), RelOptInfo::indexlist, IndexOptInfo::indexoid, IndexOptInfo::indpred, InitDirtySnapshot, InvalidBuffer, InvalidOid, InvalidStrategy, lfirst, MakeSingleTupleTableSlot(), match_index_to_operand(), MemoryContextSwitchTo(), NIL, NoLock, NULL, VariableStatData::rel, IndexOptInfo::relam, RelationGetDescr, RelationGetRelationName, RelOptInfo::relid, RangeTblEntry::relid, IndexOptInfo::reverse_sort, RTE_RELATION, RangeTblEntry::rtekind, ScanKeyEntryInitialize(), PlannerInfo::simple_rte_array, SK_ISNULL, SK_SEARCHNOTNULL, IndexOptInfo::sortopfamily, values, and VariableStatData::var.

Referenced by get_variable_range(), and ineq_histogram_selectivity().

5166 {
5167  bool have_data = false;
5168  RelOptInfo *rel = vardata->rel;
5169  RangeTblEntry *rte;
5170  ListCell *lc;
5171 
5172  /* No hope if no relation or it doesn't have indexes */
5173  if (rel == NULL || rel->indexlist == NIL)
5174  return false;
5175  /* If it has indexes it must be a plain relation */
5176  rte = root->simple_rte_array[rel->relid];
5177  Assert(rte->rtekind == RTE_RELATION);
5178 
5179  /* Search through the indexes to see if any match our problem */
5180  foreach(lc, rel->indexlist)
5181  {
5183  ScanDirection indexscandir;
5184 
5185  /* Ignore non-btree indexes */
5186  if (index->relam != BTREE_AM_OID)
5187  continue;
5188 
5189  /*
5190  * Ignore partial indexes --- we only want stats that cover the entire
5191  * relation.
5192  */
5193  if (index->indpred != NIL)
5194  continue;
5195 
5196  /*
5197  * The index list might include hypothetical indexes inserted by a
5198  * get_relation_info hook --- don't try to access them.
5199  */
5200  if (index->hypothetical)
5201  continue;
5202 
5203  /*
5204  * The first index column must match the desired variable and sort
5205  * operator --- but we can use a descending-order index.
5206  */
5207  if (!match_index_to_operand(vardata->var, 0, index))
5208  continue;
5209  switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
5210  {
5211  case BTLessStrategyNumber:
5212  if (index->reverse_sort[0])
5213  indexscandir = BackwardScanDirection;
5214  else
5215  indexscandir = ForwardScanDirection;
5216  break;
5218  if (index->reverse_sort[0])
5219  indexscandir = ForwardScanDirection;
5220  else
5221  indexscandir = BackwardScanDirection;
5222  break;
5223  default:
5224  /* index doesn't match the sortop */
5225  continue;
5226  }
5227 
5228  /*
5229  * Found a suitable index to extract data from. We'll need an EState
5230  * and a bunch of other infrastructure.
5231  */
5232  {
5233  EState *estate;
5234  ExprContext *econtext;
5235  MemoryContext tmpcontext;
5236  MemoryContext oldcontext;
5237  Relation heapRel;
5238  Relation indexRel;
5239  IndexInfo *indexInfo;
5240  TupleTableSlot *slot;
5241  int16 typLen;
5242  bool typByVal;
5243  ScanKeyData scankeys[1];
5244  IndexScanDesc index_scan;
5245  HeapTuple tup;
5247  bool isnull[INDEX_MAX_KEYS];
5248  SnapshotData SnapshotDirty;
5249 
5250  estate = CreateExecutorState();
5251  econtext = GetPerTupleExprContext(estate);
5252  /* Make sure any cruft is generated in the econtext's memory */
5253  tmpcontext = econtext->ecxt_per_tuple_memory;
5254  oldcontext = MemoryContextSwitchTo(tmpcontext);
5255 
5256  /*
5257  * Open the table and index so we can read from them. We should
5258  * already have at least AccessShareLock on the table, but not
5259  * necessarily on the index.
5260  */
5261  heapRel = heap_open(rte->relid, NoLock);
5262  indexRel = index_open(index->indexoid, AccessShareLock);
5263 
5264  /* extract index key information from the index's pg_index info */
5265  indexInfo = BuildIndexInfo(indexRel);
5266 
5267  /* some other stuff */
5268  slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel));
5269  econtext->ecxt_scantuple = slot;
5270  get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5271  InitDirtySnapshot(SnapshotDirty);
5272 
5273  /* set up an IS NOT NULL scan key so that we ignore nulls */
5274  ScanKeyEntryInitialize(&scankeys[0],
5276  1, /* index col to scan */
5277  InvalidStrategy, /* no strategy */
5278  InvalidOid, /* no strategy subtype */
5279  InvalidOid, /* no collation */
5280  InvalidOid, /* no reg proc for this */
5281  (Datum) 0); /* constant */
5282 
5283  have_data = true;
5284 
5285  /* If min is requested ... */
5286  if (min)
5287  {
5288  /*
5289  * In principle, we should scan the index with our current
5290  * active snapshot, which is the best approximation we've got
5291  * to what the query will see when executed. But that won't
5292  * be exact if a new snap is taken before running the query,
5293  * and it can be very expensive if a lot of uncommitted rows
5294  * exist at the end of the index (because we'll laboriously
5295  * fetch each one and reject it). What seems like a good
5296  * compromise is to use SnapshotDirty. That will accept
5297  * uncommitted rows, and thus avoid fetching multiple heap
5298  * tuples in this scenario. On the other hand, it will reject
5299  * known-dead rows, and thus not give a bogus answer when the
5300  * extreme value has been deleted; that case motivates not
5301  * using SnapshotAny here.
5302  */
5303  index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
5304  1, 0);
5305  index_rescan(index_scan, scankeys, 1, NULL, 0);
5306 
5307  /* Fetch first tuple in sortop's direction */
5308  if ((tup = index_getnext(index_scan,
5309  indexscandir)) != NULL)
5310  {
5311  /* Extract the index column values from the heap tuple */
5312  ExecStoreTuple(tup, slot, InvalidBuffer, false);
5313  FormIndexDatum(indexInfo, slot, estate,
5314  values, isnull);
5315 
5316  /* Shouldn't have got a null, but be careful */
5317  if (isnull[0])
5318  elog(ERROR, "found unexpected null value in index \"%s\"",
5319  RelationGetRelationName(indexRel));
5320 
5321  /* Copy the index column value out to caller's context */
5322  MemoryContextSwitchTo(oldcontext);
5323  *min = datumCopy(values[0], typByVal, typLen);
5324  MemoryContextSwitchTo(tmpcontext);
5325  }
5326  else
5327  have_data = false;
5328 
5329  index_endscan(index_scan);
5330  }
5331 
5332  /* If max is requested, and we didn't find the index is empty */
5333  if (max && have_data)
5334  {
5335  index_scan = index_beginscan(heapRel, indexRel, &SnapshotDirty,
5336  1, 0);
5337  index_rescan(index_scan, scankeys, 1, NULL, 0);
5338 
5339  /* Fetch first tuple in reverse direction */
5340  if ((tup = index_getnext(index_scan,
5341  -indexscandir)) != NULL)
5342  {
5343  /* Extract the index column values from the heap tuple */
5344  ExecStoreTuple(tup, slot, InvalidBuffer, false);
5345  FormIndexDatum(indexInfo, slot, estate,
5346  values, isnull);
5347 
5348  /* Shouldn't have got a null, but be careful */
5349  if (isnull[0])
5350  elog(ERROR, "found unexpected null value in index \"%s\"",
5351  RelationGetRelationName(indexRel));
5352 
5353  /* Copy the index column value out to caller's context */
5354  MemoryContextSwitchTo(oldcontext);
5355  *max = datumCopy(values[0], typByVal, typLen);
5356  MemoryContextSwitchTo(tmpcontext);
5357  }
5358  else
5359  have_data = false;
5360 
5361  index_endscan(index_scan);
5362  }
5363 
5364  /* Clean everything up */
5366 
5367  index_close(indexRel, AccessShareLock);
5368  heap_close(heapRel, NoLock);
5369 
5370  MemoryContextSwitchTo(oldcontext);
5371  FreeExecutorState(estate);
5372 
5373  /* And we're done */
5374  break;
5375  }
5376  }
5377 
5378  return have_data;
5379 }
signed short int16
Definition: c.h:255
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:1765
#define InvalidStrategy
Definition: stratnum.h:24
#define NIL
Definition: pg_list.h:69
TupleTableSlot * ExecStoreTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer, bool shouldFree)
Definition: execTuples.c:320
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define RelationGetDescr(relation)
Definition: rel.h:429
bool match_index_to_operand(Node *operand, int indexcol, IndexOptInfo *index)
Definition: indxpath.c:3179
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:203
RelOptInfo * rel
Definition: selfuncs.h:70
#define BTREE_AM_OID
Definition: pg_am.h:70
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define AccessShareLock
Definition: lockdefs.h:36
#define InvalidBuffer
Definition: buf.h:25
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:310
Oid * sortopfamily
Definition: relation.h:645
bool hypothetical
Definition: relation.h:665
#define heap_close(r, l)
Definition: heapam.h:97
IndexInfo * BuildIndexInfo(Relation index)
Definition: index.c:1640
Definition: type.h:90
void FreeExecutorState(EState *estate)
Definition: execUtils.c:178
#define GetPerTupleExprContext(estate)
Definition: executor.h:455
#define ERROR
Definition: elog.h:43
#define InitDirtySnapshot(snapshotdata)
Definition: tqual.h:100
#define NoLock
Definition: lockdefs.h:34
void ScanKeyEntryInitialize(ScanKey entry, int flags, AttrNumber attributeNumber, StrategyNumber strategy, Oid subtype, Oid collation, RegProcedure procedure, Datum argument)
Definition: scankey.c:32
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:216
ScanDirection
Definition: sdir.h:22
#define RelationGetRelationName(relation)
Definition: rel.h:437
#define SK_SEARCHNOTNULL
Definition: skey.h:122
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc)
Definition: execTuples.c:199
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:340
#define SK_ISNULL
Definition: skey.h:115
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:128
EState * CreateExecutorState(void)
Definition: execUtils.c:80
Index relid
Definition: relation.h:552
RangeTblEntry ** simple_rte_array
Definition: relation.h:187
uintptr_t Datum
Definition: postgres.h:372
Relation heap_open(Oid relationId, LOCKMODE lockmode)
Definition: heapam.c:1284
List * indexlist
Definition: relation.h:561
#define InvalidOid
Definition: postgres_ext.h:36
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
#define INDEX_MAX_KEYS
void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval)
Definition: lsyscache.c:2001
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:197
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:176
RTEKind rtekind
Definition: parsenodes.h:928
static Datum values[MAXATTR]
Definition: bootstrap.c:163
int get_op_opfamily_strategy(Oid opno, Oid opfamily)
Definition: lsyscache.c:80
#define elog
Definition: elog.h:219
Oid indexoid
Definition: relation.h:630
bool * reverse_sort
Definition: relation.h:646
#define BTLessStrategyNumber
Definition: stratnum.h:29
List * indpred
Definition: relation.h:653
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:151
HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction)
Definition: indexam.c:659
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:221
void get_join_variables ( PlannerInfo root,
List args,
SpecialJoinInfo sjinfo,
VariableStatData vardata1,
VariableStatData vardata2,
bool join_is_reversed 
)

Definition at line 4506 of file selfuncs.c.

References bms_is_subset(), elog, ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.

Referenced by eqjoinsel(), and networkjoinsel().

4509 {
4510  Node *left,
4511  *right;
4512 
4513  if (list_length(args) != 2)
4514  elog(ERROR, "join operator should take two arguments");
4515 
4516  left = (Node *) linitial(args);
4517  right = (Node *) lsecond(args);
4518 
4519  examine_variable(root, left, 0, vardata1);
4520  examine_variable(root, right, 0, vardata2);
4521 
4522  if (vardata1->rel &&
4523  bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4524  *join_is_reversed = true; /* var1 is on RHS */
4525  else if (vardata2->rel &&
4526  bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4527  *join_is_reversed = true; /* var2 is on LHS */
4528  else
4529  *join_is_reversed = false;
4530 }
RelOptInfo * rel
Definition: selfuncs.h:70
Definition: nodes.h:509
#define lsecond(l)
Definition: pg_list.h:116
Relids syn_lefthand
Definition: relation.h:1918
Relids syn_righthand
Definition: relation.h:1919
#define linitial(l)
Definition: pg_list.h:111
#define ERROR
Definition: elog.h:43
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:308
Relids relids
Definition: relation.h:524
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4565
static int list_length(const List *l)
Definition: pg_list.h:89
#define elog
Definition: elog.h:219
bool get_restriction_variable ( PlannerInfo root,
List args,
int  varRelid,
VariableStatData vardata,
Node **  other,
bool varonleft 
)

Definition at line 4446 of file selfuncs.c.

References estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, NULL, VariableStatData::rel, ReleaseVariableStats, and VariableStatData::var.

Referenced by _int_matchsel(), arraycontsel(), eqsel(), ltreeparentsel(), networksel(), patternsel(), rangesel(), scalargtsel(), scalarltsel(), and tsmatchsel().

4449 {
4450  Node *left,
4451  *right;
4452  VariableStatData rdata;
4453 
4454  /* Fail if not a binary opclause (probably shouldn't happen) */
4455  if (list_length(args) != 2)
4456  return false;
4457 
4458  left = (Node *) linitial(args);
4459  right = (Node *) lsecond(args);
4460 
4461  /*
4462  * Examine both sides. Note that when varRelid is nonzero, Vars of other
4463  * relations will be treated as pseudoconstants.
4464  */
4465  examine_variable(root, left, varRelid, vardata);
4466  examine_variable(root, right, varRelid, &rdata);
4467 
4468  /*
4469  * If one side is a variable and the other not, we win.
4470  */
4471  if (vardata->rel && rdata.rel == NULL)
4472  {
4473  *varonleft = true;
4474  *other = estimate_expression_value(root, rdata.var);
4475  /* Assume we need no ReleaseVariableStats(rdata) here */
4476  return true;
4477  }
4478 
4479  if (vardata->rel == NULL && rdata.rel)
4480  {
4481  *varonleft = false;
4482  *other = estimate_expression_value(root, vardata->var);
4483  /* Assume we need no ReleaseVariableStats(*vardata) here */
4484  *vardata = rdata;
4485  return true;
4486  }
4487 
4488  /* Oops, clause has wrong structure (probably var op var) */
4489  ReleaseVariableStats(*vardata);
4490  ReleaseVariableStats(rdata);
4491 
4492  return false;
4493 }
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2433
RelOptInfo * rel
Definition: selfuncs.h:70
Definition: nodes.h:509
#define lsecond(l)
Definition: pg_list.h:116
#define linitial(l)
Definition: pg_list.h:111
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4565
#define NULL
Definition: c.h:229
static int list_length(const List *l)
Definition: pg_list.h:89
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
double get_variable_numdistinct ( VariableStatData vardata,
bool isdefault 
)

Definition at line 4909 of file selfuncs.c.

References BOOLOID, clamp_row_est(), DEFAULT_NUM_DISTINCT, GETSTRUCT, HeapTupleIsValid, IsA, VariableStatData::isunique, NULL, ObjectIdAttributeNumber, VariableStatData::rel, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.

Referenced by add_unique_group_var(), eqjoinsel_inner(), eqjoinsel_semi(), estimate_hash_bucketsize(), var_eq_const(), and var_eq_non_const().

4910 {
4911  double stadistinct;
4912  double stanullfrac = 0.0;
4913  double ntuples;
4914 
4915  *isdefault = false;
4916 
4917  /*
4918  * Determine the stadistinct value to use. There are cases where we can
4919  * get an estimate even without a pg_statistic entry, or can get a better
4920  * value than is in pg_statistic. Grab stanullfrac too if we can find it
4921  * (otherwise, assume no nulls, for lack of any better idea).
4922  */
4923  if (HeapTupleIsValid(vardata->statsTuple))
4924  {
4925  /* Use the pg_statistic entry */
4926  Form_pg_statistic stats;
4927 
4928  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
4929  stadistinct = stats->stadistinct;
4930  stanullfrac = stats->stanullfrac;
4931  }
4932