PostgreSQL Source Code git master
selfuncs.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include "access/brin.h"
#include "access/brin_page.h"
#include "access/gin.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/visibilitymap.h"
#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_statistic_ext.h"
#include "executor/nodeAgg.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/plancat.h"
#include "parser/parse_clause.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "statistics/statistics.h"
#include "storage/bufmgr.h"
#include "utils/acl.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/index_selfuncs.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_locale.h"
#include "utils/rel.h"
#include "utils/selfuncs.h"
#include "utils/snapmgr.h"
#include "utils/spccache.h"
#include "utils/syscache.h"
#include "utils/timestamp.h"
#include "utils/typcache.h"
Include dependency graph for selfuncs.c:

Go to the source code of this file.

Data Structures

struct  GroupVarInfo
 
struct  GinQualCounts
 

Macros

#define DEFAULT_PAGE_CPU_MULTIPLIER   50.0
 
#define VISITED_PAGES_LIMIT   100
 

Functions

static double eqsel_internal (PG_FUNCTION_ARGS, bool negate)
 
static double eqjoinsel_inner (Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2)
 
static double eqjoinsel_semi (Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2, RelOptInfo *inner_rel)
 
static bool estimate_multivariate_ndistinct (PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
 
static bool convert_to_scalar (Datum value, Oid valuetypid, Oid collid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound)
 
static double convert_numeric_to_scalar (Datum value, Oid typid, bool *failure)
 
static void convert_string_to_scalar (char *value, double *scaledvalue, char *lobound, double *scaledlobound, char *hibound, double *scaledhibound)
 
static void convert_bytea_to_scalar (Datum value, double *scaledvalue, Datum lobound, double *scaledlobound, Datum hibound, double *scaledhibound)
 
static double convert_one_string_to_scalar (char *value, int rangelo, int rangehi)
 
static double convert_one_bytea_to_scalar (unsigned char *value, int valuelen, int rangelo, int rangehi)
 
static char * convert_string_datum (Datum value, Oid typid, Oid collid, bool *failure)
 
static double convert_timevalue_to_scalar (Datum value, Oid typid, bool *failure)
 
static void examine_simple_variable (PlannerInfo *root, Var *var, VariableStatData *vardata)
 
static bool get_variable_range (PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
 
static void get_stats_slot_range (AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc, Oid collation, int16 typLen, bool typByVal, Datum *min, Datum *max, bool *p_have_data)
 
static bool get_actual_variable_range (PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
 
static bool get_actual_variable_endpoint (Relation heapRel, Relation indexRel, ScanDirection indexscandir, ScanKey scankeys, int16 typLen, bool typByVal, TupleTableSlot *tableslot, MemoryContext outercontext, Datum *endpointDatum)
 
static RelOptInfofind_join_input_rel (PlannerInfo *root, Relids relids)
 
Datum eqsel (PG_FUNCTION_ARGS)
 
double var_eq_const (VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
 
double var_eq_non_const (VariableStatData *vardata, Oid oproid, Oid collation, Node *other, bool varonleft, bool negate)
 
Datum neqsel (PG_FUNCTION_ARGS)
 
static double scalarineqsel (PlannerInfo *root, Oid operator, bool isgt, bool iseq, Oid collation, VariableStatData *vardata, Datum constval, Oid consttype)
 
double mcv_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
 
double histogram_selectivity (VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
 
double generic_restriction_selectivity (PlannerInfo *root, Oid oproid, Oid collation, List *args, int varRelid, double default_selectivity)
 
double ineq_histogram_selectivity (PlannerInfo *root, VariableStatData *vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)
 
static Datum scalarineqsel_wrapper (PG_FUNCTION_ARGS, bool isgt, bool iseq)
 
Datum scalarltsel (PG_FUNCTION_ARGS)
 
Datum scalarlesel (PG_FUNCTION_ARGS)
 
Datum scalargtsel (PG_FUNCTION_ARGS)
 
Datum scalargesel (PG_FUNCTION_ARGS)
 
Selectivity boolvarsel (PlannerInfo *root, Node *arg, int varRelid)
 
Selectivity booltestsel (PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Selectivity nulltestsel (PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
static Nodestrip_array_coercion (Node *node)
 
Selectivity scalararraysel (PlannerInfo *root, ScalarArrayOpExpr *clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
double estimate_array_length (PlannerInfo *root, Node *arrayexpr)
 
Selectivity rowcomparesel (PlannerInfo *root, RowCompareExpr *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 
Datum eqjoinsel (PG_FUNCTION_ARGS)
 
Datum neqjoinsel (PG_FUNCTION_ARGS)
 
Datum scalarltjoinsel (PG_FUNCTION_ARGS)
 
Datum scalarlejoinsel (PG_FUNCTION_ARGS)
 
Datum scalargtjoinsel (PG_FUNCTION_ARGS)
 
Datum scalargejoinsel (PG_FUNCTION_ARGS)
 
void mergejoinscansel (PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend)
 
Datum matchingsel (PG_FUNCTION_ARGS)
 
Datum matchingjoinsel (PG_FUNCTION_ARGS)
 
static Listadd_unique_group_var (PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
 
double estimate_num_groups (PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo)
 
void estimate_hash_bucket_stats (PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, Selectivity *bucketsize_frac)
 
double estimate_hashagg_tablesize (PlannerInfo *root, Path *path, const AggClauseCosts *agg_costs, double dNumGroups)
 
bool get_restriction_variable (PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
 
void get_join_variables (PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
 
static void ReleaseDummy (HeapTuple tuple)
 
void examine_variable (PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
 
bool statistic_proc_security_check (VariableStatData *vardata, Oid func_oid)
 
double get_variable_numdistinct (VariableStatData *vardata, bool *isdefault)
 
Listget_quals_from_indexclauses (List *indexclauses)
 
Cost index_other_operands_eval_cost (PlannerInfo *root, List *indexquals)
 
void genericcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
 
Listadd_predicate_to_index_quals (IndexOptInfo *index, List *indexQuals)
 
void btcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void hashcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void gistcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void spgcostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
static bool gincost_pattern (IndexOptInfo *index, int indexcol, Oid clause_op, Datum query, GinQualCounts *counts)
 
static bool gincost_opexpr (PlannerInfo *root, IndexOptInfo *index, int indexcol, OpExpr *clause, GinQualCounts *counts)
 
static bool gincost_scalararrayopexpr (PlannerInfo *root, IndexOptInfo *index, int indexcol, ScalarArrayOpExpr *clause, double numIndexEntries, GinQualCounts *counts)
 
void gincostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void brincostestimate (PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 

Variables

get_relation_stats_hook_type get_relation_stats_hook = NULL
 
get_index_stats_hook_type get_index_stats_hook = NULL
 

Macro Definition Documentation

◆ DEFAULT_PAGE_CPU_MULTIPLIER

#define DEFAULT_PAGE_CPU_MULTIPLIER   50.0

Definition at line 145 of file selfuncs.c.

◆ VISITED_PAGES_LIMIT

#define VISITED_PAGES_LIMIT   100

Function Documentation

◆ add_predicate_to_index_quals()

List * add_predicate_to_index_quals ( IndexOptInfo index,
List indexQuals 
)

Definition at line 6810 of file selfuncs.c.

6811{
6812 List *predExtraQuals = NIL;
6813 ListCell *lc;
6814
6815 if (index->indpred == NIL)
6816 return indexQuals;
6817
6818 foreach(lc, index->indpred)
6819 {
6820 Node *predQual = (Node *) lfirst(lc);
6821 List *oneQual = list_make1(predQual);
6822
6823 if (!predicate_implied_by(oneQual, indexQuals, false))
6824 predExtraQuals = list_concat(predExtraQuals, oneQual);
6825 }
6826 return list_concat(predExtraQuals, indexQuals);
6827}
List * list_concat(List *list1, const List *list2)
Definition: list.c:561
#define lfirst(lc)
Definition: pg_list.h:172
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
bool predicate_implied_by(List *predicate_list, List *clause_list, bool weak)
Definition: predtest.c:152
Definition: pg_list.h:54
Definition: nodes.h:129
Definition: type.h:96

References lfirst, list_concat(), list_make1, NIL, and predicate_implied_by().

Referenced by btcostestimate(), genericcostestimate(), and gincostestimate().

◆ add_unique_group_var()

static List * add_unique_group_var ( PlannerInfo root,
List varinfos,
Node var,
VariableStatData vardata 
)
static

Definition at line 3300 of file selfuncs.c.

3302{
3303 GroupVarInfo *varinfo;
3304 double ndistinct;
3305 bool isdefault;
3306 ListCell *lc;
3307
3308 ndistinct = get_variable_numdistinct(vardata, &isdefault);
3309
3310 /*
3311 * The nullingrels bits within the var could cause the same var to be
3312 * counted multiple times if it's marked with different nullingrels. They
3313 * could also prevent us from matching the var to the expressions in
3314 * extended statistics (see estimate_multivariate_ndistinct). So strip
3315 * them out first.
3316 */
3317 var = remove_nulling_relids(var, root->outer_join_rels, NULL);
3318
3319 foreach(lc, varinfos)
3320 {
3321 varinfo = (GroupVarInfo *) lfirst(lc);
3322
3323 /* Drop exact duplicates */
3324 if (equal(var, varinfo->var))
3325 return varinfos;
3326
3327 /*
3328 * Drop known-equal vars, but only if they belong to different
3329 * relations (see comments for estimate_num_groups). We aren't too
3330 * fussy about the semantics of "equal" here.
3331 */
3332 if (vardata->rel != varinfo->rel &&
3333 exprs_known_equal(root, var, varinfo->var, InvalidOid))
3334 {
3335 if (varinfo->ndistinct <= ndistinct)
3336 {
3337 /* Keep older item, forget new one */
3338 return varinfos;
3339 }
3340 else
3341 {
3342 /* Delete the older item */
3343 varinfos = foreach_delete_current(varinfos, lc);
3344 }
3345 }
3346 }
3347
3348 varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
3349
3350 varinfo->var = var;
3351 varinfo->rel = vardata->rel;
3352 varinfo->ndistinct = ndistinct;
3353 varinfo->isdefault = isdefault;
3354 varinfos = lappend(varinfos, varinfo);
3355 return varinfos;
3356}
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2, Oid opfamily)
Definition: equivclass.c:2499
List * lappend(List *list, void *datum)
Definition: list.c:339
void * palloc(Size size)
Definition: mcxt.c:1317
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391
#define InvalidOid
Definition: postgres_ext.h:37
tree ctl root
Definition: radixtree.h:1857
Node * remove_nulling_relids(Node *node, const Bitmapset *removable_relids, const Bitmapset *except_relids)
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:5807
RelOptInfo * rel
Definition: selfuncs.c:3294
double ndistinct
Definition: selfuncs.c:3295
bool isdefault
Definition: selfuncs.c:3296
Node * var
Definition: selfuncs.c:3293
RelOptInfo * rel
Definition: selfuncs.h:88

References equal(), exprs_known_equal(), foreach_delete_current, get_variable_numdistinct(), InvalidOid, GroupVarInfo::isdefault, lappend(), lfirst, GroupVarInfo::ndistinct, palloc(), GroupVarInfo::rel, VariableStatData::rel, remove_nulling_relids(), root, and GroupVarInfo::var.

Referenced by estimate_num_groups().

◆ booltestsel()

Selectivity booltestsel ( PlannerInfo root,
BoolTestType  booltesttype,
Node arg,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo 
)

Definition at line 1541 of file selfuncs.c.

1543{
1544 VariableStatData vardata;
1545 double selec;
1546
1547 examine_variable(root, arg, varRelid, &vardata);
1548
1549 if (HeapTupleIsValid(vardata.statsTuple))
1550 {
1551 Form_pg_statistic stats;
1552 double freq_null;
1553 AttStatsSlot sslot;
1554
1555 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1556 freq_null = stats->stanullfrac;
1557
1558 if (get_attstatsslot(&sslot, vardata.statsTuple,
1559 STATISTIC_KIND_MCV, InvalidOid,
1561 && sslot.nnumbers > 0)
1562 {
1563 double freq_true;
1564 double freq_false;
1565
1566 /*
1567 * Get first MCV frequency and derive frequency for true.
1568 */
1569 if (DatumGetBool(sslot.values[0]))
1570 freq_true = sslot.numbers[0];
1571 else
1572 freq_true = 1.0 - sslot.numbers[0] - freq_null;
1573
1574 /*
1575 * Next derive frequency for false. Then use these as appropriate
1576 * to derive frequency for each case.
1577 */
1578 freq_false = 1.0 - freq_true - freq_null;
1579
1580 switch (booltesttype)
1581 {
1582 case IS_UNKNOWN:
1583 /* select only NULL values */
1584 selec = freq_null;
1585 break;
1586 case IS_NOT_UNKNOWN:
1587 /* select non-NULL values */
1588 selec = 1.0 - freq_null;
1589 break;
1590 case IS_TRUE:
1591 /* select only TRUE values */
1592 selec = freq_true;
1593 break;
1594 case IS_NOT_TRUE:
1595 /* select non-TRUE values */
1596 selec = 1.0 - freq_true;
1597 break;
1598 case IS_FALSE:
1599 /* select only FALSE values */
1600 selec = freq_false;
1601 break;
1602 case IS_NOT_FALSE:
1603 /* select non-FALSE values */
1604 selec = 1.0 - freq_false;
1605 break;
1606 default:
1607 elog(ERROR, "unrecognized booltesttype: %d",
1608 (int) booltesttype);
1609 selec = 0.0; /* Keep compiler quiet */
1610 break;
1611 }
1612
1613 free_attstatsslot(&sslot);
1614 }
1615 else
1616 {
1617 /*
1618 * No most-common-value info available. Still have null fraction
1619 * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1620 * for null fraction and assume a 50-50 split of TRUE and FALSE.
1621 */
1622 switch (booltesttype)
1623 {
1624 case IS_UNKNOWN:
1625 /* select only NULL values */
1626 selec = freq_null;
1627 break;
1628 case IS_NOT_UNKNOWN:
1629 /* select non-NULL values */
1630 selec = 1.0 - freq_null;
1631 break;
1632 case IS_TRUE:
1633 case IS_FALSE:
1634 /* Assume we select half of the non-NULL values */
1635 selec = (1.0 - freq_null) / 2.0;
1636 break;
1637 case IS_NOT_TRUE:
1638 case IS_NOT_FALSE:
1639 /* Assume we select NULLs plus half of the non-NULLs */
1640 /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1641 selec = (freq_null + 1.0) / 2.0;
1642 break;
1643 default:
1644 elog(ERROR, "unrecognized booltesttype: %d",
1645 (int) booltesttype);
1646 selec = 0.0; /* Keep compiler quiet */
1647 break;
1648 }
1649 }
1650 }
1651 else
1652 {
1653 /*
1654 * If we can't get variable statistics for the argument, perhaps
1655 * clause_selectivity can do something with it. We ignore the
1656 * possibility of a NULL value when using clause_selectivity, and just
1657 * assume the value is either TRUE or FALSE.
1658 */
1659 switch (booltesttype)
1660 {
1661 case IS_UNKNOWN:
1662 selec = DEFAULT_UNK_SEL;
1663 break;
1664 case IS_NOT_UNKNOWN:
1665 selec = DEFAULT_NOT_UNK_SEL;
1666 break;
1667 case IS_TRUE:
1668 case IS_NOT_FALSE:
1669 selec = (double) clause_selectivity(root, arg,
1670 varRelid,
1671 jointype, sjinfo);
1672 break;
1673 case IS_FALSE:
1674 case IS_NOT_TRUE:
1675 selec = 1.0 - (double) clause_selectivity(root, arg,
1676 varRelid,
1677 jointype, sjinfo);
1678 break;
1679 default:
1680 elog(ERROR, "unrecognized booltesttype: %d",
1681 (int) booltesttype);
1682 selec = 0.0; /* Keep compiler quiet */
1683 break;
1684 }
1685 }
1686
1687 ReleaseVariableStats(vardata);
1688
1689 /* result should be in range, but make sure... */
1690 CLAMP_PROBABILITY(selec);
1691
1692 return (Selectivity) selec;
1693}
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:667
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
Definition: htup_details.h:728
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3371
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3261
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
double Selectivity
Definition: nodes.h:250
void * arg
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
static bool DatumGetBool(Datum X)
Definition: postgres.h:95
@ IS_NOT_TRUE
Definition: primnodes.h:1981
@ IS_NOT_FALSE
Definition: primnodes.h:1981
@ IS_NOT_UNKNOWN
Definition: primnodes.h:1981
@ IS_TRUE
Definition: primnodes.h:1981
@ IS_UNKNOWN
Definition: primnodes.h:1981
@ IS_FALSE
Definition: primnodes.h:1981
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:5033
#define DEFAULT_NOT_UNK_SEL
Definition: selfuncs.h:56
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:100
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define DEFAULT_UNK_SEL
Definition: selfuncs.h:55
Datum * values
Definition: lsyscache.h:53
float4 * numbers
Definition: lsyscache.h:56
int nnumbers
Definition: lsyscache.h:57
HeapTuple statsTuple
Definition: selfuncs.h:89

References arg, ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool(), DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT(), HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, AttStatsSlot::nnumbers, AttStatsSlot::numbers, ReleaseVariableStats, root, VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by clause_selectivity_ext().

◆ boolvarsel()

Selectivity boolvarsel ( PlannerInfo root,
Node arg,
int  varRelid 
)

Definition at line 1513 of file selfuncs.c.

1514{
1515 VariableStatData vardata;
1516 double selec;
1517
1518 examine_variable(root, arg, varRelid, &vardata);
1519 if (HeapTupleIsValid(vardata.statsTuple))
1520 {
1521 /*
1522 * A boolean variable V is equivalent to the clause V = 't', so we
1523 * compute the selectivity as if that is what we have.
1524 */
1525 selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
1526 BoolGetDatum(true), false, true, false);
1527 }
1528 else
1529 {
1530 /* Otherwise, the default estimate is 0.5 */
1531 selec = 0.5;
1532 }
1533 ReleaseVariableStats(vardata);
1534 return selec;
1535}
static Datum BoolGetDatum(bool X)
Definition: postgres.h:107
double var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)
Definition: selfuncs.c:296

References arg, BoolGetDatum(), examine_variable(), HeapTupleIsValid, InvalidOid, ReleaseVariableStats, root, VariableStatData::statsTuple, and var_eq_const().

Referenced by clause_selectivity_ext().

◆ brincostestimate()

void brincostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 8016 of file selfuncs.c.

8020{
8021 IndexOptInfo *index = path->indexinfo;
8022 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
8023 double numPages = index->pages;
8024 RelOptInfo *baserel = index->rel;
8025 RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
8026 Cost spc_seq_page_cost;
8027 Cost spc_random_page_cost;
8028 double qual_arg_cost;
8029 double qualSelectivity;
8030 BrinStatsData statsData;
8031 double indexRanges;
8032 double minimalRanges;
8033 double estimatedRanges;
8034 double selec;
8035 Relation indexRel;
8036 ListCell *l;
8037 VariableStatData vardata;
8038
8039 Assert(rte->rtekind == RTE_RELATION);
8040
8041 /* fetch estimated page cost for the tablespace containing the index */
8042 get_tablespace_page_costs(index->reltablespace,
8043 &spc_random_page_cost,
8044 &spc_seq_page_cost);
8045
8046 /*
8047 * Obtain some data from the index itself, if possible. Otherwise invent
8048 * some plausible internal statistics based on the relation page count.
8049 */
8050 if (!index->hypothetical)
8051 {
8052 /*
8053 * A lock should have already been obtained on the index in plancat.c.
8054 */
8055 indexRel = index_open(index->indexoid, NoLock);
8056 brinGetStats(indexRel, &statsData);
8057 index_close(indexRel, NoLock);
8058
8059 /* work out the actual number of ranges in the index */
8060 indexRanges = Max(ceil((double) baserel->pages /
8061 statsData.pagesPerRange), 1.0);
8062 }
8063 else
8064 {
8065 /*
8066 * Assume default number of pages per range, and estimate the number
8067 * of ranges based on that.
8068 */
8069 indexRanges = Max(ceil((double) baserel->pages /
8071
8073 statsData.revmapNumPages = (indexRanges / REVMAP_PAGE_MAXITEMS) + 1;
8074 }
8075
8076 /*
8077 * Compute index correlation
8078 *
8079 * Because we can use all index quals equally when scanning, we can use
8080 * the largest correlation (in absolute value) among columns used by the
8081 * query. Start at zero, the worst possible case. If we cannot find any
8082 * correlation statistics, we will keep it as 0.
8083 */
8084 *indexCorrelation = 0;
8085
8086 foreach(l, path->indexclauses)
8087 {
8088 IndexClause *iclause = lfirst_node(IndexClause, l);
8089 AttrNumber attnum = index->indexkeys[iclause->indexcol];
8090
8091 /* attempt to lookup stats in relation for this index column */
8092 if (attnum != 0)
8093 {
8094 /* Simple variable -- look to stats for the underlying table */
8096 (*get_relation_stats_hook) (root, rte, attnum, &vardata))
8097 {
8098 /*
8099 * The hook took control of acquiring a stats tuple. If it
8100 * did supply a tuple, it'd better have supplied a freefunc.
8101 */
8102 if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
8103 elog(ERROR,
8104 "no function provided to release variable stats with");
8105 }
8106 else
8107 {
8108 vardata.statsTuple =
8109 SearchSysCache3(STATRELATTINH,
8110 ObjectIdGetDatum(rte->relid),
8112 BoolGetDatum(false));
8113 vardata.freefunc = ReleaseSysCache;
8114 }
8115 }
8116 else
8117 {
8118 /*
8119 * Looks like we've found an expression column in the index. Let's
8120 * see if there's any stats for it.
8121 */
8122
8123 /* get the attnum from the 0-based index. */
8124 attnum = iclause->indexcol + 1;
8125
8127 (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
8128 {
8129 /*
8130 * The hook took control of acquiring a stats tuple. If it
8131 * did supply a tuple, it'd better have supplied a freefunc.
8132 */
8133 if (HeapTupleIsValid(vardata.statsTuple) &&
8134 !vardata.freefunc)
8135 elog(ERROR, "no function provided to release variable stats with");
8136 }
8137 else
8138 {
8139 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
8140 ObjectIdGetDatum(index->indexoid),
8142 BoolGetDatum(false));
8143 vardata.freefunc = ReleaseSysCache;
8144 }
8145 }
8146
8147 if (HeapTupleIsValid(vardata.statsTuple))
8148 {
8149 AttStatsSlot sslot;
8150
8151 if (get_attstatsslot(&sslot, vardata.statsTuple,
8152 STATISTIC_KIND_CORRELATION, InvalidOid,
8154 {
8155 double varCorrelation = 0.0;
8156
8157 if (sslot.nnumbers > 0)
8158 varCorrelation = fabs(sslot.numbers[0]);
8159
8160 if (varCorrelation > *indexCorrelation)
8161 *indexCorrelation = varCorrelation;
8162
8163 free_attstatsslot(&sslot);
8164 }
8165 }
8166
8167 ReleaseVariableStats(vardata);
8168 }
8169
8170 qualSelectivity = clauselist_selectivity(root, indexQuals,
8171 baserel->relid,
8172 JOIN_INNER, NULL);
8173
8174 /*
8175 * Now calculate the minimum possible ranges we could match with if all of
8176 * the rows were in the perfect order in the table's heap.
8177 */
8178 minimalRanges = ceil(indexRanges * qualSelectivity);
8179
8180 /*
8181 * Now estimate the number of ranges that we'll touch by using the
8182 * indexCorrelation from the stats. Careful not to divide by zero (note
8183 * we're using the absolute value of the correlation).
8184 */
8185 if (*indexCorrelation < 1.0e-10)
8186 estimatedRanges = indexRanges;
8187 else
8188 estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
8189
8190 /* we expect to visit this portion of the table */
8191 selec = estimatedRanges / indexRanges;
8192
8193 CLAMP_PROBABILITY(selec);
8194
8195 *indexSelectivity = selec;
8196
8197 /*
8198 * Compute the index qual costs, much as in genericcostestimate, to add to
8199 * the index costs. We can disregard indexorderbys, since BRIN doesn't
8200 * support those.
8201 */
8202 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
8203
8204 /*
8205 * Compute the startup cost as the cost to read the whole revmap
8206 * sequentially, including the cost to execute the index quals.
8207 */
8208 *indexStartupCost =
8209 spc_seq_page_cost * statsData.revmapNumPages * loop_count;
8210 *indexStartupCost += qual_arg_cost;
8211
8212 /*
8213 * To read a BRIN index there might be a bit of back and forth over
8214 * regular pages, as revmap might point to them out of sequential order;
8215 * calculate the total cost as reading the whole index in random order.
8216 */
8217 *indexTotalCost = *indexStartupCost +
8218 spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
8219
8220 /*
8221 * Charge a small amount per range tuple which we expect to match to. This
8222 * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
8223 * will set a bit for each page in the range when we find a matching
8224 * range, so we must multiply the charge by the number of pages in the
8225 * range.
8226 */
8227 *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
8228 statsData.pagesPerRange;
8229
8230 *indexPages = index->pages;
8231}
int16 AttrNumber
Definition: attnum.h:21
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1642
#define BRIN_DEFAULT_PAGES_PER_RANGE
Definition: brin.h:39
#define REVMAP_PAGE_MAXITEMS
Definition: brin_page.h:93
#define Min(x, y)
Definition: c.h:961
#define Max(x, y)
Definition: c.h:955
#define Assert(condition)
Definition: c.h:815
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:100
double cpu_operator_cost
Definition: costsize.c:134
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:177
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:133
#define NoLock
Definition: lockdefs.h:34
double Cost
Definition: nodes.h:251
@ JOIN_INNER
Definition: nodes.h:293
@ RTE_RELATION
Definition: parsenodes.h:1026
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:594
int16 attnum
Definition: pg_attribute.h:74
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:177
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
List * get_quals_from_indexclauses(List *indexclauses)
Definition: selfuncs.c:6503
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:149
Cost index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
Definition: selfuncs.c:6533
get_relation_stats_hook_type get_relation_stats_hook
Definition: selfuncs.c:148
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
BlockNumber revmapNumPages
Definition: brin.h:35
BlockNumber pagesPerRange
Definition: brin.h:34
AttrNumber indexcol
Definition: pathnodes.h:1797
List * indexclauses
Definition: pathnodes.h:1747
IndexOptInfo * indexinfo
Definition: pathnodes.h:1746
RTEKind rtekind
Definition: parsenodes.h:1056
Index relid
Definition: pathnodes.h:942
BlockNumber pages
Definition: pathnodes.h:972
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:91
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
HeapTuple SearchSysCache3(int cacheId, Datum key1, Datum key2, Datum key3)
Definition: syscache.c:243

References Assert, attnum, ATTSTATSSLOT_NUMBERS, BoolGetDatum(), BRIN_DEFAULT_PAGES_PER_RANGE, brinGetStats(), CLAMP_PROBABILITY, clauselist_selectivity(), cpu_operator_cost, elog, ERROR, free_attstatsslot(), VariableStatData::freefunc, get_attstatsslot(), get_index_stats_hook, get_quals_from_indexclauses(), get_relation_stats_hook, get_tablespace_page_costs(), HeapTupleIsValid, index_close(), index_open(), index_other_operands_eval_cost(), IndexPath::indexclauses, IndexClause::indexcol, IndexPath::indexinfo, Int16GetDatum(), InvalidOid, JOIN_INNER, lfirst_node, Max, Min, AttStatsSlot::nnumbers, NoLock, AttStatsSlot::numbers, ObjectIdGetDatum(), RelOptInfo::pages, BrinStatsData::pagesPerRange, planner_rt_fetch, ReleaseSysCache(), ReleaseVariableStats, RangeTblEntry::relid, RelOptInfo::relid, REVMAP_PAGE_MAXITEMS, BrinStatsData::revmapNumPages, root, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), and VariableStatData::statsTuple.

Referenced by brinhandler().

◆ btcostestimate()

void btcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 6831 of file selfuncs.c.

6835{
6836 IndexOptInfo *index = path->indexinfo;
6837 GenericCosts costs = {0};
6838 Oid relid;
6839 AttrNumber colnum;
6840 VariableStatData vardata = {0};
6841 double numIndexTuples;
6842 Cost descentCost;
6843 List *indexBoundQuals;
6844 int indexcol;
6845 bool eqQualHere;
6846 bool found_saop;
6847 bool found_is_null_op;
6848 double num_sa_scans;
6849 ListCell *lc;
6850
6851 /*
6852 * For a btree scan, only leading '=' quals plus inequality quals for the
6853 * immediately next attribute contribute to index selectivity (these are
6854 * the "boundary quals" that determine the starting and stopping points of
6855 * the index scan). Additional quals can suppress visits to the heap, so
6856 * it's OK to count them in indexSelectivity, but they should not count
6857 * for estimating numIndexTuples. So we must examine the given indexquals
6858 * to find out which ones count as boundary quals. We rely on the
6859 * knowledge that they are given in index column order.
6860 *
6861 * For a RowCompareExpr, we consider only the first column, just as
6862 * rowcomparesel() does.
6863 *
6864 * If there's a ScalarArrayOpExpr in the quals, we'll actually perform up
6865 * to N index descents (not just one), but the ScalarArrayOpExpr's
6866 * operator can be considered to act the same as it normally does.
6867 */
6868 indexBoundQuals = NIL;
6869 indexcol = 0;
6870 eqQualHere = false;
6871 found_saop = false;
6872 found_is_null_op = false;
6873 num_sa_scans = 1;
6874 foreach(lc, path->indexclauses)
6875 {
6876 IndexClause *iclause = lfirst_node(IndexClause, lc);
6877 ListCell *lc2;
6878
6879 if (indexcol != iclause->indexcol)
6880 {
6881 /* Beginning of a new column's quals */
6882 if (!eqQualHere)
6883 break; /* done if no '=' qual for indexcol */
6884 eqQualHere = false;
6885 indexcol++;
6886 if (indexcol != iclause->indexcol)
6887 break; /* no quals at all for indexcol */
6888 }
6889
6890 /* Examine each indexqual associated with this index clause */
6891 foreach(lc2, iclause->indexquals)
6892 {
6893 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
6894 Expr *clause = rinfo->clause;
6895 Oid clause_op = InvalidOid;
6896 int op_strategy;
6897
6898 if (IsA(clause, OpExpr))
6899 {
6900 OpExpr *op = (OpExpr *) clause;
6901
6902 clause_op = op->opno;
6903 }
6904 else if (IsA(clause, RowCompareExpr))
6905 {
6906 RowCompareExpr *rc = (RowCompareExpr *) clause;
6907
6908 clause_op = linitial_oid(rc->opnos);
6909 }
6910 else if (IsA(clause, ScalarArrayOpExpr))
6911 {
6912 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6913 Node *other_operand = (Node *) lsecond(saop->args);
6914 double alength = estimate_array_length(root, other_operand);
6915
6916 clause_op = saop->opno;
6917 found_saop = true;
6918 /* estimate SA descents by indexBoundQuals only */
6919 if (alength > 1)
6920 num_sa_scans *= alength;
6921 }
6922 else if (IsA(clause, NullTest))
6923 {
6924 NullTest *nt = (NullTest *) clause;
6925
6926 if (nt->nulltesttype == IS_NULL)
6927 {
6928 found_is_null_op = true;
6929 /* IS NULL is like = for selectivity purposes */
6930 eqQualHere = true;
6931 }
6932 }
6933 else
6934 elog(ERROR, "unsupported indexqual type: %d",
6935 (int) nodeTag(clause));
6936
6937 /* check for equality operator */
6938 if (OidIsValid(clause_op))
6939 {
6940 op_strategy = get_op_opfamily_strategy(clause_op,
6941 index->opfamily[indexcol]);
6942 Assert(op_strategy != 0); /* not a member of opfamily?? */
6943 if (op_strategy == BTEqualStrategyNumber)
6944 eqQualHere = true;
6945 }
6946
6947 indexBoundQuals = lappend(indexBoundQuals, rinfo);
6948 }
6949 }
6950
6951 /*
6952 * If index is unique and we found an '=' clause for each column, we can
6953 * just assume numIndexTuples = 1 and skip the expensive
6954 * clauselist_selectivity calculations. However, a ScalarArrayOp or
6955 * NullTest invalidates that theory, even though it sets eqQualHere.
6956 */
6957 if (index->unique &&
6958 indexcol == index->nkeycolumns - 1 &&
6959 eqQualHere &&
6960 !found_saop &&
6961 !found_is_null_op)
6962 numIndexTuples = 1.0;
6963 else
6964 {
6965 List *selectivityQuals;
6966 Selectivity btreeSelectivity;
6967
6968 /*
6969 * If the index is partial, AND the index predicate with the
6970 * index-bound quals to produce a more accurate idea of the number of
6971 * rows covered by the bound conditions.
6972 */
6973 selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals);
6974
6975 btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
6976 index->rel->relid,
6977 JOIN_INNER,
6978 NULL);
6979 numIndexTuples = btreeSelectivity * index->rel->tuples;
6980
6981 /*
6982 * btree automatically combines individual ScalarArrayOpExpr primitive
6983 * index scans whenever the tuples covered by the next set of array
6984 * keys are close to tuples covered by the current set. That puts a
6985 * natural ceiling on the worst case number of descents -- there
6986 * cannot possibly be more than one descent per leaf page scanned.
6987 *
6988 * Clamp the number of descents to at most 1/3 the number of index
6989 * pages. This avoids implausibly high estimates with low selectivity
6990 * paths, where scans usually require only one or two descents. This
6991 * is most likely to help when there are several SAOP clauses, where
6992 * naively accepting the total number of distinct combinations of
6993 * array elements as the number of descents would frequently lead to
6994 * wild overestimates.
6995 *
6996 * We somewhat arbitrarily don't just make the cutoff the total number
6997 * of leaf pages (we make it 1/3 the total number of pages instead) to
6998 * give the btree code credit for its ability to continue on the leaf
6999 * level with low selectivity scans.
7000 */
7001 num_sa_scans = Min(num_sa_scans, ceil(index->pages * 0.3333333));
7002 num_sa_scans = Max(num_sa_scans, 1);
7003
7004 /*
7005 * As in genericcostestimate(), we have to adjust for any
7006 * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
7007 * to integer.
7008 *
7009 * It is tempting to make genericcostestimate behave as if SAOP
7010 * clauses work in almost the same way as scalar operators during
7011 * btree scans, making the top-level scan look like a continuous scan
7012 * (as opposed to num_sa_scans-many primitive index scans). After
7013 * all, btree scans mostly work like that at runtime. However, such a
7014 * scheme would badly bias genericcostestimate's simplistic approach
7015 * to calculating numIndexPages through prorating.
7016 *
7017 * Stick with the approach taken by non-native SAOP scans for now.
7018 * genericcostestimate will use the Mackert-Lohman formula to
7019 * compensate for repeat page fetches, even though that definitely
7020 * won't happen during btree scans (not for leaf pages, at least).
7021 * We're usually very pessimistic about the number of primitive index
7022 * scans that will be required, but it's not clear how to do better.
7023 */
7024 numIndexTuples = rint(numIndexTuples / num_sa_scans);
7025 }
7026
7027 /*
7028 * Now do generic index cost estimation.
7029 */
7030 costs.numIndexTuples = numIndexTuples;
7031 costs.num_sa_scans = num_sa_scans;
7032
7033 genericcostestimate(root, path, loop_count, &costs);
7034
7035 /*
7036 * Add a CPU-cost component to represent the costs of initial btree
7037 * descent. We don't charge any I/O cost for touching upper btree levels,
7038 * since they tend to stay in cache, but we still have to do about log2(N)
7039 * comparisons to descend a btree of N leaf tuples. We charge one
7040 * cpu_operator_cost per comparison.
7041 *
7042 * If there are ScalarArrayOpExprs, charge this once per estimated SA
7043 * index descent. The ones after the first one are not startup cost so
7044 * far as the overall plan goes, so just add them to "total" cost.
7045 */
7046 if (index->tuples > 1) /* avoid computing log(0) */
7047 {
7048 descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
7049 costs.indexStartupCost += descentCost;
7050 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7051 }
7052
7053 /*
7054 * Even though we're not charging I/O cost for touching upper btree pages,
7055 * it's still reasonable to charge some CPU cost per page descended
7056 * through. Moreover, if we had no such charge at all, bloated indexes
7057 * would appear to have the same search cost as unbloated ones, at least
7058 * in cases where only a single leaf page is expected to be visited. This
7059 * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
7060 * touched. The number of such pages is btree tree height plus one (ie,
7061 * we charge for the leaf page too). As above, charge once per estimated
7062 * SA index descent.
7063 */
7064 descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
7065 costs.indexStartupCost += descentCost;
7066 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7067
7068 /*
7069 * If we can get an estimate of the first column's ordering correlation C
7070 * from pg_statistic, estimate the index correlation as C for a
7071 * single-column index, or C * 0.75 for multiple columns. (The idea here
7072 * is that multiple columns dilute the importance of the first column's
7073 * ordering, but don't negate it entirely. Before 8.0 we divided the
7074 * correlation by the number of columns, but that seems too strong.)
7075 */
7076 if (index->indexkeys[0] != 0)
7077 {
7078 /* Simple variable --- look to stats for the underlying table */
7079 RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
7080
7081 Assert(rte->rtekind == RTE_RELATION);
7082 relid = rte->relid;
7083 Assert(relid != InvalidOid);
7084 colnum = index->indexkeys[0];
7085
7087 (*get_relation_stats_hook) (root, rte, colnum, &vardata))
7088 {
7089 /*
7090 * The hook took control of acquiring a stats tuple. If it did
7091 * supply a tuple, it'd better have supplied a freefunc.
7092 */
7093 if (HeapTupleIsValid(vardata.statsTuple) &&
7094 !vardata.freefunc)
7095 elog(ERROR, "no function provided to release variable stats with");
7096 }
7097 else
7098 {
7099 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
7100 ObjectIdGetDatum(relid),
7101 Int16GetDatum(colnum),
7102 BoolGetDatum(rte->inh));
7103 vardata.freefunc = ReleaseSysCache;
7104 }
7105 }
7106 else
7107 {
7108 /* Expression --- maybe there are stats for the index itself */
7109 relid = index->indexoid;
7110 colnum = 1;
7111
7113 (*get_index_stats_hook) (root, relid, colnum, &vardata))
7114 {
7115 /*
7116 * The hook took control of acquiring a stats tuple. If it did
7117 * supply a tuple, it'd better have supplied a freefunc.
7118 */
7119 if (HeapTupleIsValid(vardata.statsTuple) &&
7120 !vardata.freefunc)
7121 elog(ERROR, "no function provided to release variable stats with");
7122 }
7123 else
7124 {
7125 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
7126 ObjectIdGetDatum(relid),
7127 Int16GetDatum(colnum),
7128 BoolGetDatum(false));
7129 vardata.freefunc = ReleaseSysCache;
7130 }
7131 }
7132
7133 if (HeapTupleIsValid(vardata.statsTuple))
7134 {
7135 Oid sortop;
7136 AttStatsSlot sslot;
7137
7138 sortop = get_opfamily_member(index->opfamily[0],
7139 index->opcintype[0],
7140 index->opcintype[0],
7142 if (OidIsValid(sortop) &&
7143 get_attstatsslot(&sslot, vardata.statsTuple,
7144 STATISTIC_KIND_CORRELATION, sortop,
7146 {
7147 double varCorrelation;
7148
7149 Assert(sslot.nnumbers == 1);
7150 varCorrelation = sslot.numbers[0];
7151
7152 if (index->reverse_sort[0])
7153 varCorrelation = -varCorrelation;
7154
7155 if (index->nkeycolumns > 1)
7156 costs.indexCorrelation = varCorrelation * 0.75;
7157 else
7158 costs.indexCorrelation = varCorrelation;
7159
7160 free_attstatsslot(&sslot);
7161 }
7162 }
7163
7164 ReleaseVariableStats(vardata);
7165
7166 *indexStartupCost = costs.indexStartupCost;
7167 *indexTotalCost = costs.indexTotalCost;
7168 *indexSelectivity = costs.indexSelectivity;
7169 *indexCorrelation = costs.indexCorrelation;
7170 *indexPages = costs.numIndexPages;
7171}
#define OidIsValid(objectId)
Definition: c.h:732
int get_op_opfamily_strategy(Oid opno, Oid opfamily)
Definition: lsyscache.c:84
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:167
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define nodeTag(nodeptr)
Definition: nodes.h:133
#define lsecond(l)
Definition: pg_list.h:183
#define linitial_oid(l)
Definition: pg_list.h:180
unsigned int Oid
Definition: postgres_ext.h:32
@ IS_NULL
Definition: primnodes.h:1957
List * add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:6810
#define DEFAULT_PAGE_CPU_MULTIPLIER
Definition: selfuncs.c:145
double estimate_array_length(PlannerInfo *root, Node *arrayexpr)
Definition: selfuncs.c:2140
void genericcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
Definition: selfuncs.c:6587
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
Selectivity indexSelectivity
Definition: selfuncs.h:128
Cost indexStartupCost
Definition: selfuncs.h:126
double indexCorrelation
Definition: selfuncs.h:129
double num_sa_scans
Definition: selfuncs.h:135
Cost indexTotalCost
Definition: selfuncs.h:127
double numIndexPages
Definition: selfuncs.h:132
double numIndexTuples
Definition: selfuncs.h:133
List * indexquals
Definition: pathnodes.h:1795
NullTestType nulltesttype
Definition: primnodes.h:1964
Oid opno
Definition: primnodes.h:835
Expr * clause
Definition: pathnodes.h:2599

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, Assert, ATTSTATSSLOT_NUMBERS, BoolGetDatum(), BTEqualStrategyNumber, BTLessStrategyNumber, RestrictInfo::clause, clauselist_selectivity(), cpu_operator_cost, DEFAULT_PAGE_CPU_MULTIPLIER, elog, ERROR, estimate_array_length(), free_attstatsslot(), VariableStatData::freefunc, genericcostestimate(), get_attstatsslot(), get_index_stats_hook, get_op_opfamily_strategy(), get_opfamily_member(), get_relation_stats_hook, HeapTupleIsValid, IndexPath::indexclauses, IndexClause::indexcol, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexClause::indexquals, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, RangeTblEntry::inh, Int16GetDatum(), InvalidOid, IS_NULL, IsA, JOIN_INNER, lappend(), lfirst_node, linitial_oid, lsecond, Max, Min, NIL, AttStatsSlot::nnumbers, nodeTag, NullTest::nulltesttype, GenericCosts::num_sa_scans, AttStatsSlot::numbers, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, ObjectIdGetDatum(), OidIsValid, OpExpr::opno, ScalarArrayOpExpr::opno, planner_rt_fetch, ReleaseSysCache(), ReleaseVariableStats, RangeTblEntry::relid, root, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), and VariableStatData::statsTuple.

Referenced by bthandler().

◆ convert_bytea_to_scalar()

static void convert_bytea_to_scalar ( Datum  value,
double *  scaledvalue,
Datum  lobound,
double *  scaledlobound,
Datum  hibound,
double *  scaledhibound 
)
static

Definition at line 4747 of file selfuncs.c.

4753{
4754 bytea *valuep = DatumGetByteaPP(value);
4755 bytea *loboundp = DatumGetByteaPP(lobound);
4756 bytea *hiboundp = DatumGetByteaPP(hibound);
4757 int rangelo,
4758 rangehi,
4759 valuelen = VARSIZE_ANY_EXHDR(valuep),
4760 loboundlen = VARSIZE_ANY_EXHDR(loboundp),
4761 hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
4762 i,
4763 minlen;
4764 unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep);
4765 unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp);
4766 unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp);
4767
4768 /*
4769 * Assume bytea data is uniformly distributed across all byte values.
4770 */
4771 rangelo = 0;
4772 rangehi = 255;
4773
4774 /*
4775 * Now strip any common prefix of the three strings.
4776 */
4777 minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4778 for (i = 0; i < minlen; i++)
4779 {
4780 if (*lostr != *histr || *lostr != *valstr)
4781 break;
4782 lostr++, histr++, valstr++;
4783 loboundlen--, hiboundlen--, valuelen--;
4784 }
4785
4786 /*
4787 * Now we can do the conversions.
4788 */
4789 *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4790 *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4791 *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4792}
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
static struct @162 value
int i
Definition: isn.c:72
static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi)
Definition: selfuncs.c:4795
Definition: c.h:644
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References convert_one_bytea_to_scalar(), DatumGetByteaPP, i, Min, value, VARDATA_ANY, and VARSIZE_ANY_EXHDR.

Referenced by convert_to_scalar().

◆ convert_numeric_to_scalar()

static double convert_numeric_to_scalar ( Datum  value,
Oid  typid,
bool *  failure 
)
static

Definition at line 4466 of file selfuncs.c.

4467{
4468 switch (typid)
4469 {
4470 case BOOLOID:
4471 return (double) DatumGetBool(value);
4472 case INT2OID:
4473 return (double) DatumGetInt16(value);
4474 case INT4OID:
4475 return (double) DatumGetInt32(value);
4476 case INT8OID:
4477 return (double) DatumGetInt64(value);
4478 case FLOAT4OID:
4479 return (double) DatumGetFloat4(value);
4480 case FLOAT8OID:
4481 return (double) DatumGetFloat8(value);
4482 case NUMERICOID:
4483 /* Note: out-of-range values will be clamped to +-HUGE_VAL */
4484 return (double)
4486 value));
4487 case OIDOID:
4488 case REGPROCOID:
4489 case REGPROCEDUREOID:
4490 case REGOPEROID:
4491 case REGOPERATOROID:
4492 case REGCLASSOID:
4493 case REGTYPEOID:
4494 case REGCOLLATIONOID:
4495 case REGCONFIGOID:
4496 case REGDICTIONARYOID:
4497 case REGROLEOID:
4498 case REGNAMESPACEOID:
4499 /* we can treat OIDs as integers... */
4500 return (double) DatumGetObjectId(value);
4501 }
4502
4503 *failure = true;
4504 return 0;
4505}
Datum numeric_float8_no_overflow(PG_FUNCTION_ARGS)
Definition: numeric.c:4779
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:641
static int64 DatumGetInt64(Datum X)
Definition: postgres.h:390
static float4 DatumGetFloat4(Datum X)
Definition: postgres.h:463
static Oid DatumGetObjectId(Datum X)
Definition: postgres.h:247
static float8 DatumGetFloat8(Datum X)
Definition: postgres.h:499
static int16 DatumGetInt16(Datum X)
Definition: postgres.h:167
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:207

References DatumGetBool(), DatumGetFloat4(), DatumGetFloat8(), DatumGetInt16(), DatumGetInt32(), DatumGetInt64(), DatumGetObjectId(), DirectFunctionCall1, numeric_float8_no_overflow(), and value.

Referenced by convert_to_scalar().

◆ convert_one_bytea_to_scalar()

static double convert_one_bytea_to_scalar ( unsigned char *  value,
int  valuelen,
int  rangelo,
int  rangehi 
)
static

Definition at line 4795 of file selfuncs.c.

4797{
4798 double num,
4799 denom,
4800 base;
4801
4802 if (valuelen <= 0)
4803 return 0.0; /* empty string has scalar value 0 */
4804
4805 /*
4806 * Since base is 256, need not consider more than about 10 chars (even
4807 * this many seems like overkill)
4808 */
4809 if (valuelen > 10)
4810 valuelen = 10;
4811
4812 /* Convert initial characters to fraction */
4813 base = rangehi - rangelo + 1;
4814 num = 0.0;
4815 denom = base;
4816 while (valuelen-- > 0)
4817 {
4818 int ch = *value++;
4819
4820 if (ch < rangelo)
4821 ch = rangelo - 1;
4822 else if (ch > rangehi)
4823 ch = rangehi + 1;
4824 num += ((double) (ch - rangelo)) / denom;
4825 denom *= base;
4826 }
4827
4828 return num;
4829}

References value.

Referenced by convert_bytea_to_scalar().

◆ convert_one_string_to_scalar()

static double convert_one_string_to_scalar ( char *  value,
int  rangelo,
int  rangehi 
)
static

Definition at line 4608 of file selfuncs.c.

4609{
4610 int slen = strlen(value);
4611 double num,
4612 denom,
4613 base;
4614
4615 if (slen <= 0)
4616 return 0.0; /* empty string has scalar value 0 */
4617
4618 /*
4619 * There seems little point in considering more than a dozen bytes from
4620 * the string. Since base is at least 10, that will give us nominal
4621 * resolution of at least 12 decimal digits, which is surely far more
4622 * precision than this estimation technique has got anyway (especially in
4623 * non-C locales). Also, even with the maximum possible base of 256, this
4624 * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4625 * overflow on any known machine.
4626 */
4627 if (slen > 12)
4628 slen = 12;
4629
4630 /* Convert initial characters to fraction */
4631 base = rangehi - rangelo + 1;
4632 num = 0.0;
4633 denom = base;
4634 while (slen-- > 0)
4635 {
4636 int ch = (unsigned char) *value++;
4637
4638 if (ch < rangelo)
4639 ch = rangelo - 1;
4640 else if (ch > rangehi)
4641 ch = rangehi + 1;
4642 num += ((double) (ch - rangelo)) / denom;
4643 denom *= base;
4644 }
4645
4646 return num;
4647}

References value.

Referenced by convert_string_to_scalar().

◆ convert_string_datum()

static char * convert_string_datum ( Datum  value,
Oid  typid,
Oid  collid,
bool *  failure 
)
static

Definition at line 4659 of file selfuncs.c.

4660{
4661 char *val;
4662 pg_locale_t mylocale;
4663
4664 switch (typid)
4665 {
4666 case CHAROID:
4667 val = (char *) palloc(2);
4668 val[0] = DatumGetChar(value);
4669 val[1] = '\0';
4670 break;
4671 case BPCHAROID:
4672 case VARCHAROID:
4673 case TEXTOID:
4675 break;
4676 case NAMEOID:
4677 {
4679
4680 val = pstrdup(NameStr(*nm));
4681 break;
4682 }
4683 default:
4684 *failure = true;
4685 return NULL;
4686 }
4687
4689
4690 if (!mylocale->collate_is_c)
4691 {
4692 char *xfrmstr;
4693 size_t xfrmlen;
4694 size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4695
4696 /*
4697 * XXX: We could guess at a suitable output buffer size and only call
4698 * pg_strxfrm() twice if our guess is too small.
4699 *
4700 * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4701 * bogus data or set an error. This is not really a problem unless it
4702 * crashes since it will only give an estimation error and nothing
4703 * fatal.
4704 *
4705 * XXX: we do not check pg_strxfrm_enabled(). On some platforms and in
4706 * some cases, libc strxfrm() may return the wrong results, but that
4707 * will only lead to an estimation error.
4708 */
4709 xfrmlen = pg_strxfrm(NULL, val, 0, mylocale);
4710#ifdef WIN32
4711
4712 /*
4713 * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4714 * of trying to allocate this much memory (and fail), just return the
4715 * original string unmodified as if we were in the C locale.
4716 */
4717 if (xfrmlen == INT_MAX)
4718 return val;
4719#endif
4720 xfrmstr = (char *) palloc(xfrmlen + 1);
4721 xfrmlen2 = pg_strxfrm(xfrmstr, val, xfrmlen + 1, mylocale);
4722
4723 /*
4724 * Some systems (e.g., glibc) can return a smaller value from the
4725 * second call than the first; thus the Assert must be <= not ==.
4726 */
4727 Assert(xfrmlen2 <= xfrmlen);
4728 pfree(val);
4729 val = xfrmstr;
4730 }
4731
4732 return val;
4733}
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define NameStr(name)
Definition: c.h:703
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:204
Oid collid
long val
Definition: informix.c:689
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void pfree(void *pointer)
Definition: mcxt.c:1521
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1332
size_t pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
Definition: pg_locale.c:1530
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
static char DatumGetChar(Datum X)
Definition: postgres.h:117
Definition: c.h:698

References Assert, pg_locale_struct::collate_is_c, collid, DatumGetChar(), DatumGetPointer(), NameStr, palloc(), pfree(), pg_newlocale_from_collation(), pg_strxfrm(), PG_USED_FOR_ASSERTS_ONLY, pstrdup(), TextDatumGetCString, val, and value.

Referenced by convert_to_scalar().

◆ convert_string_to_scalar()

static void convert_string_to_scalar ( char *  value,
double *  scaledvalue,
char *  lobound,
double *  scaledlobound,
char *  hibound,
double *  scaledhibound 
)
static

Definition at line 4528 of file selfuncs.c.

4534{
4535 int rangelo,
4536 rangehi;
4537 char *sptr;
4538
4539 rangelo = rangehi = (unsigned char) hibound[0];
4540 for (sptr = lobound; *sptr; sptr++)
4541 {
4542 if (rangelo > (unsigned char) *sptr)
4543 rangelo = (unsigned char) *sptr;
4544 if (rangehi < (unsigned char) *sptr)
4545 rangehi = (unsigned char) *sptr;
4546 }
4547 for (sptr = hibound; *sptr; sptr++)
4548 {
4549 if (rangelo > (unsigned char) *sptr)
4550 rangelo = (unsigned char) *sptr;
4551 if (rangehi < (unsigned char) *sptr)
4552 rangehi = (unsigned char) *sptr;
4553 }
4554 /* If range includes any upper-case ASCII chars, make it include all */
4555 if (rangelo <= 'Z' && rangehi >= 'A')
4556 {
4557 if (rangelo > 'A')
4558 rangelo = 'A';
4559 if (rangehi < 'Z')
4560 rangehi = 'Z';
4561 }
4562 /* Ditto lower-case */
4563 if (rangelo <= 'z' && rangehi >= 'a')
4564 {
4565 if (rangelo > 'a')
4566 rangelo = 'a';
4567 if (rangehi < 'z')
4568 rangehi = 'z';
4569 }
4570 /* Ditto digits */
4571 if (rangelo <= '9' && rangehi >= '0')
4572 {
4573 if (rangelo > '0')
4574 rangelo = '0';
4575 if (rangehi < '9')
4576 rangehi = '9';
4577 }
4578
4579 /*
4580 * If range includes less than 10 chars, assume we have not got enough
4581 * data, and make it include regular ASCII set.
4582 */
4583 if (rangehi - rangelo < 9)
4584 {
4585 rangelo = ' ';
4586 rangehi = 127;
4587 }
4588
4589 /*
4590 * Now strip any common prefix of the three strings.
4591 */
4592 while (*lobound)
4593 {
4594 if (*lobound != *hibound || *lobound != *value)
4595 break;
4596 lobound++, hibound++, value++;
4597 }
4598
4599 /*
4600 * Now we can do the conversions.
4601 */
4602 *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4603 *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4604 *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4605}
static double convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
Definition: selfuncs.c:4608

References convert_one_string_to_scalar(), and value.

Referenced by convert_to_scalar().

◆ convert_timevalue_to_scalar()

static double convert_timevalue_to_scalar ( Datum  value,
Oid  typid,
bool *  failure 
)
static

Definition at line 4838 of file selfuncs.c.

4839{
4840 switch (typid)
4841 {
4842 case TIMESTAMPOID:
4843 return DatumGetTimestamp(value);
4844 case TIMESTAMPTZOID:
4845 return DatumGetTimestampTz(value);
4846 case DATEOID:
4848 case INTERVALOID:
4849 {
4851
4852 /*
4853 * Convert the month part of Interval to days using assumed
4854 * average month length of 365.25/12.0 days. Not too
4855 * accurate, but plenty good enough for our purposes.
4856 *
4857 * This also works for infinite intervals, which just have all
4858 * fields set to INT_MIN/INT_MAX, and so will produce a result
4859 * smaller/larger than any finite interval.
4860 */
4861 return interval->time + interval->day * (double) USECS_PER_DAY +
4863 }
4864 case TIMEOID:
4865 return DatumGetTimeADT(value);
4866 case TIMETZOID:
4867 {
4869
4870 /* use GMT-equivalent time */
4871 return (double) (timetz->time + (timetz->zone * 1000000.0));
4872 }
4873 }
4874
4875 *failure = true;
4876 return 0;
4877}
#define MONTHS_PER_YEAR
Definition: timestamp.h:108
#define USECS_PER_DAY
Definition: timestamp.h:131
#define DAYS_PER_YEAR
Definition: timestamp.h:107
double date2timestamp_no_overflow(DateADT dateVal)
Definition: date.c:739
static TimeTzADT * DatumGetTimeTzADTP(Datum X)
Definition: date.h:66
static DateADT DatumGetDateADT(Datum X)
Definition: date.h:54
static TimeADT DatumGetTimeADT(Datum X)
Definition: date.h:60
Definition: date.h:28
TimeADT time
Definition: date.h:29
int32 zone
Definition: date.h:30
static Interval * DatumGetIntervalP(Datum X)
Definition: timestamp.h:40
static Timestamp DatumGetTimestamp(Datum X)
Definition: timestamp.h:28
static TimestampTz DatumGetTimestampTz(Datum X)
Definition: timestamp.h:34

References date2timestamp_no_overflow(), DatumGetDateADT(), DatumGetIntervalP(), DatumGetTimeADT(), DatumGetTimestamp(), DatumGetTimestampTz(), DatumGetTimeTzADTP(), DAYS_PER_YEAR, interval::month, MONTHS_PER_YEAR, TimeTzADT::time, interval::time, USECS_PER_DAY, value, and TimeTzADT::zone.

Referenced by convert_to_scalar().

◆ convert_to_scalar()

static bool convert_to_scalar ( Datum  value,
Oid  valuetypid,
Oid  collid,
double *  scaledvalue,
Datum  lobound,
Datum  hibound,
Oid  boundstypid,
double *  scaledlobound,
double *  scaledhibound 
)
static

Definition at line 4319 of file selfuncs.c.

4322{
4323 bool failure = false;
4324
4325 /*
4326 * Both the valuetypid and the boundstypid should exactly match the
4327 * declared input type(s) of the operator we are invoked for. However,
4328 * extensions might try to use scalarineqsel as estimator for operators
4329 * with input type(s) we don't handle here; in such cases, we want to
4330 * return false, not fail. In any case, we mustn't assume that valuetypid
4331 * and boundstypid are identical.
4332 *
4333 * XXX The histogram we are interpolating between points of could belong
4334 * to a column that's only binary-compatible with the declared type. In
4335 * essence we are assuming that the semantics of binary-compatible types
4336 * are enough alike that we can use a histogram generated with one type's
4337 * operators to estimate selectivity for the other's. This is outright
4338 * wrong in some cases --- in particular signed versus unsigned
4339 * interpretation could trip us up. But it's useful enough in the
4340 * majority of cases that we do it anyway. Should think about more
4341 * rigorous ways to do it.
4342 */
4343 switch (valuetypid)
4344 {
4345 /*
4346 * Built-in numeric types
4347 */
4348 case BOOLOID:
4349 case INT2OID:
4350 case INT4OID:
4351 case INT8OID:
4352 case FLOAT4OID:
4353 case FLOAT8OID:
4354 case NUMERICOID:
4355 case OIDOID:
4356 case REGPROCOID:
4357 case REGPROCEDUREOID:
4358 case REGOPEROID:
4359 case REGOPERATOROID:
4360 case REGCLASSOID:
4361 case REGTYPEOID:
4362 case REGCOLLATIONOID:
4363 case REGCONFIGOID:
4364 case REGDICTIONARYOID:
4365 case REGROLEOID:
4366 case REGNAMESPACEOID:
4367 *scaledvalue = convert_numeric_to_scalar(value, valuetypid,
4368 &failure);
4369 *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
4370 &failure);
4371 *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
4372 &failure);
4373 return !failure;
4374
4375 /*
4376 * Built-in string types
4377 */
4378 case CHAROID:
4379 case BPCHAROID:
4380 case VARCHAROID:
4381 case TEXTOID:
4382 case NAMEOID:
4383 {
4384 char *valstr = convert_string_datum(value, valuetypid,
4385 collid, &failure);
4386 char *lostr = convert_string_datum(lobound, boundstypid,
4387 collid, &failure);
4388 char *histr = convert_string_datum(hibound, boundstypid,
4389 collid, &failure);
4390
4391 /*
4392 * Bail out if any of the values is not of string type. We
4393 * might leak converted strings for the other value(s), but
4394 * that's not worth troubling over.
4395 */
4396 if (failure)
4397 return false;
4398
4399 convert_string_to_scalar(valstr, scaledvalue,
4400 lostr, scaledlobound,
4401 histr, scaledhibound);
4402 pfree(valstr);
4403 pfree(lostr);
4404 pfree(histr);
4405 return true;
4406 }
4407
4408 /*
4409 * Built-in bytea type
4410 */
4411 case BYTEAOID:
4412 {
4413 /* We only support bytea vs bytea comparison */
4414 if (boundstypid != BYTEAOID)
4415 return false;
4416 convert_bytea_to_scalar(value, scaledvalue,
4417 lobound, scaledlobound,
4418 hibound, scaledhibound);
4419 return true;
4420 }
4421
4422 /*
4423 * Built-in time types
4424 */
4425 case TIMESTAMPOID:
4426 case TIMESTAMPTZOID:
4427 case DATEOID:
4428 case INTERVALOID:
4429 case TIMEOID:
4430 case TIMETZOID:
4431 *scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
4432 &failure);
4433 *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
4434 &failure);
4435 *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
4436 &failure);
4437 return !failure;
4438
4439 /*
4440 * Built-in network types
4441 */
4442 case INETOID:
4443 case CIDROID:
4444 case MACADDROID:
4445 case MACADDR8OID:
4446 *scaledvalue = convert_network_to_scalar(value, valuetypid,
4447 &failure);
4448 *scaledlobound = convert_network_to_scalar(lobound, boundstypid,
4449 &failure);
4450 *scaledhibound = convert_network_to_scalar(hibound, boundstypid,
4451 &failure);
4452 return !failure;
4453 }
4454 /* Don't know how to convert */
4455 *scaledvalue = *scaledlobound = *scaledhibound = 0;
4456 return false;
4457}
double convert_network_to_scalar(Datum value, Oid typid, bool *failure)
Definition: network.c:1496
static void convert_string_to_scalar(char *value, double *scaledvalue, char *lobound, double *scaledlobound, char *hibound, double *scaledhibound)
Definition: selfuncs.c:4528
static double convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure)
Definition: selfuncs.c:4838
static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
Definition: selfuncs.c:4466
static void convert_bytea_to_scalar(Datum value, double *scaledvalue, Datum lobound, double *scaledlobound, Datum hibound, double *scaledhibound)
Definition: selfuncs.c:4747
static char * convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
Definition: selfuncs.c:4659

References collid, convert_bytea_to_scalar(), convert_network_to_scalar(), convert_numeric_to_scalar(), convert_string_datum(), convert_string_to_scalar(), convert_timevalue_to_scalar(), pfree(), and value.

Referenced by ineq_histogram_selectivity().

◆ eqjoinsel()

Datum eqjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 2273 of file selfuncs.c.

2274{
2276 Oid operator = PG_GETARG_OID(1);
2277 List *args = (List *) PG_GETARG_POINTER(2);
2278
2279#ifdef NOT_USED
2280 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
2281#endif
2283 Oid collation = PG_GET_COLLATION();
2284 double selec;
2285 double selec_inner;
2286 VariableStatData vardata1;
2287 VariableStatData vardata2;
2288 double nd1;
2289 double nd2;
2290 bool isdefault1;
2291 bool isdefault2;
2292 Oid opfuncoid;
2293 AttStatsSlot sslot1;
2294 AttStatsSlot sslot2;
2295 Form_pg_statistic stats1 = NULL;
2296 Form_pg_statistic stats2 = NULL;
2297 bool have_mcvs1 = false;
2298 bool have_mcvs2 = false;
2299 bool get_mcv_stats;
2300 bool join_is_reversed;
2301 RelOptInfo *inner_rel;
2302
2303 get_join_variables(root, args, sjinfo,
2304 &vardata1, &vardata2, &join_is_reversed);
2305
2306 nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
2307 nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
2308
2309 opfuncoid = get_opcode(operator);
2310
2311 memset(&sslot1, 0, sizeof(sslot1));
2312 memset(&sslot2, 0, sizeof(sslot2));
2313
2314 /*
2315 * There is no use in fetching one side's MCVs if we lack MCVs for the
2316 * other side, so do a quick check to verify that both stats exist.
2317 */
2318 get_mcv_stats = (HeapTupleIsValid(vardata1.statsTuple) &&
2319 HeapTupleIsValid(vardata2.statsTuple) &&
2320 get_attstatsslot(&sslot1, vardata1.statsTuple,
2321 STATISTIC_KIND_MCV, InvalidOid,
2322 0) &&
2323 get_attstatsslot(&sslot2, vardata2.statsTuple,
2324 STATISTIC_KIND_MCV, InvalidOid,
2325 0));
2326
2327 if (HeapTupleIsValid(vardata1.statsTuple))
2328 {
2329 /* note we allow use of nullfrac regardless of security check */
2330 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
2331 if (get_mcv_stats &&
2332 statistic_proc_security_check(&vardata1, opfuncoid))
2333 have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
2334 STATISTIC_KIND_MCV, InvalidOid,
2336 }
2337
2338 if (HeapTupleIsValid(vardata2.statsTuple))
2339 {
2340 /* note we allow use of nullfrac regardless of security check */
2341 stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
2342 if (get_mcv_stats &&
2343 statistic_proc_security_check(&vardata2, opfuncoid))
2344 have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
2345 STATISTIC_KIND_MCV, InvalidOid,
2347 }
2348
2349 /* We need to compute the inner-join selectivity in all cases */
2350 selec_inner = eqjoinsel_inner(opfuncoid, collation,
2351 &vardata1, &vardata2,
2352 nd1, nd2,
2353 isdefault1, isdefault2,
2354 &sslot1, &sslot2,
2355 stats1, stats2,
2356 have_mcvs1, have_mcvs2);
2357
2358 switch (sjinfo->jointype)
2359 {
2360 case JOIN_INNER:
2361 case JOIN_LEFT:
2362 case JOIN_FULL:
2363 selec = selec_inner;
2364 break;
2365 case JOIN_SEMI:
2366 case JOIN_ANTI:
2367
2368 /*
2369 * Look up the join's inner relation. min_righthand is sufficient
2370 * information because neither SEMI nor ANTI joins permit any
2371 * reassociation into or out of their RHS, so the righthand will
2372 * always be exactly that set of rels.
2373 */
2374 inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2375
2376 if (!join_is_reversed)
2377 selec = eqjoinsel_semi(opfuncoid, collation,
2378 &vardata1, &vardata2,
2379 nd1, nd2,
2380 isdefault1, isdefault2,
2381 &sslot1, &sslot2,
2382 stats1, stats2,
2383 have_mcvs1, have_mcvs2,
2384 inner_rel);
2385 else
2386 {
2387 Oid commop = get_commutator(operator);
2388 Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
2389
2390 selec = eqjoinsel_semi(commopfuncoid, collation,
2391 &vardata2, &vardata1,
2392 nd2, nd1,
2393 isdefault2, isdefault1,
2394 &sslot2, &sslot1,
2395 stats2, stats1,
2396 have_mcvs2, have_mcvs1,
2397 inner_rel);
2398 }
2399
2400 /*
2401 * We should never estimate the output of a semijoin to be more
2402 * rows than we estimate for an inner join with the same input
2403 * rels and join condition; it's obviously impossible for that to
2404 * happen. The former estimate is N1 * Ssemi while the latter is
2405 * N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner. Doing
2406 * this is worthwhile because of the shakier estimation rules we
2407 * use in eqjoinsel_semi, particularly in cases where it has to
2408 * punt entirely.
2409 */
2410 selec = Min(selec, inner_rel->rows * selec_inner);
2411 break;
2412 default:
2413 /* other values not expected here */
2414 elog(ERROR, "unrecognized join type: %d",
2415 (int) sjinfo->jointype);
2416 selec = 0; /* keep compiler quiet */
2417 break;
2418 }
2419
2420 free_attstatsslot(&sslot1);
2421 free_attstatsslot(&sslot2);
2422
2423 ReleaseVariableStats(vardata1);
2424 ReleaseVariableStats(vardata2);
2425
2426 CLAMP_PROBABILITY(selec);
2427
2428 PG_RETURN_FLOAT8((float8) selec);
2429}
double float8
Definition: c.h:587
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_GETARG_INT16(n)
Definition: fmgr.h:271
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1312
Oid get_commutator(Oid opno)
Definition: lsyscache.c:1536
JoinType
Definition: nodes.h:288
@ JOIN_SEMI
Definition: nodes.h:307
@ JOIN_FULL
Definition: nodes.h:295
@ JOIN_LEFT
Definition: nodes.h:294
@ JOIN_ANTI
Definition: nodes.h:308
static RelOptInfo * find_join_input_rel(PlannerInfo *root, Relids relids)
Definition: selfuncs.c:6471
static double eqjoinsel_inner(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2)
Definition: selfuncs.c:2438
static double eqjoinsel_semi(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2, RelOptInfo *inner_rel)
Definition: selfuncs.c:2635
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5778
void get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
Definition: selfuncs.c:4964
Cardinality rows
Definition: pathnodes.h:901
Relids min_righthand
Definition: pathnodes.h:2930
JoinType jointype
Definition: pathnodes.h:2933

References generate_unaccent_rules::args, ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, elog, eqjoinsel_inner(), eqjoinsel_semi(), ERROR, find_join_input_rel(), free_attstatsslot(), get_attstatsslot(), get_commutator(), get_join_variables(), get_opcode(), get_variable_numdistinct(), GETSTRUCT(), HeapTupleIsValid, InvalidOid, JOIN_ANTI, JOIN_FULL, JOIN_INNER, JOIN_LEFT, JOIN_SEMI, SpecialJoinInfo::jointype, Min, SpecialJoinInfo::min_righthand, OidIsValid, PG_GET_COLLATION, PG_GETARG_INT16, PG_GETARG_OID, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, root, RelOptInfo::rows, statistic_proc_security_check(), and VariableStatData::statsTuple.

Referenced by neqjoinsel().

◆ eqjoinsel_inner()

static double eqjoinsel_inner ( Oid  opfuncoid,
Oid  collation,
VariableStatData vardata1,
VariableStatData vardata2,
double  nd1,
double  nd2,
bool  isdefault1,
bool  isdefault2,
AttStatsSlot sslot1,
AttStatsSlot sslot2,
Form_pg_statistic  stats1,
Form_pg_statistic  stats2,
bool  have_mcvs1,
bool  have_mcvs2 
)
static

Definition at line 2438 of file selfuncs.c.

2445{
2446 double selec;
2447
2448 if (have_mcvs1 && have_mcvs2)
2449 {
2450 /*
2451 * We have most-common-value lists for both relations. Run through
2452 * the lists to see which MCVs actually join to each other with the
2453 * given operator. This allows us to determine the exact join
2454 * selectivity for the portion of the relations represented by the MCV
2455 * lists. We still have to estimate for the remaining population, but
2456 * in a skewed distribution this gives us a big leg up in accuracy.
2457 * For motivation see the analysis in Y. Ioannidis and S.
2458 * Christodoulakis, "On the propagation of errors in the size of join
2459 * results", Technical Report 1018, Computer Science Dept., University
2460 * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2461 */
2462 LOCAL_FCINFO(fcinfo, 2);
2463 FmgrInfo eqproc;
2464 bool *hasmatch1;
2465 bool *hasmatch2;
2466 double nullfrac1 = stats1->stanullfrac;
2467 double nullfrac2 = stats2->stanullfrac;
2468 double matchprodfreq,
2469 matchfreq1,
2470 matchfreq2,
2471 unmatchfreq1,
2472 unmatchfreq2,
2473 otherfreq1,
2474 otherfreq2,
2475 totalsel1,
2476 totalsel2;
2477 int i,
2478 nmatches;
2479
2480 fmgr_info(opfuncoid, &eqproc);
2481
2482 /*
2483 * Save a few cycles by setting up the fcinfo struct just once. Using
2484 * FunctionCallInvoke directly also avoids failure if the eqproc
2485 * returns NULL, though really equality functions should never do
2486 * that.
2487 */
2488 InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
2489 NULL, NULL);
2490 fcinfo->args[0].isnull = false;
2491 fcinfo->args[1].isnull = false;
2492
2493 hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2494 hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
2495
2496 /*
2497 * Note we assume that each MCV will match at most one member of the
2498 * other MCV list. If the operator isn't really equality, there could
2499 * be multiple matches --- but we don't look for them, both for speed
2500 * and because the math wouldn't add up...
2501 */
2502 matchprodfreq = 0.0;
2503 nmatches = 0;
2504 for (i = 0; i < sslot1->nvalues; i++)
2505 {
2506 int j;
2507
2508 fcinfo->args[0].value = sslot1->values[i];
2509
2510 for (j = 0; j < sslot2->nvalues; j++)
2511 {
2512 Datum fresult;
2513
2514 if (hasmatch2[j])
2515 continue;
2516 fcinfo->args[1].value = sslot2->values[j];
2517 fcinfo->isnull = false;
2518 fresult = FunctionCallInvoke(fcinfo);
2519 if (!fcinfo->isnull && DatumGetBool(fresult))
2520 {
2521 hasmatch1[i] = hasmatch2[j] = true;
2522 matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
2523 nmatches++;
2524 break;
2525 }
2526 }
2527 }
2528 CLAMP_PROBABILITY(matchprodfreq);
2529 /* Sum up frequencies of matched and unmatched MCVs */
2530 matchfreq1 = unmatchfreq1 = 0.0;
2531 for (i = 0; i < sslot1->nvalues; i++)
2532 {
2533 if (hasmatch1[i])
2534 matchfreq1 += sslot1->numbers[i];
2535 else
2536 unmatchfreq1 += sslot1->numbers[i];
2537 }
2538 CLAMP_PROBABILITY(matchfreq1);
2539 CLAMP_PROBABILITY(unmatchfreq1);
2540 matchfreq2 = unmatchfreq2 = 0.0;
2541 for (i = 0; i < sslot2->nvalues; i++)
2542 {
2543 if (hasmatch2[i])
2544 matchfreq2 += sslot2->numbers[i];
2545 else
2546 unmatchfreq2 += sslot2->numbers[i];
2547 }
2548 CLAMP_PROBABILITY(matchfreq2);
2549 CLAMP_PROBABILITY(unmatchfreq2);
2550 pfree(hasmatch1);
2551 pfree(hasmatch2);
2552
2553 /*
2554 * Compute total frequency of non-null values that are not in the MCV
2555 * lists.
2556 */
2557 otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2558 otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2559 CLAMP_PROBABILITY(otherfreq1);
2560 CLAMP_PROBABILITY(otherfreq2);
2561
2562 /*
2563 * We can estimate the total selectivity from the point of view of
2564 * relation 1 as: the known selectivity for matched MCVs, plus
2565 * unmatched MCVs that are assumed to match against random members of
2566 * relation 2's non-MCV population, plus non-MCV values that are
2567 * assumed to match against random members of relation 2's unmatched
2568 * MCVs plus non-MCV values.
2569 */
2570 totalsel1 = matchprodfreq;
2571 if (nd2 > sslot2->nvalues)
2572 totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues);
2573 if (nd2 > nmatches)
2574 totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2575 (nd2 - nmatches);
2576 /* Same estimate from the point of view of relation 2. */
2577 totalsel2 = matchprodfreq;
2578 if (nd1 > sslot1->nvalues)
2579 totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues);
2580 if (nd1 > nmatches)
2581 totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2582 (nd1 - nmatches);
2583
2584 /*
2585 * Use the smaller of the two estimates. This can be justified in
2586 * essentially the same terms as given below for the no-stats case: to
2587 * a first approximation, we are estimating from the point of view of
2588 * the relation with smaller nd.
2589 */
2590 selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2591 }
2592 else
2593 {
2594 /*
2595 * We do not have MCV lists for both sides. Estimate the join
2596 * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2597 * is plausible if we assume that the join operator is strict and the
2598 * non-null values are about equally distributed: a given non-null
2599 * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2600 * of rel2, so total join rows are at most
2601 * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2602 * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2603 * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2604 * with MIN() is an upper bound. Using the MIN() means we estimate
2605 * from the point of view of the relation with smaller nd (since the
2606 * larger nd is determining the MIN). It is reasonable to assume that
2607 * most tuples in this rel will have join partners, so the bound is
2608 * probably reasonably tight and should be taken as-is.
2609 *
2610 * XXX Can we be smarter if we have an MCV list for just one side? It
2611 * seems that if we assume equal distribution for the other side, we
2612 * end up with the same answer anyway.
2613 */
2614 double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2615 double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2616
2617 selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2618 if (nd1 > nd2)
2619 selec /= nd1;
2620 else
2621 selec /= nd2;
2622 }
2623
2624 return selec;
2625}
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
#define InitFunctionCallInfoData(Fcinfo, Flinfo, Nargs, Collation, Context, Resultinfo)
Definition: fmgr.h:150
#define LOCAL_FCINFO(name, nargs)
Definition: fmgr.h:110
#define FunctionCallInvoke(fcinfo)
Definition: fmgr.h:172
int j
Definition: isn.c:73
void * palloc0(Size size)
Definition: mcxt.c:1347
uintptr_t Datum
Definition: postgres.h:69
Definition: fmgr.h:57

References CLAMP_PROBABILITY, DatumGetBool(), fmgr_info(), FunctionCallInvoke, i, InitFunctionCallInfoData, j, LOCAL_FCINFO, AttStatsSlot::numbers, AttStatsSlot::nvalues, palloc0(), pfree(), and AttStatsSlot::values.

Referenced by eqjoinsel().

◆ eqjoinsel_semi()

static double eqjoinsel_semi ( Oid  opfuncoid,
Oid  collation,
VariableStatData vardata1,
VariableStatData vardata2,
double  nd1,
double  nd2,
bool  isdefault1,
bool  isdefault2,
AttStatsSlot sslot1,
AttStatsSlot sslot2,
Form_pg_statistic  stats1,
Form_pg_statistic  stats2,
bool  have_mcvs1,
bool  have_mcvs2,
RelOptInfo inner_rel 
)
static

Definition at line 2635 of file selfuncs.c.

2643{
2644 double selec;
2645
2646 /*
2647 * We clamp nd2 to be not more than what we estimate the inner relation's
2648 * size to be. This is intuitively somewhat reasonable since obviously
2649 * there can't be more than that many distinct values coming from the
2650 * inner rel. The reason for the asymmetry (ie, that we don't clamp nd1
2651 * likewise) is that this is the only pathway by which restriction clauses
2652 * applied to the inner rel will affect the join result size estimate,
2653 * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2654 * only the outer rel's size. If we clamped nd1 we'd be double-counting
2655 * the selectivity of outer-rel restrictions.
2656 *
2657 * We can apply this clamping both with respect to the base relation from
2658 * which the join variable comes (if there is just one), and to the
2659 * immediate inner input relation of the current join.
2660 *
2661 * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2662 * great, maybe, but it didn't come out of nowhere either. This is most
2663 * helpful when the inner relation is empty and consequently has no stats.
2664 */
2665 if (vardata2->rel)
2666 {
2667 if (nd2 >= vardata2->rel->rows)
2668 {
2669 nd2 = vardata2->rel->rows;
2670 isdefault2 = false;
2671 }
2672 }
2673 if (nd2 >= inner_rel->rows)
2674 {
2675 nd2 = inner_rel->rows;
2676 isdefault2 = false;
2677 }
2678
2679 if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid))
2680 {
2681 /*
2682 * We have most-common-value lists for both relations. Run through
2683 * the lists to see which MCVs actually join to each other with the
2684 * given operator. This allows us to determine the exact join
2685 * selectivity for the portion of the relations represented by the MCV
2686 * lists. We still have to estimate for the remaining population, but
2687 * in a skewed distribution this gives us a big leg up in accuracy.
2688 */
2689 LOCAL_FCINFO(fcinfo, 2);
2690 FmgrInfo eqproc;
2691 bool *hasmatch1;
2692 bool *hasmatch2;
2693 double nullfrac1 = stats1->stanullfrac;
2694 double matchfreq1,
2695 uncertainfrac,
2696 uncertain;
2697 int i,
2698 nmatches,
2699 clamped_nvalues2;
2700
2701 /*
2702 * The clamping above could have resulted in nd2 being less than
2703 * sslot2->nvalues; in which case, we assume that precisely the nd2
2704 * most common values in the relation will appear in the join input,
2705 * and so compare to only the first nd2 members of the MCV list. Of
2706 * course this is frequently wrong, but it's the best bet we can make.
2707 */
2708 clamped_nvalues2 = Min(sslot2->nvalues, nd2);
2709
2710 fmgr_info(opfuncoid, &eqproc);
2711
2712 /*
2713 * Save a few cycles by setting up the fcinfo struct just once. Using
2714 * FunctionCallInvoke directly also avoids failure if the eqproc
2715 * returns NULL, though really equality functions should never do
2716 * that.
2717 */
2718 InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
2719 NULL, NULL);
2720 fcinfo->args[0].isnull = false;
2721 fcinfo->args[1].isnull = false;
2722
2723 hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2724 hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2725
2726 /*
2727 * Note we assume that each MCV will match at most one member of the
2728 * other MCV list. If the operator isn't really equality, there could
2729 * be multiple matches --- but we don't look for them, both for speed
2730 * and because the math wouldn't add up...
2731 */
2732 nmatches = 0;
2733 for (i = 0; i < sslot1->nvalues; i++)
2734 {
2735 int j;
2736
2737 fcinfo->args[0].value = sslot1->values[i];
2738
2739 for (j = 0; j < clamped_nvalues2; j++)
2740 {
2741 Datum fresult;
2742
2743 if (hasmatch2[j])
2744 continue;
2745 fcinfo->args[1].value = sslot2->values[j];
2746 fcinfo->isnull = false;
2747 fresult = FunctionCallInvoke(fcinfo);
2748 if (!fcinfo->isnull && DatumGetBool(fresult))
2749 {
2750 hasmatch1[i] = hasmatch2[j] = true;
2751 nmatches++;
2752 break;
2753 }
2754 }
2755 }
2756 /* Sum up frequencies of matched MCVs */
2757 matchfreq1 = 0.0;
2758 for (i = 0; i < sslot1->nvalues; i++)
2759 {
2760 if (hasmatch1[i])
2761 matchfreq1 += sslot1->numbers[i];
2762 }
2763 CLAMP_PROBABILITY(matchfreq1);
2764 pfree(hasmatch1);
2765 pfree(hasmatch2);
2766
2767 /*
2768 * Now we need to estimate the fraction of relation 1 that has at
2769 * least one join partner. We know for certain that the matched MCVs
2770 * do, so that gives us a lower bound, but we're really in the dark
2771 * about everything else. Our crude approach is: if nd1 <= nd2 then
2772 * assume all non-null rel1 rows have join partners, else assume for
2773 * the uncertain rows that a fraction nd2/nd1 have join partners. We
2774 * can discount the known-matched MCVs from the distinct-values counts
2775 * before doing the division.
2776 *
2777 * Crude as the above is, it's completely useless if we don't have
2778 * reliable ndistinct values for both sides. Hence, if either nd1 or
2779 * nd2 is default, punt and assume half of the uncertain rows have
2780 * join partners.
2781 */
2782 if (!isdefault1 && !isdefault2)
2783 {
2784 nd1 -= nmatches;
2785 nd2 -= nmatches;
2786 if (nd1 <= nd2 || nd2 < 0)
2787 uncertainfrac = 1.0;
2788 else
2789 uncertainfrac = nd2 / nd1;
2790 }
2791 else
2792 uncertainfrac = 0.5;
2793 uncertain = 1.0 - matchfreq1 - nullfrac1;
2794 CLAMP_PROBABILITY(uncertain);
2795 selec = matchfreq1 + uncertainfrac * uncertain;
2796 }
2797 else
2798 {
2799 /*
2800 * Without MCV lists for both sides, we can only use the heuristic
2801 * about nd1 vs nd2.
2802 */
2803 double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2804
2805 if (!isdefault1 && !isdefault2)
2806 {
2807 if (nd1 <= nd2 || nd2 < 0)
2808 selec = 1.0 - nullfrac1;
2809 else
2810 selec = (nd2 / nd1) * (1.0 - nullfrac1);
2811 }
2812 else
2813 selec = 0.5 * (1.0 - nullfrac1);
2814 }
2815
2816 return selec;
2817}

References CLAMP_PROBABILITY, DatumGetBool(), fmgr_info(), FunctionCallInvoke, i, InitFunctionCallInfoData, j, LOCAL_FCINFO, Min, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, palloc0(), pfree(), VariableStatData::rel, RelOptInfo::rows, and AttStatsSlot::values.

Referenced by eqjoinsel().

◆ eqsel()

Datum eqsel ( PG_FUNCTION_ARGS  )

Definition at line 228 of file selfuncs.c.

229{
230 PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
231}
static double eqsel_internal(PG_FUNCTION_ARGS, bool negate)
Definition: selfuncs.c:237

References eqsel_internal(), and PG_RETURN_FLOAT8.

◆ eqsel_internal()

static double eqsel_internal ( PG_FUNCTION_ARGS  ,
bool  negate 
)
static

Definition at line 237 of file selfuncs.c.

238{
240 Oid operator = PG_GETARG_OID(1);
242 int varRelid = PG_GETARG_INT32(3);
243 Oid collation = PG_GET_COLLATION();
244 VariableStatData vardata;
245 Node *other;
246 bool varonleft;
247 double selec;
248
249 /*
250 * When asked about <>, we do the estimation using the corresponding =
251 * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
252 */
253 if (negate)
254 {
255 operator = get_negator(operator);
256 if (!OidIsValid(operator))
257 {
258 /* Use default selectivity (should we raise an error instead?) */
259 return 1.0 - DEFAULT_EQ_SEL;
260 }
261 }
262
263 /*
264 * If expression is not variable = something or something = variable, then
265 * punt and return a default estimate.
266 */
267 if (!get_restriction_variable(root, args, varRelid,
268 &vardata, &other, &varonleft))
269 return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
270
271 /*
272 * We can do a lot better if the something is a constant. (Note: the
273 * Const might result from estimation rather than being a simple constant
274 * in the query.)
275 */
276 if (IsA(other, Const))
277 selec = var_eq_const(&vardata, operator, collation,
278 ((Const *) other)->constvalue,
279 ((Const *) other)->constisnull,
280 varonleft, negate);
281 else
282 selec = var_eq_non_const(&vardata, operator, collation, other,
283 varonleft, negate);
284
285 ReleaseVariableStats(vardata);
286
287 return selec;
288}
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
Oid get_negator(Oid opno)
Definition: lsyscache.c:1560
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4904
double var_eq_non_const(VariableStatData *vardata, Oid oproid, Oid collation, Node *other, bool varonleft, bool negate)
Definition: selfuncs.c:467
#define DEFAULT_EQ_SEL
Definition: selfuncs.h:34

References generate_unaccent_rules::args, DEFAULT_EQ_SEL, get_negator(), get_restriction_variable(), IsA, OidIsValid, PG_GET_COLLATION, PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, ReleaseVariableStats, root, var_eq_const(), and var_eq_non_const().

Referenced by eqsel(), and neqsel().

◆ estimate_array_length()

double estimate_array_length ( PlannerInfo root,
Node arrayexpr 
)

Definition at line 2140 of file selfuncs.c.

2141{
2142 /* look through any binary-compatible relabeling of arrayexpr */
2143 arrayexpr = strip_array_coercion(arrayexpr);
2144
2145 if (arrayexpr && IsA(arrayexpr, Const))
2146 {
2147 Datum arraydatum = ((Const *) arrayexpr)->constvalue;
2148 bool arrayisnull = ((Const *) arrayexpr)->constisnull;
2149 ArrayType *arrayval;
2150
2151 if (arrayisnull)
2152 return 0;
2153 arrayval = DatumGetArrayTypeP(arraydatum);
2154 return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2155 }
2156 else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2157 !((ArrayExpr *) arrayexpr)->multidims)
2158 {
2159 return list_length(((ArrayExpr *) arrayexpr)->elements);
2160 }
2161 else if (arrayexpr && root)
2162 {
2163 /* See if we can find any statistics about it */
2164 VariableStatData vardata;
2165 AttStatsSlot sslot;
2166 double nelem = 0;
2167
2168 examine_variable(root, arrayexpr, 0, &vardata);
2169 if (HeapTupleIsValid(vardata.statsTuple))
2170 {
2171 /*
2172 * Found stats, so use the average element count, which is stored
2173 * in the last stanumbers element of the DECHIST statistics.
2174 * Actually that is the average count of *distinct* elements;
2175 * perhaps we should scale it up somewhat?
2176 */
2177 if (get_attstatsslot(&sslot, vardata.statsTuple,
2178 STATISTIC_KIND_DECHIST, InvalidOid,
2180 {
2181 if (sslot.nnumbers > 0)
2182 nelem = clamp_row_est(sslot.numbers[sslot.nnumbers - 1]);
2183 free_attstatsslot(&sslot);
2184 }
2185 }
2186 ReleaseVariableStats(vardata);
2187
2188 if (nelem > 0)
2189 return nelem;
2190 }
2191
2192 /* Else use a default guess --- this should match scalararraysel */
2193 return 10;
2194}
#define ARR_NDIM(a)
Definition: array.h:290
#define DatumGetArrayTypeP(X)
Definition: array.h:261
#define ARR_DIMS(a)
Definition: array.h:294
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
double clamp_row_est(double nrows)
Definition: costsize.c:213
static int list_length(const List *l)
Definition: pg_list.h:152
static Node * strip_array_coercion(Node *node)
Definition: selfuncs.c:1784

References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), ATTSTATSSLOT_NUMBERS, clamp_row_est(), DatumGetArrayTypeP, examine_variable(), free_attstatsslot(), get_attstatsslot(), HeapTupleIsValid, InvalidOid, IsA, list_length(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, ReleaseVariableStats, root, VariableStatData::statsTuple, and strip_array_coercion().

Referenced by array_unnest_support(), btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().

◆ estimate_hash_bucket_stats()

void estimate_hash_bucket_stats ( PlannerInfo root,
Node hashkey,
double  nbuckets,
Selectivity mcv_freq,
Selectivity bucketsize_frac 
)

Definition at line 3812 of file selfuncs.c.

3815{
3816 VariableStatData vardata;
3817 double estfract,
3818 ndistinct,
3819 stanullfrac,
3820 avgfreq;
3821 bool isdefault;
3822 AttStatsSlot sslot;
3823
3824 examine_variable(root, hashkey, 0, &vardata);
3825
3826 /* Look up the frequency of the most common value, if available */
3827 *mcv_freq = 0.0;
3828
3829 if (HeapTupleIsValid(vardata.statsTuple))
3830 {
3831 if (get_attstatsslot(&sslot, vardata.statsTuple,
3832 STATISTIC_KIND_MCV, InvalidOid,
3834 {
3835 /*
3836 * The first MCV stat is for the most common value.
3837 */
3838 if (sslot.nnumbers > 0)
3839 *mcv_freq = sslot.numbers[0];
3840 free_attstatsslot(&sslot);
3841 }
3842 }
3843
3844 /* Get number of distinct values */
3845 ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3846
3847 /*
3848 * If ndistinct isn't real, punt. We normally return 0.1, but if the
3849 * mcv_freq is known to be even higher than that, use it instead.
3850 */
3851 if (isdefault)
3852 {
3853 *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3854 ReleaseVariableStats(vardata);
3855 return;
3856 }
3857
3858 /* Get fraction that are null */
3859 if (HeapTupleIsValid(vardata.statsTuple))
3860 {
3861 Form_pg_statistic stats;
3862
3863 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3864 stanullfrac = stats->stanullfrac;
3865 }
3866 else
3867 stanullfrac = 0.0;
3868
3869 /* Compute avg freq of all distinct data values in raw relation */
3870 avgfreq = (1.0 - stanullfrac) / ndistinct;
3871
3872 /*
3873 * Adjust ndistinct to account for restriction clauses. Observe we are
3874 * assuming that the data distribution is affected uniformly by the
3875 * restriction clauses!
3876 *
3877 * XXX Possibly better way, but much more expensive: multiply by
3878 * selectivity of rel's restriction clauses that mention the target Var.
3879 */
3880 if (vardata.rel && vardata.rel->tuples > 0)
3881 {
3882 ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3883 ndistinct = clamp_row_est(ndistinct);
3884 }
3885
3886 /*
3887 * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3888 * number of buckets is less than the expected number of distinct values;
3889 * otherwise it is 1/ndistinct.
3890 */
3891 if (ndistinct > nbuckets)
3892 estfract = 1.0 / nbuckets;
3893 else
3894 estfract = 1.0 / ndistinct;
3895
3896 /*
3897 * Adjust estimated bucketsize upward to account for skewed distribution.
3898 */
3899 if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3900 estfract *= *mcv_freq / avgfreq;
3901
3902 /*
3903 * Clamp bucketsize to sane range (the above adjustment could easily
3904 * produce an out-of-range result). We set the lower bound a little above
3905 * zero, since zero isn't a very sane result.
3906 */
3907 if (estfract < 1.0e-6)
3908 estfract = 1.0e-6;
3909 else if (estfract > 1.0)
3910 estfract = 1.0;
3911
3912 *bucketsize_frac = (Selectivity) estfract;
3913
3914 ReleaseVariableStats(vardata);
3915}
Cardinality tuples
Definition: pathnodes.h:973

References ATTSTATSSLOT_NUMBERS, clamp_row_est(), examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT(), HeapTupleIsValid, InvalidOid, Max, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, ReleaseVariableStats, root, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by final_cost_hashjoin().

◆ estimate_hashagg_tablesize()

double estimate_hashagg_tablesize ( PlannerInfo root,
Path path,
const AggClauseCosts agg_costs,
double  dNumGroups 
)

Definition at line 3931 of file selfuncs.c.

3933{
3934 Size hashentrysize;
3935
3936 hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
3937 path->pathtarget->width,
3938 agg_costs->transitionSpace);
3939
3940 /*
3941 * Note that this disregards the effect of fill-factor and growth policy
3942 * of the hash table. That's probably ok, given that the default
3943 * fill-factor is relatively high. It'd be hard to meaningfully factor in
3944 * "double-in-size" growth policies here.
3945 */
3946 return hashentrysize * dNumGroups;
3947}
size_t Size
Definition: c.h:562
Size hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
Definition: nodeAgg.c:1703
Size transitionSpace
Definition: pathnodes.h:62

References hash_agg_entry_size(), list_length(), root, and AggClauseCosts::transitionSpace.

Referenced by consider_groupingsets_paths().

◆ estimate_multivariate_ndistinct()

static bool estimate_multivariate_ndistinct ( PlannerInfo root,
RelOptInfo rel,
List **  varinfos,
double *  ndistinct 
)
static

Definition at line 3968 of file selfuncs.c.

3970{
3971 ListCell *lc;
3972 int nmatches_vars;
3973 int nmatches_exprs;
3974 Oid statOid = InvalidOid;
3975 MVNDistinct *stats;
3976 StatisticExtInfo *matched_info = NULL;
3978
3979 /* bail out immediately if the table has no extended statistics */
3980 if (!rel->statlist)
3981 return false;
3982
3983 /* look for the ndistinct statistics object matching the most vars */
3984 nmatches_vars = 0; /* we require at least two matches */
3985 nmatches_exprs = 0;
3986 foreach(lc, rel->statlist)
3987 {
3988 ListCell *lc2;
3990 int nshared_vars = 0;
3991 int nshared_exprs = 0;
3992
3993 /* skip statistics of other kinds */
3994 if (info->kind != STATS_EXT_NDISTINCT)
3995 continue;
3996
3997 /* skip statistics with mismatching stxdinherit value */
3998 if (info->inherit != rte->inh)
3999 continue;
4000
4001 /*
4002 * Determine how many expressions (and variables in non-matched
4003 * expressions) match. We'll then use these numbers to pick the
4004 * statistics object that best matches the clauses.
4005 */
4006 foreach(lc2, *varinfos)
4007 {
4008 ListCell *lc3;
4009 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
4011
4012 Assert(varinfo->rel == rel);
4013
4014 /* simple Var, search in statistics keys directly */
4015 if (IsA(varinfo->var, Var))
4016 {
4017 attnum = ((Var *) varinfo->var)->varattno;
4018
4019 /*
4020 * Ignore system attributes - we don't support statistics on
4021 * them, so can't match them (and it'd fail as the values are
4022 * negative).
4023 */
4025 continue;
4026
4027 if (bms_is_member(attnum, info->keys))
4028 nshared_vars++;
4029
4030 continue;
4031 }
4032
4033 /* expression - see if it's in the statistics object */
4034 foreach(lc3, info->exprs)
4035 {
4036 Node *expr = (Node *) lfirst(lc3);
4037
4038 if (equal(varinfo->var, expr))
4039 {
4040 nshared_exprs++;
4041 break;
4042 }
4043 }
4044 }
4045
4046 if (nshared_vars + nshared_exprs < 2)
4047 continue;
4048
4049 /*
4050 * Does this statistics object match more columns than the currently
4051 * best object? If so, use this one instead.
4052 *
4053 * XXX This should break ties using name of the object, or something
4054 * like that, to make the outcome stable.
4055 */
4056 if ((nshared_exprs > nmatches_exprs) ||
4057 (((nshared_exprs == nmatches_exprs)) && (nshared_vars > nmatches_vars)))
4058 {
4059 statOid = info->statOid;
4060 nmatches_vars = nshared_vars;
4061 nmatches_exprs = nshared_exprs;
4062 matched_info = info;
4063 }
4064 }
4065
4066 /* No match? */
4067 if (statOid == InvalidOid)
4068 return false;
4069
4070 Assert(nmatches_vars + nmatches_exprs > 1);
4071
4072 stats = statext_ndistinct_load(statOid, rte->inh);
4073
4074 /*
4075 * If we have a match, search it for the specific item that matches (there
4076 * must be one), and construct the output values.
4077 */
4078 if (stats)
4079 {
4080 int i;
4081 List *newlist = NIL;
4082 MVNDistinctItem *item = NULL;
4083 ListCell *lc2;
4084 Bitmapset *matched = NULL;
4085 AttrNumber attnum_offset;
4086
4087 /*
4088 * How much we need to offset the attnums? If there are no
4089 * expressions, no offset is needed. Otherwise offset enough to move
4090 * the lowest one (which is equal to number of expressions) to 1.
4091 */
4092 if (matched_info->exprs)
4093 attnum_offset = (list_length(matched_info->exprs) + 1);
4094 else
4095 attnum_offset = 0;
4096
4097 /* see what actually matched */
4098 foreach(lc2, *varinfos)
4099 {
4100 ListCell *lc3;
4101 int idx;
4102 bool found = false;
4103
4104 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
4105
4106 /*
4107 * Process a simple Var expression, by matching it to keys
4108 * directly. If there's a matching expression, we'll try matching
4109 * it later.
4110 */
4111 if (IsA(varinfo->var, Var))
4112 {
4113 AttrNumber attnum = ((Var *) varinfo->var)->varattno;
4114
4115 /*
4116 * Ignore expressions on system attributes. Can't rely on the
4117 * bms check for negative values.
4118 */
4120 continue;
4121
4122 /* Is the variable covered by the statistics object? */
4123 if (!bms_is_member(attnum, matched_info->keys))
4124 continue;
4125
4126 attnum = attnum + attnum_offset;
4127
4128 /* ensure sufficient offset */
4130
4131 matched = bms_add_member(matched, attnum);
4132
4133 found = true;
4134 }
4135
4136 /*
4137 * XXX Maybe we should allow searching the expressions even if we
4138 * found an attribute matching the expression? That would handle
4139 * trivial expressions like "(a)" but it seems fairly useless.
4140 */
4141 if (found)
4142 continue;
4143
4144 /* expression - see if it's in the statistics object */
4145 idx = 0;
4146 foreach(lc3, matched_info->exprs)
4147 {
4148 Node *expr = (Node *) lfirst(lc3);
4149
4150 if (equal(varinfo->var, expr))
4151 {
4152 AttrNumber attnum = -(idx + 1);
4153
4154 attnum = attnum + attnum_offset;
4155
4156 /* ensure sufficient offset */
4158
4159 matched = bms_add_member(matched, attnum);
4160
4161 /* there should be just one matching expression */
4162 break;
4163 }
4164
4165 idx++;
4166 }
4167 }
4168
4169 /* Find the specific item that exactly matches the combination */
4170 for (i = 0; i < stats->nitems; i++)
4171 {
4172 int j;
4173 MVNDistinctItem *tmpitem = &stats->items[i];
4174
4175 if (tmpitem->nattributes != bms_num_members(matched))
4176 continue;
4177
4178 /* assume it's the right item */
4179 item = tmpitem;
4180
4181 /* check that all item attributes/expressions fit the match */
4182 for (j = 0; j < tmpitem->nattributes; j++)
4183 {
4184 AttrNumber attnum = tmpitem->attributes[j];
4185
4186 /*
4187 * Thanks to how we constructed the matched bitmap above, we
4188 * can just offset all attnums the same way.
4189 */
4190 attnum = attnum + attnum_offset;
4191
4192 if (!bms_is_member(attnum, matched))
4193 {
4194 /* nah, it's not this item */
4195 item = NULL;
4196 break;
4197 }
4198 }
4199
4200 /*
4201 * If the item has all the matched attributes, we know it's the
4202 * right one - there can't be a better one. matching more.
4203 */
4204 if (item)
4205 break;
4206 }
4207
4208 /*
4209 * Make sure we found an item. There has to be one, because ndistinct
4210 * statistics includes all combinations of attributes.
4211 */
4212 if (!item)
4213 elog(ERROR, "corrupt MVNDistinct entry");
4214
4215 /* Form the output varinfo list, keeping only unmatched ones */
4216 foreach(lc, *varinfos)
4217 {
4218 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
4219 ListCell *lc3;
4220 bool found = false;
4221
4222 /*
4223 * Let's look at plain variables first, because it's the most
4224 * common case and the check is quite cheap. We can simply get the
4225 * attnum and check (with an offset) matched bitmap.
4226 */
4227 if (IsA(varinfo->var, Var))
4228 {
4229 AttrNumber attnum = ((Var *) varinfo->var)->varattno;
4230
4231 /*
4232 * If it's a system attribute, we're done. We don't support
4233 * extended statistics on system attributes, so it's clearly
4234 * not matched. Just keep the expression and continue.
4235 */
4237 {
4238 newlist = lappend(newlist, varinfo);
4239 continue;
4240 }
4241
4242 /* apply the same offset as above */
4243 attnum += attnum_offset;
4244
4245 /* if it's not matched, keep the varinfo */
4246 if (!bms_is_member(attnum, matched))
4247 newlist = lappend(newlist, varinfo);
4248
4249 /* The rest of the loop deals with complex expressions. */
4250 continue;
4251 }
4252
4253 /*
4254 * Process complex expressions, not just simple Vars.
4255 *
4256 * First, we search for an exact match of an expression. If we
4257 * find one, we can just discard the whole GroupVarInfo, with all
4258 * the variables we extracted from it.
4259 *
4260 * Otherwise we inspect the individual vars, and try matching it
4261 * to variables in the item.
4262 */
4263 foreach(lc3, matched_info->exprs)
4264 {
4265 Node *expr = (Node *) lfirst(lc3);
4266
4267 if (equal(varinfo->var, expr))
4268 {
4269 found = true;
4270 break;
4271 }
4272 }
4273
4274 /* found exact match, skip */
4275 if (found)
4276 continue;
4277
4278 newlist = lappend(newlist, varinfo);
4279 }
4280
4281 *varinfos = newlist;
4282 *ndistinct = item->ndistinct;
4283 return true;
4284 }
4285
4286 return false;
4287}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:751
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:815
MVNDistinct * statext_ndistinct_load(Oid mvoid, bool inh)
Definition: mvdistinct.c:148
double ndistinct
Definition: statistics.h:28
AttrNumber * attributes
Definition: statistics.h:30
uint32 nitems
Definition: statistics.h:38
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:39
List * statlist
Definition: pathnodes.h:970
Bitmapset * keys
Definition: pathnodes.h:1318
Definition: primnodes.h:262

References Assert, attnum, MVNDistinctItem::attributes, AttrNumberIsForUserDefinedAttr, bms_add_member(), bms_is_member(), bms_num_members(), elog, equal(), ERROR, StatisticExtInfo::exprs, i, idx(), RangeTblEntry::inh, StatisticExtInfo::inherit, InvalidOid, IsA, MVNDistinct::items, j, StatisticExtInfo::keys, StatisticExtInfo::kind, lappend(), lfirst, list_length(), MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, NIL, MVNDistinct::nitems, planner_rt_fetch, GroupVarInfo::rel, RelOptInfo::relid, root, statext_ndistinct_load(), RelOptInfo::statlist, StatisticExtInfo::statOid, and GroupVarInfo::var.

Referenced by estimate_num_groups().

◆ estimate_num_groups()

double estimate_num_groups ( PlannerInfo root,
List groupExprs,
double  input_rows,
List **  pgset,
EstimationInfo estinfo 
)

Definition at line 3430 of file selfuncs.c.

3432{
3433 List *varinfos = NIL;
3434 double srf_multiplier = 1.0;
3435 double numdistinct;
3436 ListCell *l;
3437 int i;
3438
3439 /* Zero the estinfo output parameter, if non-NULL */
3440 if (estinfo != NULL)
3441 memset(estinfo, 0, sizeof(EstimationInfo));
3442
3443 /*
3444 * We don't ever want to return an estimate of zero groups, as that tends
3445 * to lead to division-by-zero and other unpleasantness. The input_rows
3446 * estimate is usually already at least 1, but clamp it just in case it
3447 * isn't.
3448 */
3449 input_rows = clamp_row_est(input_rows);
3450
3451 /*
3452 * If no grouping columns, there's exactly one group. (This can't happen
3453 * for normal cases with GROUP BY or DISTINCT, but it is possible for
3454 * corner cases with set operations.)
3455 */
3456 if (groupExprs == NIL || (pgset && *pgset == NIL))
3457 return 1.0;
3458
3459 /*
3460 * Count groups derived from boolean grouping expressions. For other
3461 * expressions, find the unique Vars used, treating an expression as a Var
3462 * if we can find stats for it. For each one, record the statistical
3463 * estimate of number of distinct values (total in its table, without
3464 * regard for filtering).
3465 */
3466 numdistinct = 1.0;
3467
3468 i = 0;
3469 foreach(l, groupExprs)
3470 {
3471 Node *groupexpr = (Node *) lfirst(l);
3472 double this_srf_multiplier;
3473 VariableStatData vardata;
3474 List *varshere;
3475 ListCell *l2;
3476
3477 /* is expression in this grouping set? */
3478 if (pgset && !list_member_int(*pgset, i++))
3479 continue;
3480
3481 /*
3482 * Set-returning functions in grouping columns are a bit problematic.
3483 * The code below will effectively ignore their SRF nature and come up
3484 * with a numdistinct estimate as though they were scalar functions.
3485 * We compensate by scaling up the end result by the largest SRF
3486 * rowcount estimate. (This will be an overestimate if the SRF
3487 * produces multiple copies of any output value, but it seems best to
3488 * assume the SRF's outputs are distinct. In any case, it's probably
3489 * pointless to worry too much about this without much better
3490 * estimates for SRF output rowcounts than we have today.)
3491 */
3492 this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3493 if (srf_multiplier < this_srf_multiplier)
3494 srf_multiplier = this_srf_multiplier;
3495
3496 /* Short-circuit for expressions returning boolean */
3497 if (exprType(groupexpr) == BOOLOID)
3498 {
3499 numdistinct *= 2.0;
3500 continue;
3501 }
3502
3503 /*
3504 * If examine_variable is able to deduce anything about the GROUP BY
3505 * expression, treat it as a single variable even if it's really more
3506 * complicated.
3507 *
3508 * XXX This has the consequence that if there's a statistics object on
3509 * the expression, we don't split it into individual Vars. This
3510 * affects our selection of statistics in
3511 * estimate_multivariate_ndistinct, because it's probably better to
3512 * use more accurate estimate for each expression and treat them as
3513 * independent, than to combine estimates for the extracted variables
3514 * when we don't know how that relates to the expressions.
3515 */
3516 examine_variable(root, groupexpr, 0, &vardata);
3517 if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3518 {
3519 varinfos = add_unique_group_var(root, varinfos,
3520 groupexpr, &vardata);
3521 ReleaseVariableStats(vardata);
3522 continue;
3523 }
3524 ReleaseVariableStats(vardata);
3525
3526 /*
3527 * Else pull out the component Vars. Handle PlaceHolderVars by
3528 * recursing into their arguments (effectively assuming that the
3529 * PlaceHolderVar doesn't change the number of groups, which boils
3530 * down to ignoring the possible addition of nulls to the result set).
3531 */
3532 varshere = pull_var_clause(groupexpr,
3536
3537 /*
3538 * If we find any variable-free GROUP BY item, then either it is a
3539 * constant (and we can ignore it) or it contains a volatile function;
3540 * in the latter case we punt and assume that each input row will
3541 * yield a distinct group.
3542 */
3543 if (varshere == NIL)
3544 {
3545 if (contain_volatile_functions(groupexpr))
3546 return input_rows;
3547 continue;
3548 }
3549
3550 /*
3551 * Else add variables to varinfos list
3552 */
3553 foreach(l2, varshere)
3554 {
3555 Node *var = (Node *) lfirst(l2);
3556
3557 examine_variable(root, var, 0, &vardata);
3558 varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3559 ReleaseVariableStats(vardata);
3560 }
3561 }
3562
3563 /*
3564 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3565 * list.
3566 */
3567 if (varinfos == NIL)
3568 {
3569 /* Apply SRF multiplier as we would do in the long path */
3570 numdistinct *= srf_multiplier;
3571 /* Round off */
3572 numdistinct = ceil(numdistinct);
3573 /* Guard against out-of-range answers */
3574 if (numdistinct > input_rows)
3575 numdistinct = input_rows;
3576 if (numdistinct < 1.0)
3577 numdistinct = 1.0;
3578 return numdistinct;
3579 }
3580
3581 /*
3582 * Group Vars by relation and estimate total numdistinct.
3583 *
3584 * For each iteration of the outer loop, we process the frontmost Var in
3585 * varinfos, plus all other Vars in the same relation. We remove these
3586 * Vars from the newvarinfos list for the next iteration. This is the
3587 * easiest way to group Vars of same rel together.
3588 */
3589 do
3590 {
3591 GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3592 RelOptInfo *rel = varinfo1->rel;
3593 double reldistinct = 1;
3594 double relmaxndistinct = reldistinct;
3595 int relvarcount = 0;
3596 List *newvarinfos = NIL;
3597 List *relvarinfos = NIL;
3598
3599 /*
3600 * Split the list of varinfos in two - one for the current rel, one
3601 * for remaining Vars on other rels.
3602 */
3603 relvarinfos = lappend(relvarinfos, varinfo1);
3604 for_each_from(l, varinfos, 1)
3605 {
3606 GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3607
3608 if (varinfo2->rel == varinfo1->rel)
3609 {
3610 /* varinfos on current rel */
3611 relvarinfos = lappend(relvarinfos, varinfo2);
3612 }
3613 else
3614 {
3615 /* not time to process varinfo2 yet */
3616 newvarinfos = lappend(newvarinfos, varinfo2);
3617 }
3618 }
3619
3620 /*
3621 * Get the numdistinct estimate for the Vars of this rel. We
3622 * iteratively search for multivariate n-distinct with maximum number
3623 * of vars; assuming that each var group is independent of the others,
3624 * we multiply them together. Any remaining relvarinfos after no more
3625 * multivariate matches are found are assumed independent too, so
3626 * their individual ndistinct estimates are multiplied also.
3627 *
3628 * While iterating, count how many separate numdistinct values we
3629 * apply. We apply a fudge factor below, but only if we multiplied
3630 * more than one such values.
3631 */
3632 while (relvarinfos)
3633 {
3634 double mvndistinct;
3635
3636 if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3637 &mvndistinct))
3638 {
3639 reldistinct *= mvndistinct;
3640 if (relmaxndistinct < mvndistinct)
3641 relmaxndistinct = mvndistinct;
3642 relvarcount++;
3643 }
3644 else
3645 {
3646 foreach(l, relvarinfos)
3647 {
3648 GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3649
3650 reldistinct *= varinfo2->ndistinct;
3651 if (relmaxndistinct < varinfo2->ndistinct)
3652 relmaxndistinct = varinfo2->ndistinct;
3653 relvarcount++;
3654
3655 /*
3656 * When varinfo2's isdefault is set then we'd better set
3657 * the SELFLAG_USED_DEFAULT bit in the EstimationInfo.
3658 */
3659 if (estinfo != NULL && varinfo2->isdefault)
3660 estinfo->flags |= SELFLAG_USED_DEFAULT;
3661 }
3662
3663 /* we're done with this relation */
3664 relvarinfos = NIL;
3665 }
3666 }
3667
3668 /*
3669 * Sanity check --- don't divide by zero if empty relation.
3670 */
3671 Assert(IS_SIMPLE_REL(rel));
3672 if (rel->tuples > 0)
3673 {
3674 /*
3675 * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3676 * fudge factor is because the Vars are probably correlated but we
3677 * don't know by how much. We should never clamp to less than the
3678 * largest ndistinct value for any of the Vars, though, since
3679 * there will surely be at least that many groups.
3680 */
3681 double clamp = rel->tuples;
3682
3683 if (relvarcount > 1)
3684 {
3685 clamp *= 0.1;
3686 if (clamp < relmaxndistinct)
3687 {
3688 clamp = relmaxndistinct;
3689 /* for sanity in case some ndistinct is too large: */
3690 if (clamp > rel->tuples)
3691 clamp = rel->tuples;
3692 }
3693 }
3694 if (reldistinct > clamp)
3695 reldistinct = clamp;
3696
3697 /*
3698 * Update the estimate based on the restriction selectivity,
3699 * guarding against division by zero when reldistinct is zero.
3700 * Also skip this if we know that we are returning all rows.
3701 */
3702 if (reldistinct > 0 && rel->rows < rel->tuples)
3703 {
3704 /*
3705 * Given a table containing N rows with n distinct values in a
3706 * uniform distribution, if we select p rows at random then
3707 * the expected number of distinct values selected is
3708 *
3709 * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3710 *
3711 * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3712 *
3713 * See "Approximating block accesses in database
3714 * organizations", S. B. Yao, Communications of the ACM,
3715 * Volume 20 Issue 4, April 1977 Pages 260-261.
3716 *
3717 * Alternatively, re-arranging the terms from the factorials,
3718 * this may be written as
3719 *
3720 * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3721 *
3722 * This form of the formula is more efficient to compute in
3723 * the common case where p is larger than N/n. Additionally,
3724 * as pointed out by Dell'Era, if i << N for all terms in the
3725 * product, it can be approximated by
3726 *
3727 * n * (1 - ((N-p)/N)^(N/n))
3728 *
3729 * See "Expected distinct values when selecting from a bag
3730 * without replacement", Alberto Dell'Era,
3731 * http://www.adellera.it/investigations/distinct_balls/.
3732 *
3733 * The condition i << N is equivalent to n >> 1, so this is a
3734 * good approximation when the number of distinct values in
3735 * the table is large. It turns out that this formula also
3736 * works well even when n is small.
3737 */
3738 reldistinct *=
3739 (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3740 rel->tuples / reldistinct));
3741 }
3742 reldistinct = clamp_row_est(reldistinct);
3743
3744 /*
3745 * Update estimate of total distinct groups.
3746 */
3747 numdistinct *= reldistinct;
3748 }
3749
3750 varinfos = newvarinfos;
3751 } while (varinfos != NIL);
3752
3753 /* Now we can account for the effects of any SRFs */
3754 numdistinct *= srf_multiplier;
3755
3756 /* Round off */
3757 numdistinct = ceil(numdistinct);
3758
3759 /* Guard against out-of-range answers */
3760 if (numdistinct > input_rows)
3761 numdistinct = input_rows;
3762 if (numdistinct < 1.0)
3763 numdistinct = 1.0;
3764
3765 return numdistinct;
3766}
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:537
double expression_returns_set_rows(PlannerInfo *root, Node *clause)
Definition: clauses.c:288
bool list_member_int(const List *list, int datum)
Definition: list.c:702
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:42
#define PVC_RECURSE_AGGREGATES
Definition: optimizer.h:188
#define PVC_RECURSE_PLACEHOLDERS
Definition: optimizer.h:192
#define PVC_RECURSE_WINDOWFUNCS
Definition: optimizer.h:190
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:863
#define for_each_from(cell, lst, N)
Definition: pg_list.h:414
#define linitial(l)
Definition: pg_list.h:178
static bool estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct)
Definition: selfuncs.c:3968
static List * add_unique_group_var(PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata)
Definition: selfuncs.c:3300
#define SELFLAG_USED_DEFAULT
Definition: selfuncs.h:76
uint32 flags
Definition: selfuncs.h:80
List * pull_var_clause(Node *node, int flags)
Definition: var.c:653

References add_unique_group_var(), Assert, clamp_row_est(), contain_volatile_functions(), estimate_multivariate_ndistinct(), examine_variable(), expression_returns_set_rows(), exprType(), EstimationInfo::flags, for_each_from, HeapTupleIsValid, i, IS_SIMPLE_REL, GroupVarInfo::isdefault, VariableStatData::isunique, lappend(), lfirst, linitial, list_member_int(), GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, PVC_RECURSE_WINDOWFUNCS, GroupVarInfo::rel, ReleaseVariableStats, root, RelOptInfo::rows, SELFLAG_USED_DEFAULT, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by adjust_rowcount_for_semijoins(), build_setop_child_paths(), cost_incremental_sort(), cost_memoize_rescan(), create_final_distinct_paths(), create_partial_distinct_paths(), create_unique_path(), estimate_path_cost_size(), get_number_of_groups(), and get_windowclause_startup_tuples().

◆ examine_simple_variable()

static void examine_simple_variable ( PlannerInfo root,
Var var,
VariableStatData vardata 
)
static

Definition at line 5440 of file selfuncs.c.

5442{
5443 RangeTblEntry *rte = root->simple_rte_array[var->varno];
5444
5445 Assert(IsA(rte, RangeTblEntry));
5446
5448 (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
5449 {
5450 /*
5451 * The hook took control of acquiring a stats tuple. If it did supply
5452 * a tuple, it'd better have supplied a freefunc.
5453 */
5454 if (HeapTupleIsValid(vardata->statsTuple) &&
5455 !vardata->freefunc)
5456 elog(ERROR, "no function provided to release variable stats with");
5457 }
5458 else if (rte->rtekind == RTE_RELATION)
5459 {
5460 /*
5461 * Plain table or parent of an inheritance appendrel, so look up the
5462 * column in pg_statistic
5463 */
5464 vardata->statsTuple = SearchSysCache3(STATRELATTINH,
5465 ObjectIdGetDatum(rte->relid),
5466 Int16GetDatum(var->varattno),
5467 BoolGetDatum(rte->inh));
5468 vardata->freefunc = ReleaseSysCache;
5469
5470 if (HeapTupleIsValid(vardata->statsTuple))
5471 {
5472 RelOptInfo *onerel = find_base_rel_noerr(root, var->varno);
5473 Oid userid;
5474
5475 /*
5476 * Check if user has permission to read this column. We require
5477 * all rows to be accessible, so there must be no securityQuals
5478 * from security barrier views or RLS policies.
5479 *
5480 * Normally the Var will have an associated RelOptInfo from which
5481 * we can find out which userid to do the check as; but it might
5482 * not if it's a RETURNING Var for an INSERT target relation. In
5483 * that case use the RTEPermissionInfo associated with the RTE.
5484 */
5485 if (onerel)
5486 userid = onerel->userid;
5487 else
5488 {
5489 RTEPermissionInfo *perminfo;
5490
5491 perminfo = getRTEPermissionInfo(root->parse->rteperminfos, rte);
5492 userid = perminfo->checkAsUser;
5493 }
5494 if (!OidIsValid(userid))
5495 userid = GetUserId();
5496
5497 vardata->acl_ok =
5498 rte->securityQuals == NIL &&
5499 ((pg_class_aclcheck(rte->relid, userid,
5500 ACL_SELECT) == ACLCHECK_OK) ||
5501 (pg_attribute_aclcheck(rte->relid, var->varattno, userid,
5502 ACL_SELECT) == ACLCHECK_OK));
5503
5504 /*
5505 * If the user doesn't have permissions to access an inheritance
5506 * child relation or specifically this attribute, check the
5507 * permissions of the table/column actually mentioned in the
5508 * query, since most likely the user does have that permission
5509 * (else the query will fail at runtime), and if the user can read
5510 * the column there then he can get the values of the child table
5511 * too. To do that, we must find out which of the root parent's
5512 * attributes the child relation's attribute corresponds to.
5513 */
5514 if (!vardata->acl_ok && var->varattno > 0 &&
5515 root->append_rel_array != NULL)
5516 {
5517 AppendRelInfo *appinfo;
5518 Index varno = var->varno;
5519 int varattno = var->varattno;
5520 bool found = false;
5521
5522 appinfo = root->append_rel_array[varno];
5523
5524 /*
5525 * Partitions are mapped to their immediate parent, not the
5526 * root parent, so must be ready to walk up multiple
5527 * AppendRelInfos. But stop if we hit a parent that is not
5528 * RTE_RELATION --- that's a flattened UNION ALL subquery, not
5529 * an inheritance parent.
5530 */
5531 while (appinfo &&
5533 root)->rtekind == RTE_RELATION)
5534 {
5535 int parent_varattno;
5536
5537 found = false;
5538 if (varattno <= 0 || varattno > appinfo->num_child_cols)
5539 break; /* safety check */
5540 parent_varattno = appinfo->parent_colnos[varattno - 1];
5541 if (parent_varattno == 0)
5542 break; /* Var is local to child */
5543
5544 varno = appinfo->parent_relid;
5545 varattno = parent_varattno;
5546 found = true;
5547
5548 /* If the parent is itself a child, continue up. */
5549 appinfo = root->append_rel_array[varno];
5550 }
5551
5552 /*
5553 * In rare cases, the Var may be local to the child table, in
5554 * which case, we've got to live with having no access to this
5555 * column's stats.
5556 */
5557 if (!found)
5558 return;
5559
5560 /* Repeat the access check on this parent rel & column */
5561 rte = planner_rt_fetch(varno, root);
5562 Assert(rte->rtekind == RTE_RELATION);
5563
5564 /*
5565 * Fine to use the same userid as it's the same in all
5566 * relations of a given inheritance tree.
5567 */
5568 vardata->acl_ok =
5569 rte->securityQuals == NIL &&
5570 ((pg_class_aclcheck(rte->relid, userid,
5571 ACL_SELECT) == ACLCHECK_OK) ||
5572 (pg_attribute_aclcheck(rte->relid, varattno, userid,
5573 ACL_SELECT) == ACLCHECK_OK));
5574 }
5575 }
5576 else
5577 {
5578 /* suppress any possible leakproofness checks later */
5579 vardata->acl_ok = true;
5580 }
5581 }
5582 else if ((rte->rtekind == RTE_SUBQUERY && !rte->inh) ||
5583 (rte->rtekind == RTE_CTE && !rte->self_reference))
5584 {
5585 /*
5586 * Plain subquery (not one that was converted to an appendrel) or
5587 * non-recursive CTE. In either case, we can try to find out what the
5588 * Var refers to within the subquery. We skip this for appendrel and
5589 * recursive-CTE cases because any column stats we did find would
5590 * likely not be very relevant.
5591 */
5592 PlannerInfo *subroot;
5593 Query *subquery;
5594 List *subtlist;
5595 TargetEntry *ste;
5596
5597 /*
5598 * Punt if it's a whole-row var rather than a plain column reference.
5599 */
5600 if (var->varattno == InvalidAttrNumber)
5601 return;
5602
5603 /*
5604 * Otherwise, find the subquery's planner subroot.
5605 */
5606 if (rte->rtekind == RTE_SUBQUERY)
5607 {
5608 RelOptInfo *rel;
5609
5610 /*
5611 * Fetch RelOptInfo for subquery. Note that we don't change the
5612 * rel returned in vardata, since caller expects it to be a rel of
5613 * the caller's query level. Because we might already be
5614 * recursing, we can't use that rel pointer either, but have to
5615 * look up the Var's rel afresh.
5616 */
5617 rel = find_base_rel(root, var->varno);
5618
5619 subroot = rel->subroot;
5620 }
5621 else
5622 {
5623 /* CTE case is more difficult */
5624 PlannerInfo *cteroot;
5625 Index levelsup;
5626 int ndx;
5627 int plan_id;
5628 ListCell *lc;
5629
5630 /*
5631 * Find the referenced CTE, and locate the subroot previously made
5632 * for it.
5633 */
5634 levelsup = rte->ctelevelsup;
5635 cteroot = root;
5636 while (levelsup-- > 0)
5637 {
5638 cteroot = cteroot->parent_root;
5639 if (!cteroot) /* shouldn't happen */
5640 elog(ERROR, "bad levelsup for CTE \"%s\"", rte->ctename);
5641 }
5642
5643 /*
5644 * Note: cte_plan_ids can be shorter than cteList, if we are still
5645 * working on planning the CTEs (ie, this is a side-reference from
5646 * another CTE). So we mustn't use forboth here.
5647 */
5648 ndx = 0;
5649 foreach(lc, cteroot->parse->cteList)
5650 {
5651 CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
5652
5653 if (strcmp(cte->ctename, rte->ctename) == 0)
5654 break;
5655 ndx++;
5656 }
5657 if (lc == NULL) /* shouldn't happen */
5658 elog(ERROR, "could not find CTE \"%s\"", rte->ctename);
5659 if (ndx >= list_length(cteroot->cte_plan_ids))
5660 elog(ERROR, "could not find plan for CTE \"%s\"", rte->ctename);
5661 plan_id = list_nth_int(cteroot->cte_plan_ids, ndx);
5662 if (plan_id <= 0)
5663 elog(ERROR, "no plan was made for CTE \"%s\"", rte->ctename);
5664 subroot = list_nth(root->glob->subroots, plan_id - 1);
5665 }
5666
5667 /* If the subquery hasn't been planned yet, we have to punt */
5668 if (subroot == NULL)
5669 return;
5670 Assert(IsA(subroot, PlannerInfo));
5671
5672 /*
5673 * We must use the subquery parsetree as mangled by the planner, not
5674 * the raw version from the RTE, because we need a Var that will refer
5675 * to the subroot's live RelOptInfos. For instance, if any subquery
5676 * pullup happened during planning, Vars in the targetlist might have
5677 * gotten replaced, and we need to see the replacement expressions.
5678 */
5679 subquery = subroot->parse;
5680 Assert(IsA(subquery, Query));
5681
5682 /*
5683 * Punt if subquery uses set operations or grouping sets, as these
5684 * will mash underlying columns' stats beyond recognition. (Set ops
5685 * are particularly nasty; if we forged ahead, we would return stats
5686 * relevant to only the leftmost subselect...) DISTINCT is also
5687 * problematic, but we check that later because there is a possibility
5688 * of learning something even with it.
5689 */
5690 if (subquery->setOperations ||
5691 subquery->groupingSets)
5692 return;
5693
5694 /* Get the subquery output expression referenced by the upper Var */
5695 if (subquery->returningList)
5696 subtlist = subquery->returningList;
5697 else
5698 subtlist = subquery->targetList;
5699 ste = get_tle_by_resno(subtlist, var->varattno);
5700 if (ste == NULL || ste->resjunk)
5701 elog(ERROR, "subquery %s does not have attribute %d",
5702 rte->eref->aliasname, var->varattno);
5703 var = (Var *) ste->expr;
5704
5705 /*
5706 * If subquery uses DISTINCT, we can't make use of any stats for the
5707 * variable ... but, if it's the only DISTINCT column, we are entitled
5708 * to consider it unique. We do the test this way so that it works
5709 * for cases involving DISTINCT ON.
5710 */
5711 if (subquery->distinctClause)
5712 {
5713 if (list_length(subquery->distinctClause) == 1 &&
5715 vardata->isunique = true;
5716 /* cannot go further */
5717 return;
5718 }
5719
5720 /* The same idea as with DISTINCT clause works for a GROUP-BY too */
5721 if (subquery->groupClause)
5722 {
5723 if (list_length(subquery->groupClause) == 1 &&
5724 targetIsInSortList(ste, InvalidOid, subquery->groupClause))
5725 vardata->isunique = true;
5726 /* cannot go further */
5727 return;
5728 }
5729
5730 /*
5731 * If the sub-query originated from a view with the security_barrier
5732 * attribute, we must not look at the variable's statistics, though it
5733 * seems all right to notice the existence of a DISTINCT clause. So
5734 * stop here.
5735 *
5736 * This is probably a harsher restriction than necessary; it's
5737 * certainly OK for the selectivity estimator (which is a C function,
5738 * and therefore omnipotent anyway) to look at the statistics. But
5739 * many selectivity estimators will happily *invoke the operator
5740 * function* to try to work out a good estimate - and that's not OK.
5741 * So for now, don't dig down for stats.
5742 */
5743 if (rte->security_barrier)
5744 return;
5745
5746 /* Can only handle a simple Var of subquery's query level */
5747 if (var && IsA(var, Var) &&
5748 var->varlevelsup == 0)
5749 {
5750 /*
5751 * OK, recurse into the subquery. Note that the original setting
5752 * of vardata->isunique (which will surely be false) is left
5753 * unchanged in this situation. That's what we want, since even
5754 * if the underlying column is unique, the subquery may have
5755 * joined to other tables in a way that creates duplicates.
5756 */
5757 examine_simple_variable(subroot, var, vardata);
5758 }
5759 }
5760 else
5761 {
5762 /*
5763 * Otherwise, the Var comes from a FUNCTION or VALUES RTE. (We won't
5764 * see RTE_JOIN here because join alias Vars have already been
5765 * flattened.) There's not much we can do with function outputs, but
5766 * maybe someday try to be smarter about VALUES.
5767 */
5768 }
5769}
@ ACLCHECK_OK
Definition: acl.h:183
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
Definition: aclchk.c:3836
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4007
#define InvalidAttrNumber
Definition: attnum.h:23
unsigned int Index
Definition: c.h:571
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
Oid GetUserId(void)
Definition: miscinit.c:517
bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList)
RTEPermissionInfo * getRTEPermissionInfo(List *rteperminfos, RangeTblEntry *rte)
TargetEntry * get_tle_by_resno(List *tlist, AttrNumber resno)
@ RTE_CTE
Definition: parsenodes.h:1032
@ RTE_SUBQUERY
Definition: parsenodes.h:1027
#define ACL_SELECT
Definition: parsenodes.h:77
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
static int list_nth_int(const List *list, int n)
Definition: pg_list.h:310
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:414
RelOptInfo * find_base_rel_noerr(PlannerInfo *root, int relid)
Definition: relnode.c:436
static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata)
Definition: selfuncs.c:5440
Index parent_relid
Definition: pathnodes.h:3004
int num_child_cols
Definition: pathnodes.h:3040
List * cte_plan_ids
Definition: pathnodes.h:326
Query * parse
Definition: pathnodes.h:223
List * returningList
Definition: parsenodes.h:209
Node * setOperations
Definition: parsenodes.h:230
List * cteList
Definition: parsenodes.h:168
List * groupClause
Definition: parsenodes.h:211
List * targetList
Definition: parsenodes.h:193
List * groupingSets
Definition: parsenodes.h:214
List * distinctClause
Definition: parsenodes.h:220
char * ctename
Definition: parsenodes.h:1205
Index ctelevelsup
Definition: parsenodes.h:1207
Oid userid
Definition: pathnodes.h:990
PlannerInfo * subroot
Definition: pathnodes.h:977
Expr * expr
Definition: primnodes.h:2219
AttrNumber varattno
Definition: primnodes.h:274
int varno
Definition: primnodes.h:269
Index varlevelsup
Definition: primnodes.h:294

References VariableStatData::acl_ok, ACL_SELECT, ACLCHECK_OK, Assert, BoolGetDatum(), RTEPermissionInfo::checkAsUser, PlannerInfo::cte_plan_ids, RangeTblEntry::ctelevelsup, Query::cteList, RangeTblEntry::ctename, CommonTableExpr::ctename, Query::distinctClause, elog, ERROR, examine_simple_variable(), TargetEntry::expr, find_base_rel(), find_base_rel_noerr(), VariableStatData::freefunc, get_relation_stats_hook, get_tle_by_resno(), getRTEPermissionInfo(), GetUserId(), Query::groupClause, Query::groupingSets, HeapTupleIsValid, if(), RangeTblEntry::inh, Int16GetDatum(), InvalidAttrNumber, InvalidOid, IsA, VariableStatData::isunique, lfirst, list_length(), list_nth(), list_nth_int(), NIL, AppendRelInfo::num_child_cols, ObjectIdGetDatum(), OidIsValid, AppendRelInfo::parent_relid, PlannerInfo::parse, pg_attribute_aclcheck(), pg_class_aclcheck(), planner_rt_fetch, ReleaseSysCache(), RangeTblEntry::relid, Query::returningList, root, RTE_CTE, RTE_RELATION, RTE_SUBQUERY, RangeTblEntry::rtekind, SearchSysCache3(), Query::setOperations, VariableStatData::statsTuple, RelOptInfo::subroot, targetIsInSortList(), Query::targetList, RelOptInfo::userid, Var::varattno, Var::varlevelsup, and Var::varno.

Referenced by examine_simple_variable(), and examine_variable().

◆ examine_variable()

void examine_variable ( PlannerInfo root,
Node node,
int  varRelid,
VariableStatData vardata 
)

Definition at line 5033 of file selfuncs.c.

5035{
5036 Node *basenode;
5037 Relids varnos;
5038 Relids basevarnos;
5039 RelOptInfo *onerel;
5040
5041 /* Make sure we don't return dangling pointers in vardata */
5042 MemSet(vardata, 0, sizeof(VariableStatData));
5043
5044 /* Save the exposed type of the expression */
5045 vardata->vartype = exprType(node);
5046
5047 /* Look inside any binary-compatible relabeling */
5048
5049 if (IsA(node, RelabelType))
5050 basenode = (Node *) ((RelabelType *) node)->arg;
5051 else
5052 basenode = node;
5053
5054 /* Fast path for a simple Var */
5055
5056 if (IsA(basenode, Var) &&
5057 (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
5058 {
5059 Var *var = (Var *) basenode;
5060
5061 /* Set up result fields other than the stats tuple */
5062 vardata->var = basenode; /* return Var without relabeling */
5063 vardata->rel = find_base_rel(root, var->varno);
5064 vardata->atttype = var->vartype;
5065 vardata->atttypmod = var->vartypmod;
5066 vardata->isunique = has_unique_index(vardata->rel, var->varattno);
5067
5068 /* Try to locate some stats */
5069 examine_simple_variable(root, var, vardata);
5070
5071 return;
5072 }
5073
5074 /*
5075 * Okay, it's a more complicated expression. Determine variable
5076 * membership. Note that when varRelid isn't zero, only vars of that
5077 * relation are considered "real" vars.
5078 */
5079 varnos = pull_varnos(root, basenode);
5080 basevarnos = bms_difference(varnos, root->outer_join_rels);
5081
5082 onerel = NULL;
5083
5084 if (bms_is_empty(basevarnos))
5085 {
5086 /* No Vars at all ... must be pseudo-constant clause */
5087 }
5088 else
5089 {
5090 int relid;
5091
5092 /* Check if the expression is in vars of a single base relation */
5093 if (bms_get_singleton_member(basevarnos, &relid))
5094 {
5095 if (varRelid == 0 || varRelid == relid)
5096 {
5097 onerel = find_base_rel(root, relid);
5098 vardata->rel = onerel;
5099 node = basenode; /* strip any relabeling */
5100 }
5101 /* else treat it as a constant */
5102 }
5103 else
5104 {
5105 /* varnos has multiple relids */
5106 if (varRelid == 0)
5107 {
5108 /* treat it as a variable of a join relation */
5109 vardata->rel = find_join_rel(root, varnos);
5110 node = basenode; /* strip any relabeling */
5111 }
5112 else if (bms_is_member(varRelid, varnos))
5113 {
5114 /* ignore the vars belonging to other relations */
5115 vardata->rel = find_base_rel(root, varRelid);
5116 node = basenode; /* strip any relabeling */
5117 /* note: no point in expressional-index search here */
5118 }
5119 /* else treat it as a constant */
5120 }
5121 }
5122
5123 bms_free(basevarnos);
5124
5125 vardata->var = node;
5126 vardata->atttype = exprType(node);
5127 vardata->atttypmod = exprTypmod(node);
5128
5129 if (onerel)
5130 {
5131 /*
5132 * We have an expression in vars of a single relation. Try to match
5133 * it to expressional index columns, in hopes of finding some
5134 * statistics.
5135 *
5136 * Note that we consider all index columns including INCLUDE columns,
5137 * since there could be stats for such columns. But the test for
5138 * uniqueness needs to be warier.
5139 *
5140 * XXX it's conceivable that there are multiple matches with different
5141 * index opfamilies; if so, we need to pick one that matches the
5142 * operator we are estimating for. FIXME later.
5143 */
5144 ListCell *ilist;
5145 ListCell *slist;
5146 Oid userid;
5147
5148 /*
5149 * The nullingrels bits within the expression could prevent us from
5150 * matching it to expressional index columns or to the expressions in
5151 * extended statistics. So strip them out first.
5152 */
5153 if (bms_overlap(varnos, root->outer_join_rels))
5154 node = remove_nulling_relids(node, root->outer_join_rels, NULL);
5155
5156 /*
5157 * Determine the user ID to use for privilege checks: either
5158 * onerel->userid if it's set (e.g., in case we're accessing the table
5159 * via a view), or the current user otherwise.
5160 *
5161 * If we drill down to child relations, we keep using the same userid:
5162 * it's going to be the same anyway, due to how we set up the relation
5163 * tree (q.v. build_simple_rel).
5164 */
5165 userid = OidIsValid(onerel->userid) ? onerel->userid : GetUserId();
5166
5167 foreach(ilist, onerel->indexlist)
5168 {
5169 IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
5170 ListCell *indexpr_item;
5171 int pos;
5172
5173 indexpr_item = list_head(index->indexprs);
5174 if (indexpr_item == NULL)
5175 continue; /* no expressions here... */
5176
5177 for (pos = 0; pos < index->ncolumns; pos++)
5178 {
5179 if (index->indexkeys[pos] == 0)
5180 {
5181 Node *indexkey;
5182
5183 if (indexpr_item == NULL)
5184 elog(ERROR, "too few entries in indexprs list");
5185 indexkey = (Node *) lfirst(indexpr_item);
5186 if (indexkey && IsA(indexkey, RelabelType))
5187 indexkey = (Node *) ((RelabelType *) indexkey)->arg;
5188 if (equal(node, indexkey))
5189 {
5190 /*
5191 * Found a match ... is it a unique index? Tests here
5192 * should match has_unique_index().
5193 */
5194 if (index->unique &&
5195 index->nkeycolumns == 1 &&
5196 pos == 0 &&
5197 (index->indpred == NIL || index->predOK))
5198 vardata->isunique = true;
5199
5200 /*
5201 * Has it got stats? We only consider stats for
5202 * non-partial indexes, since partial indexes probably
5203 * don't reflect whole-relation statistics; the above
5204 * check for uniqueness is the only info we take from
5205 * a partial index.
5206 *
5207 * An index stats hook, however, must make its own
5208 * decisions about what to do with partial indexes.
5209 */
5211 (*get_index_stats_hook) (root, index->indexoid,
5212 pos + 1, vardata))
5213 {
5214 /*
5215 * The hook took control of acquiring a stats
5216 * tuple. If it did supply a tuple, it'd better
5217 * have supplied a freefunc.
5218 */
5219 if (HeapTupleIsValid(vardata->statsTuple) &&
5220 !vardata->freefunc)
5221 elog(ERROR, "no function provided to release variable stats with");
5222 }
5223 else if (index->indpred == NIL)
5224 {
5225 vardata->statsTuple =
5226 SearchSysCache3(STATRELATTINH,
5227 ObjectIdGetDatum(index->indexoid),
5228 Int16GetDatum(pos + 1),
5229 BoolGetDatum(false));
5230 vardata->freefunc = ReleaseSysCache;
5231
5232 if (HeapTupleIsValid(vardata->statsTuple))
5233 {
5234 /* Get index's table for permission check */
5235 RangeTblEntry *rte;
5236
5237 rte = planner_rt_fetch(index->rel->relid, root);
5238 Assert(rte->rtekind == RTE_RELATION);
5239
5240 /*
5241 * For simplicity, we insist on the whole
5242 * table being selectable, rather than trying
5243 * to identify which column(s) the index
5244 * depends on. Also require all rows to be
5245 * selectable --- there must be no
5246 * securityQuals from security barrier views
5247 * or RLS policies.
5248 */
5249 vardata->acl_ok =
5250 rte->securityQuals == NIL &&
5251 (pg_class_aclcheck(rte->relid, userid,
5253
5254 /*
5255 * If the user doesn't have permissions to
5256 * access an inheritance child relation, check
5257 * the permissions of the table actually
5258 * mentioned in the query, since most likely
5259 * the user does have that permission. Note
5260 * that whole-table select privilege on the
5261 * parent doesn't quite guarantee that the
5262 * user could read all columns of the child.
5263 * But in practice it's unlikely that any
5264 * interesting security violation could result
5265 * from allowing access to the expression
5266 * index's stats, so we allow it anyway. See
5267 * similar code in examine_simple_variable()
5268 * for additional comments.
5269 */
5270 if (!vardata->acl_ok &&
5271 root->append_rel_array != NULL)
5272 {
5273 AppendRelInfo *appinfo;
5274 Index varno = index->rel->relid;
5275
5276 appinfo = root->append_rel_array[varno];
5277 while (appinfo &&
5279 root)->rtekind == RTE_RELATION)
5280 {
5281 varno = appinfo->parent_relid;
5282 appinfo = root->append_rel_array[varno];
5283 }
5284 if (varno != index->rel->relid)
5285 {
5286 /* Repeat access check on this rel */
5287 rte = planner_rt_fetch(varno, root);
5288 Assert(rte->rtekind == RTE_RELATION);
5289
5290 vardata->acl_ok =
5291 rte->securityQuals == NIL &&
5293 userid,
5295 }
5296 }
5297 }
5298 else
5299 {
5300 /* suppress leakproofness checks later */
5301 vardata->acl_ok = true;
5302 }
5303 }
5304 if (vardata->statsTuple)
5305 break;
5306 }
5307 indexpr_item = lnext(index->indexprs, indexpr_item);
5308 }
5309 }
5310 if (vardata->statsTuple)
5311 break;
5312 }
5313
5314 /*
5315 * Search extended statistics for one with a matching expression.
5316 * There might be multiple ones, so just grab the first one. In the
5317 * future, we might consider the statistics target (and pick the most
5318 * accurate statistics) and maybe some other parameters.
5319 */
5320 foreach(slist, onerel->statlist)
5321 {
5322 StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist);
5323 RangeTblEntry *rte = planner_rt_fetch(onerel->relid, root);
5324 ListCell *expr_item;
5325 int pos;
5326
5327 /*
5328 * Stop once we've found statistics for the expression (either
5329 * from extended stats, or for an index in the preceding loop).
5330 */
5331 if (vardata->statsTuple)
5332 break;
5333
5334 /* skip stats without per-expression stats */
5335 if (info->kind != STATS_EXT_EXPRESSIONS)
5336 continue;
5337
5338 /* skip stats with mismatching stxdinherit value */
5339 if (info->inherit != rte->inh)
5340 continue;
5341
5342 pos = 0;
5343 foreach(expr_item, info->exprs)
5344 {
5345 Node *expr = (Node *) lfirst(expr_item);
5346
5347 Assert(expr);
5348
5349 /* strip RelabelType before comparing it */
5350 if (expr && IsA(expr, RelabelType))
5351 expr = (Node *) ((RelabelType *) expr)->arg;
5352
5353 /* found a match, see if we can extract pg_statistic row */
5354 if (equal(node, expr))
5355 {
5356 /*
5357 * XXX Not sure if we should cache the tuple somewhere.
5358 * Now we just create a new copy every time.
5359 */
5360 vardata->statsTuple =
5361 statext_expressions_load(info->statOid, rte->inh, pos);
5362
5363 vardata->freefunc = ReleaseDummy;
5364
5365 /*
5366 * For simplicity, we insist on the whole table being
5367 * selectable, rather than trying to identify which
5368 * column(s) the statistics object depends on. Also
5369 * require all rows to be selectable --- there must be no
5370 * securityQuals from security barrier views or RLS
5371 * policies.
5372 */
5373 vardata->acl_ok =
5374 rte->securityQuals == NIL &&
5375 (pg_class_aclcheck(rte->relid, userid,
5377
5378 /*
5379 * If the user doesn't have permissions to access an
5380 * inheritance child relation, check the permissions of
5381 * the table actually mentioned in the query, since most
5382 * likely the user does have that permission. Note that
5383 * whole-table select privilege on the parent doesn't
5384 * quite guarantee that the user could read all columns of
5385 * the child. But in practice it's unlikely that any
5386 * interesting security violation could result from
5387 * allowing access to the expression stats, so we allow it
5388 * anyway. See similar code in examine_simple_variable()
5389 * for additional comments.
5390 */
5391 if (!vardata->acl_ok &&
5392 root->append_rel_array != NULL)
5393 {
5394 AppendRelInfo *appinfo;
5395 Index varno = onerel->relid;
5396
5397 appinfo = root->append_rel_array[varno];
5398 while (appinfo &&
5400 root)->rtekind == RTE_RELATION)
5401 {
5402 varno = appinfo->parent_relid;
5403 appinfo = root->append_rel_array[varno];
5404 }
5405 if (varno != onerel->relid)
5406 {
5407 /* Repeat access check on this rel */
5408 rte = planner_rt_fetch(varno, root);
5409 Assert(rte->rtekind == RTE_RELATION);
5410
5411 vardata->acl_ok =
5412 rte->securityQuals == NIL &&
5414 userid,
5416 }
5417 }
5418
5419 break;
5420 }
5421
5422 pos++;
5423 }
5424 }
5425 }
5426
5427 bms_free(varnos);
5428}
Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:346
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Definition: bitmapset.c:715
#define bms_is_empty(a)
Definition: bitmapset.h:118
#define MemSet(start, val, len)
Definition: c.h:977
HeapTuple statext_expressions_load(Oid stxoid, bool inh, int idx)
int32 exprTypmod(const Node *expr)
Definition: nodeFuncs.c:301
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
bool has_unique_index(RelOptInfo *rel, AttrNumber attno)
Definition: plancat.c:2228
RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids)
Definition: relnode.c:527
static void ReleaseDummy(HeapTuple tuple)
Definition: selfuncs.c:4992
List * indexlist
Definition: pathnodes.h:968
int32 atttypmod
Definition: selfuncs.h:94
Relids pull_varnos(PlannerInfo *root, Node *node)
Definition: var.c:114

References VariableStatData::acl_ok, ACL_SELECT, ACLCHECK_OK, arg, Assert, VariableStatData::atttype, VariableStatData::atttypmod, bms_difference(), bms_free(), bms_get_singleton_member(), bms_is_empty, bms_is_member(), bms_overlap(), BoolGetDatum(), elog, equal(), ERROR, examine_simple_variable(), StatisticExtInfo::exprs, exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, GetUserId(), has_unique_index(), HeapTupleIsValid, if(), RelOptInfo::indexlist, RangeTblEntry::inh, StatisticExtInfo::inherit, Int16GetDatum(), IsA, VariableStatData::isunique, StatisticExtInfo::kind, lfirst, list_head(), lnext(), MemSet, NIL, ObjectIdGetDatum(), OidIsValid, AppendRelInfo::parent_relid, pg_class_aclcheck(), planner_rt_fetch, pull_varnos(), VariableStatData::rel, ReleaseDummy(), ReleaseSysCache(), RangeTblEntry::relid, RelOptInfo::relid, remove_nulling_relids(), root, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), statext_expressions_load(), RelOptInfo::statlist, StatisticExtInfo::statOid, VariableStatData::statsTuple, RelOptInfo::userid, VariableStatData::var, Var::varattno, Var::varno, and VariableStatData::vartype.

Referenced by booltestsel(), boolvarsel(), estimate_array_length(), estimate_hash_bucket_stats(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().

◆ find_join_input_rel()

static RelOptInfo * find_join_input_rel ( PlannerInfo root,
Relids  relids 
)
static

Definition at line 6471 of file selfuncs.c.

6472{
6473 RelOptInfo *rel = NULL;
6474
6475 if (!bms_is_empty(relids))
6476 {
6477 int relid;
6478
6479 if (bms_get_singleton_member(relids, &relid))
6480 rel = find_base_rel(root, relid);
6481 else
6482 rel = find_join_rel(root, relids);
6483 }
6484
6485 if (rel == NULL)
6486 elog(ERROR, "could not find RelOptInfo for given relids");
6487
6488 return rel;
6489}

References bms_get_singleton_member(), bms_is_empty, elog, ERROR, find_base_rel(), find_join_rel(), and root.

Referenced by eqjoinsel().

◆ generic_restriction_selectivity()

double generic_restriction_selectivity ( PlannerInfo root,
Oid  oproid,
Oid  collation,
List args,
int  varRelid,
double  default_selectivity 
)

Definition at line 915 of file selfuncs.c.

918{
919 double selec;
920 VariableStatData vardata;
921 Node *other;
922 bool varonleft;
923
924 /*
925 * If expression is not variable OP something or something OP variable,
926 * then punt and return the default estimate.
927 */
928 if (!get_restriction_variable(root, args, varRelid,
929 &vardata, &other, &varonleft))
930 return default_selectivity;
931
932 /*
933 * If the something is a NULL constant, assume operator is strict and
934 * return zero, ie, operator will never return TRUE.
935 */
936 if (IsA(other, Const) &&
937 ((Const *) other)->constisnull)
938 {
939 ReleaseVariableStats(vardata);
940 return 0.0;
941 }
942
943 if (IsA(other, Const))
944 {
945 /* Variable is being compared to a known non-null constant */
946 Datum constval = ((Const *) other)->constvalue;
947 FmgrInfo opproc;
948 double mcvsum;
949 double mcvsel;
950 double nullfrac;
951 int hist_size;
952
953 fmgr_info(get_opcode(oproid), &opproc);
954
955 /*
956 * Calculate the selectivity for the column's most common values.
957 */
958 mcvsel = mcv_selectivity(&vardata, &opproc, collation,
959 constval, varonleft,
960 &mcvsum);
961
962 /*
963 * If the histogram is large enough, see what fraction of it matches
964 * the query, and assume that's representative of the non-MCV
965 * population. Otherwise use the default selectivity for the non-MCV
966 * population.
967 */
968 selec = histogram_selectivity(&vardata, &opproc, collation,
969 constval, varonleft,
970 10, 1, &hist_size);
971 if (selec < 0)
972 {
973 /* Nope, fall back on default */
974 selec = default_selectivity;
975 }
976 else if (hist_size < 100)
977 {
978 /*
979 * For histogram sizes from 10 to 100, we combine the histogram
980 * and default selectivities, putting increasingly more trust in
981 * the histogram for larger sizes.
982 */
983 double hist_weight = hist_size / 100.0;
984
985 selec = selec * hist_weight +
986 default_selectivity * (1.0 - hist_weight);
987 }
988
989 /* In any case, don't believe extremely small or large estimates. */
990 if (selec < 0.0001)
991 selec = 0.0001;
992 else if (selec > 0.9999)
993 selec = 0.9999;
994
995 /* Don't forget to account for nulls. */
996 if (HeapTupleIsValid(vardata.statsTuple))
997 nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
998 else
999 nullfrac = 0.0;
1000
1001 /*
1002 * Now merge the results from the MCV and histogram calculations,
1003 * realizing that the histogram covers only the non-null values that
1004 * are not listed in MCV.
1005 */
1006 selec *= 1.0 - nullfrac - mcvsum;
1007 selec += mcvsel;
1008 }
1009 else
1010 {
1011 /* Comparison value is not constant, so we can't do anything */
1012 selec = default_selectivity;
1013 }
1014
1015 ReleaseVariableStats(vardata);
1016
1017 /* result should be in range, but make sure... */
1018 CLAMP_PROBABILITY(selec);
1019
1020 return selec;
1021}
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:733
double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)
Definition: selfuncs.c:824

References generate_unaccent_rules::args, CLAMP_PROBABILITY, fmgr_info(), get_opcode(), get_restriction_variable(), GETSTRUCT(), HeapTupleIsValid, histogram_selectivity(), IsA, mcv_selectivity(), ReleaseVariableStats, root, and VariableStatData::statsTuple.

Referenced by ltreeparentsel(), and matchingsel().

◆ genericcostestimate()

void genericcostestimate ( PlannerInfo root,
IndexPath path,
double  loop_count,
GenericCosts costs 
)

Definition at line 6587 of file selfuncs.c.

6591{
6592 IndexOptInfo *index = path->indexinfo;
6593 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
6594 List *indexOrderBys = path->indexorderbys;
6595 Cost indexStartupCost;
6596 Cost indexTotalCost;
6597 Selectivity indexSelectivity;
6598 double indexCorrelation;
6599 double numIndexPages;
6600 double numIndexTuples;
6601 double spc_random_page_cost;
6602 double num_sa_scans;
6603 double num_outer_scans;
6604 double num_scans;
6605 double qual_op_cost;
6606 double qual_arg_cost;
6607 List *selectivityQuals;
6608 ListCell *l;
6609
6610 /*
6611 * If the index is partial, AND the index predicate with the explicitly
6612 * given indexquals to produce a more accurate idea of the index
6613 * selectivity.
6614 */
6615 selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
6616
6617 /*
6618 * If caller didn't give us an estimate for ScalarArrayOpExpr index scans,
6619 * just assume that the number of index descents is the number of distinct
6620 * combinations of array elements from all of the scan's SAOP clauses.
6621 */
6622 num_sa_scans = costs->num_sa_scans;
6623 if (num_sa_scans < 1)
6624 {
6625 num_sa_scans = 1;
6626 foreach(l, indexQuals)
6627 {
6628 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
6629
6630 if (IsA(rinfo->clause, ScalarArrayOpExpr))
6631 {
6632 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
6633 double alength = estimate_array_length(root, lsecond(saop->args));
6634
6635 if (alength > 1)
6636 num_sa_scans *= alength;
6637 }
6638 }
6639 }
6640
6641 /* Estimate the fraction of main-table tuples that will be visited */
6642 indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6643 index->rel->relid,
6644 JOIN_INNER,
6645 NULL);
6646
6647 /*
6648 * If caller didn't give us an estimate, estimate the number of index
6649 * tuples that will be visited. We do it in this rather peculiar-looking
6650 * way in order to get the right answer for partial indexes.
6651 */
6652 numIndexTuples = costs->numIndexTuples;
6653 if (numIndexTuples <= 0.0)
6654 {
6655 numIndexTuples = indexSelectivity * index->rel->tuples;
6656
6657 /*
6658 * The above calculation counts all the tuples visited across all
6659 * scans induced by ScalarArrayOpExpr nodes. We want to consider the
6660 * average per-indexscan number, so adjust. This is a handy place to
6661 * round to integer, too. (If caller supplied tuple estimate, it's
6662 * responsible for handling these considerations.)
6663 */
6664 numIndexTuples = rint(numIndexTuples / num_sa_scans);
6665 }
6666
6667 /*
6668 * We can bound the number of tuples by the index size in any case. Also,
6669 * always estimate at least one tuple is touched, even when
6670 * indexSelectivity estimate is tiny.
6671 */
6672 if (numIndexTuples > index->tuples)
6673 numIndexTuples = index->tuples;
6674 if (numIndexTuples < 1.0)
6675 numIndexTuples = 1.0;
6676
6677 /*
6678 * Estimate the number of index pages that will be retrieved.
6679 *
6680 * We use the simplistic method of taking a pro-rata fraction of the total
6681 * number of index pages. In effect, this counts only leaf pages and not
6682 * any overhead such as index metapage or upper tree levels.
6683 *
6684 * In practice access to upper index levels is often nearly free because
6685 * those tend to stay in cache under load; moreover, the cost involved is
6686 * highly dependent on index type. We therefore ignore such costs here
6687 * and leave it to the caller to add a suitable charge if needed.
6688 */
6689 if (index->pages > 1 && index->tuples > 1)
6690 numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
6691 else
6692 numIndexPages = 1.0;
6693
6694 /* fetch estimated page cost for tablespace containing index */
6695 get_tablespace_page_costs(index->reltablespace,
6696 &spc_random_page_cost,
6697 NULL);
6698
6699 /*
6700 * Now compute the disk access costs.
6701 *
6702 * The above calculations are all per-index-scan. However, if we are in a
6703 * nestloop inner scan, we can expect the scan to be repeated (with
6704 * different search keys) for each row of the outer relation. Likewise,
6705 * ScalarArrayOpExpr quals result in multiple index scans. This creates
6706 * the potential for cache effects to reduce the number of disk page
6707 * fetches needed. We want to estimate the average per-scan I/O cost in
6708 * the presence of caching.
6709 *
6710 * We use the Mackert-Lohman formula (see costsize.c for details) to
6711 * estimate the total number of page fetches that occur. While this
6712 * wasn't what it was designed for, it seems a reasonable model anyway.
6713 * Note that we are counting pages not tuples anymore, so we take N = T =
6714 * index size, as if there were one "tuple" per page.
6715 */
6716 num_outer_scans = loop_count;
6717 num_scans = num_sa_scans * num_outer_scans;
6718
6719 if (num_scans > 1)
6720 {
6721 double pages_fetched;
6722
6723 /* total page fetches ignoring cache effects */
6724 pages_fetched = numIndexPages * num_scans;
6725
6726 /* use Mackert and Lohman formula to adjust for cache effects */
6727 pages_fetched = index_pages_fetched(pages_fetched,
6728 index->pages,
6729 (double) index->pages,
6730 root);
6731
6732 /*
6733 * Now compute the total disk access cost, and then report a pro-rated
6734 * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
6735 * since that's internal to the indexscan.)
6736 */
6737 indexTotalCost = (pages_fetched * spc_random_page_cost)
6738 / num_outer_scans;
6739 }
6740 else
6741 {
6742 /*
6743 * For a single index scan, we just charge spc_random_page_cost per
6744 * page touched.
6745 */
6746 indexTotalCost = numIndexPages * spc_random_page_cost;
6747 }
6748
6749 /*
6750 * CPU cost: any complex expressions in the indexquals will need to be
6751 * evaluated once at the start of the scan to reduce them to runtime keys
6752 * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
6753 * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
6754 * indexqual operator. Because we have numIndexTuples as a per-scan
6755 * number, we have to multiply by num_sa_scans to get the correct result
6756 * for ScalarArrayOpExpr cases. Similarly add in costs for any index
6757 * ORDER BY expressions.
6758 *
6759 * Note: this neglects the possible costs of rechecking lossy operators.
6760 * Detecting that that might be needed seems more expensive than it's
6761 * worth, though, considering all the other inaccuracies here ...
6762 */
6763 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
6764 index_other_operands_eval_cost(root, indexOrderBys);
6765 qual_op_cost = cpu_operator_cost *
6766 (list_length(indexQuals) + list_length(indexOrderBys));
6767
6768 indexStartupCost = qual_arg_cost;
6769 indexTotalCost += qual_arg_cost;
6770 indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
6771
6772 /*
6773 * Generic assumption about index correlation: there isn't any.
6774 */
6775 indexCorrelation = 0.0;
6776
6777 /*
6778 * Return everything to caller.
6779 */
6780 costs->indexStartupCost = indexStartupCost;
6781 costs->indexTotalCost = indexTotalCost;
6782 costs->indexSelectivity = indexSelectivity;
6783 costs->indexCorrelation = indexCorrelation;
6784 costs->numIndexPages = numIndexPages;
6785 costs->numIndexTuples = numIndexTuples;
6786 costs->spc_random_page_cost = spc_random_page_cost;
6787 costs->num_sa_scans = num_sa_scans;
6788}
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:908
double cpu_index_tuple_cost
Definition: costsize.c:133
double spc_random_page_cost
Definition: selfuncs.h:134
List * indexorderbys
Definition: pathnodes.h:1748

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, estimate_array_length(), get_quals_from_indexclauses(), get_tablespace_page_costs(), index_other_operands_eval_cost(), index_pages_fetched(), IndexPath::indexclauses, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexPath::indexorderbys, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, IsA, JOIN_INNER, lfirst, list_length(), lsecond, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, root, and GenericCosts::spc_random_page_cost.

Referenced by blcostestimate(), btcostestimate(), gistcostestimate(), hashcostestimate(), and spgcostestimate().

◆ get_actual_variable_endpoint()

static bool get_actual_variable_endpoint ( Relation  heapRel,
Relation  indexRel,
ScanDirection  indexscandir,
ScanKey  scankeys,
int16  typLen,
bool  typByVal,
TupleTableSlot tableslot,
MemoryContext  outercontext,
Datum endpointDatum 
)
static

Definition at line 6310 of file selfuncs.c.

6319{
6320 bool have_data = false;
6321 SnapshotData SnapshotNonVacuumable;
6322 IndexScanDesc index_scan;
6323 Buffer vmbuffer = InvalidBuffer;
6324 BlockNumber last_heap_block = InvalidBlockNumber;
6325 int n_visited_heap_pages = 0;
6326 ItemPointer tid;
6328 bool isnull[INDEX_MAX_KEYS];
6329 MemoryContext oldcontext;
6330
6331 /*
6332 * We use the index-only-scan machinery for this. With mostly-static
6333 * tables that's a win because it avoids a heap visit. It's also a win
6334 * for dynamic data, but the reason is less obvious; read on for details.
6335 *
6336 * In principle, we should scan the index with our current active
6337 * snapshot, which is the best approximation we've got to what the query
6338 * will see when executed. But that won't be exact if a new snap is taken
6339 * before running the query, and it can be very expensive if a lot of
6340 * recently-dead or uncommitted rows exist at the beginning or end of the
6341 * index (because we'll laboriously fetch each one and reject it).
6342 * Instead, we use SnapshotNonVacuumable. That will accept recently-dead
6343 * and uncommitted rows as well as normal visible rows. On the other
6344 * hand, it will reject known-dead rows, and thus not give a bogus answer
6345 * when the extreme value has been deleted (unless the deletion was quite
6346 * recent); that case motivates not using SnapshotAny here.
6347 *
6348 * A crucial point here is that SnapshotNonVacuumable, with
6349 * GlobalVisTestFor(heapRel) as horizon, yields the inverse of the
6350 * condition that the indexscan will use to decide that index entries are
6351 * killable (see heap_hot_search_buffer()). Therefore, if the snapshot
6352 * rejects a tuple (or more precisely, all tuples of a HOT chain) and we
6353 * have to continue scanning past it, we know that the indexscan will mark
6354 * that index entry killed. That means that the next
6355 * get_actual_variable_endpoint() call will not have to re-consider that
6356 * index entry. In this way we avoid repetitive work when this function
6357 * is used a lot during planning.
6358 *
6359 * But using SnapshotNonVacuumable creates a hazard of its own. In a
6360 * recently-created index, some index entries may point at "broken" HOT
6361 * chains in which not all the tuple versions contain data matching the
6362 * index entry. The live tuple version(s) certainly do match the index,
6363 * but SnapshotNonVacuumable can accept recently-dead tuple versions that
6364 * don't match. Hence, if we took data from the selected heap tuple, we
6365 * might get a bogus answer that's not close to the index extremal value,
6366 * or could even be NULL. We avoid this hazard because we take the data
6367 * from the index entry not the heap.
6368 *
6369 * Despite all this care, there are situations where we might find many
6370 * non-visible tuples near the end of the index. We don't want to expend
6371 * a huge amount of time here, so we give up once we've read too many heap
6372 * pages. When we fail for that reason, the caller will end up using
6373 * whatever extremal value is recorded in pg_statistic.
6374 */
6375 InitNonVacuumableSnapshot(SnapshotNonVacuumable,
6376 GlobalVisTestFor(heapRel));
6377
6378 index_scan = index_beginscan(heapRel, indexRel,
6379 &SnapshotNonVacuumable,
6380 1, 0);
6381 /* Set it up for index-only scan */
6382 index_scan->xs_want_itup = true;
6383 index_rescan(index_scan, scankeys, 1, NULL, 0);
6384
6385 /* Fetch first/next tuple in specified direction */
6386 while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
6387 {
6389
6390 if (!VM_ALL_VISIBLE(heapRel,
6391 block,
6392 &vmbuffer))
6393 {
6394 /* Rats, we have to visit the heap to check visibility */
6395 if (!index_fetch_heap(index_scan, tableslot))
6396 {
6397 /*
6398 * No visible tuple for this index entry, so we need to
6399 * advance to the next entry. Before doing so, count heap
6400 * page fetches and give up if we've done too many.
6401 *
6402 * We don't charge a page fetch if this is the same heap page
6403 * as the previous tuple. This is on the conservative side,
6404 * since other recently-accessed pages are probably still in
6405 * buffers too; but it's good enough for this heuristic.
6406 */
6407#define VISITED_PAGES_LIMIT 100
6408
6409 if (block != last_heap_block)
6410 {
6411 last_heap_block = block;
6412 n_visited_heap_pages++;
6413 if (n_visited_heap_pages > VISITED_PAGES_LIMIT)
6414 break;
6415 }
6416
6417 continue; /* no visible tuple, try next index entry */
6418 }
6419
6420 /* We don't actually need the heap tuple for anything */
6421 ExecClearTuple(tableslot);
6422
6423 /*
6424 * We don't care whether there's more than one visible tuple in
6425 * the HOT chain; if any are visible, that's good enough.
6426 */
6427 }
6428
6429 /*
6430 * We expect that btree will return data in IndexTuple not HeapTuple
6431 * format. It's not lossy either.
6432 */
6433 if (!index_scan->xs_itup)
6434 elog(ERROR, "no data returned for index-only scan");
6435 if (index_scan->xs_recheck)
6436 elog(ERROR, "unexpected recheck indication from btree");
6437
6438 /* OK to deconstruct the index tuple */
6439 index_deform_tuple(index_scan->xs_itup,
6440 index_scan->xs_itupdesc,
6441 values, isnull);
6442
6443 /* Shouldn't have got a null, but be careful */
6444 if (isnull[0])
6445 elog(ERROR, "found unexpected null value in index \"%s\"",
6446 RelationGetRelationName(indexRel));
6447
6448 /* Copy the index column value out to caller's context */
6449 oldcontext = MemoryContextSwitchTo(outercontext);
6450 *endpointDatum = datumCopy(values[0], typByVal, typLen);
6451 MemoryContextSwitchTo(oldcontext);
6452 have_data = true;
6453 break;
6454 }
6455
6456 if (vmbuffer != InvalidBuffer)
6457 ReleaseBuffer(vmbuffer);
6458 index_endscan(index_scan);
6459
6460 return have_data;
6461}
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:151
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4864
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
Definition: indexam.c:576
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:256
bool index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
Definition: indexam.c:634
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:378
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:352
void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: indextuple.c:456
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define INDEX_MAX_KEYS
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4107
#define RelationGetRelationName(relation)
Definition: rel.h:546
#define VISITED_PAGES_LIMIT
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:50
IndexTuple xs_itup
Definition: relscan.h:159
struct TupleDescData * xs_itupdesc
Definition: relscan.h:160
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24

References datumCopy(), elog, ERROR, ExecClearTuple(), GlobalVisTestFor(), index_beginscan(), index_deform_tuple(), index_endscan(), index_fetch_heap(), index_getnext_tid(), INDEX_MAX_KEYS, index_rescan(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, ItemPointerGetBlockNumber(), MemoryContextSwitchTo(), RelationGetRelationName, ReleaseBuffer(), values, VISITED_PAGES_LIMIT, VM_ALL_VISIBLE, IndexScanDescData::xs_itup, IndexScanDescData::xs_itupdesc, IndexScanDescData::xs_recheck, and IndexScanDescData::xs_want_itup.

Referenced by get_actual_variable_range().

◆ get_actual_variable_range()

static bool get_actual_variable_range ( PlannerInfo root,
VariableStatData vardata,
Oid  sortop,
Oid  collation,
Datum min,
Datum max 
)
static

Definition at line 6130 of file selfuncs.c.

6133{
6134 bool have_data = false;
6135 RelOptInfo *rel = vardata->rel;
6136 RangeTblEntry *rte;
6137 ListCell *lc;
6138
6139 /* No hope if no relation or it doesn't have indexes */
6140 if (rel == NULL || rel->indexlist == NIL)
6141 return false;
6142 /* If it has indexes it must be a plain relation */
6143 rte = root->simple_rte_array[rel->relid];
6144 Assert(rte->rtekind == RTE_RELATION);
6145
6146 /* ignore partitioned tables. Any indexes here are not real indexes */
6147 if (rte->relkind == RELKIND_PARTITIONED_TABLE)
6148 return false;
6149
6150 /* Search through the indexes to see if any match our problem */
6151 foreach(lc, rel->indexlist)
6152 {
6154 ScanDirection indexscandir;
6155
6156 /* Ignore non-btree indexes */
6157 if (index->relam != BTREE_AM_OID)
6158 continue;
6159
6160 /*
6161 * Ignore partial indexes --- we only want stats that cover the entire
6162 * relation.
6163 */
6164 if (index->indpred != NIL)
6165 continue;
6166
6167 /*
6168 * The index list might include hypothetical indexes inserted by a
6169 * get_relation_info hook --- don't try to access them.
6170 */
6171 if (index->hypothetical)
6172 continue;
6173
6174 /*
6175 * The first index column must match the desired variable, sortop, and
6176 * collation --- but we can use a descending-order index.
6177 */
6178 if (collation != index->indexcollations[0])
6179 continue; /* test first 'cause it's cheapest */
6180 if (!match_index_to_operand(vardata->var, 0, index))
6181 continue;
6182 switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
6183 {
6185 if (index->reverse_sort[0])
6186 indexscandir = BackwardScanDirection;
6187 else
6188 indexscandir = ForwardScanDirection;
6189 break;
6191 if (index->reverse_sort[0])
6192 indexscandir = ForwardScanDirection;
6193 else
6194 indexscandir = BackwardScanDirection;
6195 break;
6196 default:
6197 /* index doesn't match the sortop */
6198 continue;
6199 }
6200
6201 /*
6202 * Found a suitable index to extract data from. Set up some data that
6203 * can be used by both invocations of get_actual_variable_endpoint.
6204 */
6205 {
6206 MemoryContext tmpcontext;
6207 MemoryContext oldcontext;
6208 Relation heapRel;
6209 Relation indexRel;
6210 TupleTableSlot *slot;
6211 int16 typLen;
6212 bool typByVal;
6213 ScanKeyData scankeys[1];
6214
6215 /* Make sure any cruft gets recycled when we're done */
6217 "get_actual_variable_range workspace",
6219 oldcontext = MemoryContextSwitchTo(tmpcontext);
6220
6221 /*
6222 * Open the table and index so we can read from them. We should
6223 * already have some type of lock on each.
6224 */
6225 heapRel = table_open(rte->relid, NoLock);
6226 indexRel = index_open(index->indexoid, NoLock);
6227
6228 /* build some stuff needed for indexscan execution */
6229 slot = table_slot_create(heapRel, NULL);
6230 get_typlenbyval(vardata->atttype, &typLen, &typByVal);
6231
6232 /* set up an IS NOT NULL scan key so that we ignore nulls */
6233 ScanKeyEntryInitialize(&scankeys[0],
6235 1, /* index col to scan */
6236 InvalidStrategy, /* no strategy */
6237 InvalidOid, /* no strategy subtype */
6238 InvalidOid, /* no collation */
6239 InvalidOid, /* no reg proc for this */
6240 (Datum) 0); /* constant */
6241
6242 /* If min is requested ... */
6243 if (min)
6244 {
6245 have_data = get_actual_variable_endpoint(heapRel,
6246 indexRel,
6247 indexscandir,
6248 scankeys,
6249 typLen,
6250 typByVal,
6251 slot,
6252 oldcontext,
6253 min);
6254 }
6255 else
6256 {
6257 /* If min not requested, still want to fetch max */
6258 have_data = true;
6259 }
6260
6261 /* If max is requested, and we didn't already fail ... */
6262 if (max && have_data)
6263 {
6264 /* scan in the opposite direction; all else is the same */
6265 have_data = get_actual_variable_endpoint(heapRel,
6266 indexRel,
6267 -indexscandir,
6268 scankeys,
6269 typLen,
6270 typByVal,
6271 slot,
6272 oldcontext,
6273 max);
6274 }
6275
6276 /* Clean everything up */
6278
6279 index_close(indexRel, NoLock);
6280 table_close(heapRel, NoLock);
6281
6282 MemoryContextSwitchTo(oldcontext);
6283 MemoryContextDelete(tmpcontext);
6284
6285 /* And we're done */
6286 break;
6287 }
6288 }
6289
6290 return have_data;
6291}
int16_t int16
Definition: c.h:483
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1441
bool match_index_to_operand(Node *operand, int indexcol, IndexOptInfo *index)
Definition: indxpath.c:4426
void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval)
Definition: lsyscache.c:2278
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void ScanKeyEntryInitialize(ScanKey entry, int flags, AttrNumber attributeNumber, StrategyNumber strategy, Oid subtype, Oid collation, RegProcedure procedure, Datum argument)
Definition: scankey.c:32
ScanDirection
Definition: sdir.h:25
@ BackwardScanDirection
Definition: sdir.h:26
@ ForwardScanDirection
Definition: sdir.h:28
static bool get_actual_variable_endpoint(Relation heapRel, Relation indexRel, ScanDirection indexscandir, ScanKey scankeys, int16 typLen, bool typByVal, TupleTableSlot *tableslot, MemoryContext outercontext, Datum *endpointDatum)
Definition: selfuncs.c:6310
#define SK_SEARCHNOTNULL
Definition: skey.h:122
#define SK_ISNULL
Definition: skey.h:115
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define InvalidStrategy
Definition: stratnum.h:24
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:126
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:40
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, VariableStatData::atttype, BackwardScanDirection, BTGreaterStrategyNumber, BTLessStrategyNumber, CurrentMemoryContext, ExecDropSingleTupleTableSlot(), ForwardScanDirection, get_actual_variable_endpoint(), get_op_opfamily_strategy(), get_typlenbyval(), index_close(), index_open(), RelOptInfo::indexlist, InvalidOid, InvalidStrategy, lfirst, match_index_to_operand(), MemoryContextDelete(), MemoryContextSwitchTo(), NIL, NoLock, VariableStatData::rel, RangeTblEntry::relid, RelOptInfo::relid, root, RTE_RELATION, RangeTblEntry::rtekind, ScanKeyEntryInitialize(), SK_ISNULL, SK_SEARCHNOTNULL, table_close(), table_open(), table_slot_create(), and VariableStatData::var.

Referenced by get_variable_range(), and ineq_histogram_selectivity().

◆ get_join_variables()

void get_join_variables ( PlannerInfo root,
List args,
SpecialJoinInfo sjinfo,
VariableStatData vardata1,
VariableStatData vardata2,
bool *  join_is_reversed 
)

Definition at line 4964 of file selfuncs.c.

4967{
4968 Node *left,
4969 *right;
4970
4971 if (list_length(args) != 2)
4972 elog(ERROR, "join operator should take two arguments");
4973
4974 left = (Node *) linitial(args);
4975 right = (Node *) lsecond(args);
4976
4977 examine_variable(root, left, 0, vardata1);
4978 examine_variable(root, right, 0, vardata2);
4979
4980 if (vardata1->rel &&
4981 bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4982 *join_is_reversed = true; /* var1 is on RHS */
4983 else if (vardata2->rel &&
4984 bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4985 *join_is_reversed = true; /* var2 is on LHS */
4986 else
4987 *join_is_reversed = false;
4988}
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:412
Relids relids
Definition: pathnodes.h:895
Relids syn_lefthand
Definition: pathnodes.h:2931
Relids syn_righthand
Definition: pathnodes.h:2932

References generate_unaccent_rules::args, bms_is_subset(), elog, ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, root, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.

Referenced by eqjoinsel(), neqjoinsel(), and networkjoinsel().

◆ get_quals_from_indexclauses()

List * get_quals_from_indexclauses ( List indexclauses)

Definition at line 6503 of file selfuncs.c.

6504{
6505 List *result = NIL;
6506 ListCell *lc;
6507
6508 foreach(lc, indexclauses)
6509 {
6510 IndexClause *iclause = lfirst_node(IndexClause, lc);
6511 ListCell *lc2;
6512
6513 foreach(lc2, iclause->indexquals)
6514 {
6515 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
6516
6517 result = lappend(result, rinfo);
6518 }
6519 }
6520 return result;
6521}

References IndexClause::indexquals, lappend(), lfirst_node, and NIL.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

◆ get_restriction_variable()

bool get_restriction_variable ( PlannerInfo root,
List args,
int  varRelid,
VariableStatData vardata,
Node **  other,
bool *  varonleft 
)

Definition at line 4904 of file selfuncs.c.

4907{
4908 Node *left,
4909 *right;
4910 VariableStatData rdata;
4911
4912 /* Fail if not a binary opclause (probably shouldn't happen) */
4913 if (list_length(args) != 2)
4914 return false;
4915
4916 left = (Node *) linitial(args);
4917 right = (Node *) lsecond(args);
4918
4919 /*
4920 * Examine both sides. Note that when varRelid is nonzero, Vars of other
4921 * relations will be treated as pseudoconstants.
4922 */
4923 examine_variable(root, left, varRelid, vardata);
4924 examine_variable(root, right, varRelid, &rdata);
4925
4926 /*
4927 * If one side is a variable and the other not, we win.
4928 */
4929 if (vardata->rel && rdata.rel == NULL)
4930 {
4931 *varonleft = true;
4932 *other = estimate_expression_value(root, rdata.var);
4933 /* Assume we need no ReleaseVariableStats(rdata) here */
4934 return true;
4935 }
4936
4937 if (vardata->rel == NULL && rdata.rel)
4938 {
4939 *varonleft = false;
4940 *other = estimate_expression_value(root, vardata->var);
4941 /* Assume we need no ReleaseVariableStats(*vardata) here */
4942 *vardata = rdata;
4943 return true;
4944 }
4945
4946 /* Oops, clause has wrong structure (probably var op var) */
4947 ReleaseVariableStats(*vardata);
4948 ReleaseVariableStats(rdata);
4949
4950 return false;
4951}
Node * estimate_expression_value(PlannerInfo *root, Node *node)
Definition: clauses.c:2395

References generate_unaccent_rules::args, estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, ReleaseVariableStats, root, and VariableStatData::var.

Referenced by _int_matchsel(), arraycontsel(), eqsel_internal(), generic_restriction_selectivity(), multirangesel(), networksel(), patternsel_common(), rangesel(), scalarineqsel_wrapper(), and tsmatchsel().

◆ get_stats_slot_range()

static void get_stats_slot_range ( AttStatsSlot sslot,
Oid  opfuncoid,
FmgrInfo opproc,
Oid  collation,
int16  typLen,
bool  typByVal,
Datum min,
Datum max,
bool *  p_have_data 
)
static

Definition at line 6067 of file selfuncs.c.

6070{
6071 Datum tmin = *min;
6072 Datum tmax = *max;
6073 bool have_data = *p_have_data;
6074 bool found_tmin = false;
6075 bool found_tmax = false;
6076
6077 /* Look up the comparison function, if we didn't already do so */
6078 if (opproc->fn_oid != opfuncoid)
6079 fmgr_info(opfuncoid, opproc);
6080
6081 /* Scan all the slot's values */
6082 for (int i = 0; i < sslot->nvalues; i++)
6083 {
6084 if (!have_data)
6085 {
6086 tmin = tmax = sslot->values[i];
6087 found_tmin = found_tmax = true;
6088 *p_have_data = have_data = true;
6089 continue;
6090 }
6092 collation,
6093 sslot->values[i], tmin)))
6094 {
6095 tmin = sslot->values[i];
6096 found_tmin = true;
6097 }
6099 collation,
6100 tmax, sslot->values[i])))
6101 {
6102 tmax = sslot->values[i];
6103 found_tmax = true;
6104 }
6105 }
6106
6107 /*
6108 * Copy the slot's values, if we found new extreme values.
6109 */
6110 if (found_tmin)
6111 *min = datumCopy(tmin, typByVal, typLen);
6112 if (found_tmax)
6113 *max = datumCopy(tmax, typByVal, typLen);
6114}
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
Oid fn_oid
Definition: fmgr.h:59

References datumCopy(), DatumGetBool(), fmgr_info(), FmgrInfo::fn_oid, FunctionCall2Coll(), i, AttStatsSlot::nvalues, and AttStatsSlot::values.

Referenced by get_variable_range().

◆ get_variable_numdistinct()

double get_variable_numdistinct ( VariableStatData vardata,
bool *  isdefault 
)

Definition at line 5807 of file selfuncs.c.

5808{
5809 double stadistinct;
5810 double stanullfrac = 0.0;
5811 double ntuples;
5812
5813 *isdefault = false;
5814
5815 /*
5816 * Determine the stadistinct value to use. There are cases where we can
5817 * get an estimate even without a pg_statistic entry, or can get a better
5818 * value than is in pg_statistic. Grab stanullfrac too if we can find it
5819 * (otherwise, assume no nulls, for lack of any better idea).
5820 */
5821 if (HeapTupleIsValid(vardata->statsTuple))
5822 {
5823 /* Use the pg_statistic entry */
5824 Form_pg_statistic stats;
5825
5826 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
5827 stadistinct = stats->stadistinct;
5828 stanullfrac = stats->stanullfrac;
5829 }
5830 else if (vardata->vartype == BOOLOID)
5831 {
5832 /*
5833 * Special-case boolean columns: presumably, two distinct values.
5834 *
5835 * Are there any other datatypes we should wire in special estimates
5836 * for?
5837 */
5838 stadistinct = 2.0;
5839 }
5840 else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
5841 {
5842 /*
5843 * If the Var represents a column of a VALUES RTE, assume it's unique.
5844 * This could of course be very wrong, but it should tend to be true
5845 * in well-written queries. We could consider examining the VALUES'
5846 * contents to get some real statistics; but that only works if the
5847 * entries are all constants, and it would be pretty expensive anyway.
5848 */
5849 stadistinct = -1.0; /* unique (and all non null) */
5850 }
5851 else
5852 {
5853 /*
5854 * We don't keep statistics for system columns, but in some cases we
5855 * can infer distinctness anyway.
5856 */
5857 if (vardata->var && IsA(vardata->var, Var))
5858 {
5859 switch (((Var *) vardata->var)->varattno)
5860 {
5862 stadistinct = -1.0; /* unique (and all non null) */
5863 break;
5865 stadistinct = 1.0; /* only 1 value */
5866 break;
5867 default:
5868 stadistinct = 0.0; /* means "unknown" */
5869 break;
5870 }
5871 }
5872 else
5873 stadistinct = 0.0; /* means "unknown" */
5874
5875 /*
5876 * XXX consider using estimate_num_groups on expressions?
5877 */
5878 }
5879
5880 /*
5881 * If there is a unique index, DISTINCT or GROUP-BY clause for the
5882 * variable, assume it is unique no matter what pg_statistic says; the
5883 * statistics could be out of date, or we might have found a partial
5884 * unique index that proves the var is unique for this query. However,
5885 * we'd better still believe the null-fraction statistic.
5886 */
5887 if (vardata->isunique)
5888 stadistinct = -1.0 * (1.0 - stanullfrac);
5889
5890 /*
5891 * If we had an absolute estimate, use that.
5892 */
5893 if (stadistinct > 0.0)
5894 return clamp_row_est(stadistinct);
5895
5896 /*
5897 * Otherwise we need to get the relation size; punt if not available.
5898 */
5899 if (vardata->rel == NULL)
5900 {
5901 *isdefault = true;
5902 return DEFAULT_NUM_DISTINCT;
5903 }
5904 ntuples = vardata->rel->tuples;
5905 if (ntuples <= 0.0)
5906 {
5907 *isdefault = true;
5908 return DEFAULT_NUM_DISTINCT;
5909 }
5910
5911 /*
5912 * If we had a relative estimate, use that.
5913 */
5914 if (stadistinct < 0.0)
5915 return clamp_row_est(-stadistinct * ntuples);
5916
5917 /*
5918 * With no data, estimate ndistinct = ntuples if the table is small, else
5919 * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5920 * that the behavior isn't discontinuous.
5921 */
5922 if (ntuples < DEFAULT_NUM_DISTINCT)
5923 return clamp_row_est(ntuples);
5924
5925 *isdefault = true;
5926 return DEFAULT_NUM_DISTINCT;
5927}
@ RTE_VALUES
Definition: parsenodes.h:1031
#define DEFAULT_NUM_DISTINCT
Definition: selfuncs.h:52
RTEKind rtekind
Definition: pathnodes.h:946
#define TableOidAttributeNumber
Definition: sysattr.h:26
#define SelfItemPointerAttributeNumber
Definition: sysattr.h:21

References clamp_row_est(), DEFAULT_NUM_DISTINCT, GETSTRUCT(), HeapTupleIsValid, IsA, VariableStatData::isunique, VariableStatData::rel, RTE_VALUES, RelOptInfo::rtekind, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.

Referenced by add_unique_group_var(), eqjoinsel(), estimate_hash_bucket_stats(), ineq_histogram_selectivity(), var_eq_const(), and var_eq_non_const().

◆ get_variable_range()

static bool get_variable_range ( PlannerInfo root,
VariableStatData vardata,
Oid  sortop,
Oid  collation,
Datum min,
Datum max 
)
static

Definition at line 5940 of file selfuncs.c.

5943{
5944 Datum tmin = 0;
5945 Datum tmax = 0;
5946 bool have_data = false;
5947 int16 typLen;
5948 bool typByVal;
5949 Oid opfuncoid;
5950 FmgrInfo opproc;
5951 AttStatsSlot sslot;
5952
5953 /*
5954 * XXX It's very tempting to try to use the actual column min and max, if
5955 * we can get them relatively-cheaply with an index probe. However, since
5956 * this function is called many times during join planning, that could
5957 * have unpleasant effects on planning speed. Need more investigation
5958 * before enabling this.
5959 */
5960#ifdef NOT_USED
5961 if (get_actual_variable_range(root, vardata, sortop, collation, min, max))
5962 return true;
5963#endif
5964
5965 if (!HeapTupleIsValid(vardata->statsTuple))
5966 {
5967 /* no stats available, so default result */
5968 return false;
5969 }
5970
5971 /*
5972 * If we can't apply the sortop to the stats data, just fail. In
5973 * principle, if there's a histogram and no MCVs, we could return the
5974 * histogram endpoints without ever applying the sortop ... but it's
5975 * probably not worth trying, because whatever the caller wants to do with
5976 * the endpoints would likely fail the security check too.
5977 */
5978 if (!statistic_proc_security_check(vardata,
5979 (opfuncoid = get_opcode(sortop))))
5980 return false;
5981
5982 opproc.fn_oid = InvalidOid; /* mark this as not looked up yet */
5983
5984 get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5985
5986 /*
5987 * If there is a histogram with the ordering we want, grab the first and
5988 * last values.
5989 */
5990 if (get_attstatsslot(&sslot, vardata->statsTuple,
5991 STATISTIC_KIND_HISTOGRAM, sortop,
5993 {
5994 if (sslot.stacoll == collation && sslot.nvalues > 0)
5995 {
5996 tmin = datumCopy(sslot.values[0], typByVal, typLen);
5997 tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
5998 have_data = true;
5999 }
6000 free_attstatsslot(&sslot);
6001 }
6002
6003 /*
6004 * Otherwise, if there is a histogram with some other ordering, scan it
6005 * and get the min and max values according to the ordering we want. This
6006 * of course may not find values that are really extremal according to our
6007 * ordering, but it beats ignoring available data.
6008 */
6009 if (!have_data &&
6010 get_attstatsslot(&sslot, vardata->statsTuple,
6011 STATISTIC_KIND_HISTOGRAM, InvalidOid,
6013 {
6014 get_stats_slot_range(&sslot, opfuncoid, &opproc,
6015 collation, typLen, typByVal,
6016 &tmin, &tmax, &have_data);
6017 free_attstatsslot(&sslot);
6018 }
6019
6020 /*
6021 * If we have most-common-values info, look for extreme MCVs. This is
6022 * needed even if we also have a histogram, since the histogram excludes
6023 * the MCVs. However, if we *only* have MCVs and no histogram, we should
6024 * be pretty wary of deciding that that is a full representation of the
6025 * data. Proceed only if the MCVs represent the whole table (to within
6026 * roundoff error).
6027 */
6028 if (get_attstatsslot(&sslot, vardata->statsTuple,
6029 STATISTIC_KIND_MCV, InvalidOid,
6030 have_data ? ATTSTATSSLOT_VALUES :
6032 {
6033 bool use_mcvs = have_data;
6034
6035 if (!have_data)
6036 {
6037 double sumcommon = 0.0;
6038 double nullfrac;
6039 int i;
6040
6041 for (i = 0; i < sslot.nnumbers; i++)
6042 sumcommon += sslot.numbers[i];
6043 nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata->statsTuple))->stanullfrac;
6044 if (sumcommon + nullfrac > 0.99999)
6045 use_mcvs = true;
6046 }
6047
6048 if (use_mcvs)
6049 get_stats_slot_range(&sslot, opfuncoid, &opproc,
6050 collation, typLen, typByVal,
6051 &tmin, &tmax, &have_data);
6052 free_attstatsslot(&sslot);
6053 }
6054
6055 *min = tmin;
6056 *max = tmax;
6057 return have_data;
6058}
static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, Datum *min, Datum *max)
Definition: selfuncs.c:6130
static void get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc, Oid collation, int16 typLen, bool typByVal, Datum *min, Datum *max, bool *p_have_data)
Definition: selfuncs.c:6067

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, VariableStatData::atttype, datumCopy(), FmgrInfo::fn_oid, free_attstatsslot(), get_actual_variable_range(), get_attstatsslot(), get_opcode(), get_stats_slot_range(), get_typlenbyval(), GETSTRUCT(), HeapTupleIsValid, i, InvalidOid, AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, root, AttStatsSlot::stacoll, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by mergejoinscansel().

◆ gincost_opexpr()

static bool gincost_opexpr ( PlannerInfo root,
IndexOptInfo index,
int  indexcol,
OpExpr clause,
GinQualCounts counts 
)
static

Definition at line 7460 of file selfuncs.c.

7465{
7466 Oid clause_op = clause->opno;
7467 Node *operand = (Node *) lsecond(clause->args);
7468
7469 /* aggressively reduce to a constant, and look through relabeling */
7470 operand = estimate_expression_value(root, operand);
7471
7472 if (IsA(operand, RelabelType))
7473 operand = (Node *) ((RelabelType *) operand)->arg;
7474
7475 /*
7476 * It's impossible to call extractQuery method for unknown operand. So
7477 * unless operand is a Const we can't do much; just assume there will be
7478 * one ordinary search entry from the operand at runtime.
7479 */
7480 if (!IsA(operand, Const))
7481 {
7482 counts->exactEntries++;
7483 counts->searchEntries++;
7484 return true;
7485 }
7486
7487 /* If Const is null, there can be no matches */
7488 if (((Const *) operand)->constisnull)
7489 return false;
7490
7491 /* Otherwise, apply extractQuery and get the actual term counts */
7492 return gincost_pattern(index, indexcol, clause_op,
7493 ((Const *) operand)->constvalue,
7494 counts);
7495}
static bool gincost_pattern(IndexOptInfo *index, int indexcol, Oid clause_op, Datum query, GinQualCounts *counts)
Definition: selfuncs.c:7346
double exactEntries
Definition: selfuncs.c:7335
double searchEntries
Definition: selfuncs.c:7336
List * args
Definition: primnodes.h:853

References arg, OpExpr::args, estimate_expression_value(), GinQualCounts::exactEntries, gincost_pattern(), IsA, lsecond, OpExpr::opno, root, and GinQualCounts::searchEntries.

Referenced by gincostestimate().

◆ gincost_pattern()

static bool gincost_pattern ( IndexOptInfo index,
int  indexcol,
Oid  clause_op,
Datum  query,
GinQualCounts counts 
)
static

Definition at line 7346 of file selfuncs.c.

7349{
7350 FmgrInfo flinfo;
7351 Oid extractProcOid;
7352 Oid collation;
7353 int strategy_op;
7354 Oid lefttype,
7355 righttype;
7356 int32 nentries = 0;
7357 bool *partial_matches = NULL;
7358 Pointer *extra_data = NULL;
7359 bool *nullFlags = NULL;
7360 int32 searchMode = GIN_SEARCH_MODE_DEFAULT;
7361 int32 i;
7362
7363 Assert(indexcol < index->nkeycolumns);
7364
7365 /*
7366 * Get the operator's strategy number and declared input data types within
7367 * the index opfamily. (We don't need the latter, but we use
7368 * get_op_opfamily_properties because it will throw error if it fails to
7369 * find a matching pg_amop entry.)
7370 */
7371 get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
7372 &strategy_op, &lefttype, &righttype);
7373
7374 /*
7375 * GIN always uses the "default" support functions, which are those with
7376 * lefttype == righttype == the opclass' opcintype (see
7377 * IndexSupportInitialize in relcache.c).
7378 */
7379 extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
7380 index->opcintype[indexcol],
7381 index->opcintype[indexcol],
7383
7384 if (!OidIsValid(extractProcOid))
7385 {
7386 /* should not happen; throw same error as index_getprocinfo */
7387 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
7388 GIN_EXTRACTQUERY_PROC, indexcol + 1,
7389 get_rel_name(index->indexoid));
7390 }
7391
7392 /*
7393 * Choose collation to pass to extractProc (should match initGinState).
7394 */
7395 if (OidIsValid(index->indexcollations[indexcol]))
7396 collation = index->indexcollations[indexcol];
7397 else
7398 collation = DEFAULT_COLLATION_OID;
7399
7400 fmgr_info(extractProcOid, &flinfo);
7401
7402 set_fn_opclass_options(&flinfo, index->opclassoptions[indexcol]);
7403
7404 FunctionCall7Coll(&flinfo,
7405 collation,
7406 query,
7407 PointerGetDatum(&nentries),
7408 UInt16GetDatum(strategy_op),
7409 PointerGetDatum(&partial_matches),
7410 PointerGetDatum(&extra_data),
7411 PointerGetDatum(&nullFlags),
7412 PointerGetDatum(&searchMode));
7413
7414 if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
7415 {
7416 /* No match is possible */
7417 return false;
7418 }
7419
7420 for (i = 0; i < nentries; i++)
7421 {
7422 /*
7423 * For partial match we haven't any information to estimate number of
7424 * matched entries in index, so, we just estimate it as 100
7425 */
7426 if (partial_matches && partial_matches[i])
7427 counts->partialEntries += 100;
7428 else
7429 counts->exactEntries++;
7430
7431 counts->searchEntries++;
7432 }
7433
7434 if (searchMode == GIN_SEARCH_MODE_DEFAULT)
7435 {
7436 counts->attHasNormalScan[indexcol] = true;
7437 }
7438 else if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
7439 {
7440 /* Treat "include empty" like an exact-match item */
7441 counts->attHasNormalScan[indexcol] = true;
7442 counts->exactEntries++;
7443 counts->searchEntries++;
7444 }
7445 else
7446 {
7447 /* It's GIN_SEARCH_MODE_ALL */
7448 counts->attHasFullScan[indexcol] = true;
7449 }
7450
7451 return true;
7452}
char * Pointer
Definition: c.h:479
int32_t int32
Definition: c.h:484
void set_fn_opclass_options(FmgrInfo *flinfo, bytea *options)
Definition: fmgr.c:2070
Datum FunctionCall7Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3, Datum arg4, Datum arg5, Datum arg6, Datum arg7)
Definition: fmgr.c:1284
#define GIN_EXTRACTQUERY_PROC
Definition: gin.h:24
#define GIN_SEARCH_MODE_DEFAULT
Definition: gin.h:34
#define GIN_SEARCH_MODE_INCLUDE_EMPTY
Definition: gin.h:35
char * get_rel_name(Oid relid)
Definition: lsyscache.c:1955
void get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op, int *strategy, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:137
Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum)
Definition: lsyscache.c:797
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
static Datum UInt16GetDatum(uint16 X)
Definition: postgres.h:197
bool attHasNormalScan[INDEX_MAX_KEYS]
Definition: selfuncs.c:7333
double partialEntries
Definition: selfuncs.c:7334
bool attHasFullScan[INDEX_MAX_KEYS]
Definition: selfuncs.c:7332

References Assert, GinQualCounts::attHasFullScan, GinQualCounts::attHasNormalScan, elog, ERROR, GinQualCounts::exactEntries, fmgr_info(), FunctionCall7Coll(), get_op_opfamily_properties(), get_opfamily_proc(), get_rel_name(), GIN_EXTRACTQUERY_PROC, GIN_SEARCH_MODE_DEFAULT, GIN_SEARCH_MODE_INCLUDE_EMPTY, i, OidIsValid, GinQualCounts::partialEntries, PointerGetDatum(), GinQualCounts::searchEntries, set_fn_opclass_options(), and UInt16GetDatum().

Referenced by gincost_opexpr(), and gincost_scalararrayopexpr().

◆ gincost_scalararrayopexpr()

static bool gincost_scalararrayopexpr ( PlannerInfo root,
IndexOptInfo index,
int  indexcol,
ScalarArrayOpExpr clause,
double  numIndexEntries,
GinQualCounts counts 
)
static

Definition at line 7510 of file selfuncs.c.

7516{
7517 Oid clause_op = clause->opno;
7518 Node *rightop = (Node *) lsecond(clause->