PostgreSQL Source Code git master
index_selfuncs.h File Reference
#include "access/amapi.h"
Include dependency graph for index_selfuncs.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void brincostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void btcostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void hashcostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void gistcostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void spgcostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void gincostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 

Function Documentation

◆ brincostestimate()

void brincostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 8910 of file selfuncs.c.

8914{
8915 IndexOptInfo *index = path->indexinfo;
8916 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
8917 double numPages = index->pages;
8918 RelOptInfo *baserel = index->rel;
8919 RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
8920 Cost spc_seq_page_cost;
8921 Cost spc_random_page_cost;
8922 double qual_arg_cost;
8923 double qualSelectivity;
8924 BrinStatsData statsData;
8925 double indexRanges;
8926 double minimalRanges;
8927 double estimatedRanges;
8928 double selec;
8929 Relation indexRel;
8930 ListCell *l;
8931 VariableStatData vardata;
8932
8933 Assert(rte->rtekind == RTE_RELATION);
8934
8935 /* fetch estimated page cost for the tablespace containing the index */
8936 get_tablespace_page_costs(index->reltablespace,
8937 &spc_random_page_cost,
8938 &spc_seq_page_cost);
8939
8940 /*
8941 * Obtain some data from the index itself, if possible. Otherwise invent
8942 * some plausible internal statistics based on the relation page count.
8943 */
8944 if (!index->hypothetical)
8945 {
8946 /*
8947 * A lock should have already been obtained on the index in plancat.c.
8948 */
8949 indexRel = index_open(index->indexoid, NoLock);
8950 brinGetStats(indexRel, &statsData);
8951 index_close(indexRel, NoLock);
8952
8953 /* work out the actual number of ranges in the index */
8954 indexRanges = Max(ceil((double) baserel->pages /
8955 statsData.pagesPerRange), 1.0);
8956 }
8957 else
8958 {
8959 /*
8960 * Assume default number of pages per range, and estimate the number
8961 * of ranges based on that.
8962 */
8963 indexRanges = Max(ceil((double) baserel->pages /
8965
8967 statsData.revmapNumPages = (indexRanges / REVMAP_PAGE_MAXITEMS) + 1;
8968 }
8969
8970 /*
8971 * Compute index correlation
8972 *
8973 * Because we can use all index quals equally when scanning, we can use
8974 * the largest correlation (in absolute value) among columns used by the
8975 * query. Start at zero, the worst possible case. If we cannot find any
8976 * correlation statistics, we will keep it as 0.
8977 */
8978 *indexCorrelation = 0;
8979
8980 foreach(l, path->indexclauses)
8981 {
8982 IndexClause *iclause = lfirst_node(IndexClause, l);
8983 AttrNumber attnum = index->indexkeys[iclause->indexcol];
8984
8985 /* attempt to lookup stats in relation for this index column */
8986 if (attnum != 0)
8987 {
8988 /* Simple variable -- look to stats for the underlying table */
8990 (*get_relation_stats_hook) (root, rte, attnum, &vardata))
8991 {
8992 /*
8993 * The hook took control of acquiring a stats tuple. If it
8994 * did supply a tuple, it'd better have supplied a freefunc.
8995 */
8996 if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
8997 elog(ERROR,
8998 "no function provided to release variable stats with");
8999 }
9000 else
9001 {
9002 vardata.statsTuple =
9003 SearchSysCache3(STATRELATTINH,
9004 ObjectIdGetDatum(rte->relid),
9006 BoolGetDatum(false));
9007 vardata.freefunc = ReleaseSysCache;
9008 }
9009 }
9010 else
9011 {
9012 /*
9013 * Looks like we've found an expression column in the index. Let's
9014 * see if there's any stats for it.
9015 */
9016
9017 /* get the attnum from the 0-based index. */
9018 attnum = iclause->indexcol + 1;
9019
9021 (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
9022 {
9023 /*
9024 * The hook took control of acquiring a stats tuple. If it
9025 * did supply a tuple, it'd better have supplied a freefunc.
9026 */
9027 if (HeapTupleIsValid(vardata.statsTuple) &&
9028 !vardata.freefunc)
9029 elog(ERROR, "no function provided to release variable stats with");
9030 }
9031 else
9032 {
9033 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
9034 ObjectIdGetDatum(index->indexoid),
9036 BoolGetDatum(false));
9037 vardata.freefunc = ReleaseSysCache;
9038 }
9039 }
9040
9041 if (HeapTupleIsValid(vardata.statsTuple))
9042 {
9043 AttStatsSlot sslot;
9044
9045 if (get_attstatsslot(&sslot, vardata.statsTuple,
9046 STATISTIC_KIND_CORRELATION, InvalidOid,
9048 {
9049 double varCorrelation = 0.0;
9050
9051 if (sslot.nnumbers > 0)
9052 varCorrelation = fabs(sslot.numbers[0]);
9053
9054 if (varCorrelation > *indexCorrelation)
9055 *indexCorrelation = varCorrelation;
9056
9057 free_attstatsslot(&sslot);
9058 }
9059 }
9060
9061 ReleaseVariableStats(vardata);
9062 }
9063
9064 qualSelectivity = clauselist_selectivity(root, indexQuals,
9065 baserel->relid,
9066 JOIN_INNER, NULL);
9067
9068 /*
9069 * Now calculate the minimum possible ranges we could match with if all of
9070 * the rows were in the perfect order in the table's heap.
9071 */
9072 minimalRanges = ceil(indexRanges * qualSelectivity);
9073
9074 /*
9075 * Now estimate the number of ranges that we'll touch by using the
9076 * indexCorrelation from the stats. Careful not to divide by zero (note
9077 * we're using the absolute value of the correlation).
9078 */
9079 if (*indexCorrelation < 1.0e-10)
9080 estimatedRanges = indexRanges;
9081 else
9082 estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
9083
9084 /* we expect to visit this portion of the table */
9085 selec = estimatedRanges / indexRanges;
9086
9087 CLAMP_PROBABILITY(selec);
9088
9089 *indexSelectivity = selec;
9090
9091 /*
9092 * Compute the index qual costs, much as in genericcostestimate, to add to
9093 * the index costs. We can disregard indexorderbys, since BRIN doesn't
9094 * support those.
9095 */
9096 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
9097
9098 /*
9099 * Compute the startup cost as the cost to read the whole revmap
9100 * sequentially, including the cost to execute the index quals.
9101 */
9102 *indexStartupCost =
9103 spc_seq_page_cost * statsData.revmapNumPages * loop_count;
9104 *indexStartupCost += qual_arg_cost;
9105
9106 /*
9107 * To read a BRIN index there might be a bit of back and forth over
9108 * regular pages, as revmap might point to them out of sequential order;
9109 * calculate the total cost as reading the whole index in random order.
9110 */
9111 *indexTotalCost = *indexStartupCost +
9112 spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
9113
9114 /*
9115 * Charge a small amount per range tuple which we expect to match to. This
9116 * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
9117 * will set a bit for each page in the range when we find a matching
9118 * range, so we must multiply the charge by the number of pages in the
9119 * range.
9120 */
9121 *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
9122 statsData.pagesPerRange;
9123
9124 *indexPages = index->pages;
9125}
int16 AttrNumber
Definition: attnum.h:21
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1648
#define BRIN_DEFAULT_PAGES_PER_RANGE
Definition: brin.h:40
#define REVMAP_PAGE_MAXITEMS
Definition: brin_page.h:93
#define Min(x, y)
Definition: c.h:1006
#define Max(x, y)
Definition: c.h:1000
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:100
double cpu_operator_cost
Definition: costsize.c:134
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
Assert(PointerIsAligned(start, uint64))
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:177
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:133
#define NoLock
Definition: lockdefs.h:34
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3511
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3401
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:44
double Cost
Definition: nodes.h:261
@ JOIN_INNER
Definition: nodes.h:303
@ RTE_RELATION
Definition: parsenodes.h:1043
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:610
int16 attnum
Definition: pg_attribute.h:74
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:182
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
#define InvalidOid
Definition: postgres_ext.h:37
tree ctl root
Definition: radixtree.h:1857
List * get_quals_from_indexclauses(List *indexclauses)
Definition: selfuncs.c:7230
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:184
Cost index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
Definition: selfuncs.c:7260
get_relation_stats_hook_type get_relation_stats_hook
Definition: selfuncs.c:183
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
float4 * numbers
Definition: lsyscache.h:57
int nnumbers
Definition: lsyscache.h:58
BlockNumber revmapNumPages
Definition: brin.h:36
BlockNumber pagesPerRange
Definition: brin.h:35
AttrNumber indexcol
Definition: pathnodes.h:2009
List * indexclauses
Definition: pathnodes.h:1959
IndexOptInfo * indexinfo
Definition: pathnodes.h:1958
Definition: pg_list.h:54
RTEKind rtekind
Definition: parsenodes.h:1078
Index relid
Definition: pathnodes.h:973
BlockNumber pages
Definition: pathnodes.h:999
HeapTuple statsTuple
Definition: selfuncs.h:89
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:91
Definition: type.h:96
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
HeapTuple SearchSysCache3(int cacheId, Datum key1, Datum key2, Datum key3)
Definition: syscache.c:240

References Assert(), attnum, ATTSTATSSLOT_NUMBERS, BoolGetDatum(), BRIN_DEFAULT_PAGES_PER_RANGE, brinGetStats(), CLAMP_PROBABILITY, clauselist_selectivity(), cpu_operator_cost, elog, ERROR, free_attstatsslot(), VariableStatData::freefunc, get_attstatsslot(), get_index_stats_hook, get_quals_from_indexclauses(), get_relation_stats_hook, get_tablespace_page_costs(), HeapTupleIsValid, index_close(), index_open(), index_other_operands_eval_cost(), IndexPath::indexclauses, IndexClause::indexcol, IndexPath::indexinfo, Int16GetDatum(), InvalidOid, JOIN_INNER, lfirst_node, Max, Min, AttStatsSlot::nnumbers, NoLock, AttStatsSlot::numbers, ObjectIdGetDatum(), RelOptInfo::pages, BrinStatsData::pagesPerRange, planner_rt_fetch, ReleaseSysCache(), ReleaseVariableStats, RelOptInfo::relid, REVMAP_PAGE_MAXITEMS, BrinStatsData::revmapNumPages, root, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), and VariableStatData::statsTuple.

Referenced by brinhandler().

◆ btcostestimate()

void btcostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 7605 of file selfuncs.c.

7609{
7610 IndexOptInfo *index = path->indexinfo;
7611 GenericCosts costs = {0};
7612 VariableStatData vardata = {0};
7613 double numIndexTuples;
7614 Cost descentCost;
7615 List *indexBoundQuals;
7616 List *indexSkipQuals;
7617 int indexcol;
7618 bool eqQualHere;
7619 bool found_row_compare;
7620 bool found_array;
7621 bool found_is_null_op;
7622 bool have_correlation = false;
7623 double num_sa_scans;
7624 double correlation = 0.0;
7625 ListCell *lc;
7626
7627 /*
7628 * For a btree scan, only leading '=' quals plus inequality quals for the
7629 * immediately next attribute contribute to index selectivity (these are
7630 * the "boundary quals" that determine the starting and stopping points of
7631 * the index scan). Additional quals can suppress visits to the heap, so
7632 * it's OK to count them in indexSelectivity, but they should not count
7633 * for estimating numIndexTuples. So we must examine the given indexquals
7634 * to find out which ones count as boundary quals. We rely on the
7635 * knowledge that they are given in index column order. Note that nbtree
7636 * preprocessing can add skip arrays that act as leading '=' quals in the
7637 * absence of ordinary input '=' quals, so in practice _most_ input quals
7638 * are able to act as index bound quals (which we take into account here).
7639 *
7640 * For a RowCompareExpr, we consider only the first column, just as
7641 * rowcomparesel() does.
7642 *
7643 * If there's a SAOP or skip array in the quals, we'll actually perform up
7644 * to N index descents (not just one), but the underlying array key's
7645 * operator can be considered to act the same as it normally does.
7646 */
7647 indexBoundQuals = NIL;
7648 indexSkipQuals = NIL;
7649 indexcol = 0;
7650 eqQualHere = false;
7651 found_row_compare = false;
7652 found_array = false;
7653 found_is_null_op = false;
7654 num_sa_scans = 1;
7655 foreach(lc, path->indexclauses)
7656 {
7657 IndexClause *iclause = lfirst_node(IndexClause, lc);
7658 ListCell *lc2;
7659
7660 if (indexcol < iclause->indexcol)
7661 {
7662 double num_sa_scans_prev_cols = num_sa_scans;
7663
7664 /*
7665 * Beginning of a new column's quals.
7666 *
7667 * Skip scans use skip arrays, which are ScalarArrayOp style
7668 * arrays that generate their elements procedurally and on demand.
7669 * Given a multi-column index on "(a, b)", and an SQL WHERE clause
7670 * "WHERE b = 42", a skip scan will effectively use an indexqual
7671 * "WHERE a = ANY('{every col a value}') AND b = 42". (Obviously,
7672 * the array on "a" must also return "IS NULL" matches, since our
7673 * WHERE clause used no strict operator on "a").
7674 *
7675 * Here we consider how nbtree will backfill skip arrays for any
7676 * index columns that lacked an '=' qual. This maintains our
7677 * num_sa_scans estimate, and determines if this new column (the
7678 * "iclause->indexcol" column, not the prior "indexcol" column)
7679 * can have its RestrictInfos/quals added to indexBoundQuals.
7680 *
7681 * We'll need to handle columns that have inequality quals, where
7682 * the skip array generates values from a range constrained by the
7683 * quals (not every possible value). We've been maintaining
7684 * indexSkipQuals to help with this; it will now contain all of
7685 * the prior column's quals (that is, indexcol's quals) when they
7686 * might be used for this.
7687 */
7688 if (found_row_compare)
7689 {
7690 /*
7691 * Skip arrays can't be added after a RowCompare input qual
7692 * due to limitations in nbtree
7693 */
7694 break;
7695 }
7696 if (eqQualHere)
7697 {
7698 /*
7699 * Don't need to add a skip array for an indexcol that already
7700 * has an '=' qual/equality constraint
7701 */
7702 indexcol++;
7703 indexSkipQuals = NIL;
7704 }
7705 eqQualHere = false;
7706
7707 while (indexcol < iclause->indexcol)
7708 {
7709 double ndistinct;
7710 bool isdefault = true;
7711
7712 found_array = true;
7713
7714 /*
7715 * A skipped attribute's ndistinct forms the basis of our
7716 * estimate of the total number of "array elements" used by
7717 * its skip array at runtime. Look that up first.
7718 */
7719 examine_indexcol_variable(root, index, indexcol, &vardata);
7720 ndistinct = get_variable_numdistinct(&vardata, &isdefault);
7721
7722 if (indexcol == 0)
7723 {
7724 /*
7725 * Get an estimate of the leading column's correlation in
7726 * passing (avoids rereading variable stats below)
7727 */
7728 if (HeapTupleIsValid(vardata.statsTuple))
7729 correlation = btcost_correlation(index, &vardata);
7730 have_correlation = true;
7731 }
7732
7733 ReleaseVariableStats(vardata);
7734
7735 /*
7736 * If ndistinct is a default estimate, conservatively assume
7737 * that no skipping will happen at runtime
7738 */
7739 if (isdefault)
7740 {
7741 num_sa_scans = num_sa_scans_prev_cols;
7742 break; /* done building indexBoundQuals */
7743 }
7744
7745 /*
7746 * Apply indexcol's indexSkipQuals selectivity to ndistinct
7747 */
7748 if (indexSkipQuals != NIL)
7749 {
7750 List *partialSkipQuals;
7751 Selectivity ndistinctfrac;
7752
7753 /*
7754 * If the index is partial, AND the index predicate with
7755 * the index-bound quals to produce a more accurate idea
7756 * of the number of distinct values for prior indexcol
7757 */
7758 partialSkipQuals = add_predicate_to_index_quals(index,
7759 indexSkipQuals);
7760
7761 ndistinctfrac = clauselist_selectivity(root, partialSkipQuals,
7762 index->rel->relid,
7763 JOIN_INNER,
7764 NULL);
7765
7766 /*
7767 * If ndistinctfrac is selective (on its own), the scan is
7768 * unlikely to benefit from repositioning itself using
7769 * later quals. Do not allow iclause->indexcol's quals to
7770 * be added to indexBoundQuals (it would increase descent
7771 * costs, without lowering numIndexTuples costs by much).
7772 */
7773 if (ndistinctfrac < DEFAULT_RANGE_INEQ_SEL)
7774 {
7775 num_sa_scans = num_sa_scans_prev_cols;
7776 break; /* done building indexBoundQuals */
7777 }
7778
7779 /* Adjust ndistinct downward */
7780 ndistinct = rint(ndistinct * ndistinctfrac);
7781 ndistinct = Max(ndistinct, 1);
7782 }
7783
7784 /*
7785 * When there's no inequality quals, account for the need to
7786 * find an initial value by counting -inf/+inf as a value.
7787 *
7788 * We don't charge anything extra for possible next/prior key
7789 * index probes, which are sometimes used to find the next
7790 * valid skip array element (ahead of using the located
7791 * element value to relocate the scan to the next position
7792 * that might contain matching tuples). It seems hard to do
7793 * better here. Use of the skip support infrastructure often
7794 * avoids most next/prior key probes. But even when it can't,
7795 * there's a decent chance that most individual next/prior key
7796 * probes will locate a leaf page whose key space overlaps all
7797 * of the scan's keys (even the lower-order keys) -- which
7798 * also avoids the need for a separate, extra index descent.
7799 * Note also that these probes are much cheaper than non-probe
7800 * primitive index scans: they're reliably very selective.
7801 */
7802 if (indexSkipQuals == NIL)
7803 ndistinct += 1;
7804
7805 /*
7806 * Update num_sa_scans estimate by multiplying by ndistinct.
7807 *
7808 * We make the pessimistic assumption that there is no
7809 * naturally occurring cross-column correlation. This is
7810 * often wrong, but it seems best to err on the side of not
7811 * expecting skipping to be helpful...
7812 */
7813 num_sa_scans *= ndistinct;
7814
7815 /*
7816 * ...but back out of adding this latest group of 1 or more
7817 * skip arrays when num_sa_scans exceeds the total number of
7818 * index pages (revert to num_sa_scans from before indexcol).
7819 * This causes a sharp discontinuity in cost (as a function of
7820 * the indexcol's ndistinct), but that is representative of
7821 * actual runtime costs.
7822 *
7823 * Note that skipping is helpful when each primitive index
7824 * scan only manages to skip over 1 or 2 irrelevant leaf pages
7825 * on average. Skip arrays bring savings in CPU costs due to
7826 * the scan not needing to evaluate indexquals against every
7827 * tuple, which can greatly exceed any savings in I/O costs.
7828 * This test is a test of whether num_sa_scans implies that
7829 * we're past the point where the ability to skip ceases to
7830 * lower the scan's costs (even qual evaluation CPU costs).
7831 */
7832 if (index->pages < num_sa_scans)
7833 {
7834 num_sa_scans = num_sa_scans_prev_cols;
7835 break; /* done building indexBoundQuals */
7836 }
7837
7838 indexcol++;
7839 indexSkipQuals = NIL;
7840 }
7841
7842 /*
7843 * Finished considering the need to add skip arrays to bridge an
7844 * initial eqQualHere gap between the old and new index columns
7845 * (or there was no initial eqQualHere gap in the first place).
7846 *
7847 * If an initial gap could not be bridged, then new column's quals
7848 * (i.e. iclause->indexcol's quals) won't go into indexBoundQuals,
7849 * and so won't affect our final numIndexTuples estimate.
7850 */
7851 if (indexcol != iclause->indexcol)
7852 break; /* done building indexBoundQuals */
7853 }
7854
7855 Assert(indexcol == iclause->indexcol);
7856
7857 /* Examine each indexqual associated with this index clause */
7858 foreach(lc2, iclause->indexquals)
7859 {
7860 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
7861 Expr *clause = rinfo->clause;
7862 Oid clause_op = InvalidOid;
7863 int op_strategy;
7864
7865 if (IsA(clause, OpExpr))
7866 {
7867 OpExpr *op = (OpExpr *) clause;
7868
7869 clause_op = op->opno;
7870 }
7871 else if (IsA(clause, RowCompareExpr))
7872 {
7873 RowCompareExpr *rc = (RowCompareExpr *) clause;
7874
7875 clause_op = linitial_oid(rc->opnos);
7876 found_row_compare = true;
7877 }
7878 else if (IsA(clause, ScalarArrayOpExpr))
7879 {
7880 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
7881 Node *other_operand = (Node *) lsecond(saop->args);
7882 double alength = estimate_array_length(root, other_operand);
7883
7884 clause_op = saop->opno;
7885 found_array = true;
7886 /* estimate SA descents by indexBoundQuals only */
7887 if (alength > 1)
7888 num_sa_scans *= alength;
7889 }
7890 else if (IsA(clause, NullTest))
7891 {
7892 NullTest *nt = (NullTest *) clause;
7893
7894 if (nt->nulltesttype == IS_NULL)
7895 {
7896 found_is_null_op = true;
7897 /* IS NULL is like = for selectivity/skip scan purposes */
7898 eqQualHere = true;
7899 }
7900 }
7901 else
7902 elog(ERROR, "unsupported indexqual type: %d",
7903 (int) nodeTag(clause));
7904
7905 /* check for equality operator */
7906 if (OidIsValid(clause_op))
7907 {
7908 op_strategy = get_op_opfamily_strategy(clause_op,
7909 index->opfamily[indexcol]);
7910 Assert(op_strategy != 0); /* not a member of opfamily?? */
7911 if (op_strategy == BTEqualStrategyNumber)
7912 eqQualHere = true;
7913 }
7914
7915 indexBoundQuals = lappend(indexBoundQuals, rinfo);
7916
7917 /*
7918 * We apply inequality selectivities to estimate index descent
7919 * costs with scans that use skip arrays. Save this indexcol's
7920 * RestrictInfos if it looks like they'll be needed for that.
7921 */
7922 if (!eqQualHere && !found_row_compare &&
7923 indexcol < index->nkeycolumns - 1)
7924 indexSkipQuals = lappend(indexSkipQuals, rinfo);
7925 }
7926 }
7927
7928 /*
7929 * If index is unique and we found an '=' clause for each column, we can
7930 * just assume numIndexTuples = 1 and skip the expensive
7931 * clauselist_selectivity calculations. However, an array or NullTest
7932 * always invalidates that theory (even when eqQualHere has been set).
7933 */
7934 if (index->unique &&
7935 indexcol == index->nkeycolumns - 1 &&
7936 eqQualHere &&
7937 !found_array &&
7938 !found_is_null_op)
7939 numIndexTuples = 1.0;
7940 else
7941 {
7942 List *selectivityQuals;
7943 Selectivity btreeSelectivity;
7944
7945 /*
7946 * If the index is partial, AND the index predicate with the
7947 * index-bound quals to produce a more accurate idea of the number of
7948 * rows covered by the bound conditions.
7949 */
7950 selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals);
7951
7952 btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
7953 index->rel->relid,
7954 JOIN_INNER,
7955 NULL);
7956 numIndexTuples = btreeSelectivity * index->rel->tuples;
7957
7958 /*
7959 * btree automatically combines individual array element primitive
7960 * index scans whenever the tuples covered by the next set of array
7961 * keys are close to tuples covered by the current set. That puts a
7962 * natural ceiling on the worst case number of descents -- there
7963 * cannot possibly be more than one descent per leaf page scanned.
7964 *
7965 * Clamp the number of descents to at most 1/3 the number of index
7966 * pages. This avoids implausibly high estimates with low selectivity
7967 * paths, where scans usually require only one or two descents. This
7968 * is most likely to help when there are several SAOP clauses, where
7969 * naively accepting the total number of distinct combinations of
7970 * array elements as the number of descents would frequently lead to
7971 * wild overestimates.
7972 *
7973 * We somewhat arbitrarily don't just make the cutoff the total number
7974 * of leaf pages (we make it 1/3 the total number of pages instead) to
7975 * give the btree code credit for its ability to continue on the leaf
7976 * level with low selectivity scans.
7977 *
7978 * Note: num_sa_scans includes both ScalarArrayOp array elements and
7979 * skip array elements whose qual affects our numIndexTuples estimate.
7980 */
7981 num_sa_scans = Min(num_sa_scans, ceil(index->pages * 0.3333333));
7982 num_sa_scans = Max(num_sa_scans, 1);
7983
7984 /*
7985 * As in genericcostestimate(), we have to adjust for any array quals
7986 * included in indexBoundQuals, and then round to integer.
7987 *
7988 * It is tempting to make genericcostestimate behave as if array
7989 * clauses work in almost the same way as scalar operators during
7990 * btree scans, making the top-level scan look like a continuous scan
7991 * (as opposed to num_sa_scans-many primitive index scans). After
7992 * all, btree scans mostly work like that at runtime. However, such a
7993 * scheme would badly bias genericcostestimate's simplistic approach
7994 * to calculating numIndexPages through prorating.
7995 *
7996 * Stick with the approach taken by non-native SAOP scans for now.
7997 * genericcostestimate will use the Mackert-Lohman formula to
7998 * compensate for repeat page fetches, even though that definitely
7999 * won't happen during btree scans (not for leaf pages, at least).
8000 * We're usually very pessimistic about the number of primitive index
8001 * scans that will be required, but it's not clear how to do better.
8002 */
8003 numIndexTuples = rint(numIndexTuples / num_sa_scans);
8004 }
8005
8006 /*
8007 * Now do generic index cost estimation.
8008 */
8009 costs.numIndexTuples = numIndexTuples;
8010 costs.num_sa_scans = num_sa_scans;
8011
8012 genericcostestimate(root, path, loop_count, &costs);
8013
8014 /*
8015 * Add a CPU-cost component to represent the costs of initial btree
8016 * descent. We don't charge any I/O cost for touching upper btree levels,
8017 * since they tend to stay in cache, but we still have to do about log2(N)
8018 * comparisons to descend a btree of N leaf tuples. We charge one
8019 * cpu_operator_cost per comparison.
8020 *
8021 * If there are SAOP or skip array keys, charge this once per estimated
8022 * index descent. The ones after the first one are not startup cost so
8023 * far as the overall plan goes, so just add them to "total" cost.
8024 */
8025 if (index->tuples > 1) /* avoid computing log(0) */
8026 {
8027 descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
8028 costs.indexStartupCost += descentCost;
8029 costs.indexTotalCost += costs.num_sa_scans * descentCost;
8030 }
8031
8032 /*
8033 * Even though we're not charging I/O cost for touching upper btree pages,
8034 * it's still reasonable to charge some CPU cost per page descended
8035 * through. Moreover, if we had no such charge at all, bloated indexes
8036 * would appear to have the same search cost as unbloated ones, at least
8037 * in cases where only a single leaf page is expected to be visited. This
8038 * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
8039 * touched. The number of such pages is btree tree height plus one (ie,
8040 * we charge for the leaf page too). As above, charge once per estimated
8041 * SAOP/skip array descent.
8042 */
8043 descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8044 costs.indexStartupCost += descentCost;
8045 costs.indexTotalCost += costs.num_sa_scans * descentCost;
8046
8047 if (!have_correlation)
8048 {
8049 examine_indexcol_variable(root, index, 0, &vardata);
8050 if (HeapTupleIsValid(vardata.statsTuple))
8051 costs.indexCorrelation = btcost_correlation(index, &vardata);
8052 ReleaseVariableStats(vardata);
8053 }
8054 else
8055 {
8056 /* btcost_correlation already called earlier on */
8057 costs.indexCorrelation = correlation;
8058 }
8059
8060 *indexStartupCost = costs.indexStartupCost;
8061 *indexTotalCost = costs.indexTotalCost;
8062 *indexSelectivity = costs.indexSelectivity;
8063 *indexCorrelation = costs.indexCorrelation;
8064 *indexPages = costs.numIndexPages;
8065}
#define OidIsValid(objectId)
Definition: c.h:777
List * lappend(List *list, void *datum)
Definition: list.c:339
int get_op_opfamily_strategy(Oid opno, Oid opfamily)
Definition: lsyscache.c:85
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
#define nodeTag(nodeptr)
Definition: nodes.h:139
double Selectivity
Definition: nodes.h:260
#define NIL
Definition: pg_list.h:68
#define lsecond(l)
Definition: pg_list.h:183
#define linitial_oid(l)
Definition: pg_list.h:180
unsigned int Oid
Definition: postgres_ext.h:32
@ IS_NULL
Definition: primnodes.h:1977
List * add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:7537
#define DEFAULT_PAGE_CPU_MULTIPLIER
Definition: selfuncs.c:144
double estimate_array_length(PlannerInfo *root, Node *arrayexpr)
Definition: selfuncs.c:2220
void genericcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
Definition: selfuncs.c:7314
static void examine_indexcol_variable(PlannerInfo *root, IndexOptInfo *index, int indexcol, VariableStatData *vardata)
Definition: selfuncs.c:6418
static double btcost_correlation(IndexOptInfo *index, VariableStatData *vardata)
Definition: selfuncs.c:7568
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:6521
#define DEFAULT_RANGE_INEQ_SEL
Definition: selfuncs.h:40
#define BTEqualStrategyNumber
Definition: stratnum.h:31
Selectivity indexSelectivity
Definition: selfuncs.h:129
Cost indexStartupCost
Definition: selfuncs.h:127
double indexCorrelation
Definition: selfuncs.h:130
double num_sa_scans
Definition: selfuncs.h:136
Cost indexTotalCost
Definition: selfuncs.h:128
double numIndexPages
Definition: selfuncs.h:133
double numIndexTuples
Definition: selfuncs.h:134
List * indexquals
Definition: pathnodes.h:2007
Definition: nodes.h:135
NullTestType nulltesttype
Definition: primnodes.h:1984
Oid opno
Definition: primnodes.h:850
Expr * clause
Definition: pathnodes.h:2792

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, Assert(), btcost_correlation(), BTEqualStrategyNumber, RestrictInfo::clause, clauselist_selectivity(), cpu_operator_cost, DEFAULT_PAGE_CPU_MULTIPLIER, DEFAULT_RANGE_INEQ_SEL, elog, ERROR, estimate_array_length(), examine_indexcol_variable(), genericcostestimate(), get_op_opfamily_strategy(), get_variable_numdistinct(), HeapTupleIsValid, IndexPath::indexclauses, IndexClause::indexcol, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexClause::indexquals, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, InvalidOid, IS_NULL, IsA, JOIN_INNER, lappend(), lfirst_node, linitial_oid, lsecond, Max, Min, NIL, nodeTag, NullTest::nulltesttype, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, OidIsValid, OpExpr::opno, ScalarArrayOpExpr::opno, ReleaseVariableStats, root, and VariableStatData::statsTuple.

Referenced by bthandler().

◆ gincostestimate()

void gincostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 8520 of file selfuncs.c.

8524{
8525 IndexOptInfo *index = path->indexinfo;
8526 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
8527 List *selectivityQuals;
8528 double numPages = index->pages,
8529 numTuples = index->tuples;
8530 double numEntryPages,
8531 numDataPages,
8532 numPendingPages,
8533 numEntries;
8534 GinQualCounts counts;
8535 bool matchPossible;
8536 bool fullIndexScan;
8537 double partialScale;
8538 double entryPagesFetched,
8539 dataPagesFetched,
8540 dataPagesFetchedBySel;
8541 double qual_op_cost,
8542 qual_arg_cost,
8543 spc_random_page_cost,
8544 outer_scans;
8545 Cost descentCost;
8546 Relation indexRel;
8547 GinStatsData ginStats;
8548 ListCell *lc;
8549 int i;
8550
8551 /*
8552 * Obtain statistical information from the meta page, if possible. Else
8553 * set ginStats to zeroes, and we'll cope below.
8554 */
8555 if (!index->hypothetical)
8556 {
8557 /* Lock should have already been obtained in plancat.c */
8558 indexRel = index_open(index->indexoid, NoLock);
8559 ginGetStats(indexRel, &ginStats);
8560 index_close(indexRel, NoLock);
8561 }
8562 else
8563 {
8564 memset(&ginStats, 0, sizeof(ginStats));
8565 }
8566
8567 /*
8568 * Assuming we got valid (nonzero) stats at all, nPendingPages can be
8569 * trusted, but the other fields are data as of the last VACUUM. We can
8570 * scale them up to account for growth since then, but that method only
8571 * goes so far; in the worst case, the stats might be for a completely
8572 * empty index, and scaling them will produce pretty bogus numbers.
8573 * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
8574 * it's grown more than that, fall back to estimating things only from the
8575 * assumed-accurate index size. But we'll trust nPendingPages in any case
8576 * so long as it's not clearly insane, ie, more than the index size.
8577 */
8578 if (ginStats.nPendingPages < numPages)
8579 numPendingPages = ginStats.nPendingPages;
8580 else
8581 numPendingPages = 0;
8582
8583 if (numPages > 0 && ginStats.nTotalPages <= numPages &&
8584 ginStats.nTotalPages > numPages / 4 &&
8585 ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
8586 {
8587 /*
8588 * OK, the stats seem close enough to sane to be trusted. But we
8589 * still need to scale them by the ratio numPages / nTotalPages to
8590 * account for growth since the last VACUUM.
8591 */
8592 double scale = numPages / ginStats.nTotalPages;
8593
8594 numEntryPages = ceil(ginStats.nEntryPages * scale);
8595 numDataPages = ceil(ginStats.nDataPages * scale);
8596 numEntries = ceil(ginStats.nEntries * scale);
8597 /* ensure we didn't round up too much */
8598 numEntryPages = Min(numEntryPages, numPages - numPendingPages);
8599 numDataPages = Min(numDataPages,
8600 numPages - numPendingPages - numEntryPages);
8601 }
8602 else
8603 {
8604 /*
8605 * We might get here because it's a hypothetical index, or an index
8606 * created pre-9.1 and never vacuumed since upgrading (in which case
8607 * its stats would read as zeroes), or just because it's grown too
8608 * much since the last VACUUM for us to put our faith in scaling.
8609 *
8610 * Invent some plausible internal statistics based on the index page
8611 * count (and clamp that to at least 10 pages, just in case). We
8612 * estimate that 90% of the index is entry pages, and the rest is data
8613 * pages. Estimate 100 entries per entry page; this is rather bogus
8614 * since it'll depend on the size of the keys, but it's more robust
8615 * than trying to predict the number of entries per heap tuple.
8616 */
8617 numPages = Max(numPages, 10);
8618 numEntryPages = floor((numPages - numPendingPages) * 0.90);
8619 numDataPages = numPages - numPendingPages - numEntryPages;
8620 numEntries = floor(numEntryPages * 100);
8621 }
8622
8623 /* In an empty index, numEntries could be zero. Avoid divide-by-zero */
8624 if (numEntries < 1)
8625 numEntries = 1;
8626
8627 /*
8628 * If the index is partial, AND the index predicate with the index-bound
8629 * quals to produce a more accurate idea of the number of rows covered by
8630 * the bound conditions.
8631 */
8632 selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
8633
8634 /* Estimate the fraction of main-table tuples that will be visited */
8635 *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
8636 index->rel->relid,
8637 JOIN_INNER,
8638 NULL);
8639
8640 /* fetch estimated page cost for tablespace containing index */
8641 get_tablespace_page_costs(index->reltablespace,
8642 &spc_random_page_cost,
8643 NULL);
8644
8645 /*
8646 * Generic assumption about index correlation: there isn't any.
8647 */
8648 *indexCorrelation = 0.0;
8649
8650 /*
8651 * Examine quals to estimate number of search entries & partial matches
8652 */
8653 memset(&counts, 0, sizeof(counts));
8654 counts.arrayScans = 1;
8655 matchPossible = true;
8656
8657 foreach(lc, path->indexclauses)
8658 {
8659 IndexClause *iclause = lfirst_node(IndexClause, lc);
8660 ListCell *lc2;
8661
8662 foreach(lc2, iclause->indexquals)
8663 {
8664 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
8665 Expr *clause = rinfo->clause;
8666
8667 if (IsA(clause, OpExpr))
8668 {
8669 matchPossible = gincost_opexpr(root,
8670 index,
8671 iclause->indexcol,
8672 (OpExpr *) clause,
8673 &counts);
8674 if (!matchPossible)
8675 break;
8676 }
8677 else if (IsA(clause, ScalarArrayOpExpr))
8678 {
8679 matchPossible = gincost_scalararrayopexpr(root,
8680 index,
8681 iclause->indexcol,
8682 (ScalarArrayOpExpr *) clause,
8683 numEntries,
8684 &counts);
8685 if (!matchPossible)
8686 break;
8687 }
8688 else
8689 {
8690 /* shouldn't be anything else for a GIN index */
8691 elog(ERROR, "unsupported GIN indexqual type: %d",
8692 (int) nodeTag(clause));
8693 }
8694 }
8695 }
8696
8697 /* Fall out if there were any provably-unsatisfiable quals */
8698 if (!matchPossible)
8699 {
8700 *indexStartupCost = 0;
8701 *indexTotalCost = 0;
8702 *indexSelectivity = 0;
8703 return;
8704 }
8705
8706 /*
8707 * If attribute has a full scan and at the same time doesn't have normal
8708 * scan, then we'll have to scan all non-null entries of that attribute.
8709 * Currently, we don't have per-attribute statistics for GIN. Thus, we
8710 * must assume the whole GIN index has to be scanned in this case.
8711 */
8712 fullIndexScan = false;
8713 for (i = 0; i < index->nkeycolumns; i++)
8714 {
8715 if (counts.attHasFullScan[i] && !counts.attHasNormalScan[i])
8716 {
8717 fullIndexScan = true;
8718 break;
8719 }
8720 }
8721
8722 if (fullIndexScan || indexQuals == NIL)
8723 {
8724 /*
8725 * Full index scan will be required. We treat this as if every key in
8726 * the index had been listed in the query; is that reasonable?
8727 */
8728 counts.partialEntries = 0;
8729 counts.exactEntries = numEntries;
8730 counts.searchEntries = numEntries;
8731 }
8732
8733 /* Will we have more than one iteration of a nestloop scan? */
8734 outer_scans = loop_count;
8735
8736 /*
8737 * Compute cost to begin scan, first of all, pay attention to pending
8738 * list.
8739 */
8740 entryPagesFetched = numPendingPages;
8741
8742 /*
8743 * Estimate number of entry pages read. We need to do
8744 * counts.searchEntries searches. Use a power function as it should be,
8745 * but tuples on leaf pages usually is much greater. Here we include all
8746 * searches in entry tree, including search of first entry in partial
8747 * match algorithm
8748 */
8749 entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
8750
8751 /*
8752 * Add an estimate of entry pages read by partial match algorithm. It's a
8753 * scan over leaf pages in entry tree. We haven't any useful stats here,
8754 * so estimate it as proportion. Because counts.partialEntries is really
8755 * pretty bogus (see code above), it's possible that it is more than
8756 * numEntries; clamp the proportion to ensure sanity.
8757 */
8758 partialScale = counts.partialEntries / numEntries;
8759 partialScale = Min(partialScale, 1.0);
8760
8761 entryPagesFetched += ceil(numEntryPages * partialScale);
8762
8763 /*
8764 * Partial match algorithm reads all data pages before doing actual scan,
8765 * so it's a startup cost. Again, we haven't any useful stats here, so
8766 * estimate it as proportion.
8767 */
8768 dataPagesFetched = ceil(numDataPages * partialScale);
8769
8770 *indexStartupCost = 0;
8771 *indexTotalCost = 0;
8772
8773 /*
8774 * Add a CPU-cost component to represent the costs of initial entry btree
8775 * descent. We don't charge any I/O cost for touching upper btree levels,
8776 * since they tend to stay in cache, but we still have to do about log2(N)
8777 * comparisons to descend a btree of N leaf tuples. We charge one
8778 * cpu_operator_cost per comparison.
8779 *
8780 * If there are ScalarArrayOpExprs, charge this once per SA scan. The
8781 * ones after the first one are not startup cost so far as the overall
8782 * plan is concerned, so add them only to "total" cost.
8783 */
8784 if (numEntries > 1) /* avoid computing log(0) */
8785 {
8786 descentCost = ceil(log(numEntries) / log(2.0)) * cpu_operator_cost;
8787 *indexStartupCost += descentCost * counts.searchEntries;
8788 *indexTotalCost += counts.arrayScans * descentCost * counts.searchEntries;
8789 }
8790
8791 /*
8792 * Add a cpu cost per entry-page fetched. This is not amortized over a
8793 * loop.
8794 */
8795 *indexStartupCost += entryPagesFetched * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8796 *indexTotalCost += entryPagesFetched * counts.arrayScans * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8797
8798 /*
8799 * Add a cpu cost per data-page fetched. This is also not amortized over a
8800 * loop. Since those are the data pages from the partial match algorithm,
8801 * charge them as startup cost.
8802 */
8803 *indexStartupCost += DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost * dataPagesFetched;
8804
8805 /*
8806 * Since we add the startup cost to the total cost later on, remove the
8807 * initial arrayscan from the total.
8808 */
8809 *indexTotalCost += dataPagesFetched * (counts.arrayScans - 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8810
8811 /*
8812 * Calculate cache effects if more than one scan due to nestloops or array
8813 * quals. The result is pro-rated per nestloop scan, but the array qual
8814 * factor shouldn't be pro-rated (compare genericcostestimate).
8815 */
8816 if (outer_scans > 1 || counts.arrayScans > 1)
8817 {
8818 entryPagesFetched *= outer_scans * counts.arrayScans;
8819 entryPagesFetched = index_pages_fetched(entryPagesFetched,
8820 (BlockNumber) numEntryPages,
8821 numEntryPages, root);
8822 entryPagesFetched /= outer_scans;
8823 dataPagesFetched *= outer_scans * counts.arrayScans;
8824 dataPagesFetched = index_pages_fetched(dataPagesFetched,
8825 (BlockNumber) numDataPages,
8826 numDataPages, root);
8827 dataPagesFetched /= outer_scans;
8828 }
8829
8830 /*
8831 * Here we use random page cost because logically-close pages could be far
8832 * apart on disk.
8833 */
8834 *indexStartupCost += (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
8835
8836 /*
8837 * Now compute the number of data pages fetched during the scan.
8838 *
8839 * We assume every entry to have the same number of items, and that there
8840 * is no overlap between them. (XXX: tsvector and array opclasses collect
8841 * statistics on the frequency of individual keys; it would be nice to use
8842 * those here.)
8843 */
8844 dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
8845
8846 /*
8847 * If there is a lot of overlap among the entries, in particular if one of
8848 * the entries is very frequent, the above calculation can grossly
8849 * under-estimate. As a simple cross-check, calculate a lower bound based
8850 * on the overall selectivity of the quals. At a minimum, we must read
8851 * one item pointer for each matching entry.
8852 *
8853 * The width of each item pointer varies, based on the level of
8854 * compression. We don't have statistics on that, but an average of
8855 * around 3 bytes per item is fairly typical.
8856 */
8857 dataPagesFetchedBySel = ceil(*indexSelectivity *
8858 (numTuples / (BLCKSZ / 3)));
8859 if (dataPagesFetchedBySel > dataPagesFetched)
8860 dataPagesFetched = dataPagesFetchedBySel;
8861
8862 /* Add one page cpu-cost to the startup cost */
8863 *indexStartupCost += DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost * counts.searchEntries;
8864
8865 /*
8866 * Add once again a CPU-cost for those data pages, before amortizing for
8867 * cache.
8868 */
8869 *indexTotalCost += dataPagesFetched * counts.arrayScans * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8870
8871 /* Account for cache effects, the same as above */
8872 if (outer_scans > 1 || counts.arrayScans > 1)
8873 {
8874 dataPagesFetched *= outer_scans * counts.arrayScans;
8875 dataPagesFetched = index_pages_fetched(dataPagesFetched,
8876 (BlockNumber) numDataPages,
8877 numDataPages, root);
8878 dataPagesFetched /= outer_scans;
8879 }
8880
8881 /* And apply random_page_cost as the cost per page */
8882 *indexTotalCost += *indexStartupCost +
8883 dataPagesFetched * spc_random_page_cost;
8884
8885 /*
8886 * Add on index qual eval costs, much as in genericcostestimate. We charge
8887 * cpu but we can disregard indexorderbys, since GIN doesn't support
8888 * those.
8889 */
8890 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
8891 qual_op_cost = cpu_operator_cost * list_length(indexQuals);
8892
8893 *indexStartupCost += qual_arg_cost;
8894 *indexTotalCost += qual_arg_cost;
8895
8896 /*
8897 * Add a cpu cost per search entry, corresponding to the actual visited
8898 * entries.
8899 */
8900 *indexTotalCost += (counts.searchEntries * counts.arrayScans) * (qual_op_cost);
8901 /* Now add a cpu cost per tuple in the posting lists / trees */
8902 *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost);
8903 *indexPages = dataPagesFetched;
8904}
uint32 BlockNumber
Definition: block.h:31
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:882
double cpu_index_tuple_cost
Definition: costsize.c:133
void ginGetStats(Relation index, GinStatsData *stats)
Definition: ginutil.c:628
int i
Definition: isn.c:77
static int list_length(const List *l)
Definition: pg_list.h:152
static int scale
Definition: pgbench.c:182
static bool gincost_scalararrayopexpr(PlannerInfo *root, IndexOptInfo *index, int indexcol, ScalarArrayOpExpr *clause, double numIndexEntries, GinQualCounts *counts)
Definition: selfuncs.c:8404
static bool gincost_opexpr(PlannerInfo *root, IndexOptInfo *index, int indexcol, OpExpr *clause, GinQualCounts *counts)
Definition: selfuncs.c:8354
bool attHasNormalScan[INDEX_MAX_KEYS]
Definition: selfuncs.c:8227
double exactEntries
Definition: selfuncs.c:8229
double arrayScans
Definition: selfuncs.c:8231
double partialEntries
Definition: selfuncs.c:8228
bool attHasFullScan[INDEX_MAX_KEYS]
Definition: selfuncs.c:8226
double searchEntries
Definition: selfuncs.c:8230
BlockNumber nDataPages
Definition: gin.h:60
BlockNumber nPendingPages
Definition: gin.h:57
BlockNumber nEntryPages
Definition: gin.h:59
int64 nEntries
Definition: gin.h:61
BlockNumber nTotalPages
Definition: gin.h:58

References add_predicate_to_index_quals(), GinQualCounts::arrayScans, GinQualCounts::attHasFullScan, GinQualCounts::attHasNormalScan, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, DEFAULT_PAGE_CPU_MULTIPLIER, elog, ERROR, GinQualCounts::exactEntries, get_quals_from_indexclauses(), get_tablespace_page_costs(), gincost_opexpr(), gincost_scalararrayopexpr(), ginGetStats(), i, index_close(), index_open(), index_other_operands_eval_cost(), index_pages_fetched(), IndexPath::indexclauses, IndexClause::indexcol, IndexPath::indexinfo, IndexClause::indexquals, IsA, JOIN_INNER, lfirst_node, list_length(), Max, Min, GinStatsData::nDataPages, GinStatsData::nEntries, GinStatsData::nEntryPages, NIL, nodeTag, NoLock, GinStatsData::nPendingPages, GinStatsData::nTotalPages, GinQualCounts::partialEntries, root, scale, and GinQualCounts::searchEntries.

Referenced by ginhandler().

◆ gistcostestimate()

void gistcostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 8110 of file selfuncs.c.

8114{
8115 IndexOptInfo *index = path->indexinfo;
8116 GenericCosts costs = {0};
8117 Cost descentCost;
8118
8119 genericcostestimate(root, path, loop_count, &costs);
8120
8121 /*
8122 * We model index descent costs similarly to those for btree, but to do
8123 * that we first need an idea of the tree height. We somewhat arbitrarily
8124 * assume that the fanout is 100, meaning the tree height is at most
8125 * log100(index->pages).
8126 *
8127 * Although this computation isn't really expensive enough to require
8128 * caching, we might as well use index->tree_height to cache it.
8129 */
8130 if (index->tree_height < 0) /* unknown? */
8131 {
8132 if (index->pages > 1) /* avoid computing log(0) */
8133 index->tree_height = (int) (log(index->pages) / log(100.0));
8134 else
8135 index->tree_height = 0;
8136 }
8137
8138 /*
8139 * Add a CPU-cost component to represent the costs of initial descent. We
8140 * just use log(N) here not log2(N) since the branching factor isn't
8141 * necessarily two anyway. As for btree, charge once per SA scan.
8142 */
8143 if (index->tuples > 1) /* avoid computing log(0) */
8144 {
8145 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
8146 costs.indexStartupCost += descentCost;
8147 costs.indexTotalCost += costs.num_sa_scans * descentCost;
8148 }
8149
8150 /*
8151 * Likewise add a per-page charge, calculated the same as for btrees.
8152 */
8153 descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8154 costs.indexStartupCost += descentCost;
8155 costs.indexTotalCost += costs.num_sa_scans * descentCost;
8156
8157 *indexStartupCost = costs.indexStartupCost;
8158 *indexTotalCost = costs.indexTotalCost;
8159 *indexSelectivity = costs.indexSelectivity;
8160 *indexCorrelation = costs.indexCorrelation;
8161 *indexPages = costs.numIndexPages;
8162}

References cpu_operator_cost, DEFAULT_PAGE_CPU_MULTIPLIER, genericcostestimate(), GenericCosts::indexCorrelation, IndexPath::indexinfo, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, and root.

Referenced by gisthandler().

◆ hashcostestimate()

void hashcostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 8068 of file selfuncs.c.

8072{
8073 GenericCosts costs = {0};
8074
8075 genericcostestimate(root, path, loop_count, &costs);
8076
8077 /*
8078 * A hash index has no descent costs as such, since the index AM can go
8079 * directly to the target bucket after computing the hash value. There
8080 * are a couple of other hash-specific costs that we could conceivably add
8081 * here, though:
8082 *
8083 * Ideally we'd charge spc_random_page_cost for each page in the target
8084 * bucket, not just the numIndexPages pages that genericcostestimate
8085 * thought we'd visit. However in most cases we don't know which bucket
8086 * that will be. There's no point in considering the average bucket size
8087 * because the hash AM makes sure that's always one page.
8088 *
8089 * Likewise, we could consider charging some CPU for each index tuple in
8090 * the bucket, if we knew how many there were. But the per-tuple cost is
8091 * just a hash value comparison, not a general datatype-dependent
8092 * comparison, so any such charge ought to be quite a bit less than
8093 * cpu_operator_cost; which makes it probably not worth worrying about.
8094 *
8095 * A bigger issue is that chance hash-value collisions will result in
8096 * wasted probes into the heap. We don't currently attempt to model this
8097 * cost on the grounds that it's rare, but maybe it's not rare enough.
8098 * (Any fix for this ought to consider the generic lossy-operator problem,
8099 * though; it's not entirely hash-specific.)
8100 */
8101
8102 *indexStartupCost = costs.indexStartupCost;
8103 *indexTotalCost = costs.indexTotalCost;
8104 *indexSelectivity = costs.indexSelectivity;
8105 *indexCorrelation = costs.indexCorrelation;
8106 *indexPages = costs.numIndexPages;
8107}

References genericcostestimate(), GenericCosts::indexCorrelation, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, GenericCosts::numIndexPages, and root.

Referenced by hashhandler().

◆ spgcostestimate()

void spgcostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 8165 of file selfuncs.c.

8169{
8170 IndexOptInfo *index = path->indexinfo;
8171 GenericCosts costs = {0};
8172 Cost descentCost;
8173
8174 genericcostestimate(root, path, loop_count, &costs);
8175
8176 /*
8177 * We model index descent costs similarly to those for btree, but to do
8178 * that we first need an idea of the tree height. We somewhat arbitrarily
8179 * assume that the fanout is 100, meaning the tree height is at most
8180 * log100(index->pages).
8181 *
8182 * Although this computation isn't really expensive enough to require
8183 * caching, we might as well use index->tree_height to cache it.
8184 */
8185 if (index->tree_height < 0) /* unknown? */
8186 {
8187 if (index->pages > 1) /* avoid computing log(0) */
8188 index->tree_height = (int) (log(index->pages) / log(100.0));
8189 else
8190 index->tree_height = 0;
8191 }
8192
8193 /*
8194 * Add a CPU-cost component to represent the costs of initial descent. We
8195 * just use log(N) here not log2(N) since the branching factor isn't
8196 * necessarily two anyway. As for btree, charge once per SA scan.
8197 */
8198 if (index->tuples > 1) /* avoid computing log(0) */
8199 {
8200 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
8201 costs.indexStartupCost += descentCost;
8202 costs.indexTotalCost += costs.num_sa_scans * descentCost;
8203 }
8204
8205 /*
8206 * Likewise add a per-page charge, calculated the same as for btrees.
8207 */
8208 descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8209 costs.indexStartupCost += descentCost;
8210 costs.indexTotalCost += costs.num_sa_scans * descentCost;
8211
8212 *indexStartupCost = costs.indexStartupCost;
8213 *indexTotalCost = costs.indexTotalCost;
8214 *indexSelectivity = costs.indexSelectivity;
8215 *indexCorrelation = costs.indexCorrelation;
8216 *indexPages = costs.numIndexPages;
8217}

References cpu_operator_cost, DEFAULT_PAGE_CPU_MULTIPLIER, genericcostestimate(), GenericCosts::indexCorrelation, IndexPath::indexinfo, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, and root.

Referenced by spghandler().