PostgreSQL Source Code git master
index_selfuncs.h File Reference
#include "access/amapi.h"
Include dependency graph for index_selfuncs.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void brincostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void btcostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void hashcostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void gistcostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void spgcostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 
void gincostestimate (struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
 

Function Documentation

◆ brincostestimate()

void brincostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 8598 of file selfuncs.c.

8602{
8603 IndexOptInfo *index = path->indexinfo;
8604 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
8605 double numPages = index->pages;
8606 RelOptInfo *baserel = index->rel;
8607 RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
8608 Cost spc_seq_page_cost;
8609 Cost spc_random_page_cost;
8610 double qual_arg_cost;
8611 double qualSelectivity;
8612 BrinStatsData statsData;
8613 double indexRanges;
8614 double minimalRanges;
8615 double estimatedRanges;
8616 double selec;
8617 Relation indexRel;
8618 ListCell *l;
8619 VariableStatData vardata;
8620
8621 Assert(rte->rtekind == RTE_RELATION);
8622
8623 /* fetch estimated page cost for the tablespace containing the index */
8624 get_tablespace_page_costs(index->reltablespace,
8625 &spc_random_page_cost,
8626 &spc_seq_page_cost);
8627
8628 /*
8629 * Obtain some data from the index itself, if possible. Otherwise invent
8630 * some plausible internal statistics based on the relation page count.
8631 */
8632 if (!index->hypothetical)
8633 {
8634 /*
8635 * A lock should have already been obtained on the index in plancat.c.
8636 */
8637 indexRel = index_open(index->indexoid, NoLock);
8638 brinGetStats(indexRel, &statsData);
8639 index_close(indexRel, NoLock);
8640
8641 /* work out the actual number of ranges in the index */
8642 indexRanges = Max(ceil((double) baserel->pages /
8643 statsData.pagesPerRange), 1.0);
8644 }
8645 else
8646 {
8647 /*
8648 * Assume default number of pages per range, and estimate the number
8649 * of ranges based on that.
8650 */
8651 indexRanges = Max(ceil((double) baserel->pages /
8653
8655 statsData.revmapNumPages = (indexRanges / REVMAP_PAGE_MAXITEMS) + 1;
8656 }
8657
8658 /*
8659 * Compute index correlation
8660 *
8661 * Because we can use all index quals equally when scanning, we can use
8662 * the largest correlation (in absolute value) among columns used by the
8663 * query. Start at zero, the worst possible case. If we cannot find any
8664 * correlation statistics, we will keep it as 0.
8665 */
8666 *indexCorrelation = 0;
8667
8668 foreach(l, path->indexclauses)
8669 {
8670 IndexClause *iclause = lfirst_node(IndexClause, l);
8671 AttrNumber attnum = index->indexkeys[iclause->indexcol];
8672
8673 /* attempt to lookup stats in relation for this index column */
8674 if (attnum != 0)
8675 {
8676 /* Simple variable -- look to stats for the underlying table */
8678 (*get_relation_stats_hook) (root, rte, attnum, &vardata))
8679 {
8680 /*
8681 * The hook took control of acquiring a stats tuple. If it
8682 * did supply a tuple, it'd better have supplied a freefunc.
8683 */
8684 if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
8685 elog(ERROR,
8686 "no function provided to release variable stats with");
8687 }
8688 else
8689 {
8690 vardata.statsTuple =
8691 SearchSysCache3(STATRELATTINH,
8692 ObjectIdGetDatum(rte->relid),
8694 BoolGetDatum(false));
8695 vardata.freefunc = ReleaseSysCache;
8696 }
8697 }
8698 else
8699 {
8700 /*
8701 * Looks like we've found an expression column in the index. Let's
8702 * see if there's any stats for it.
8703 */
8704
8705 /* get the attnum from the 0-based index. */
8706 attnum = iclause->indexcol + 1;
8707
8709 (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
8710 {
8711 /*
8712 * The hook took control of acquiring a stats tuple. If it
8713 * did supply a tuple, it'd better have supplied a freefunc.
8714 */
8715 if (HeapTupleIsValid(vardata.statsTuple) &&
8716 !vardata.freefunc)
8717 elog(ERROR, "no function provided to release variable stats with");
8718 }
8719 else
8720 {
8721 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
8722 ObjectIdGetDatum(index->indexoid),
8724 BoolGetDatum(false));
8725 vardata.freefunc = ReleaseSysCache;
8726 }
8727 }
8728
8729 if (HeapTupleIsValid(vardata.statsTuple))
8730 {
8731 AttStatsSlot sslot;
8732
8733 if (get_attstatsslot(&sslot, vardata.statsTuple,
8734 STATISTIC_KIND_CORRELATION, InvalidOid,
8736 {
8737 double varCorrelation = 0.0;
8738
8739 if (sslot.nnumbers > 0)
8740 varCorrelation = fabs(sslot.numbers[0]);
8741
8742 if (varCorrelation > *indexCorrelation)
8743 *indexCorrelation = varCorrelation;
8744
8745 free_attstatsslot(&sslot);
8746 }
8747 }
8748
8749 ReleaseVariableStats(vardata);
8750 }
8751
8752 qualSelectivity = clauselist_selectivity(root, indexQuals,
8753 baserel->relid,
8754 JOIN_INNER, NULL);
8755
8756 /*
8757 * Now calculate the minimum possible ranges we could match with if all of
8758 * the rows were in the perfect order in the table's heap.
8759 */
8760 minimalRanges = ceil(indexRanges * qualSelectivity);
8761
8762 /*
8763 * Now estimate the number of ranges that we'll touch by using the
8764 * indexCorrelation from the stats. Careful not to divide by zero (note
8765 * we're using the absolute value of the correlation).
8766 */
8767 if (*indexCorrelation < 1.0e-10)
8768 estimatedRanges = indexRanges;
8769 else
8770 estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
8771
8772 /* we expect to visit this portion of the table */
8773 selec = estimatedRanges / indexRanges;
8774
8775 CLAMP_PROBABILITY(selec);
8776
8777 *indexSelectivity = selec;
8778
8779 /*
8780 * Compute the index qual costs, much as in genericcostestimate, to add to
8781 * the index costs. We can disregard indexorderbys, since BRIN doesn't
8782 * support those.
8783 */
8784 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
8785
8786 /*
8787 * Compute the startup cost as the cost to read the whole revmap
8788 * sequentially, including the cost to execute the index quals.
8789 */
8790 *indexStartupCost =
8791 spc_seq_page_cost * statsData.revmapNumPages * loop_count;
8792 *indexStartupCost += qual_arg_cost;
8793
8794 /*
8795 * To read a BRIN index there might be a bit of back and forth over
8796 * regular pages, as revmap might point to them out of sequential order;
8797 * calculate the total cost as reading the whole index in random order.
8798 */
8799 *indexTotalCost = *indexStartupCost +
8800 spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
8801
8802 /*
8803 * Charge a small amount per range tuple which we expect to match to. This
8804 * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
8805 * will set a bit for each page in the range when we find a matching
8806 * range, so we must multiply the charge by the number of pages in the
8807 * range.
8808 */
8809 *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
8810 statsData.pagesPerRange;
8811
8812 *indexPages = index->pages;
8813}
int16 AttrNumber
Definition: attnum.h:21
void brinGetStats(Relation index, BrinStatsData *stats)
Definition: brin.c:1648
#define BRIN_DEFAULT_PAGES_PER_RANGE
Definition: brin.h:40
#define REVMAP_PAGE_MAXITEMS
Definition: brin_page.h:93
#define Min(x, y)
Definition: c.h:1007
#define Max(x, y)
Definition: c.h:1001
Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:100
double cpu_operator_cost
Definition: costsize.c:134
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
Assert(PointerIsAligned(start, uint64))
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:177
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:133
#define NoLock
Definition: lockdefs.h:34
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3511
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3401
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:44
double Cost
Definition: nodes.h:261
@ JOIN_INNER
Definition: nodes.h:303
@ RTE_RELATION
Definition: parsenodes.h:1043
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:610
int16 attnum
Definition: pg_attribute.h:74
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:182
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
#define InvalidOid
Definition: postgres_ext.h:37
tree ctl root
Definition: radixtree.h:1857
List * get_quals_from_indexclauses(List *indexclauses)
Definition: selfuncs.c:6918
get_index_stats_hook_type get_index_stats_hook
Definition: selfuncs.c:148
Cost index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
Definition: selfuncs.c:6948
get_relation_stats_hook_type get_relation_stats_hook
Definition: selfuncs.c:147
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:101
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
void get_tablespace_page_costs(Oid spcid, double *spc_random_page_cost, double *spc_seq_page_cost)
Definition: spccache.c:182
float4 * numbers
Definition: lsyscache.h:57
int nnumbers
Definition: lsyscache.h:58
BlockNumber revmapNumPages
Definition: brin.h:36
BlockNumber pagesPerRange
Definition: brin.h:35
AttrNumber indexcol
Definition: pathnodes.h:2008
List * indexclauses
Definition: pathnodes.h:1958
IndexOptInfo * indexinfo
Definition: pathnodes.h:1957
Definition: pg_list.h:54
RTEKind rtekind
Definition: parsenodes.h:1078
Index relid
Definition: pathnodes.h:973
BlockNumber pages
Definition: pathnodes.h:999
HeapTuple statsTuple
Definition: selfuncs.h:89
void(* freefunc)(HeapTuple tuple)
Definition: selfuncs.h:91
Definition: type.h:96
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
HeapTuple SearchSysCache3(int cacheId, Datum key1, Datum key2, Datum key3)
Definition: syscache.c:240

References Assert(), attnum, ATTSTATSSLOT_NUMBERS, BoolGetDatum(), BRIN_DEFAULT_PAGES_PER_RANGE, brinGetStats(), CLAMP_PROBABILITY, clauselist_selectivity(), cpu_operator_cost, elog, ERROR, free_attstatsslot(), VariableStatData::freefunc, get_attstatsslot(), get_index_stats_hook, get_quals_from_indexclauses(), get_relation_stats_hook, get_tablespace_page_costs(), HeapTupleIsValid, index_close(), index_open(), index_other_operands_eval_cost(), IndexPath::indexclauses, IndexClause::indexcol, IndexPath::indexinfo, Int16GetDatum(), InvalidOid, JOIN_INNER, lfirst_node, Max, Min, AttStatsSlot::nnumbers, NoLock, AttStatsSlot::numbers, ObjectIdGetDatum(), RelOptInfo::pages, BrinStatsData::pagesPerRange, planner_rt_fetch, ReleaseSysCache(), ReleaseVariableStats, RelOptInfo::relid, REVMAP_PAGE_MAXITEMS, BrinStatsData::revmapNumPages, root, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), and VariableStatData::statsTuple.

Referenced by brinhandler().

◆ btcostestimate()

void btcostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 7293 of file selfuncs.c.

7297{
7298 IndexOptInfo *index = path->indexinfo;
7299 GenericCosts costs = {0};
7300 VariableStatData vardata = {0};
7301 double numIndexTuples;
7302 Cost descentCost;
7303 List *indexBoundQuals;
7304 List *indexSkipQuals;
7305 int indexcol;
7306 bool eqQualHere;
7307 bool found_row_compare;
7308 bool found_array;
7309 bool found_is_null_op;
7310 bool have_correlation = false;
7311 double num_sa_scans;
7312 double correlation = 0.0;
7313 ListCell *lc;
7314
7315 /*
7316 * For a btree scan, only leading '=' quals plus inequality quals for the
7317 * immediately next attribute contribute to index selectivity (these are
7318 * the "boundary quals" that determine the starting and stopping points of
7319 * the index scan). Additional quals can suppress visits to the heap, so
7320 * it's OK to count them in indexSelectivity, but they should not count
7321 * for estimating numIndexTuples. So we must examine the given indexquals
7322 * to find out which ones count as boundary quals. We rely on the
7323 * knowledge that they are given in index column order. Note that nbtree
7324 * preprocessing can add skip arrays that act as leading '=' quals in the
7325 * absence of ordinary input '=' quals, so in practice _most_ input quals
7326 * are able to act as index bound quals (which we take into account here).
7327 *
7328 * For a RowCompareExpr, we consider only the first column, just as
7329 * rowcomparesel() does.
7330 *
7331 * If there's a SAOP or skip array in the quals, we'll actually perform up
7332 * to N index descents (not just one), but the underlying array key's
7333 * operator can be considered to act the same as it normally does.
7334 */
7335 indexBoundQuals = NIL;
7336 indexSkipQuals = NIL;
7337 indexcol = 0;
7338 eqQualHere = false;
7339 found_row_compare = false;
7340 found_array = false;
7341 found_is_null_op = false;
7342 num_sa_scans = 1;
7343 foreach(lc, path->indexclauses)
7344 {
7345 IndexClause *iclause = lfirst_node(IndexClause, lc);
7346 ListCell *lc2;
7347
7348 if (indexcol < iclause->indexcol)
7349 {
7350 double num_sa_scans_prev_cols = num_sa_scans;
7351
7352 /*
7353 * Beginning of a new column's quals.
7354 *
7355 * Skip scans use skip arrays, which are ScalarArrayOp style
7356 * arrays that generate their elements procedurally and on demand.
7357 * Given a multi-column index on "(a, b)", and an SQL WHERE clause
7358 * "WHERE b = 42", a skip scan will effectively use an indexqual
7359 * "WHERE a = ANY('{every col a value}') AND b = 42". (Obviously,
7360 * the array on "a" must also return "IS NULL" matches, since our
7361 * WHERE clause used no strict operator on "a").
7362 *
7363 * Here we consider how nbtree will backfill skip arrays for any
7364 * index columns that lacked an '=' qual. This maintains our
7365 * num_sa_scans estimate, and determines if this new column (the
7366 * "iclause->indexcol" column, not the prior "indexcol" column)
7367 * can have its RestrictInfos/quals added to indexBoundQuals.
7368 *
7369 * We'll need to handle columns that have inequality quals, where
7370 * the skip array generates values from a range constrained by the
7371 * quals (not every possible value). We've been maintaining
7372 * indexSkipQuals to help with this; it will now contain all of
7373 * the prior column's quals (that is, indexcol's quals) when they
7374 * might be used for this.
7375 */
7376 if (found_row_compare)
7377 {
7378 /*
7379 * Skip arrays can't be added after a RowCompare input qual
7380 * due to limitations in nbtree
7381 */
7382 break;
7383 }
7384 if (eqQualHere)
7385 {
7386 /*
7387 * Don't need to add a skip array for an indexcol that already
7388 * has an '=' qual/equality constraint
7389 */
7390 indexcol++;
7391 indexSkipQuals = NIL;
7392 }
7393 eqQualHere = false;
7394
7395 while (indexcol < iclause->indexcol)
7396 {
7397 double ndistinct;
7398 bool isdefault = true;
7399
7400 found_array = true;
7401
7402 /*
7403 * A skipped attribute's ndistinct forms the basis of our
7404 * estimate of the total number of "array elements" used by
7405 * its skip array at runtime. Look that up first.
7406 */
7407 examine_indexcol_variable(root, index, indexcol, &vardata);
7408 ndistinct = get_variable_numdistinct(&vardata, &isdefault);
7409
7410 if (indexcol == 0)
7411 {
7412 /*
7413 * Get an estimate of the leading column's correlation in
7414 * passing (avoids rereading variable stats below)
7415 */
7416 if (HeapTupleIsValid(vardata.statsTuple))
7417 correlation = btcost_correlation(index, &vardata);
7418 have_correlation = true;
7419 }
7420
7421 ReleaseVariableStats(vardata);
7422
7423 /*
7424 * If ndistinct is a default estimate, conservatively assume
7425 * that no skipping will happen at runtime
7426 */
7427 if (isdefault)
7428 {
7429 num_sa_scans = num_sa_scans_prev_cols;
7430 break; /* done building indexBoundQuals */
7431 }
7432
7433 /*
7434 * Apply indexcol's indexSkipQuals selectivity to ndistinct
7435 */
7436 if (indexSkipQuals != NIL)
7437 {
7438 List *partialSkipQuals;
7439 Selectivity ndistinctfrac;
7440
7441 /*
7442 * If the index is partial, AND the index predicate with
7443 * the index-bound quals to produce a more accurate idea
7444 * of the number of distinct values for prior indexcol
7445 */
7446 partialSkipQuals = add_predicate_to_index_quals(index,
7447 indexSkipQuals);
7448
7449 ndistinctfrac = clauselist_selectivity(root, partialSkipQuals,
7450 index->rel->relid,
7451 JOIN_INNER,
7452 NULL);
7453
7454 /*
7455 * If ndistinctfrac is selective (on its own), the scan is
7456 * unlikely to benefit from repositioning itself using
7457 * later quals. Do not allow iclause->indexcol's quals to
7458 * be added to indexBoundQuals (it would increase descent
7459 * costs, without lowering numIndexTuples costs by much).
7460 */
7461 if (ndistinctfrac < DEFAULT_RANGE_INEQ_SEL)
7462 {
7463 num_sa_scans = num_sa_scans_prev_cols;
7464 break; /* done building indexBoundQuals */
7465 }
7466
7467 /* Adjust ndistinct downward */
7468 ndistinct = rint(ndistinct * ndistinctfrac);
7469 ndistinct = Max(ndistinct, 1);
7470 }
7471
7472 /*
7473 * When there's no inequality quals, account for the need to
7474 * find an initial value by counting -inf/+inf as a value.
7475 *
7476 * We don't charge anything extra for possible next/prior key
7477 * index probes, which are sometimes used to find the next
7478 * valid skip array element (ahead of using the located
7479 * element value to relocate the scan to the next position
7480 * that might contain matching tuples). It seems hard to do
7481 * better here. Use of the skip support infrastructure often
7482 * avoids most next/prior key probes. But even when it can't,
7483 * there's a decent chance that most individual next/prior key
7484 * probes will locate a leaf page whose key space overlaps all
7485 * of the scan's keys (even the lower-order keys) -- which
7486 * also avoids the need for a separate, extra index descent.
7487 * Note also that these probes are much cheaper than non-probe
7488 * primitive index scans: they're reliably very selective.
7489 */
7490 if (indexSkipQuals == NIL)
7491 ndistinct += 1;
7492
7493 /*
7494 * Update num_sa_scans estimate by multiplying by ndistinct.
7495 *
7496 * We make the pessimistic assumption that there is no
7497 * naturally occurring cross-column correlation. This is
7498 * often wrong, but it seems best to err on the side of not
7499 * expecting skipping to be helpful...
7500 */
7501 num_sa_scans *= ndistinct;
7502
7503 /*
7504 * ...but back out of adding this latest group of 1 or more
7505 * skip arrays when num_sa_scans exceeds the total number of
7506 * index pages (revert to num_sa_scans from before indexcol).
7507 * This causes a sharp discontinuity in cost (as a function of
7508 * the indexcol's ndistinct), but that is representative of
7509 * actual runtime costs.
7510 *
7511 * Note that skipping is helpful when each primitive index
7512 * scan only manages to skip over 1 or 2 irrelevant leaf pages
7513 * on average. Skip arrays bring savings in CPU costs due to
7514 * the scan not needing to evaluate indexquals against every
7515 * tuple, which can greatly exceed any savings in I/O costs.
7516 * This test is a test of whether num_sa_scans implies that
7517 * we're past the point where the ability to skip ceases to
7518 * lower the scan's costs (even qual evaluation CPU costs).
7519 */
7520 if (index->pages < num_sa_scans)
7521 {
7522 num_sa_scans = num_sa_scans_prev_cols;
7523 break; /* done building indexBoundQuals */
7524 }
7525
7526 indexcol++;
7527 indexSkipQuals = NIL;
7528 }
7529
7530 /*
7531 * Finished considering the need to add skip arrays to bridge an
7532 * initial eqQualHere gap between the old and new index columns
7533 * (or there was no initial eqQualHere gap in the first place).
7534 *
7535 * If an initial gap could not be bridged, then new column's quals
7536 * (i.e. iclause->indexcol's quals) won't go into indexBoundQuals,
7537 * and so won't affect our final numIndexTuples estimate.
7538 */
7539 if (indexcol != iclause->indexcol)
7540 break; /* done building indexBoundQuals */
7541 }
7542
7543 Assert(indexcol == iclause->indexcol);
7544
7545 /* Examine each indexqual associated with this index clause */
7546 foreach(lc2, iclause->indexquals)
7547 {
7548 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
7549 Expr *clause = rinfo->clause;
7550 Oid clause_op = InvalidOid;
7551 int op_strategy;
7552
7553 if (IsA(clause, OpExpr))
7554 {
7555 OpExpr *op = (OpExpr *) clause;
7556
7557 clause_op = op->opno;
7558 }
7559 else if (IsA(clause, RowCompareExpr))
7560 {
7561 RowCompareExpr *rc = (RowCompareExpr *) clause;
7562
7563 clause_op = linitial_oid(rc->opnos);
7564 found_row_compare = true;
7565 }
7566 else if (IsA(clause, ScalarArrayOpExpr))
7567 {
7568 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
7569 Node *other_operand = (Node *) lsecond(saop->args);
7570 double alength = estimate_array_length(root, other_operand);
7571
7572 clause_op = saop->opno;
7573 found_array = true;
7574 /* estimate SA descents by indexBoundQuals only */
7575 if (alength > 1)
7576 num_sa_scans *= alength;
7577 }
7578 else if (IsA(clause, NullTest))
7579 {
7580 NullTest *nt = (NullTest *) clause;
7581
7582 if (nt->nulltesttype == IS_NULL)
7583 {
7584 found_is_null_op = true;
7585 /* IS NULL is like = for selectivity/skip scan purposes */
7586 eqQualHere = true;
7587 }
7588 }
7589 else
7590 elog(ERROR, "unsupported indexqual type: %d",
7591 (int) nodeTag(clause));
7592
7593 /* check for equality operator */
7594 if (OidIsValid(clause_op))
7595 {
7596 op_strategy = get_op_opfamily_strategy(clause_op,
7597 index->opfamily[indexcol]);
7598 Assert(op_strategy != 0); /* not a member of opfamily?? */
7599 if (op_strategy == BTEqualStrategyNumber)
7600 eqQualHere = true;
7601 }
7602
7603 indexBoundQuals = lappend(indexBoundQuals, rinfo);
7604
7605 /*
7606 * We apply inequality selectivities to estimate index descent
7607 * costs with scans that use skip arrays. Save this indexcol's
7608 * RestrictInfos if it looks like they'll be needed for that.
7609 */
7610 if (!eqQualHere && !found_row_compare &&
7611 indexcol < index->nkeycolumns - 1)
7612 indexSkipQuals = lappend(indexSkipQuals, rinfo);
7613 }
7614 }
7615
7616 /*
7617 * If index is unique and we found an '=' clause for each column, we can
7618 * just assume numIndexTuples = 1 and skip the expensive
7619 * clauselist_selectivity calculations. However, an array or NullTest
7620 * always invalidates that theory (even when eqQualHere has been set).
7621 */
7622 if (index->unique &&
7623 indexcol == index->nkeycolumns - 1 &&
7624 eqQualHere &&
7625 !found_array &&
7626 !found_is_null_op)
7627 numIndexTuples = 1.0;
7628 else
7629 {
7630 List *selectivityQuals;
7631 Selectivity btreeSelectivity;
7632
7633 /*
7634 * If the index is partial, AND the index predicate with the
7635 * index-bound quals to produce a more accurate idea of the number of
7636 * rows covered by the bound conditions.
7637 */
7638 selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals);
7639
7640 btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
7641 index->rel->relid,
7642 JOIN_INNER,
7643 NULL);
7644 numIndexTuples = btreeSelectivity * index->rel->tuples;
7645
7646 /*
7647 * btree automatically combines individual array element primitive
7648 * index scans whenever the tuples covered by the next set of array
7649 * keys are close to tuples covered by the current set. That puts a
7650 * natural ceiling on the worst case number of descents -- there
7651 * cannot possibly be more than one descent per leaf page scanned.
7652 *
7653 * Clamp the number of descents to at most 1/3 the number of index
7654 * pages. This avoids implausibly high estimates with low selectivity
7655 * paths, where scans usually require only one or two descents. This
7656 * is most likely to help when there are several SAOP clauses, where
7657 * naively accepting the total number of distinct combinations of
7658 * array elements as the number of descents would frequently lead to
7659 * wild overestimates.
7660 *
7661 * We somewhat arbitrarily don't just make the cutoff the total number
7662 * of leaf pages (we make it 1/3 the total number of pages instead) to
7663 * give the btree code credit for its ability to continue on the leaf
7664 * level with low selectivity scans.
7665 *
7666 * Note: num_sa_scans includes both ScalarArrayOp array elements and
7667 * skip array elements whose qual affects our numIndexTuples estimate.
7668 */
7669 num_sa_scans = Min(num_sa_scans, ceil(index->pages * 0.3333333));
7670 num_sa_scans = Max(num_sa_scans, 1);
7671
7672 /*
7673 * As in genericcostestimate(), we have to adjust for any array quals
7674 * included in indexBoundQuals, and then round to integer.
7675 *
7676 * It is tempting to make genericcostestimate behave as if array
7677 * clauses work in almost the same way as scalar operators during
7678 * btree scans, making the top-level scan look like a continuous scan
7679 * (as opposed to num_sa_scans-many primitive index scans). After
7680 * all, btree scans mostly work like that at runtime. However, such a
7681 * scheme would badly bias genericcostestimate's simplistic approach
7682 * to calculating numIndexPages through prorating.
7683 *
7684 * Stick with the approach taken by non-native SAOP scans for now.
7685 * genericcostestimate will use the Mackert-Lohman formula to
7686 * compensate for repeat page fetches, even though that definitely
7687 * won't happen during btree scans (not for leaf pages, at least).
7688 * We're usually very pessimistic about the number of primitive index
7689 * scans that will be required, but it's not clear how to do better.
7690 */
7691 numIndexTuples = rint(numIndexTuples / num_sa_scans);
7692 }
7693
7694 /*
7695 * Now do generic index cost estimation.
7696 */
7697 costs.numIndexTuples = numIndexTuples;
7698 costs.num_sa_scans = num_sa_scans;
7699
7700 genericcostestimate(root, path, loop_count, &costs);
7701
7702 /*
7703 * Add a CPU-cost component to represent the costs of initial btree
7704 * descent. We don't charge any I/O cost for touching upper btree levels,
7705 * since they tend to stay in cache, but we still have to do about log2(N)
7706 * comparisons to descend a btree of N leaf tuples. We charge one
7707 * cpu_operator_cost per comparison.
7708 *
7709 * If there are SAOP or skip array keys, charge this once per estimated
7710 * index descent. The ones after the first one are not startup cost so
7711 * far as the overall plan goes, so just add them to "total" cost.
7712 */
7713 if (index->tuples > 1) /* avoid computing log(0) */
7714 {
7715 descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
7716 costs.indexStartupCost += descentCost;
7717 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7718 }
7719
7720 /*
7721 * Even though we're not charging I/O cost for touching upper btree pages,
7722 * it's still reasonable to charge some CPU cost per page descended
7723 * through. Moreover, if we had no such charge at all, bloated indexes
7724 * would appear to have the same search cost as unbloated ones, at least
7725 * in cases where only a single leaf page is expected to be visited. This
7726 * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
7727 * touched. The number of such pages is btree tree height plus one (ie,
7728 * we charge for the leaf page too). As above, charge once per estimated
7729 * SAOP/skip array descent.
7730 */
7731 descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
7732 costs.indexStartupCost += descentCost;
7733 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7734
7735 if (!have_correlation)
7736 {
7737 examine_indexcol_variable(root, index, 0, &vardata);
7738 if (HeapTupleIsValid(vardata.statsTuple))
7739 costs.indexCorrelation = btcost_correlation(index, &vardata);
7740 ReleaseVariableStats(vardata);
7741 }
7742 else
7743 {
7744 /* btcost_correlation already called earlier on */
7745 costs.indexCorrelation = correlation;
7746 }
7747
7748 *indexStartupCost = costs.indexStartupCost;
7749 *indexTotalCost = costs.indexTotalCost;
7750 *indexSelectivity = costs.indexSelectivity;
7751 *indexCorrelation = costs.indexCorrelation;
7752 *indexPages = costs.numIndexPages;
7753}
#define OidIsValid(objectId)
Definition: c.h:778
List * lappend(List *list, void *datum)
Definition: list.c:339
int get_op_opfamily_strategy(Oid opno, Oid opfamily)
Definition: lsyscache.c:85
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
#define nodeTag(nodeptr)
Definition: nodes.h:139
double Selectivity
Definition: nodes.h:260
#define NIL
Definition: pg_list.h:68
#define lsecond(l)
Definition: pg_list.h:183
#define linitial_oid(l)
Definition: pg_list.h:180
unsigned int Oid
Definition: postgres_ext.h:32
@ IS_NULL
Definition: primnodes.h:1977
List * add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
Definition: selfuncs.c:7225
#define DEFAULT_PAGE_CPU_MULTIPLIER
Definition: selfuncs.c:144
double estimate_array_length(PlannerInfo *root, Node *arrayexpr)
Definition: selfuncs.c:2154
void genericcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts *costs)
Definition: selfuncs.c:7002
static void examine_indexcol_variable(PlannerInfo *root, IndexOptInfo *index, int indexcol, VariableStatData *vardata)
Definition: selfuncs.c:6106
static double btcost_correlation(IndexOptInfo *index, VariableStatData *vardata)
Definition: selfuncs.c:7256
double get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
Definition: selfuncs.c:6209
#define DEFAULT_RANGE_INEQ_SEL
Definition: selfuncs.h:40
#define BTEqualStrategyNumber
Definition: stratnum.h:31
Selectivity indexSelectivity
Definition: selfuncs.h:129
Cost indexStartupCost
Definition: selfuncs.h:127
double indexCorrelation
Definition: selfuncs.h:130
double num_sa_scans
Definition: selfuncs.h:136
Cost indexTotalCost
Definition: selfuncs.h:128
double numIndexPages
Definition: selfuncs.h:133
double numIndexTuples
Definition: selfuncs.h:134
List * indexquals
Definition: pathnodes.h:2006
Definition: nodes.h:135
NullTestType nulltesttype
Definition: primnodes.h:1984
Oid opno
Definition: primnodes.h:850
Expr * clause
Definition: pathnodes.h:2791

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, Assert(), btcost_correlation(), BTEqualStrategyNumber, RestrictInfo::clause, clauselist_selectivity(), cpu_operator_cost, DEFAULT_PAGE_CPU_MULTIPLIER, DEFAULT_RANGE_INEQ_SEL, elog, ERROR, estimate_array_length(), examine_indexcol_variable(), genericcostestimate(), get_op_opfamily_strategy(), get_variable_numdistinct(), HeapTupleIsValid, IndexPath::indexclauses, IndexClause::indexcol, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexClause::indexquals, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, InvalidOid, IS_NULL, IsA, JOIN_INNER, lappend(), lfirst_node, linitial_oid, lsecond, Max, Min, NIL, nodeTag, NullTest::nulltesttype, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, OidIsValid, OpExpr::opno, ScalarArrayOpExpr::opno, ReleaseVariableStats, root, and VariableStatData::statsTuple.

Referenced by bthandler().

◆ gincostestimate()

void gincostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 8208 of file selfuncs.c.

8212{
8213 IndexOptInfo *index = path->indexinfo;
8214 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
8215 List *selectivityQuals;
8216 double numPages = index->pages,
8217 numTuples = index->tuples;
8218 double numEntryPages,
8219 numDataPages,
8220 numPendingPages,
8221 numEntries;
8222 GinQualCounts counts;
8223 bool matchPossible;
8224 bool fullIndexScan;
8225 double partialScale;
8226 double entryPagesFetched,
8227 dataPagesFetched,
8228 dataPagesFetchedBySel;
8229 double qual_op_cost,
8230 qual_arg_cost,
8231 spc_random_page_cost,
8232 outer_scans;
8233 Cost descentCost;
8234 Relation indexRel;
8235 GinStatsData ginStats;
8236 ListCell *lc;
8237 int i;
8238
8239 /*
8240 * Obtain statistical information from the meta page, if possible. Else
8241 * set ginStats to zeroes, and we'll cope below.
8242 */
8243 if (!index->hypothetical)
8244 {
8245 /* Lock should have already been obtained in plancat.c */
8246 indexRel = index_open(index->indexoid, NoLock);
8247 ginGetStats(indexRel, &ginStats);
8248 index_close(indexRel, NoLock);
8249 }
8250 else
8251 {
8252 memset(&ginStats, 0, sizeof(ginStats));
8253 }
8254
8255 /*
8256 * Assuming we got valid (nonzero) stats at all, nPendingPages can be
8257 * trusted, but the other fields are data as of the last VACUUM. We can
8258 * scale them up to account for growth since then, but that method only
8259 * goes so far; in the worst case, the stats might be for a completely
8260 * empty index, and scaling them will produce pretty bogus numbers.
8261 * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
8262 * it's grown more than that, fall back to estimating things only from the
8263 * assumed-accurate index size. But we'll trust nPendingPages in any case
8264 * so long as it's not clearly insane, ie, more than the index size.
8265 */
8266 if (ginStats.nPendingPages < numPages)
8267 numPendingPages = ginStats.nPendingPages;
8268 else
8269 numPendingPages = 0;
8270
8271 if (numPages > 0 && ginStats.nTotalPages <= numPages &&
8272 ginStats.nTotalPages > numPages / 4 &&
8273 ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
8274 {
8275 /*
8276 * OK, the stats seem close enough to sane to be trusted. But we
8277 * still need to scale them by the ratio numPages / nTotalPages to
8278 * account for growth since the last VACUUM.
8279 */
8280 double scale = numPages / ginStats.nTotalPages;
8281
8282 numEntryPages = ceil(ginStats.nEntryPages * scale);
8283 numDataPages = ceil(ginStats.nDataPages * scale);
8284 numEntries = ceil(ginStats.nEntries * scale);
8285 /* ensure we didn't round up too much */
8286 numEntryPages = Min(numEntryPages, numPages - numPendingPages);
8287 numDataPages = Min(numDataPages,
8288 numPages - numPendingPages - numEntryPages);
8289 }
8290 else
8291 {
8292 /*
8293 * We might get here because it's a hypothetical index, or an index
8294 * created pre-9.1 and never vacuumed since upgrading (in which case
8295 * its stats would read as zeroes), or just because it's grown too
8296 * much since the last VACUUM for us to put our faith in scaling.
8297 *
8298 * Invent some plausible internal statistics based on the index page
8299 * count (and clamp that to at least 10 pages, just in case). We
8300 * estimate that 90% of the index is entry pages, and the rest is data
8301 * pages. Estimate 100 entries per entry page; this is rather bogus
8302 * since it'll depend on the size of the keys, but it's more robust
8303 * than trying to predict the number of entries per heap tuple.
8304 */
8305 numPages = Max(numPages, 10);
8306 numEntryPages = floor((numPages - numPendingPages) * 0.90);
8307 numDataPages = numPages - numPendingPages - numEntryPages;
8308 numEntries = floor(numEntryPages * 100);
8309 }
8310
8311 /* In an empty index, numEntries could be zero. Avoid divide-by-zero */
8312 if (numEntries < 1)
8313 numEntries = 1;
8314
8315 /*
8316 * If the index is partial, AND the index predicate with the index-bound
8317 * quals to produce a more accurate idea of the number of rows covered by
8318 * the bound conditions.
8319 */
8320 selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
8321
8322 /* Estimate the fraction of main-table tuples that will be visited */
8323 *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
8324 index->rel->relid,
8325 JOIN_INNER,
8326 NULL);
8327
8328 /* fetch estimated page cost for tablespace containing index */
8329 get_tablespace_page_costs(index->reltablespace,
8330 &spc_random_page_cost,
8331 NULL);
8332
8333 /*
8334 * Generic assumption about index correlation: there isn't any.
8335 */
8336 *indexCorrelation = 0.0;
8337
8338 /*
8339 * Examine quals to estimate number of search entries & partial matches
8340 */
8341 memset(&counts, 0, sizeof(counts));
8342 counts.arrayScans = 1;
8343 matchPossible = true;
8344
8345 foreach(lc, path->indexclauses)
8346 {
8347 IndexClause *iclause = lfirst_node(IndexClause, lc);
8348 ListCell *lc2;
8349
8350 foreach(lc2, iclause->indexquals)
8351 {
8352 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
8353 Expr *clause = rinfo->clause;
8354
8355 if (IsA(clause, OpExpr))
8356 {
8357 matchPossible = gincost_opexpr(root,
8358 index,
8359 iclause->indexcol,
8360 (OpExpr *) clause,
8361 &counts);
8362 if (!matchPossible)
8363 break;
8364 }
8365 else if (IsA(clause, ScalarArrayOpExpr))
8366 {
8367 matchPossible = gincost_scalararrayopexpr(root,
8368 index,
8369 iclause->indexcol,
8370 (ScalarArrayOpExpr *) clause,
8371 numEntries,
8372 &counts);
8373 if (!matchPossible)
8374 break;
8375 }
8376 else
8377 {
8378 /* shouldn't be anything else for a GIN index */
8379 elog(ERROR, "unsupported GIN indexqual type: %d",
8380 (int) nodeTag(clause));
8381 }
8382 }
8383 }
8384
8385 /* Fall out if there were any provably-unsatisfiable quals */
8386 if (!matchPossible)
8387 {
8388 *indexStartupCost = 0;
8389 *indexTotalCost = 0;
8390 *indexSelectivity = 0;
8391 return;
8392 }
8393
8394 /*
8395 * If attribute has a full scan and at the same time doesn't have normal
8396 * scan, then we'll have to scan all non-null entries of that attribute.
8397 * Currently, we don't have per-attribute statistics for GIN. Thus, we
8398 * must assume the whole GIN index has to be scanned in this case.
8399 */
8400 fullIndexScan = false;
8401 for (i = 0; i < index->nkeycolumns; i++)
8402 {
8403 if (counts.attHasFullScan[i] && !counts.attHasNormalScan[i])
8404 {
8405 fullIndexScan = true;
8406 break;
8407 }
8408 }
8409
8410 if (fullIndexScan || indexQuals == NIL)
8411 {
8412 /*
8413 * Full index scan will be required. We treat this as if every key in
8414 * the index had been listed in the query; is that reasonable?
8415 */
8416 counts.partialEntries = 0;
8417 counts.exactEntries = numEntries;
8418 counts.searchEntries = numEntries;
8419 }
8420
8421 /* Will we have more than one iteration of a nestloop scan? */
8422 outer_scans = loop_count;
8423
8424 /*
8425 * Compute cost to begin scan, first of all, pay attention to pending
8426 * list.
8427 */
8428 entryPagesFetched = numPendingPages;
8429
8430 /*
8431 * Estimate number of entry pages read. We need to do
8432 * counts.searchEntries searches. Use a power function as it should be,
8433 * but tuples on leaf pages usually is much greater. Here we include all
8434 * searches in entry tree, including search of first entry in partial
8435 * match algorithm
8436 */
8437 entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
8438
8439 /*
8440 * Add an estimate of entry pages read by partial match algorithm. It's a
8441 * scan over leaf pages in entry tree. We haven't any useful stats here,
8442 * so estimate it as proportion. Because counts.partialEntries is really
8443 * pretty bogus (see code above), it's possible that it is more than
8444 * numEntries; clamp the proportion to ensure sanity.
8445 */
8446 partialScale = counts.partialEntries / numEntries;
8447 partialScale = Min(partialScale, 1.0);
8448
8449 entryPagesFetched += ceil(numEntryPages * partialScale);
8450
8451 /*
8452 * Partial match algorithm reads all data pages before doing actual scan,
8453 * so it's a startup cost. Again, we haven't any useful stats here, so
8454 * estimate it as proportion.
8455 */
8456 dataPagesFetched = ceil(numDataPages * partialScale);
8457
8458 *indexStartupCost = 0;
8459 *indexTotalCost = 0;
8460
8461 /*
8462 * Add a CPU-cost component to represent the costs of initial entry btree
8463 * descent. We don't charge any I/O cost for touching upper btree levels,
8464 * since they tend to stay in cache, but we still have to do about log2(N)
8465 * comparisons to descend a btree of N leaf tuples. We charge one
8466 * cpu_operator_cost per comparison.
8467 *
8468 * If there are ScalarArrayOpExprs, charge this once per SA scan. The
8469 * ones after the first one are not startup cost so far as the overall
8470 * plan is concerned, so add them only to "total" cost.
8471 */
8472 if (numEntries > 1) /* avoid computing log(0) */
8473 {
8474 descentCost = ceil(log(numEntries) / log(2.0)) * cpu_operator_cost;
8475 *indexStartupCost += descentCost * counts.searchEntries;
8476 *indexTotalCost += counts.arrayScans * descentCost * counts.searchEntries;
8477 }
8478
8479 /*
8480 * Add a cpu cost per entry-page fetched. This is not amortized over a
8481 * loop.
8482 */
8483 *indexStartupCost += entryPagesFetched * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8484 *indexTotalCost += entryPagesFetched * counts.arrayScans * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8485
8486 /*
8487 * Add a cpu cost per data-page fetched. This is also not amortized over a
8488 * loop. Since those are the data pages from the partial match algorithm,
8489 * charge them as startup cost.
8490 */
8491 *indexStartupCost += DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost * dataPagesFetched;
8492
8493 /*
8494 * Since we add the startup cost to the total cost later on, remove the
8495 * initial arrayscan from the total.
8496 */
8497 *indexTotalCost += dataPagesFetched * (counts.arrayScans - 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8498
8499 /*
8500 * Calculate cache effects if more than one scan due to nestloops or array
8501 * quals. The result is pro-rated per nestloop scan, but the array qual
8502 * factor shouldn't be pro-rated (compare genericcostestimate).
8503 */
8504 if (outer_scans > 1 || counts.arrayScans > 1)
8505 {
8506 entryPagesFetched *= outer_scans * counts.arrayScans;
8507 entryPagesFetched = index_pages_fetched(entryPagesFetched,
8508 (BlockNumber) numEntryPages,
8509 numEntryPages, root);
8510 entryPagesFetched /= outer_scans;
8511 dataPagesFetched *= outer_scans * counts.arrayScans;
8512 dataPagesFetched = index_pages_fetched(dataPagesFetched,
8513 (BlockNumber) numDataPages,
8514 numDataPages, root);
8515 dataPagesFetched /= outer_scans;
8516 }
8517
8518 /*
8519 * Here we use random page cost because logically-close pages could be far
8520 * apart on disk.
8521 */
8522 *indexStartupCost += (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
8523
8524 /*
8525 * Now compute the number of data pages fetched during the scan.
8526 *
8527 * We assume every entry to have the same number of items, and that there
8528 * is no overlap between them. (XXX: tsvector and array opclasses collect
8529 * statistics on the frequency of individual keys; it would be nice to use
8530 * those here.)
8531 */
8532 dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
8533
8534 /*
8535 * If there is a lot of overlap among the entries, in particular if one of
8536 * the entries is very frequent, the above calculation can grossly
8537 * under-estimate. As a simple cross-check, calculate a lower bound based
8538 * on the overall selectivity of the quals. At a minimum, we must read
8539 * one item pointer for each matching entry.
8540 *
8541 * The width of each item pointer varies, based on the level of
8542 * compression. We don't have statistics on that, but an average of
8543 * around 3 bytes per item is fairly typical.
8544 */
8545 dataPagesFetchedBySel = ceil(*indexSelectivity *
8546 (numTuples / (BLCKSZ / 3)));
8547 if (dataPagesFetchedBySel > dataPagesFetched)
8548 dataPagesFetched = dataPagesFetchedBySel;
8549
8550 /* Add one page cpu-cost to the startup cost */
8551 *indexStartupCost += DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost * counts.searchEntries;
8552
8553 /*
8554 * Add once again a CPU-cost for those data pages, before amortizing for
8555 * cache.
8556 */
8557 *indexTotalCost += dataPagesFetched * counts.arrayScans * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
8558
8559 /* Account for cache effects, the same as above */
8560 if (outer_scans > 1 || counts.arrayScans > 1)
8561 {
8562 dataPagesFetched *= outer_scans * counts.arrayScans;
8563 dataPagesFetched = index_pages_fetched(dataPagesFetched,
8564 (BlockNumber) numDataPages,
8565 numDataPages, root);
8566 dataPagesFetched /= outer_scans;
8567 }
8568
8569 /* And apply random_page_cost as the cost per page */
8570 *indexTotalCost += *indexStartupCost +
8571 dataPagesFetched * spc_random_page_cost;
8572
8573 /*
8574 * Add on index qual eval costs, much as in genericcostestimate. We charge
8575 * cpu but we can disregard indexorderbys, since GIN doesn't support
8576 * those.
8577 */
8578 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
8579 qual_op_cost = cpu_operator_cost * list_length(indexQuals);
8580
8581 *indexStartupCost += qual_arg_cost;
8582 *indexTotalCost += qual_arg_cost;
8583
8584 /*
8585 * Add a cpu cost per search entry, corresponding to the actual visited
8586 * entries.
8587 */
8588 *indexTotalCost += (counts.searchEntries * counts.arrayScans) * (qual_op_cost);
8589 /* Now add a cpu cost per tuple in the posting lists / trees */
8590 *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost);
8591 *indexPages = dataPagesFetched;
8592}
uint32 BlockNumber
Definition: block.h:31
double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root)
Definition: costsize.c:882
double cpu_index_tuple_cost
Definition: costsize.c:133
void ginGetStats(Relation index, GinStatsData *stats)
Definition: ginutil.c:628
int i
Definition: isn.c:77
static int list_length(const List *l)
Definition: pg_list.h:152
static int scale
Definition: pgbench.c:182
static bool gincost_scalararrayopexpr(PlannerInfo *root, IndexOptInfo *index, int indexcol, ScalarArrayOpExpr *clause, double numIndexEntries, GinQualCounts *counts)
Definition: selfuncs.c:8092
static bool gincost_opexpr(PlannerInfo *root, IndexOptInfo *index, int indexcol, OpExpr *clause, GinQualCounts *counts)
Definition: selfuncs.c:8042
bool attHasNormalScan[INDEX_MAX_KEYS]
Definition: selfuncs.c:7915
double exactEntries
Definition: selfuncs.c:7917
double arrayScans
Definition: selfuncs.c:7919
double partialEntries
Definition: selfuncs.c:7916
bool attHasFullScan[INDEX_MAX_KEYS]
Definition: selfuncs.c:7914
double searchEntries
Definition: selfuncs.c:7918
BlockNumber nDataPages
Definition: gin.h:60
BlockNumber nPendingPages
Definition: gin.h:57
BlockNumber nEntryPages
Definition: gin.h:59
int64 nEntries
Definition: gin.h:61
BlockNumber nTotalPages
Definition: gin.h:58

References add_predicate_to_index_quals(), GinQualCounts::arrayScans, GinQualCounts::attHasFullScan, GinQualCounts::attHasNormalScan, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, DEFAULT_PAGE_CPU_MULTIPLIER, elog, ERROR, GinQualCounts::exactEntries, get_quals_from_indexclauses(), get_tablespace_page_costs(), gincost_opexpr(), gincost_scalararrayopexpr(), ginGetStats(), i, index_close(), index_open(), index_other_operands_eval_cost(), index_pages_fetched(), IndexPath::indexclauses, IndexClause::indexcol, IndexPath::indexinfo, IndexClause::indexquals, IsA, JOIN_INNER, lfirst_node, list_length(), Max, Min, GinStatsData::nDataPages, GinStatsData::nEntries, GinStatsData::nEntryPages, NIL, nodeTag, NoLock, GinStatsData::nPendingPages, GinStatsData::nTotalPages, GinQualCounts::partialEntries, root, scale, and GinQualCounts::searchEntries.

Referenced by ginhandler().

◆ gistcostestimate()

void gistcostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 7798 of file selfuncs.c.

7802{
7803 IndexOptInfo *index = path->indexinfo;
7804 GenericCosts costs = {0};
7805 Cost descentCost;
7806
7807 genericcostestimate(root, path, loop_count, &costs);
7808
7809 /*
7810 * We model index descent costs similarly to those for btree, but to do
7811 * that we first need an idea of the tree height. We somewhat arbitrarily
7812 * assume that the fanout is 100, meaning the tree height is at most
7813 * log100(index->pages).
7814 *
7815 * Although this computation isn't really expensive enough to require
7816 * caching, we might as well use index->tree_height to cache it.
7817 */
7818 if (index->tree_height < 0) /* unknown? */
7819 {
7820 if (index->pages > 1) /* avoid computing log(0) */
7821 index->tree_height = (int) (log(index->pages) / log(100.0));
7822 else
7823 index->tree_height = 0;
7824 }
7825
7826 /*
7827 * Add a CPU-cost component to represent the costs of initial descent. We
7828 * just use log(N) here not log2(N) since the branching factor isn't
7829 * necessarily two anyway. As for btree, charge once per SA scan.
7830 */
7831 if (index->tuples > 1) /* avoid computing log(0) */
7832 {
7833 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
7834 costs.indexStartupCost += descentCost;
7835 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7836 }
7837
7838 /*
7839 * Likewise add a per-page charge, calculated the same as for btrees.
7840 */
7841 descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
7842 costs.indexStartupCost += descentCost;
7843 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7844
7845 *indexStartupCost = costs.indexStartupCost;
7846 *indexTotalCost = costs.indexTotalCost;
7847 *indexSelectivity = costs.indexSelectivity;
7848 *indexCorrelation = costs.indexCorrelation;
7849 *indexPages = costs.numIndexPages;
7850}

References cpu_operator_cost, DEFAULT_PAGE_CPU_MULTIPLIER, genericcostestimate(), GenericCosts::indexCorrelation, IndexPath::indexinfo, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, and root.

Referenced by gisthandler().

◆ hashcostestimate()

void hashcostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 7756 of file selfuncs.c.

7760{
7761 GenericCosts costs = {0};
7762
7763 genericcostestimate(root, path, loop_count, &costs);
7764
7765 /*
7766 * A hash index has no descent costs as such, since the index AM can go
7767 * directly to the target bucket after computing the hash value. There
7768 * are a couple of other hash-specific costs that we could conceivably add
7769 * here, though:
7770 *
7771 * Ideally we'd charge spc_random_page_cost for each page in the target
7772 * bucket, not just the numIndexPages pages that genericcostestimate
7773 * thought we'd visit. However in most cases we don't know which bucket
7774 * that will be. There's no point in considering the average bucket size
7775 * because the hash AM makes sure that's always one page.
7776 *
7777 * Likewise, we could consider charging some CPU for each index tuple in
7778 * the bucket, if we knew how many there were. But the per-tuple cost is
7779 * just a hash value comparison, not a general datatype-dependent
7780 * comparison, so any such charge ought to be quite a bit less than
7781 * cpu_operator_cost; which makes it probably not worth worrying about.
7782 *
7783 * A bigger issue is that chance hash-value collisions will result in
7784 * wasted probes into the heap. We don't currently attempt to model this
7785 * cost on the grounds that it's rare, but maybe it's not rare enough.
7786 * (Any fix for this ought to consider the generic lossy-operator problem,
7787 * though; it's not entirely hash-specific.)
7788 */
7789
7790 *indexStartupCost = costs.indexStartupCost;
7791 *indexTotalCost = costs.indexTotalCost;
7792 *indexSelectivity = costs.indexSelectivity;
7793 *indexCorrelation = costs.indexCorrelation;
7794 *indexPages = costs.numIndexPages;
7795}

References genericcostestimate(), GenericCosts::indexCorrelation, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, GenericCosts::numIndexPages, and root.

Referenced by hashhandler().

◆ spgcostestimate()

void spgcostestimate ( struct PlannerInfo root,
struct IndexPath path,
double  loop_count,
Cost indexStartupCost,
Cost indexTotalCost,
Selectivity indexSelectivity,
double *  indexCorrelation,
double *  indexPages 
)

Definition at line 7853 of file selfuncs.c.

7857{
7858 IndexOptInfo *index = path->indexinfo;
7859 GenericCosts costs = {0};
7860 Cost descentCost;
7861
7862 genericcostestimate(root, path, loop_count, &costs);
7863
7864 /*
7865 * We model index descent costs similarly to those for btree, but to do
7866 * that we first need an idea of the tree height. We somewhat arbitrarily
7867 * assume that the fanout is 100, meaning the tree height is at most
7868 * log100(index->pages).
7869 *
7870 * Although this computation isn't really expensive enough to require
7871 * caching, we might as well use index->tree_height to cache it.
7872 */
7873 if (index->tree_height < 0) /* unknown? */
7874 {
7875 if (index->pages > 1) /* avoid computing log(0) */
7876 index->tree_height = (int) (log(index->pages) / log(100.0));
7877 else
7878 index->tree_height = 0;
7879 }
7880
7881 /*
7882 * Add a CPU-cost component to represent the costs of initial descent. We
7883 * just use log(N) here not log2(N) since the branching factor isn't
7884 * necessarily two anyway. As for btree, charge once per SA scan.
7885 */
7886 if (index->tuples > 1) /* avoid computing log(0) */
7887 {
7888 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
7889 costs.indexStartupCost += descentCost;
7890 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7891 }
7892
7893 /*
7894 * Likewise add a per-page charge, calculated the same as for btrees.
7895 */
7896 descentCost = (index->tree_height + 1) * DEFAULT_PAGE_CPU_MULTIPLIER * cpu_operator_cost;
7897 costs.indexStartupCost += descentCost;
7898 costs.indexTotalCost += costs.num_sa_scans * descentCost;
7899
7900 *indexStartupCost = costs.indexStartupCost;
7901 *indexTotalCost = costs.indexTotalCost;
7902 *indexSelectivity = costs.indexSelectivity;
7903 *indexCorrelation = costs.indexCorrelation;
7904 *indexPages = costs.numIndexPages;
7905}

References cpu_operator_cost, DEFAULT_PAGE_CPU_MULTIPLIER, genericcostestimate(), GenericCosts::indexCorrelation, IndexPath::indexinfo, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, and root.

Referenced by spghandler().