#include "access/htup.h"
#include "fmgr.h"
#include "nodes/pathnodes.h"

Include dependency graph for selfuncs.h:

This graph shows which files directly or indirectly include this file:

Data Structures
struct	EstimationInfo

struct	VariableStatData

struct	GenericCosts

Macros
#define	DEFAULT_EQ_SEL 0.005

#define	DEFAULT_INEQ_SEL 0.3333333333333333

#define	DEFAULT_RANGE_INEQ_SEL 0.005

#define	DEFAULT_MULTIRANGE_INEQ_SEL 0.005

#define	DEFAULT_MATCH_SEL 0.005

#define	DEFAULT_MATCHING_SEL 0.010

#define	DEFAULT_NUM_DISTINCT 200

#define	DEFAULT_UNK_SEL 0.005

#define	DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)

#define	CLAMP_PROBABILITY(p)

#define	SELFLAG_USED_DEFAULT

#define	ReleaseVariableStats(vardata)

Typedefs
typedef struct EstimationInfo	EstimationInfo

typedef struct VariableStatData	VariableStatData

typedef bool(*	get_relation_stats_hook_type) (PlannerInfo root, RangeTblEntry rte, AttrNumber attnum, VariableStatData *vardata)

typedef bool(*	get_index_stats_hook_type) (PlannerInfo root, Oid indexOid, AttrNumber indexattnum, VariableStatData vardata)

Functions
void	examine_variable (PlannerInfo root, Node node, int varRelid, VariableStatData *vardata)

bool	statistic_proc_security_check (VariableStatData *vardata, Oid func_oid)

bool	get_restriction_variable (PlannerInfo root, List args, int varRelid, VariableStatData vardata, Node other, bool varonleft)

void	get_join_variables (PlannerInfo root, List args, SpecialJoinInfo sjinfo, VariableStatData vardata1, VariableStatData vardata2, bool join_is_reversed)

double	get_variable_numdistinct (VariableStatData vardata, bool isdefault)

double	mcv_selectivity (VariableStatData vardata, FmgrInfo opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)

double	histogram_selectivity (VariableStatData vardata, FmgrInfo opproc, Oid collation, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size)

double	generic_restriction_selectivity (PlannerInfo root, Oid oproid, Oid collation, List args, int varRelid, double default_selectivity)

double	ineq_histogram_selectivity (PlannerInfo root, VariableStatData vardata, Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, Oid collation, Datum constval, Oid consttype)

double	var_eq_const (VariableStatData *vardata, Oid oproid, Oid collation, Datum constval, bool constisnull, bool varonleft, bool negate)

double	var_eq_non_const (VariableStatData vardata, Oid oproid, Oid collation, Node other, bool varonleft, bool negate)

Selectivity	boolvarsel (PlannerInfo root, Node arg, int varRelid)

Selectivity	booltestsel (PlannerInfo root, BoolTestType booltesttype, Node arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)

Selectivity	nulltestsel (PlannerInfo root, NullTestType nulltesttype, Node arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)

Selectivity	scalararraysel (PlannerInfo root, ScalarArrayOpExpr clause, bool is_join_clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)

double	estimate_array_length (PlannerInfo root, Node arrayexpr)

Selectivity	rowcomparesel (PlannerInfo root, RowCompareExpr clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)

void	mergejoinscansel (PlannerInfo root, Node clause, Oid opfamily, CompareType cmptype, bool nulls_first, Selectivity leftstart, Selectivity leftend, Selectivity rightstart, Selectivity rightend)

double	estimate_num_groups (PlannerInfo root, List groupExprs, double input_rows, List *pgset, EstimationInfo estinfo)

List *	estimate_multivariate_bucketsize (PlannerInfo root, RelOptInfo inner, List hashclauses, Selectivity innerbucketsize)

void	estimate_hash_bucket_stats (PlannerInfo root, Node hashkey, double nbuckets, Selectivity mcv_freq, Selectivity bucketsize_frac)

double	estimate_hashagg_tablesize (PlannerInfo root, Path path, const AggClauseCosts *agg_costs, double dNumGroups)

List *	get_quals_from_indexclauses (List *indexclauses)

Cost	index_other_operands_eval_cost (PlannerInfo root, List indexquals)

List *	add_predicate_to_index_quals (IndexOptInfo index, List indexQuals)

void	genericcostestimate (PlannerInfo root, IndexPath path, double loop_count, GenericCosts *costs)

Selectivity	scalararraysel_containment (PlannerInfo root, Node leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)

Variables
PGDLLIMPORT get_relation_stats_hook_type	get_relation_stats_hook

PGDLLIMPORT get_index_stats_hook_type	get_index_stats_hook

Macro Definition Documentation

◆ CLAMP_PROBABILITY

#define CLAMP_PROBABILITY ( p )

Value:

    do { \
        if (p < 0.0) \
            p = 0.0; \
        else if (p > 1.0) \
            p = 1.0; \
    } while (0)

Definition at line 63 of file selfuncs.h.

◆ DEFAULT_EQ_SEL

#define DEFAULT_EQ_SEL 0.005

Definition at line 34 of file selfuncs.h.

◆ DEFAULT_INEQ_SEL

#define DEFAULT_INEQ_SEL 0.3333333333333333

Definition at line 37 of file selfuncs.h.

◆ DEFAULT_MATCH_SEL

#define DEFAULT_MATCH_SEL 0.005

Definition at line 46 of file selfuncs.h.

◆ DEFAULT_MATCHING_SEL

#define DEFAULT_MATCHING_SEL 0.010

Definition at line 49 of file selfuncs.h.

◆ DEFAULT_MULTIRANGE_INEQ_SEL

#define DEFAULT_MULTIRANGE_INEQ_SEL 0.005

Definition at line 43 of file selfuncs.h.

◆ DEFAULT_NOT_UNK_SEL

#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)

Definition at line 56 of file selfuncs.h.

◆ DEFAULT_NUM_DISTINCT

#define DEFAULT_NUM_DISTINCT 200

Definition at line 52 of file selfuncs.h.

◆ DEFAULT_RANGE_INEQ_SEL

#define DEFAULT_RANGE_INEQ_SEL 0.005

Definition at line 40 of file selfuncs.h.

◆ DEFAULT_UNK_SEL

#define DEFAULT_UNK_SEL 0.005

Definition at line 55 of file selfuncs.h.

◆ ReleaseVariableStats

#define ReleaseVariableStats ( vardata )

Value:

    do { \
        if (HeapTupleIsValid((vardata).statsTuple)) \
            (vardata).freefunc((vardata).statsTuple); \
    } while(0)

Definition at line 100 of file selfuncs.h.

◆ SELFLAG_USED_DEFAULT

#define SELFLAG_USED_DEFAULT

Value:

                                                 (1 << 0)   /* Estimation fell back on one
                                                 * of the DEFAULTs as defined
                                                 * above. */

Definition at line 76 of file selfuncs.h.

Typedef Documentation

◆ EstimationInfo

typedef struct EstimationInfo EstimationInfo

◆ get_index_stats_hook_type

typedef bool(* get_index_stats_hook_type) (PlannerInfo *root, Oid indexOid, AttrNumber indexattnum, VariableStatData *vardata)

Definition at line 144 of file selfuncs.h.

◆ get_relation_stats_hook_type

typedef bool(* get_relation_stats_hook_type) (PlannerInfo *root, RangeTblEntry *rte, AttrNumber attnum, VariableStatData *vardata)

Definition at line 139 of file selfuncs.h.

◆ VariableStatData

typedef struct VariableStatData VariableStatData

Function Documentation

◆ add_predicate_to_index_quals()

List * add_predicate_to_index_quals	(	IndexOptInfo *	index,
		List *	indexQuals
	)

Definition at line 7158 of file selfuncs.c.

{
    List       *predExtraQuals = NIL;
    ListCell   *lc;
 
    if (index->indpred == NIL)
        return indexQuals;
 
    foreach(lc, index->indpred)
    {
        Node       *predQual = (Node *) lfirst(lc);
        List       *oneQual = list_make1(predQual);
 
        if (!predicate_implied_by(oneQual, indexQuals, false))
            predExtraQuals = list_concat(predExtraQuals, oneQual);
    }
    return list_concat(predExtraQuals, indexQuals);
}

References lfirst, list_concat(), list_make1, NIL, and predicate_implied_by().

Referenced by btcostestimate(), genericcostestimate(), and gincostestimate().

◆ booltestsel()

Selectivity booltestsel	(	PlannerInfo *	root,
		BoolTestType	booltesttype,
		Node *	arg,
		int	varRelid,
		JoinType	jointype,
		SpecialJoinInfo *	sjinfo
	)

Definition at line 1545 of file selfuncs.c.

{
    VariableStatData vardata;
    double      selec;
 
    examine_variable(root, arg, varRelid, &vardata);
 
    if (HeapTupleIsValid(vardata.statsTuple))
    {
        Form_pg_statistic stats;
        double      freq_null;
        AttStatsSlot sslot;
 
        stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
        freq_null = stats->stanullfrac;
 
        if (get_attstatsslot(&sslot, vardata.statsTuple,
                             STATISTIC_KIND_MCV, InvalidOid,
                             ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)
            && sslot.nnumbers > 0)
        {
            double      freq_true;
            double      freq_false;
 
            /*
             * Get first MCV frequency and derive frequency for true.
             */
            if (DatumGetBool(sslot.values[0]))
                freq_true = sslot.numbers[0];
            else
                freq_true = 1.0 - sslot.numbers[0] - freq_null;
 
            /*
             * Next derive frequency for false. Then use these as appropriate
             * to derive frequency for each case.
             */
            freq_false = 1.0 - freq_true - freq_null;
 
            switch (booltesttype)
            {
                case IS_UNKNOWN:
                    /* select only NULL values */
                    selec = freq_null;
                    break;
                case IS_NOT_UNKNOWN:
                    /* select non-NULL values */
                    selec = 1.0 - freq_null;
                    break;
                case IS_TRUE:
                    /* select only TRUE values */
                    selec = freq_true;
                    break;
                case IS_NOT_TRUE:
                    /* select non-TRUE values */
                    selec = 1.0 - freq_true;
                    break;
                case IS_FALSE:
                    /* select only FALSE values */
                    selec = freq_false;
                    break;
                case IS_NOT_FALSE:
                    /* select non-FALSE values */
                    selec = 1.0 - freq_false;
                    break;
                default:
                    elog(ERROR, "unrecognized booltesttype: %d",
                         (int) booltesttype);
                    selec = 0.0;    /* Keep compiler quiet */
                    break;
            }
 
            free_attstatsslot(&sslot);
        }
        else
        {
            /*
             * No most-common-value info available. Still have null fraction
             * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
             * for null fraction and assume a 50-50 split of TRUE and FALSE.
             */
            switch (booltesttype)
            {
                case IS_UNKNOWN:
                    /* select only NULL values */
                    selec = freq_null;
                    break;
                case IS_NOT_UNKNOWN:
                    /* select non-NULL values */
                    selec = 1.0 - freq_null;
                    break;
                case IS_TRUE:
                case IS_FALSE:
                    /* Assume we select half of the non-NULL values */
                    selec = (1.0 - freq_null) / 2.0;
                    break;
                case IS_NOT_TRUE:
                case IS_NOT_FALSE:
                    /* Assume we select NULLs plus half of the non-NULLs */
                    /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
                    selec = (freq_null + 1.0) / 2.0;
                    break;
                default:
                    elog(ERROR, "unrecognized booltesttype: %d",
                         (int) booltesttype);
                    selec = 0.0;    /* Keep compiler quiet */
                    break;
            }
        }
    }
    else
    {
        /*
         * If we can't get variable statistics for the argument, perhaps
         * clause_selectivity can do something with it.  We ignore the
         * possibility of a NULL value when using clause_selectivity, and just
         * assume the value is either TRUE or FALSE.
         */
        switch (booltesttype)
        {
            case IS_UNKNOWN:
                selec = DEFAULT_UNK_SEL;
                break;
            case IS_NOT_UNKNOWN:
                selec = DEFAULT_NOT_UNK_SEL;
                break;
            case IS_TRUE:
            case IS_NOT_FALSE:
                selec = (double) clause_selectivity(root, arg,
                                                    varRelid,
                                                    jointype, sjinfo);
                break;
            case IS_FALSE:
            case IS_NOT_TRUE:
                selec = 1.0 - (double) clause_selectivity(root, arg,
                                                          varRelid,
                                                          jointype, sjinfo);
                break;
            default:
                elog(ERROR, "unrecognized booltesttype: %d",
                     (int) booltesttype);
                selec = 0.0;    /* Keep compiler quiet */
                break;
        }
    }
 
    ReleaseVariableStats(vardata);
 
    /* result should be in range, but make sure... */
    CLAMP_PROBABILITY(selec);
 
    return (Selectivity) selec;
}

References arg, ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, clause_selectivity(), DatumGetBool(), DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), free_attstatsslot(), get_attstatsslot(), GETSTRUCT(), HeapTupleIsValid, InvalidOid, IS_FALSE, IS_NOT_FALSE, IS_NOT_TRUE, IS_NOT_UNKNOWN, IS_TRUE, IS_UNKNOWN, AttStatsSlot::nnumbers, AttStatsSlot::numbers, ReleaseVariableStats, root, VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by clause_selectivity_ext().

◆ boolvarsel()

Selectivity boolvarsel	(	PlannerInfo *	root,
		Node *	arg,
		int	varRelid
	)

Definition at line 1517 of file selfuncs.c.

{
    VariableStatData vardata;
    double      selec;
 
    examine_variable(root, arg, varRelid, &vardata);
    if (HeapTupleIsValid(vardata.statsTuple))
    {
        /*
         * A boolean variable V is equivalent to the clause V = 't', so we
         * compute the selectivity as if that is what we have.
         */
        selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
                             BoolGetDatum(true), false, true, false);
    }
    else
    {
        /* Otherwise, the default estimate is 0.5 */
        selec = 0.5;
    }
    ReleaseVariableStats(vardata);
    return selec;
}

References arg, BoolGetDatum(), examine_variable(), HeapTupleIsValid, InvalidOid, ReleaseVariableStats, root, VariableStatData::statsTuple, and var_eq_const().

Referenced by clause_selectivity_ext().

◆ estimate_array_length()

double estimate_array_length	(	PlannerInfo *	root,
		Node *	arrayexpr
	)

Definition at line 2144 of file selfuncs.c.

{
    /* look through any binary-compatible relabeling of arrayexpr */
    arrayexpr = strip_array_coercion(arrayexpr);
 
    if (arrayexpr && IsA(arrayexpr, Const))
    {
        Datum       arraydatum = ((Const *) arrayexpr)->constvalue;
        bool        arrayisnull = ((Const *) arrayexpr)->constisnull;
        ArrayType  *arrayval;
 
        if (arrayisnull)
            return 0;
        arrayval = DatumGetArrayTypeP(arraydatum);
        return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
    }
    else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
             !((ArrayExpr *) arrayexpr)->multidims)
    {
        return list_length(((ArrayExpr *) arrayexpr)->elements);
    }
    else if (arrayexpr && root)
    {
        /* See if we can find any statistics about it */
        VariableStatData vardata;
        AttStatsSlot sslot;
        double      nelem = 0;
 
        examine_variable(root, arrayexpr, 0, &vardata);
        if (HeapTupleIsValid(vardata.statsTuple))
        {
            /*
             * Found stats, so use the average element count, which is stored
             * in the last stanumbers element of the DECHIST statistics.
             * Actually that is the average count of *distinct* elements;
             * perhaps we should scale it up somewhat?
             */
            if (get_attstatsslot(&sslot, vardata.statsTuple,
                                 STATISTIC_KIND_DECHIST, InvalidOid,
                                 ATTSTATSSLOT_NUMBERS))
            {
                if (sslot.nnumbers > 0)
                    nelem = clamp_row_est(sslot.numbers[sslot.nnumbers - 1]);
                free_attstatsslot(&sslot);
            }
        }
        ReleaseVariableStats(vardata);
 
        if (nelem > 0)
            return nelem;
    }
 
    /* Else use a default guess --- this should match scalararraysel */
    return 10;
}

References ARR_DIMS, ARR_NDIM, ArrayGetNItems(), ATTSTATSSLOT_NUMBERS, clamp_row_est(), DatumGetArrayTypeP, examine_variable(), free_attstatsslot(), get_attstatsslot(), HeapTupleIsValid, InvalidOid, IsA, list_length(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, ReleaseVariableStats, root, VariableStatData::statsTuple, and strip_array_coercion().

Referenced by array_unnest_support(), btcostestimate(), cost_qual_eval_walker(), cost_tidscan(), genericcostestimate(), and gincost_scalararrayopexpr().

◆ estimate_hash_bucket_stats()

void estimate_hash_bucket_stats	(	PlannerInfo *	root,
		Node *	hashkey,
		double	nbuckets,
		Selectivity *	mcv_freq,
		Selectivity *	bucketsize_frac
	)

Definition at line 4057 of file selfuncs.c.

{
    VariableStatData vardata;
    double      estfract,
                ndistinct,
                stanullfrac,
                avgfreq;
    bool        isdefault;
    AttStatsSlot sslot;
 
    examine_variable(root, hashkey, 0, &vardata);
 
    /* Look up the frequency of the most common value, if available */
    *mcv_freq = 0.0;
 
    if (HeapTupleIsValid(vardata.statsTuple))
    {
        if (get_attstatsslot(&sslot, vardata.statsTuple,
                             STATISTIC_KIND_MCV, InvalidOid,
                             ATTSTATSSLOT_NUMBERS))
        {
            /*
             * The first MCV stat is for the most common value.
             */
            if (sslot.nnumbers > 0)
                *mcv_freq = sslot.numbers[0];
            free_attstatsslot(&sslot);
        }
    }
 
    /* Get number of distinct values */
    ndistinct = get_variable_numdistinct(&vardata, &isdefault);
 
    /*
     * If ndistinct isn't real, punt.  We normally return 0.1, but if the
     * mcv_freq is known to be even higher than that, use it instead.
     */
    if (isdefault)
    {
        *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
        ReleaseVariableStats(vardata);
        return;
    }
 
    /* Get fraction that are null */
    if (HeapTupleIsValid(vardata.statsTuple))
    {
        Form_pg_statistic stats;
 
        stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
        stanullfrac = stats->stanullfrac;
    }
    else
        stanullfrac = 0.0;
 
    /* Compute avg freq of all distinct data values in raw relation */
    avgfreq = (1.0 - stanullfrac) / ndistinct;
 
    /*
     * Adjust ndistinct to account for restriction clauses.  Observe we are
     * assuming that the data distribution is affected uniformly by the
     * restriction clauses!
     *
     * XXX Possibly better way, but much more expensive: multiply by
     * selectivity of rel's restriction clauses that mention the target Var.
     */
    if (vardata.rel && vardata.rel->tuples > 0)
    {
        ndistinct *= vardata.rel->rows / vardata.rel->tuples;
        ndistinct = clamp_row_est(ndistinct);
    }
 
    /*
     * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
     * number of buckets is less than the expected number of distinct values;
     * otherwise it is 1/ndistinct.
     */
    if (ndistinct > nbuckets)
        estfract = 1.0 / nbuckets;
    else
        estfract = 1.0 / ndistinct;
 
    /*
     * Adjust estimated bucketsize upward to account for skewed distribution.
     */
    if (avgfreq > 0.0 && *mcv_freq > avgfreq)
        estfract *= *mcv_freq / avgfreq;
 
    /*
     * Clamp bucketsize to sane range (the above adjustment could easily
     * produce an out-of-range result).  We set the lower bound a little above
     * zero, since zero isn't a very sane result.
     */
    if (estfract < 1.0e-6)
        estfract = 1.0e-6;
    else if (estfract > 1.0)
        estfract = 1.0;
 
    *bucketsize_frac = (Selectivity) estfract;
 
    ReleaseVariableStats(vardata);
}

References ATTSTATSSLOT_NUMBERS, clamp_row_est(), examine_variable(), free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT(), HeapTupleIsValid, InvalidOid, Max, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, ReleaseVariableStats, root, RelOptInfo::rows, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by final_cost_hashjoin().

◆ estimate_hashagg_tablesize()

double estimate_hashagg_tablesize	(	PlannerInfo *	root,
		Path *	path,
		const AggClauseCosts *	agg_costs,
		double	dNumGroups
	)

Definition at line 4176 of file selfuncs.c.

{
    Size        hashentrysize;
 
    hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
                                        path->pathtarget->width,
                                        agg_costs->transitionSpace);
 
    /*
     * Note that this disregards the effect of fill-factor and growth policy
     * of the hash table.  That's probably ok, given that the default
     * fill-factor is relatively high.  It'd be hard to meaningfully factor in
     * "double-in-size" growth policies here.
     */
    return hashentrysize * dNumGroups;
}

References hash_agg_entry_size(), list_length(), root, and AggClauseCosts::transitionSpace.

Referenced by consider_groupingsets_paths().

◆ estimate_multivariate_bucketsize()

List * estimate_multivariate_bucketsize	(	PlannerInfo *	root,
		RelOptInfo *	inner,
		List *	hashclauses,
		Selectivity *	innerbucketsize
	)

Definition at line 3798 of file selfuncs.c.

{
    List       *clauses = list_copy(hashclauses);
    List       *otherclauses = NIL;
    double      ndistinct = 1.0;
 
    if (list_length(hashclauses) <= 1)
 
        /*
         * Nothing to do for a single clause.  Could we employ univariate
         * extended stat here?
         */
        return hashclauses;
 
    while (clauses != NIL)
    {
        ListCell   *lc;
        int         relid = -1;
        List       *varinfos = NIL;
        List       *origin_rinfos = NIL;
        double      mvndistinct;
        List       *origin_varinfos;
        int         group_relid = -1;
        RelOptInfo *group_rel = NULL;
        ListCell   *lc1,
                   *lc2;
 
        /*
         * Find clauses, referencing the same single base relation and try to
         * estimate such a group with extended statistics.  Create varinfo for
         * an approved clause, push it to otherclauses, if it can't be
         * estimated here or ignore to process at the next iteration.
         */
        foreach(lc, clauses)
        {
            RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
            Node       *expr;
            Relids      relids;
            GroupVarInfo *varinfo;
 
            /*
             * Find the inner side of the join, which we need to estimate the
             * number of buckets.  Use outer_is_left because the
             * clause_sides_match_join routine has called on hash clauses.
             */
            relids = rinfo->outer_is_left ?
                rinfo->right_relids : rinfo->left_relids;
            expr = rinfo->outer_is_left ?
                get_rightop(rinfo->clause) : get_leftop(rinfo->clause);
 
            if (bms_get_singleton_member(relids, &relid) &&
                root->simple_rel_array[relid]->statlist != NIL)
            {
                bool        is_duplicate = false;
 
                /*
                 * This inner-side expression references only one relation.
                 * Extended statistics on this clause can exist.
                 */
                if (group_relid < 0)
                {
                    RangeTblEntry *rte = root->simple_rte_array[relid];
 
                    if (!rte || (rte->relkind != RELKIND_RELATION &&
                                 rte->relkind != RELKIND_MATVIEW &&
                                 rte->relkind != RELKIND_FOREIGN_TABLE &&
                                 rte->relkind != RELKIND_PARTITIONED_TABLE))
                    {
                        /* Extended statistics can't exist in principle */
                        otherclauses = lappend(otherclauses, rinfo);
                        clauses = foreach_delete_current(clauses, lc);
                        continue;
                    }
 
                    group_relid = relid;
                    group_rel = root->simple_rel_array[relid];
                }
                else if (group_relid != relid)
 
                    /*
                     * Being in the group forming state we don't need other
                     * clauses.
                     */
                    continue;
 
                /*
                 * We're going to add the new clause to the varinfos list.  We
                 * might re-use add_unique_group_var(), but we don't do so for
                 * two reasons.
                 *
                 * 1) We must keep the origin_rinfos list ordered exactly the
                 * same way as varinfos.
                 *
                 * 2) add_unique_group_var() is designed for
                 * estimate_num_groups(), where a larger number of groups is
                 * worse.   While estimating the number of hash buckets, we
                 * have the opposite: a lesser number of groups is worse.
                 * Therefore, we don't have to remove "known equal" vars: the
                 * removed var may valuably contribute to the multivariate
                 * statistics to grow the number of groups.
                 */
 
                /*
                 * Clear nullingrels to correctly match hash keys.  See
                 * add_unique_group_var()'s comment for details.
                 */
                expr = remove_nulling_relids(expr, root->outer_join_rels, NULL);
 
                /*
                 * Detect and exclude exact duplicates from the list of hash
                 * keys (like add_unique_group_var does).
                 */
                foreach(lc1, varinfos)
                {
                    varinfo = (GroupVarInfo *) lfirst(lc1);
 
                    if (!equal(expr, varinfo->var))
                        continue;
 
                    is_duplicate = true;
                    break;
                }
 
                if (is_duplicate)
                {
                    /*
                     * Skip exact duplicates. Adding them to the otherclauses
                     * list also doesn't make sense.
                     */
                    continue;
                }
 
                /*
                 * Initialize GroupVarInfo.  We only use it to call
                 * estimate_multivariate_ndistinct(), which doesn't care about
                 * ndistinct and isdefault fields.  Thus, skip these fields.
                 */
                varinfo = (GroupVarInfo *) palloc0(sizeof(GroupVarInfo));
                varinfo->var = expr;
                varinfo->rel = root->simple_rel_array[relid];
                varinfos = lappend(varinfos, varinfo);
 
                /*
                 * Remember the link to RestrictInfo for the case the clause
                 * is failed to be estimated.
                 */
                origin_rinfos = lappend(origin_rinfos, rinfo);
            }
            else
            {
                /* This clause can't be estimated with extended statistics */
                otherclauses = lappend(otherclauses, rinfo);
            }
 
            clauses = foreach_delete_current(clauses, lc);
        }
 
        if (list_length(varinfos) < 2)
        {
            /*
             * Multivariate statistics doesn't apply to single columns except
             * for expressions, but it has not been implemented yet.
             */
            otherclauses = list_concat(otherclauses, origin_rinfos);
            list_free_deep(varinfos);
            list_free(origin_rinfos);
            continue;
        }
 
        Assert(group_rel != NULL);
 
        /* Employ the extended statistics. */
        origin_varinfos = varinfos;
        for (;;)
        {
            bool        estimated = estimate_multivariate_ndistinct(root,
                                                                    group_rel,
                                                                    &varinfos,
                                                                    &mvndistinct);
 
            if (!estimated)
                break;
 
            /*
             * We've got an estimation.  Use ndistinct value in a consistent
             * way - according to the caller's logic (see
             * final_cost_hashjoin).
             */
            if (ndistinct < mvndistinct)
                ndistinct = mvndistinct;
            Assert(ndistinct >= 1.0);
        }
 
        Assert(list_length(origin_varinfos) == list_length(origin_rinfos));
 
        /* Collect unmatched clauses as otherclauses. */
        forboth(lc1, origin_varinfos, lc2, origin_rinfos)
        {
            GroupVarInfo *vinfo = lfirst(lc1);
 
            if (!list_member_ptr(varinfos, vinfo))
                /* Already estimated */
                continue;
 
            /* Can't be estimated here - push to the returning list */
            otherclauses = lappend(otherclauses, lfirst(lc2));
        }
    }
 
    *innerbucketsize = 1.0 / ndistinct;
    return otherclauses;
}

References Assert(), bms_get_singleton_member(), RestrictInfo::clause, equal(), estimate_multivariate_ndistinct(), forboth, foreach_delete_current, get_leftop(), get_rightop(), lappend(), lfirst, lfirst_node, list_concat(), list_copy(), list_free(), list_free_deep(), list_length(), list_member_ptr(), NIL, palloc0(), GroupVarInfo::rel, remove_nulling_relids(), root, and GroupVarInfo::var.

Referenced by final_cost_hashjoin().

◆ estimate_num_groups()

double estimate_num_groups	(	PlannerInfo *	root,
		List *	groupExprs,
		double	input_rows,
		List **	pgset,
		EstimationInfo *	estinfo
	)

Definition at line 3446 of file selfuncs.c.

{
    List       *varinfos = NIL;
    double      srf_multiplier = 1.0;
    double      numdistinct;
    ListCell   *l;
    int         i;
 
    /* Zero the estinfo output parameter, if non-NULL */
    if (estinfo != NULL)
        memset(estinfo, 0, sizeof(EstimationInfo));
 
    /*
     * We don't ever want to return an estimate of zero groups, as that tends
     * to lead to division-by-zero and other unpleasantness.  The input_rows
     * estimate is usually already at least 1, but clamp it just in case it
     * isn't.
     */
    input_rows = clamp_row_est(input_rows);
 
    /*
     * If no grouping columns, there's exactly one group.  (This can't happen
     * for normal cases with GROUP BY or DISTINCT, but it is possible for
     * corner cases with set operations.)
     */
    if (groupExprs == NIL || (pgset && *pgset == NIL))
        return 1.0;
 
    /*
     * Count groups derived from boolean grouping expressions.  For other
     * expressions, find the unique Vars used, treating an expression as a Var
     * if we can find stats for it.  For each one, record the statistical
     * estimate of number of distinct values (total in its table, without
     * regard for filtering).
     */
    numdistinct = 1.0;
 
    i = 0;
    foreach(l, groupExprs)
    {
        Node       *groupexpr = (Node *) lfirst(l);
        double      this_srf_multiplier;
        VariableStatData vardata;
        List       *varshere;
        ListCell   *l2;
 
        /* is expression in this grouping set? */
        if (pgset && !list_member_int(*pgset, i++))
            continue;
 
        /*
         * Set-returning functions in grouping columns are a bit problematic.
         * The code below will effectively ignore their SRF nature and come up
         * with a numdistinct estimate as though they were scalar functions.
         * We compensate by scaling up the end result by the largest SRF
         * rowcount estimate.  (This will be an overestimate if the SRF
         * produces multiple copies of any output value, but it seems best to
         * assume the SRF's outputs are distinct.  In any case, it's probably
         * pointless to worry too much about this without much better
         * estimates for SRF output rowcounts than we have today.)
         */
        this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
        if (srf_multiplier < this_srf_multiplier)
            srf_multiplier = this_srf_multiplier;
 
        /* Short-circuit for expressions returning boolean */
        if (exprType(groupexpr) == BOOLOID)
        {
            numdistinct *= 2.0;
            continue;
        }
 
        /*
         * If examine_variable is able to deduce anything about the GROUP BY
         * expression, treat it as a single variable even if it's really more
         * complicated.
         *
         * XXX This has the consequence that if there's a statistics object on
         * the expression, we don't split it into individual Vars. This
         * affects our selection of statistics in
         * estimate_multivariate_ndistinct, because it's probably better to
         * use more accurate estimate for each expression and treat them as
         * independent, than to combine estimates for the extracted variables
         * when we don't know how that relates to the expressions.
         */
        examine_variable(root, groupexpr, 0, &vardata);
        if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
        {
            varinfos = add_unique_group_var(root, varinfos,
                                            groupexpr, &vardata);
            ReleaseVariableStats(vardata);
            continue;
        }
        ReleaseVariableStats(vardata);
 
        /*
         * Else pull out the component Vars.  Handle PlaceHolderVars by
         * recursing into their arguments (effectively assuming that the
         * PlaceHolderVar doesn't change the number of groups, which boils
         * down to ignoring the possible addition of nulls to the result set).
         */
        varshere = pull_var_clause(groupexpr,
                                   PVC_RECURSE_AGGREGATES |
                                   PVC_RECURSE_WINDOWFUNCS |
                                   PVC_RECURSE_PLACEHOLDERS);
 
        /*
         * If we find any variable-free GROUP BY item, then either it is a
         * constant (and we can ignore it) or it contains a volatile function;
         * in the latter case we punt and assume that each input row will
         * yield a distinct group.
         */
        if (varshere == NIL)
        {
            if (contain_volatile_functions(groupexpr))
                return input_rows;
            continue;
        }
 
        /*
         * Else add variables to varinfos list
         */
        foreach(l2, varshere)
        {
            Node       *var = (Node *) lfirst(l2);
 
            examine_variable(root, var, 0, &vardata);
            varinfos = add_unique_group_var(root, varinfos, var, &vardata);
            ReleaseVariableStats(vardata);
        }
    }
 
    /*
     * If now no Vars, we must have an all-constant or all-boolean GROUP BY
     * list.
     */
    if (varinfos == NIL)
    {
        /* Apply SRF multiplier as we would do in the long path */
        numdistinct *= srf_multiplier;
        /* Round off */
        numdistinct = ceil(numdistinct);
        /* Guard against out-of-range answers */
        if (numdistinct > input_rows)
            numdistinct = input_rows;
        if (numdistinct < 1.0)
            numdistinct = 1.0;
        return numdistinct;
    }
 
    /*
     * Group Vars by relation and estimate total numdistinct.
     *
     * For each iteration of the outer loop, we process the frontmost Var in
     * varinfos, plus all other Vars in the same relation.  We remove these
     * Vars from the newvarinfos list for the next iteration. This is the
     * easiest way to group Vars of same rel together.
     */
    do
    {
        GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
        RelOptInfo *rel = varinfo1->rel;
        double      reldistinct = 1;
        double      relmaxndistinct = reldistinct;
        int         relvarcount = 0;
        List       *newvarinfos = NIL;
        List       *relvarinfos = NIL;
 
        /*
         * Split the list of varinfos in two - one for the current rel, one
         * for remaining Vars on other rels.
         */
        relvarinfos = lappend(relvarinfos, varinfo1);
        for_each_from(l, varinfos, 1)
        {
            GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
 
            if (varinfo2->rel == varinfo1->rel)
            {
                /* varinfos on current rel */
                relvarinfos = lappend(relvarinfos, varinfo2);
            }
            else
            {
                /* not time to process varinfo2 yet */
                newvarinfos = lappend(newvarinfos, varinfo2);
            }
        }
 
        /*
         * Get the numdistinct estimate for the Vars of this rel.  We
         * iteratively search for multivariate n-distinct with maximum number
         * of vars; assuming that each var group is independent of the others,
         * we multiply them together.  Any remaining relvarinfos after no more
         * multivariate matches are found are assumed independent too, so
         * their individual ndistinct estimates are multiplied also.
         *
         * While iterating, count how many separate numdistinct values we
         * apply.  We apply a fudge factor below, but only if we multiplied
         * more than one such values.
         */
        while (relvarinfos)
        {
            double      mvndistinct;
 
            if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
                                                &mvndistinct))
            {
                reldistinct *= mvndistinct;
                if (relmaxndistinct < mvndistinct)
                    relmaxndistinct = mvndistinct;
                relvarcount++;
            }
            else
            {
                foreach(l, relvarinfos)
                {
                    GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
 
                    reldistinct *= varinfo2->ndistinct;
                    if (relmaxndistinct < varinfo2->ndistinct)
                        relmaxndistinct = varinfo2->ndistinct;
                    relvarcount++;
 
                    /*
                     * When varinfo2's isdefault is set then we'd better set
                     * the SELFLAG_USED_DEFAULT bit in the EstimationInfo.
                     */
                    if (estinfo != NULL && varinfo2->isdefault)
                        estinfo->flags |= SELFLAG_USED_DEFAULT;
                }
 
                /* we're done with this relation */
                relvarinfos = NIL;
            }
        }
 
        /*
         * Sanity check --- don't divide by zero if empty relation.
         */
        Assert(IS_SIMPLE_REL(rel));
        if (rel->tuples > 0)
        {
            /*
             * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
             * fudge factor is because the Vars are probably correlated but we
             * don't know by how much.  We should never clamp to less than the
             * largest ndistinct value for any of the Vars, though, since
             * there will surely be at least that many groups.
             */
            double      clamp = rel->tuples;
 
            if (relvarcount > 1)
            {
                clamp *= 0.1;
                if (clamp < relmaxndistinct)
                {
                    clamp = relmaxndistinct;
                    /* for sanity in case some ndistinct is too large: */
                    if (clamp > rel->tuples)
                        clamp = rel->tuples;
                }
            }
            if (reldistinct > clamp)
                reldistinct = clamp;
 
            /*
             * Update the estimate based on the restriction selectivity,
             * guarding against division by zero when reldistinct is zero.
             * Also skip this if we know that we are returning all rows.
             */
            if (reldistinct > 0 && rel->rows < rel->tuples)
            {
                /*
                 * Given a table containing N rows with n distinct values in a
                 * uniform distribution, if we select p rows at random then
                 * the expected number of distinct values selected is
                 *
                 * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
                 *
                 * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
                 *
                 * See "Approximating block accesses in database
                 * organizations", S. B. Yao, Communications of the ACM,
                 * Volume 20 Issue 4, April 1977 Pages 260-261.
                 *
                 * Alternatively, re-arranging the terms from the factorials,
                 * this may be written as
                 *
                 * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
                 *
                 * This form of the formula is more efficient to compute in
                 * the common case where p is larger than N/n.  Additionally,
                 * as pointed out by Dell'Era, if i << N for all terms in the
                 * product, it can be approximated by
                 *
                 * n * (1 - ((N-p)/N)^(N/n))
                 *
                 * See "Expected distinct values when selecting from a bag
                 * without replacement", Alberto Dell'Era,
                 * http://www.adellera.it/investigations/distinct_balls/.
                 *
                 * The condition i << N is equivalent to n >> 1, so this is a
                 * good approximation when the number of distinct values in
                 * the table is large.  It turns out that this formula also
                 * works well even when n is small.
                 */
                reldistinct *=
                    (1 - pow((rel->tuples - rel->rows) / rel->tuples,
                             rel->tuples / reldistinct));
            }
            reldistinct = clamp_row_est(reldistinct);
 
            /*
             * Update estimate of total distinct groups.
             */
            numdistinct *= reldistinct;
        }
 
        varinfos = newvarinfos;
    } while (varinfos != NIL);
 
    /* Now we can account for the effects of any SRFs */
    numdistinct *= srf_multiplier;
 
    /* Round off */
    numdistinct = ceil(numdistinct);
 
    /* Guard against out-of-range answers */
    if (numdistinct > input_rows)
        numdistinct = input_rows;
    if (numdistinct < 1.0)
        numdistinct = 1.0;
 
    return numdistinct;
}

References add_unique_group_var(), Assert(), clamp_row_est(), contain_volatile_functions(), estimate_multivariate_ndistinct(), examine_variable(), expression_returns_set_rows(), exprType(), EstimationInfo::flags, for_each_from, HeapTupleIsValid, i, IS_SIMPLE_REL, GroupVarInfo::isdefault, VariableStatData::isunique, lappend(), lfirst, linitial, list_member_int(), GroupVarInfo::ndistinct, NIL, pull_var_clause(), PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS, PVC_RECURSE_WINDOWFUNCS, GroupVarInfo::rel, ReleaseVariableStats, root, RelOptInfo::rows, SELFLAG_USED_DEFAULT, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by adjust_rowcount_for_semijoins(), build_setop_child_paths(), cost_incremental_sort(), cost_memoize_rescan(), create_final_distinct_paths(), create_partial_distinct_paths(), create_unique_path(), estimate_path_cost_size(), get_number_of_groups(), and get_windowclause_startup_tuples().

◆ examine_variable()

void examine_variable	(	PlannerInfo *	root,
		Node *	node,
		int	varRelid,
		VariableStatData *	vardata
	)

Definition at line 5289 of file selfuncs.c.

{
    Node       *basenode;
    Relids      varnos;
    Relids      basevarnos;
    RelOptInfo *onerel;
 
    /* Make sure we don't return dangling pointers in vardata */
    MemSet(vardata, 0, sizeof(VariableStatData));
 
    /* Save the exposed type of the expression */
    vardata->vartype = exprType(node);
 
    /* Look inside any binary-compatible relabeling */
 
    if (IsA(node, RelabelType))
        basenode = (Node *) ((RelabelType *) node)->arg;
    else
        basenode = node;
 
    /* Fast path for a simple Var */
 
    if (IsA(basenode, Var) &&
        (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
    {
        Var        *var = (Var *) basenode;
 
        /* Set up result fields other than the stats tuple */
        vardata->var = basenode;    /* return Var without relabeling */
        vardata->rel = find_base_rel(root, var->varno);
        vardata->atttype = var->vartype;
        vardata->atttypmod = var->vartypmod;
        vardata->isunique = has_unique_index(vardata->rel, var->varattno);
 
        /* Try to locate some stats */
        examine_simple_variable(root, var, vardata);
 
        return;
    }
 
    /*
     * Okay, it's a more complicated expression.  Determine variable
     * membership.  Note that when varRelid isn't zero, only vars of that
     * relation are considered "real" vars.
     */
    varnos = pull_varnos(root, basenode);
    basevarnos = bms_difference(varnos, root->outer_join_rels);
 
    onerel = NULL;
 
    if (bms_is_empty(basevarnos))
    {
        /* No Vars at all ... must be pseudo-constant clause */
    }
    else
    {
        int         relid;
 
        /* Check if the expression is in vars of a single base relation */
        if (bms_get_singleton_member(basevarnos, &relid))
        {
            if (varRelid == 0 || varRelid == relid)
            {
                onerel = find_base_rel(root, relid);
                vardata->rel = onerel;
                node = basenode;    /* strip any relabeling */
            }
            /* else treat it as a constant */
        }
        else
        {
            /* varnos has multiple relids */
            if (varRelid == 0)
            {
                /* treat it as a variable of a join relation */
                vardata->rel = find_join_rel(root, varnos);
                node = basenode;    /* strip any relabeling */
            }
            else if (bms_is_member(varRelid, varnos))
            {
                /* ignore the vars belonging to other relations */
                vardata->rel = find_base_rel(root, varRelid);
                node = basenode;    /* strip any relabeling */
                /* note: no point in expressional-index search here */
            }
            /* else treat it as a constant */
        }
    }
 
    bms_free(basevarnos);
 
    vardata->var = node;
    vardata->atttype = exprType(node);
    vardata->atttypmod = exprTypmod(node);
 
    if (onerel)
    {
        /*
         * We have an expression in vars of a single relation.  Try to match
         * it to expressional index columns, in hopes of finding some
         * statistics.
         *
         * Note that we consider all index columns including INCLUDE columns,
         * since there could be stats for such columns.  But the test for
         * uniqueness needs to be warier.
         *
         * XXX it's conceivable that there are multiple matches with different
         * index opfamilies; if so, we need to pick one that matches the
         * operator we are estimating for.  FIXME later.
         */
        ListCell   *ilist;
        ListCell   *slist;
        Oid         userid;
 
        /*
         * The nullingrels bits within the expression could prevent us from
         * matching it to expressional index columns or to the expressions in
         * extended statistics.  So strip them out first.
         */
        if (bms_overlap(varnos, root->outer_join_rels))
            node = remove_nulling_relids(node, root->outer_join_rels, NULL);
 
        /*
         * Determine the user ID to use for privilege checks: either
         * onerel->userid if it's set (e.g., in case we're accessing the table
         * via a view), or the current user otherwise.
         *
         * If we drill down to child relations, we keep using the same userid:
         * it's going to be the same anyway, due to how we set up the relation
         * tree (q.v. build_simple_rel).
         */
        userid = OidIsValid(onerel->userid) ? onerel->userid : GetUserId();
 
        foreach(ilist, onerel->indexlist)
        {
            IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
            ListCell   *indexpr_item;
            int         pos;
 
            indexpr_item = list_head(index->indexprs);
            if (indexpr_item == NULL)
                continue;       /* no expressions here... */
 
            for (pos = 0; pos < index->ncolumns; pos++)
            {
                if (index->indexkeys[pos] == 0)
                {
                    Node       *indexkey;
 
                    if (indexpr_item == NULL)
                        elog(ERROR, "too few entries in indexprs list");
                    indexkey = (Node *) lfirst(indexpr_item);
                    if (indexkey && IsA(indexkey, RelabelType))
                        indexkey = (Node *) ((RelabelType *) indexkey)->arg;
                    if (equal(node, indexkey))
                    {
                        /*
                         * Found a match ... is it a unique index? Tests here
                         * should match has_unique_index().
                         */
                        if (index->unique &&
                            index->nkeycolumns == 1 &&
                            pos == 0 &&
                            (index->indpred == NIL || index->predOK))
                            vardata->isunique = true;
 
                        /*
                         * Has it got stats?  We only consider stats for
                         * non-partial indexes, since partial indexes probably
                         * don't reflect whole-relation statistics; the above
                         * check for uniqueness is the only info we take from
                         * a partial index.
                         *
                         * An index stats hook, however, must make its own
                         * decisions about what to do with partial indexes.
                         */
                        if (get_index_stats_hook &&
                            (*get_index_stats_hook) (root, index->indexoid,
                                                     pos + 1, vardata))
                        {
                            /*
                             * The hook took control of acquiring a stats
                             * tuple.  If it did supply a tuple, it'd better
                             * have supplied a freefunc.
                             */
                            if (HeapTupleIsValid(vardata->statsTuple) &&
                                !vardata->freefunc)
                                elog(ERROR, "no function provided to release variable stats with");
                        }
                        else if (index->indpred == NIL)
                        {
                            vardata->statsTuple =
                                SearchSysCache3(STATRELATTINH,
                                                ObjectIdGetDatum(index->indexoid),
                                                Int16GetDatum(pos + 1),
                                                BoolGetDatum(false));
                            vardata->freefunc = ReleaseSysCache;
 
                            if (HeapTupleIsValid(vardata->statsTuple))
                            {
                                /* Get index's table for permission check */
                                RangeTblEntry *rte;
 
                                rte = planner_rt_fetch(index->rel->relid, root);
                                Assert(rte->rtekind == RTE_RELATION);
 
                                /*
                                 * For simplicity, we insist on the whole
                                 * table being selectable, rather than trying
                                 * to identify which column(s) the index
                                 * depends on.  Also require all rows to be
                                 * selectable --- there must be no
                                 * securityQuals from security barrier views
                                 * or RLS policies.
                                 */
                                vardata->acl_ok =
                                    rte->securityQuals == NIL &&
                                    (pg_class_aclcheck(rte->relid, userid,
                                                       ACL_SELECT) == ACLCHECK_OK);
 
                                /*
                                 * If the user doesn't have permissions to
                                 * access an inheritance child relation, check
                                 * the permissions of the table actually
                                 * mentioned in the query, since most likely
                                 * the user does have that permission.  Note
                                 * that whole-table select privilege on the
                                 * parent doesn't quite guarantee that the
                                 * user could read all columns of the child.
                                 * But in practice it's unlikely that any
                                 * interesting security violation could result
                                 * from allowing access to the expression
                                 * index's stats, so we allow it anyway.  See
                                 * similar code in examine_simple_variable()
                                 * for additional comments.
                                 */
                                if (!vardata->acl_ok &&
                                    root->append_rel_array != NULL)
                                {
                                    AppendRelInfo *appinfo;
                                    Index       varno = index->rel->relid;
 
                                    appinfo = root->append_rel_array[varno];
                                    while (appinfo &&
                                           planner_rt_fetch(appinfo->parent_relid,
                                                            root)->rtekind == RTE_RELATION)
                                    {
                                        varno = appinfo->parent_relid;
                                        appinfo = root->append_rel_array[varno];
                                    }
                                    if (varno != index->rel->relid)
                                    {
                                        /* Repeat access check on this rel */
                                        rte = planner_rt_fetch(varno, root);
                                        Assert(rte->rtekind == RTE_RELATION);
 
                                        vardata->acl_ok =
                                            rte->securityQuals == NIL &&
                                            (pg_class_aclcheck(rte->relid,
                                                               userid,
                                                               ACL_SELECT) == ACLCHECK_OK);
                                    }
                                }
                            }
                            else
                            {
                                /* suppress leakproofness checks later */
                                vardata->acl_ok = true;
                            }
                        }
                        if (vardata->statsTuple)
                            break;
                    }
                    indexpr_item = lnext(index->indexprs, indexpr_item);
                }
            }
            if (vardata->statsTuple)
                break;
        }
 
        /*
         * Search extended statistics for one with a matching expression.
         * There might be multiple ones, so just grab the first one. In the
         * future, we might consider the statistics target (and pick the most
         * accurate statistics) and maybe some other parameters.
         */
        foreach(slist, onerel->statlist)
        {
            StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist);
            RangeTblEntry *rte = planner_rt_fetch(onerel->relid, root);
            ListCell   *expr_item;
            int         pos;
 
            /*
             * Stop once we've found statistics for the expression (either
             * from extended stats, or for an index in the preceding loop).
             */
            if (vardata->statsTuple)
                break;
 
            /* skip stats without per-expression stats */
            if (info->kind != STATS_EXT_EXPRESSIONS)
                continue;
 
            /* skip stats with mismatching stxdinherit value */
            if (info->inherit != rte->inh)
                continue;
 
            pos = 0;
            foreach(expr_item, info->exprs)
            {
                Node       *expr = (Node *) lfirst(expr_item);
 
                Assert(expr);
 
                /* strip RelabelType before comparing it */
                if (expr && IsA(expr, RelabelType))
                    expr = (Node *) ((RelabelType *) expr)->arg;
 
                /* found a match, see if we can extract pg_statistic row */
                if (equal(node, expr))
                {
                    /*
                     * XXX Not sure if we should cache the tuple somewhere.
                     * Now we just create a new copy every time.
                     */
                    vardata->statsTuple =
                        statext_expressions_load(info->statOid, rte->inh, pos);
 
                    vardata->freefunc = ReleaseDummy;
 
                    /*
                     * For simplicity, we insist on the whole table being
                     * selectable, rather than trying to identify which
                     * column(s) the statistics object depends on.  Also
                     * require all rows to be selectable --- there must be no
                     * securityQuals from security barrier views or RLS
                     * policies.
                     */
                    vardata->acl_ok =
                        rte->securityQuals == NIL &&
                        (pg_class_aclcheck(rte->relid, userid,
                                           ACL_SELECT) == ACLCHECK_OK);
 
                    /*
                     * If the user doesn't have permissions to access an
                     * inheritance child relation, check the permissions of
                     * the table actually mentioned in the query, since most
                     * likely the user does have that permission.  Note that
                     * whole-table select privilege on the parent doesn't
                     * quite guarantee that the user could read all columns of
                     * the child. But in practice it's unlikely that any
                     * interesting security violation could result from
                     * allowing access to the expression stats, so we allow it
                     * anyway.  See similar code in examine_simple_variable()
                     * for additional comments.
                     */
                    if (!vardata->acl_ok &&
                        root->append_rel_array != NULL)
                    {
                        AppendRelInfo *appinfo;
                        Index       varno = onerel->relid;
 
                        appinfo = root->append_rel_array[varno];
                        while (appinfo &&
                               planner_rt_fetch(appinfo->parent_relid,
                                                root)->rtekind == RTE_RELATION)
                        {
                            varno = appinfo->parent_relid;
                            appinfo = root->append_rel_array[varno];
                        }
                        if (varno != onerel->relid)
                        {
                            /* Repeat access check on this rel */
                            rte = planner_rt_fetch(varno, root);
                            Assert(rte->rtekind == RTE_RELATION);
 
                            vardata->acl_ok =
                                rte->securityQuals == NIL &&
                                (pg_class_aclcheck(rte->relid,
                                                   userid,
                                                   ACL_SELECT) == ACLCHECK_OK);
                        }
                    }
 
                    break;
                }
 
                pos++;
            }
        }
    }
 
    bms_free(varnos);
}

References VariableStatData::acl_ok, ACL_SELECT, ACLCHECK_OK, arg, Assert(), VariableStatData::atttype, VariableStatData::atttypmod, bms_difference(), bms_free(), bms_get_singleton_member(), bms_is_empty, bms_is_member(), bms_overlap(), BoolGetDatum(), elog, equal(), ERROR, examine_simple_variable(), StatisticExtInfo::exprs, exprType(), exprTypmod(), find_base_rel(), find_join_rel(), VariableStatData::freefunc, get_index_stats_hook, GetUserId(), has_unique_index(), HeapTupleIsValid, if(), RelOptInfo::indexlist, RangeTblEntry::inh, StatisticExtInfo::inherit, Int16GetDatum(), IsA, VariableStatData::isunique, StatisticExtInfo::kind, lfirst, list_head(), lnext(), MemSet, NIL, ObjectIdGetDatum(), OidIsValid, AppendRelInfo::parent_relid, pg_class_aclcheck(), planner_rt_fetch, pull_varnos(), VariableStatData::rel, ReleaseDummy(), ReleaseSysCache(), RelOptInfo::relid, remove_nulling_relids(), root, RTE_RELATION, RangeTblEntry::rtekind, SearchSysCache3(), statext_expressions_load(), RelOptInfo::statlist, StatisticExtInfo::statOid, VariableStatData::statsTuple, RelOptInfo::userid, VariableStatData::var, Var::varattno, Var::varno, and VariableStatData::vartype.

Referenced by booltestsel(), boolvarsel(), estimate_array_length(), estimate_hash_bucket_stats(), estimate_num_groups(), get_join_variables(), get_restriction_variable(), mergejoinscansel(), nulltestsel(), and scalararraysel_containment().

◆ generic_restriction_selectivity()

double generic_restriction_selectivity	(	PlannerInfo *	root,
		Oid	oproid,
		Oid	collation,
		List *	args,
		int	varRelid,
		double	default_selectivity
	)

Definition at line 919 of file selfuncs.c.

{
    double      selec;
    VariableStatData vardata;
    Node       *other;
    bool        varonleft;
 
    /*
     * If expression is not variable OP something or something OP variable,
     * then punt and return the default estimate.
     */
    if (!get_restriction_variable(root, args, varRelid,
                                  &vardata, &other, &varonleft))
        return default_selectivity;
 
    /*
     * If the something is a NULL constant, assume operator is strict and
     * return zero, ie, operator will never return TRUE.
     */
    if (IsA(other, Const) &&
        ((Const *) other)->constisnull)
    {
        ReleaseVariableStats(vardata);
        return 0.0;
    }
 
    if (IsA(other, Const))
    {
        /* Variable is being compared to a known non-null constant */
        Datum       constval = ((Const *) other)->constvalue;
        FmgrInfo    opproc;
        double      mcvsum;
        double      mcvsel;
        double      nullfrac;
        int         hist_size;
 
        fmgr_info(get_opcode(oproid), &opproc);
 
        /*
         * Calculate the selectivity for the column's most common values.
         */
        mcvsel = mcv_selectivity(&vardata, &opproc, collation,
                                 constval, varonleft,
                                 &mcvsum);
 
        /*
         * If the histogram is large enough, see what fraction of it matches
         * the query, and assume that's representative of the non-MCV
         * population.  Otherwise use the default selectivity for the non-MCV
         * population.
         */
        selec = histogram_selectivity(&vardata, &opproc, collation,
                                      constval, varonleft,
                                      10, 1, &hist_size);
        if (selec < 0)
        {
            /* Nope, fall back on default */
            selec = default_selectivity;
        }
        else if (hist_size < 100)
        {
            /*
             * For histogram sizes from 10 to 100, we combine the histogram
             * and default selectivities, putting increasingly more trust in
             * the histogram for larger sizes.
             */
            double      hist_weight = hist_size / 100.0;
 
            selec = selec * hist_weight +
                default_selectivity * (1.0 - hist_weight);
        }
 
        /* In any case, don't believe extremely small or large estimates. */
        if (selec < 0.0001)
            selec = 0.0001;
        else if (selec > 0.9999)
            selec = 0.9999;
 
        /* Don't forget to account for nulls. */
        if (HeapTupleIsValid(vardata.statsTuple))
            nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
        else
            nullfrac = 0.0;
 
        /*
         * Now merge the results from the MCV and histogram calculations,
         * realizing that the histogram covers only the non-null values that
         * are not listed in MCV.
         */
        selec *= 1.0 - nullfrac - mcvsum;
        selec += mcvsel;
    }
    else
    {
        /* Comparison value is not constant, so we can't do anything */
        selec = default_selectivity;
    }
 
    ReleaseVariableStats(vardata);
 
    /* result should be in range, but make sure... */
    CLAMP_PROBABILITY(selec);
 
    return selec;
}

References generate_unaccent_rules::args, CLAMP_PROBABILITY, fmgr_info(), get_opcode(), get_restriction_variable(), GETSTRUCT(), HeapTupleIsValid, histogram_selectivity(), IsA, mcv_selectivity(), ReleaseVariableStats, root, and VariableStatData::statsTuple.

Referenced by ltreeparentsel(), and matchingsel().

◆ genericcostestimate()

void genericcostestimate	(	PlannerInfo *	root,
		IndexPath *	path,
		double	loop_count,
		GenericCosts *	costs
	)

Definition at line 6935 of file selfuncs.c.

{
    IndexOptInfo *index = path->indexinfo;
    List       *indexQuals = get_quals_from_indexclauses(path->indexclauses);
    List       *indexOrderBys = path->indexorderbys;
    Cost        indexStartupCost;
    Cost        indexTotalCost;
    Selectivity indexSelectivity;
    double      indexCorrelation;
    double      numIndexPages;
    double      numIndexTuples;
    double      spc_random_page_cost;
    double      num_sa_scans;
    double      num_outer_scans;
    double      num_scans;
    double      qual_op_cost;
    double      qual_arg_cost;
    List       *selectivityQuals;
    ListCell   *l;
 
    /*
     * If the index is partial, AND the index predicate with the explicitly
     * given indexquals to produce a more accurate idea of the index
     * selectivity.
     */
    selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
 
    /*
     * If caller didn't give us an estimate for ScalarArrayOpExpr index scans,
     * just assume that the number of index descents is the number of distinct
     * combinations of array elements from all of the scan's SAOP clauses.
     */
    num_sa_scans = costs->num_sa_scans;
    if (num_sa_scans < 1)
    {
        num_sa_scans = 1;
        foreach(l, indexQuals)
        {
            RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
 
            if (IsA(rinfo->clause, ScalarArrayOpExpr))
            {
                ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
                double      alength = estimate_array_length(root, lsecond(saop->args));
 
                if (alength > 1)
                    num_sa_scans *= alength;
            }
        }
    }
 
    /* Estimate the fraction of main-table tuples that will be visited */
    indexSelectivity = clauselist_selectivity(root, selectivityQuals,
                                              index->rel->relid,
                                              JOIN_INNER,
                                              NULL);
 
    /*
     * If caller didn't give us an estimate, estimate the number of index
     * tuples that will be visited.  We do it in this rather peculiar-looking
     * way in order to get the right answer for partial indexes.
     */
    numIndexTuples = costs->numIndexTuples;
    if (numIndexTuples <= 0.0)
    {
        numIndexTuples = indexSelectivity * index->rel->tuples;
 
        /*
         * The above calculation counts all the tuples visited across all
         * scans induced by ScalarArrayOpExpr nodes.  We want to consider the
         * average per-indexscan number, so adjust.  This is a handy place to
         * round to integer, too.  (If caller supplied tuple estimate, it's
         * responsible for handling these considerations.)
         */
        numIndexTuples = rint(numIndexTuples / num_sa_scans);
    }
 
    /*
     * We can bound the number of tuples by the index size in any case. Also,
     * always estimate at least one tuple is touched, even when
     * indexSelectivity estimate is tiny.
     */
    if (numIndexTuples > index->tuples)
        numIndexTuples = index->tuples;
    if (numIndexTuples < 1.0)
        numIndexTuples = 1.0;
 
    /*
     * Estimate the number of index pages that will be retrieved.
     *
     * We use the simplistic method of taking a pro-rata fraction of the total
     * number of index pages.  In effect, this counts only leaf pages and not
     * any overhead such as index metapage or upper tree levels.
     *
     * In practice access to upper index levels is often nearly free because
     * those tend to stay in cache under load; moreover, the cost involved is
     * highly dependent on index type.  We therefore ignore such costs here
     * and leave it to the caller to add a suitable charge if needed.
     */
    if (index->pages > 1 && index->tuples > 1)
        numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
    else
        numIndexPages = 1.0;
 
    /* fetch estimated page cost for tablespace containing index */
    get_tablespace_page_costs(index->reltablespace,
                              &spc_random_page_cost,
                              NULL);
 
    /*
     * Now compute the disk access costs.
     *
     * The above calculations are all per-index-scan.  However, if we are in a
     * nestloop inner scan, we can expect the scan to be repeated (with
     * different search keys) for each row of the outer relation.  Likewise,
     * ScalarArrayOpExpr quals result in multiple index scans.  This creates
     * the potential for cache effects to reduce the number of disk page
     * fetches needed.  We want to estimate the average per-scan I/O cost in
     * the presence of caching.
     *
     * We use the Mackert-Lohman formula (see costsize.c for details) to
     * estimate the total number of page fetches that occur.  While this
     * wasn't what it was designed for, it seems a reasonable model anyway.
     * Note that we are counting pages not tuples anymore, so we take N = T =
     * index size, as if there were one "tuple" per page.
     */
    num_outer_scans = loop_count;
    num_scans = num_sa_scans * num_outer_scans;
 
    if (num_scans > 1)
    {
        double      pages_fetched;
 
        /* total page fetches ignoring cache effects */
        pages_fetched = numIndexPages * num_scans;
 
        /* use Mackert and Lohman formula to adjust for cache effects */
        pages_fetched = index_pages_fetched(pages_fetched,
                                            index->pages,
                                            (double) index->pages,
                                            root);
 
        /*
         * Now compute the total disk access cost, and then report a pro-rated
         * share for each outer scan.  (Don't pro-rate for ScalarArrayOpExpr,
         * since that's internal to the indexscan.)
         */
        indexTotalCost = (pages_fetched * spc_random_page_cost)
            / num_outer_scans;
    }
    else
    {
        /*
         * For a single index scan, we just charge spc_random_page_cost per
         * page touched.
         */
        indexTotalCost = numIndexPages * spc_random_page_cost;
    }
 
    /*
     * CPU cost: any complex expressions in the indexquals will need to be
     * evaluated once at the start of the scan to reduce them to runtime keys
     * to pass to the index AM (see nodeIndexscan.c).  We model the per-tuple
     * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
     * indexqual operator.  Because we have numIndexTuples as a per-scan
     * number, we have to multiply by num_sa_scans to get the correct result
     * for ScalarArrayOpExpr cases.  Similarly add in costs for any index
     * ORDER BY expressions.
     *
     * Note: this neglects the possible costs of rechecking lossy operators.
     * Detecting that that might be needed seems more expensive than it's
     * worth, though, considering all the other inaccuracies here ...
     */
    qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
        index_other_operands_eval_cost(root, indexOrderBys);
    qual_op_cost = cpu_operator_cost *
        (list_length(indexQuals) + list_length(indexOrderBys));
 
    indexStartupCost = qual_arg_cost;
    indexTotalCost += qual_arg_cost;
    indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
 
    /*
     * Generic assumption about index correlation: there isn't any.
     */
    indexCorrelation = 0.0;
 
    /*
     * Return everything to caller.
     */
    costs->indexStartupCost = indexStartupCost;
    costs->indexTotalCost = indexTotalCost;
    costs->indexSelectivity = indexSelectivity;
    costs->indexCorrelation = indexCorrelation;
    costs->numIndexPages = numIndexPages;
    costs->numIndexTuples = numIndexTuples;
    costs->spc_random_page_cost = spc_random_page_cost;
    costs->num_sa_scans = num_sa_scans;
}

References add_predicate_to_index_quals(), ScalarArrayOpExpr::args, RestrictInfo::clause, clauselist_selectivity(), cpu_index_tuple_cost, cpu_operator_cost, estimate_array_length(), get_quals_from_indexclauses(), get_tablespace_page_costs(), index_other_operands_eval_cost(), index_pages_fetched(), IndexPath::indexclauses, GenericCosts::indexCorrelation, IndexPath::indexinfo, IndexPath::indexorderbys, GenericCosts::indexSelectivity, GenericCosts::indexStartupCost, GenericCosts::indexTotalCost, IsA, JOIN_INNER, lfirst, list_length(), lsecond, GenericCosts::num_sa_scans, GenericCosts::numIndexPages, GenericCosts::numIndexTuples, root, and GenericCosts::spc_random_page_cost.

Referenced by blcostestimate(), btcostestimate(), gistcostestimate(), hashcostestimate(), and spgcostestimate().

◆ get_join_variables()

void get_join_variables	(	PlannerInfo *	root,
		List *	args,
		SpecialJoinInfo *	sjinfo,
		VariableStatData *	vardata1,
		VariableStatData *	vardata2,
		bool *	join_is_reversed
	)

Definition at line 5220 of file selfuncs.c.

{
    Node       *left,
               *right;
 
    if (list_length(args) != 2)
        elog(ERROR, "join operator should take two arguments");
 
    left = (Node *) linitial(args);
    right = (Node *) lsecond(args);
 
    examine_variable(root, left, 0, vardata1);
    examine_variable(root, right, 0, vardata2);
 
    if (vardata1->rel &&
        bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
        *join_is_reversed = true;   /* var1 is on RHS */
    else if (vardata2->rel &&
             bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
        *join_is_reversed = true;   /* var2 is on LHS */
    else
        *join_is_reversed = false;
}

References generate_unaccent_rules::args, bms_is_subset(), elog, ERROR, examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, RelOptInfo::relids, root, SpecialJoinInfo::syn_lefthand, and SpecialJoinInfo::syn_righthand.

Referenced by eqjoinsel(), neqjoinsel(), and networkjoinsel().

◆ get_quals_from_indexclauses()

List * get_quals_from_indexclauses ( List * indexclauses )

Definition at line 6851 of file selfuncs.c.

{
    List       *result = NIL;
    ListCell   *lc;
 
    foreach(lc, indexclauses)
    {
        IndexClause *iclause = lfirst_node(IndexClause, lc);
        ListCell   *lc2;
 
        foreach(lc2, iclause->indexquals)
        {
            RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
 
            result = lappend(result, rinfo);
        }
    }
    return result;
}

References IndexClause::indexquals, lappend(), lfirst_node, and NIL.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

◆ get_restriction_variable()

bool get_restriction_variable	(	PlannerInfo *	root,
		List *	args,
		int	varRelid,
		VariableStatData *	vardata,
		Node **	other,
		bool *	varonleft
	)

Definition at line 5160 of file selfuncs.c.

{
    Node       *left,
               *right;
    VariableStatData rdata;
 
    /* Fail if not a binary opclause (probably shouldn't happen) */
    if (list_length(args) != 2)
        return false;
 
    left = (Node *) linitial(args);
    right = (Node *) lsecond(args);
 
    /*
     * Examine both sides.  Note that when varRelid is nonzero, Vars of other
     * relations will be treated as pseudoconstants.
     */
    examine_variable(root, left, varRelid, vardata);
    examine_variable(root, right, varRelid, &rdata);
 
    /*
     * If one side is a variable and the other not, we win.
     */
    if (vardata->rel && rdata.rel == NULL)
    {
        *varonleft = true;
        *other = estimate_expression_value(root, rdata.var);
        /* Assume we need no ReleaseVariableStats(rdata) here */
        return true;
    }
 
    if (vardata->rel == NULL && rdata.rel)
    {
        *varonleft = false;
        *other = estimate_expression_value(root, vardata->var);
        /* Assume we need no ReleaseVariableStats(*vardata) here */
        *vardata = rdata;
        return true;
    }
 
    /* Oops, clause has wrong structure (probably var op var) */
    ReleaseVariableStats(*vardata);
    ReleaseVariableStats(rdata);
 
    return false;
}

References generate_unaccent_rules::args, estimate_expression_value(), examine_variable(), linitial, list_length(), lsecond, VariableStatData::rel, ReleaseVariableStats, root, and VariableStatData::var.

Referenced by _int_matchsel(), arraycontsel(), eqsel_internal(), generic_restriction_selectivity(), multirangesel(), networksel(), patternsel_common(), rangesel(), scalarineqsel_wrapper(), and tsmatchsel().

◆ get_variable_numdistinct()

double get_variable_numdistinct	(	VariableStatData *	vardata,
		bool *	isdefault
	)

Definition at line 6149 of file selfuncs.c.

{
    double      stadistinct;
    double      stanullfrac = 0.0;
    double      ntuples;
 
    *isdefault = false;
 
    /*
     * Determine the stadistinct value to use.  There are cases where we can
     * get an estimate even without a pg_statistic entry, or can get a better
     * value than is in pg_statistic.  Grab stanullfrac too if we can find it
     * (otherwise, assume no nulls, for lack of any better idea).
     */
    if (HeapTupleIsValid(vardata->statsTuple))
    {
        /* Use the pg_statistic entry */
        Form_pg_statistic stats;
 
        stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
        stadistinct = stats->stadistinct;
        stanullfrac = stats->stanullfrac;
    }
    else if (vardata->vartype == BOOLOID)
    {
        /*
         * Special-case boolean columns: presumably, two distinct values.
         *
         * Are there any other datatypes we should wire in special estimates
         * for?
         */
        stadistinct = 2.0;
    }
    else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
    {
        /*
         * If the Var represents a column of a VALUES RTE, assume it's unique.
         * This could of course be very wrong, but it should tend to be true
         * in well-written queries.  We could consider examining the VALUES'
         * contents to get some real statistics; but that only works if the
         * entries are all constants, and it would be pretty expensive anyway.
         */
        stadistinct = -1.0;     /* unique (and all non null) */
    }
    else
    {
        /*
         * We don't keep statistics for system columns, but in some cases we
         * can infer distinctness anyway.
         */
        if (vardata->var && IsA(vardata->var, Var))
        {
            switch (((Var *) vardata->var)->varattno)
            {
                case SelfItemPointerAttributeNumber:
                    stadistinct = -1.0; /* unique (and all non null) */
                    break;
                case TableOidAttributeNumber:
                    stadistinct = 1.0;  /* only 1 value */
                    break;
                default:
                    stadistinct = 0.0;  /* means "unknown" */
                    break;
            }
        }
        else
            stadistinct = 0.0;  /* means "unknown" */
 
        /*
         * XXX consider using estimate_num_groups on expressions?
         */
    }
 
    /*
     * If there is a unique index, DISTINCT or GROUP-BY clause for the
     * variable, assume it is unique no matter what pg_statistic says; the
     * statistics could be out of date, or we might have found a partial
     * unique index that proves the var is unique for this query.  However,
     * we'd better still believe the null-fraction statistic.
     */
    if (vardata->isunique)
        stadistinct = -1.0 * (1.0 - stanullfrac);
 
    /*
     * If we had an absolute estimate, use that.
     */
    if (stadistinct > 0.0)
        return clamp_row_est(stadistinct);
 
    /*
     * Otherwise we need to get the relation size; punt if not available.
     */
    if (vardata->rel == NULL)
    {
        *isdefault = true;
        return DEFAULT_NUM_DISTINCT;
    }
    ntuples = vardata->rel->tuples;
    if (ntuples <= 0.0)
    {
        *isdefault = true;
        return DEFAULT_NUM_DISTINCT;
    }
 
    /*
     * If we had a relative estimate, use that.
     */
    if (stadistinct < 0.0)
        return clamp_row_est(-stadistinct * ntuples);
 
    /*
     * With no data, estimate ndistinct = ntuples if the table is small, else
     * use default.  We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
     * that the behavior isn't discontinuous.
     */
    if (ntuples < DEFAULT_NUM_DISTINCT)
        return clamp_row_est(ntuples);
 
    *isdefault = true;
    return DEFAULT_NUM_DISTINCT;
}

References clamp_row_est(), DEFAULT_NUM_DISTINCT, GETSTRUCT(), HeapTupleIsValid, IsA, VariableStatData::isunique, VariableStatData::rel, RTE_VALUES, RelOptInfo::rtekind, SelfItemPointerAttributeNumber, VariableStatData::statsTuple, TableOidAttributeNumber, RelOptInfo::tuples, VariableStatData::var, and VariableStatData::vartype.

Referenced by add_unique_group_var(), btcostestimate(), eqjoinsel(), estimate_hash_bucket_stats(), ineq_histogram_selectivity(), var_eq_const(), and var_eq_non_const().

◆ histogram_selectivity()

double histogram_selectivity	(	VariableStatData *	vardata,
		FmgrInfo *	opproc,
		Oid	collation,
		Datum	constval,
		bool	varonleft,
		int	min_hist_size,
		int	n_skip,
		int *	hist_size
	)

Definition at line 828 of file selfuncs.c.

{
    double      result;
    AttStatsSlot sslot;
 
    /* check sanity of parameters */
    Assert(n_skip >= 0);
    Assert(min_hist_size > 2 * n_skip);
 
    if (HeapTupleIsValid(vardata->statsTuple) &&
        statistic_proc_security_check(vardata, opproc->fn_oid) &&
        get_attstatsslot(&sslot, vardata->statsTuple,
                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
                         ATTSTATSSLOT_VALUES))
    {
        *hist_size = sslot.nvalues;
        if (sslot.nvalues >= min_hist_size)
        {
            LOCAL_FCINFO(fcinfo, 2);
            int         nmatch = 0;
            int         i;
 
            /*
             * We invoke the opproc "by hand" so that we won't fail on NULL
             * results.  Such cases won't arise for normal comparison
             * functions, but generic_restriction_selectivity could perhaps be
             * used with operators that can return NULL.  A small side benefit
             * is to not need to re-initialize the fcinfo struct from scratch
             * each time.
             */
            InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
                                     NULL, NULL);
            fcinfo->args[0].isnull = false;
            fcinfo->args[1].isnull = false;
            /* be careful to apply operator right way 'round */
            if (varonleft)
                fcinfo->args[1].value = constval;
            else
                fcinfo->args[0].value = constval;
 
            for (i = n_skip; i < sslot.nvalues - n_skip; i++)
            {
                Datum       fresult;
 
                if (varonleft)
                    fcinfo->args[0].value = sslot.values[i];
                else
                    fcinfo->args[1].value = sslot.values[i];
                fcinfo->isnull = false;
                fresult = FunctionCallInvoke(fcinfo);
                if (!fcinfo->isnull && DatumGetBool(fresult))
                    nmatch++;
            }
            result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
        }
        else
            result = -1;
        free_attstatsslot(&sslot);
    }
    else
    {
        *hist_size = 0;
        result = -1;
    }
 
    return result;
}

References Assert(), ATTSTATSSLOT_VALUES, DatumGetBool(), FmgrInfo::fn_oid, free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::nvalues, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by generic_restriction_selectivity(), and patternsel_common().

◆ index_other_operands_eval_cost()

Cost index_other_operands_eval_cost	(	PlannerInfo *	root,
		List *	indexquals
	)

Definition at line 6881 of file selfuncs.c.

{
    Cost        qual_arg_cost = 0;
    ListCell   *lc;
 
    foreach(lc, indexquals)
    {
        Expr       *clause = (Expr *) lfirst(lc);
        Node       *other_operand;
        QualCost    index_qual_cost;
 
        /*
         * Index quals will have RestrictInfos, indexorderbys won't.  Look
         * through RestrictInfo if present.
         */
        if (IsA(clause, RestrictInfo))
            clause = ((RestrictInfo *) clause)->clause;
 
        if (IsA(clause, OpExpr))
        {
            OpExpr     *op = (OpExpr *) clause;
 
            other_operand = (Node *) lsecond(op->args);
        }
        else if (IsA(clause, RowCompareExpr))
        {
            RowCompareExpr *rc = (RowCompareExpr *) clause;
 
            other_operand = (Node *) rc->rargs;
        }
        else if (IsA(clause, ScalarArrayOpExpr))
        {
            ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
 
            other_operand = (Node *) lsecond(saop->args);
        }
        else if (IsA(clause, NullTest))
        {
            other_operand = NULL;
        }
        else
        {
            elog(ERROR, "unsupported indexqual type: %d",
                 (int) nodeTag(clause));
            other_operand = NULL;   /* keep compiler quiet */
        }
 
        cost_qual_eval_node(&index_qual_cost, other_operand, root);
        qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
    }
    return qual_arg_cost;
}

References OpExpr::args, ScalarArrayOpExpr::args, cost_qual_eval_node(), elog, ERROR, IsA, lfirst, lsecond, nodeTag, QualCost::per_tuple, RowCompareExpr::rargs, root, and QualCost::startup.

Referenced by brincostestimate(), genericcostestimate(), and gincostestimate().

◆ ineq_histogram_selectivity()

double ineq_histogram_selectivity	(	PlannerInfo *	root,
		VariableStatData *	vardata,
		Oid	opoid,
		FmgrInfo *	opproc,
		bool	isgt,
		bool	iseq,
		Oid	collation,
		Datum	constval,
		Oid	consttype
	)

Definition at line 1046 of file selfuncs.c.

{
    double      hist_selec;
    AttStatsSlot sslot;
 
    hist_selec = -1.0;
 
    /*
     * Someday, ANALYZE might store more than one histogram per rel/att,
     * corresponding to more than one possible sort ordering defined for the
     * column type.  Right now, we know there is only one, so just grab it and
     * see if it matches the query.
     *
     * Note that we can't use opoid as search argument; the staop appearing in
     * pg_statistic will be for the relevant '<' operator, but what we have
     * might be some other inequality operator such as '>='.  (Even if opoid
     * is a '<' operator, it could be cross-type.)  Hence we must use
     * comparison_ops_are_compatible() to see if the operators match.
     */
    if (HeapTupleIsValid(vardata->statsTuple) &&
        statistic_proc_security_check(vardata, opproc->fn_oid) &&
        get_attstatsslot(&sslot, vardata->statsTuple,
                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
                         ATTSTATSSLOT_VALUES))
    {
        if (sslot.nvalues > 1 &&
            sslot.stacoll == collation &&
            comparison_ops_are_compatible(sslot.staop, opoid))
        {
            /*
             * Use binary search to find the desired location, namely the
             * right end of the histogram bin containing the comparison value,
             * which is the leftmost entry for which the comparison operator
             * succeeds (if isgt) or fails (if !isgt).
             *
             * In this loop, we pay no attention to whether the operator iseq
             * or not; that detail will be mopped up below.  (We cannot tell,
             * anyway, whether the operator thinks the values are equal.)
             *
             * If the binary search accesses the first or last histogram
             * entry, we try to replace that endpoint with the true column min
             * or max as found by get_actual_variable_range().  This
             * ameliorates misestimates when the min or max is moving as a
             * result of changes since the last ANALYZE.  Note that this could
             * result in effectively including MCVs into the histogram that
             * weren't there before, but we don't try to correct for that.
             */
            double      histfrac;
            int         lobound = 0;    /* first possible slot to search */
            int         hibound = sslot.nvalues;    /* last+1 slot to search */
            bool        have_end = false;
 
            /*
             * If there are only two histogram entries, we'll want up-to-date
             * values for both.  (If there are more than two, we need at most
             * one of them to be updated, so we deal with that within the
             * loop.)
             */
            if (sslot.nvalues == 2)
                have_end = get_actual_variable_range(root,
                                                     vardata,
                                                     sslot.staop,
                                                     collation,
                                                     &sslot.values[0],
                                                     &sslot.values[1]);
 
            while (lobound < hibound)
            {
                int         probe = (lobound + hibound) / 2;
                bool        ltcmp;
 
                /*
                 * If we find ourselves about to compare to the first or last
                 * histogram entry, first try to replace it with the actual
                 * current min or max (unless we already did so above).
                 */
                if (probe == 0 && sslot.nvalues > 2)
                    have_end = get_actual_variable_range(root,
                                                         vardata,
                                                         sslot.staop,
                                                         collation,
                                                         &sslot.values[0],
                                                         NULL);
                else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
                    have_end = get_actual_variable_range(root,
                                                         vardata,
                                                         sslot.staop,
                                                         collation,
                                                         NULL,
                                                         &sslot.values[probe]);
 
                ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
                                                       collation,
                                                       sslot.values[probe],
                                                       constval));
                if (isgt)
                    ltcmp = !ltcmp;
                if (ltcmp)
                    lobound = probe + 1;
                else
                    hibound = probe;
            }
 
            if (lobound <= 0)
            {
                /*
                 * Constant is below lower histogram boundary.  More
                 * precisely, we have found that no entry in the histogram
                 * satisfies the inequality clause (if !isgt) or they all do
                 * (if isgt).  We estimate that that's true of the entire
                 * table, so set histfrac to 0.0 (which we'll flip to 1.0
                 * below, if isgt).
                 */
                histfrac = 0.0;
            }
            else if (lobound >= sslot.nvalues)
            {
                /*
                 * Inverse case: constant is above upper histogram boundary.
                 */
                histfrac = 1.0;
            }
            else
            {
                /* We have values[i-1] <= constant <= values[i]. */
                int         i = lobound;
                double      eq_selec = 0;
                double      val,
                            high,
                            low;
                double      binfrac;
 
                /*
                 * In the cases where we'll need it below, obtain an estimate
                 * of the selectivity of "x = constval".  We use a calculation
                 * similar to what var_eq_const() does for a non-MCV constant,
                 * ie, estimate that all distinct non-MCV values occur equally
                 * often.  But multiplication by "1.0 - sumcommon - nullfrac"
                 * will be done by our caller, so we shouldn't do that here.
                 * Therefore we can't try to clamp the estimate by reference
                 * to the least common MCV; the result would be too small.
                 *
                 * Note: since this is effectively assuming that constval
                 * isn't an MCV, it's logically dubious if constval in fact is
                 * one.  But we have to apply *some* correction for equality,
                 * and anyway we cannot tell if constval is an MCV, since we
                 * don't have a suitable equality operator at hand.
                 */
                if (i == 1 || isgt == iseq)
                {
                    double      otherdistinct;
                    bool        isdefault;
                    AttStatsSlot mcvslot;
 
                    /* Get estimated number of distinct values */
                    otherdistinct = get_variable_numdistinct(vardata,
                                                             &isdefault);
 
                    /* Subtract off the number of known MCVs */
                    if (get_attstatsslot(&mcvslot, vardata->statsTuple,
                                         STATISTIC_KIND_MCV, InvalidOid,
                                         ATTSTATSSLOT_NUMBERS))
                    {
                        otherdistinct -= mcvslot.nnumbers;
                        free_attstatsslot(&mcvslot);
                    }
 
                    /* If result doesn't seem sane, leave eq_selec at 0 */
                    if (otherdistinct > 1)
                        eq_selec = 1.0 / otherdistinct;
                }
 
                /*
                 * Convert the constant and the two nearest bin boundary
                 * values to a uniform comparison scale, and do a linear
                 * interpolation within this bin.
                 */
                if (convert_to_scalar(constval, consttype, collation,
                                      &val,
                                      sslot.values[i - 1], sslot.values[i],
                                      vardata->vartype,
                                      &low, &high))
                {
                    if (high <= low)
                    {
                        /* cope if bin boundaries appear identical */
                        binfrac = 0.5;
                    }
                    else if (val <= low)
                        binfrac = 0.0;
                    else if (val >= high)
                        binfrac = 1.0;
                    else
                    {
                        binfrac = (val - low) / (high - low);
 
                        /*
                         * Watch out for the possibility that we got a NaN or
                         * Infinity from the division.  This can happen
                         * despite the previous checks, if for example "low"
                         * is -Infinity.
                         */
                        if (isnan(binfrac) ||
                            binfrac < 0.0 || binfrac > 1.0)
                            binfrac = 0.5;
                    }
                }
                else
                {
                    /*
                     * Ideally we'd produce an error here, on the grounds that
                     * the given operator shouldn't have scalarXXsel
                     * registered as its selectivity func unless we can deal
                     * with its operand types.  But currently, all manner of
                     * stuff is invoking scalarXXsel, so give a default
                     * estimate until that can be fixed.
                     */
                    binfrac = 0.5;
                }
 
                /*
                 * Now, compute the overall selectivity across the values
                 * represented by the histogram.  We have i-1 full bins and
                 * binfrac partial bin below the constant.
                 */
                histfrac = (double) (i - 1) + binfrac;
                histfrac /= (double) (sslot.nvalues - 1);
 
                /*
                 * At this point, histfrac is an estimate of the fraction of
                 * the population represented by the histogram that satisfies
                 * "x <= constval".  Somewhat remarkably, this statement is
                 * true regardless of which operator we were doing the probes
                 * with, so long as convert_to_scalar() delivers reasonable
                 * results.  If the probe constant is equal to some histogram
                 * entry, we would have considered the bin to the left of that
                 * entry if probing with "<" or ">=", or the bin to the right
                 * if probing with "<=" or ">"; but binfrac would have come
                 * out as 1.0 in the first case and 0.0 in the second, leading
                 * to the same histfrac in either case.  For probe constants
                 * between histogram entries, we find the same bin and get the
                 * same estimate with any operator.
                 *
                 * The fact that the estimate corresponds to "x <= constval"
                 * and not "x < constval" is because of the way that ANALYZE
                 * constructs the histogram: each entry is, effectively, the
                 * rightmost value in its sample bucket.  So selectivity
                 * values that are exact multiples of 1/(histogram_size-1)
                 * should be understood as estimates including a histogram
                 * entry plus everything to its left.
                 *
                 * However, that breaks down for the first histogram entry,
                 * which necessarily is the leftmost value in its sample
                 * bucket.  That means the first histogram bin is slightly
                 * narrower than the rest, by an amount equal to eq_selec.
                 * Another way to say that is that we want "x <= leftmost" to
                 * be estimated as eq_selec not zero.  So, if we're dealing
                 * with the first bin (i==1), rescale to make that true while
                 * adjusting the rest of that bin linearly.
                 */
                if (i == 1)
                    histfrac += eq_selec * (1.0 - binfrac);
 
                /*
                 * "x <= constval" is good if we want an estimate for "<=" or
                 * ">", but if we are estimating for "<" or ">=", we now need
                 * to decrease the estimate by eq_selec.
                 */
                if (isgt == iseq)
                    histfrac -= eq_selec;
            }
 
            /*
             * Now the estimate is finished for "<" and "<=" cases.  If we are
             * estimating for ">" or ">=", flip it.
             */
            hist_selec = isgt ? (1.0 - histfrac) : histfrac;
 
            /*
             * The histogram boundaries are only approximate to begin with,
             * and may well be out of date anyway.  Therefore, don't believe
             * extremely small or large selectivity estimates --- unless we
             * got actual current endpoint values from the table, in which
             * case just do the usual sanity clamp.  Somewhat arbitrarily, we
             * set the cutoff for other cases at a hundredth of the histogram
             * resolution.
             */
            if (have_end)
                CLAMP_PROBABILITY(hist_selec);
            else
            {
                double      cutoff = 0.01 / (double) (sslot.nvalues - 1);
 
                if (hist_selec < cutoff)
                    hist_selec = cutoff;
                else if (hist_selec > 1.0 - cutoff)
                    hist_selec = 1.0 - cutoff;
            }
        }
        else if (sslot.nvalues > 1)
        {
            /*
             * If we get here, we have a histogram but it's not sorted the way
             * we want.  Do a brute-force search to see how many of the
             * entries satisfy the comparison condition, and take that
             * fraction as our estimate.  (This is identical to the inner loop
             * of histogram_selectivity; maybe share code?)
             */
            LOCAL_FCINFO(fcinfo, 2);
            int         nmatch = 0;
 
            InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
                                     NULL, NULL);
            fcinfo->args[0].isnull = false;
            fcinfo->args[1].isnull = false;
            fcinfo->args[1].value = constval;
            for (int i = 0; i < sslot.nvalues; i++)
            {
                Datum       fresult;
 
                fcinfo->args[0].value = sslot.values[i];
                fcinfo->isnull = false;
                fresult = FunctionCallInvoke(fcinfo);
                if (!fcinfo->isnull && DatumGetBool(fresult))
                    nmatch++;
            }
            hist_selec = ((double) nmatch) / ((double) sslot.nvalues);
 
            /*
             * As above, clamp to a hundredth of the histogram resolution.
             * This case is surely even less trustworthy than the normal one,
             * so we shouldn't believe exact 0 or 1 selectivity.  (Maybe the
             * clamp should be more restrictive in this case?)
             */
            {
                double      cutoff = 0.01 / (double) (sslot.nvalues - 1);
 
                if (hist_selec < cutoff)
                    hist_selec = cutoff;
                else if (hist_selec > 1.0 - cutoff)
                    hist_selec = 1.0 - cutoff;
            }
        }
 
        free_attstatsslot(&sslot);
    }
 
    return hist_selec;
}

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, comparison_ops_are_compatible(), convert_to_scalar(), DatumGetBool(), FmgrInfo::fn_oid, free_attstatsslot(), FunctionCall2Coll(), FunctionCallInvoke, get_actual_variable_range(), get_attstatsslot(), get_variable_numdistinct(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::nnumbers, AttStatsSlot::nvalues, root, AttStatsSlot::stacoll, AttStatsSlot::staop, statistic_proc_security_check(), VariableStatData::statsTuple, val, AttStatsSlot::values, and VariableStatData::vartype.

Referenced by prefix_selectivity(), and scalarineqsel().

◆ mcv_selectivity()

double mcv_selectivity	(	VariableStatData *	vardata,
		FmgrInfo *	opproc,
		Oid	collation,
		Datum	constval,
		bool	varonleft,
		double *	sumcommonp
	)

Definition at line 737 of file selfuncs.c.

{
    double      mcv_selec,
                sumcommon;
    AttStatsSlot sslot;
    int         i;
 
    mcv_selec = 0.0;
    sumcommon = 0.0;
 
    if (HeapTupleIsValid(vardata->statsTuple) &&
        statistic_proc_security_check(vardata, opproc->fn_oid) &&
        get_attstatsslot(&sslot, vardata->statsTuple,
                         STATISTIC_KIND_MCV, InvalidOid,
                         ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
    {
        LOCAL_FCINFO(fcinfo, 2);
 
        /*
         * We invoke the opproc "by hand" so that we won't fail on NULL
         * results.  Such cases won't arise for normal comparison functions,
         * but generic_restriction_selectivity could perhaps be used with
         * operators that can return NULL.  A small side benefit is to not
         * need to re-initialize the fcinfo struct from scratch each time.
         */
        InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
                                 NULL, NULL);
        fcinfo->args[0].isnull = false;
        fcinfo->args[1].isnull = false;
        /* be careful to apply operator right way 'round */
        if (varonleft)
            fcinfo->args[1].value = constval;
        else
            fcinfo->args[0].value = constval;
 
        for (i = 0; i < sslot.nvalues; i++)
        {
            Datum       fresult;
 
            if (varonleft)
                fcinfo->args[0].value = sslot.values[i];
            else
                fcinfo->args[1].value = sslot.values[i];
            fcinfo->isnull = false;
            fresult = FunctionCallInvoke(fcinfo);
            if (!fcinfo->isnull && DatumGetBool(fresult))
                mcv_selec += sslot.numbers[i];
            sumcommon += sslot.numbers[i];
        }
        free_attstatsslot(&sslot);
    }
 
    *sumcommonp = sumcommon;
    return mcv_selec;
}

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, DatumGetBool(), FmgrInfo::fn_oid, free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, LOCAL_FCINFO, AttStatsSlot::numbers, AttStatsSlot::nvalues, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by generic_restriction_selectivity(), networksel(), patternsel_common(), and scalarineqsel().

◆ mergejoinscansel()

void mergejoinscansel	(	PlannerInfo *	root,
		Node *	clause,
		Oid	opfamily,
		CompareType	cmptype,
		bool	nulls_first,
		Selectivity *	leftstart,
		Selectivity *	leftend,
		Selectivity *	rightstart,
		Selectivity *	rightend
	)

Definition at line 2960 of file selfuncs.c.

{
    Node       *left,
               *right;
    VariableStatData leftvar,
                rightvar;
    Oid         opmethod;
    int         op_strategy;
    Oid         op_lefttype;
    Oid         op_righttype;
    Oid         opno,
                collation,
                lsortop,
                rsortop,
                lstatop,
                rstatop,
                ltop,
                leop,
                revltop,
                revleop;
    StrategyNumber ltstrat,
                lestrat,
                gtstrat,
                gestrat;
    bool        isgt;
    Datum       leftmin,
                leftmax,
                rightmin,
                rightmax;
    double      selec;
 
    /* Set default results if we can't figure anything out. */
    /* XXX should default "start" fraction be a bit more than 0? */
    *leftstart = *rightstart = 0.0;
    *leftend = *rightend = 1.0;
 
    /* Deconstruct the merge clause */
    if (!is_opclause(clause))
        return;                 /* shouldn't happen */
    opno = ((OpExpr *) clause)->opno;
    collation = ((OpExpr *) clause)->inputcollid;
    left = get_leftop((Expr *) clause);
    right = get_rightop((Expr *) clause);
    if (!right)
        return;                 /* shouldn't happen */
 
    /* Look for stats for the inputs */
    examine_variable(root, left, 0, &leftvar);
    examine_variable(root, right, 0, &rightvar);
 
    opmethod = get_opfamily_method(opfamily);
 
    /* Extract the operator's declared left/right datatypes */
    get_op_opfamily_properties(opno, opfamily, false,
                               &op_strategy,
                               &op_lefttype,
                               &op_righttype);
    Assert(IndexAmTranslateStrategy(op_strategy, opmethod, opfamily, true) == COMPARE_EQ);
 
    /*
     * Look up the various operators we need.  If we don't find them all, it
     * probably means the opfamily is broken, but we just fail silently.
     *
     * Note: we expect that pg_statistic histograms will be sorted by the '<'
     * operator, regardless of which sort direction we are considering.
     */
    switch (cmptype)
    {
        case COMPARE_LT:
            isgt = false;
            ltstrat = IndexAmTranslateCompareType(COMPARE_LT, opmethod, opfamily, true);
            lestrat = IndexAmTranslateCompareType(COMPARE_LE, opmethod, opfamily, true);
            if (op_lefttype == op_righttype)
            {
                /* easy case */
                ltop = get_opfamily_member(opfamily,
                                           op_lefttype, op_righttype,
                                           ltstrat);
                leop = get_opfamily_member(opfamily,
                                           op_lefttype, op_righttype,
                                           lestrat);
                lsortop = ltop;
                rsortop = ltop;
                lstatop = lsortop;
                rstatop = rsortop;
                revltop = ltop;
                revleop = leop;
            }
            else
            {
                ltop = get_opfamily_member(opfamily,
                                           op_lefttype, op_righttype,
                                           ltstrat);
                leop = get_opfamily_member(opfamily,
                                           op_lefttype, op_righttype,
                                           lestrat);
                lsortop = get_opfamily_member(opfamily,
                                              op_lefttype, op_lefttype,
                                              ltstrat);
                rsortop = get_opfamily_member(opfamily,
                                              op_righttype, op_righttype,
                                              ltstrat);
                lstatop = lsortop;
                rstatop = rsortop;
                revltop = get_opfamily_member(opfamily,
                                              op_righttype, op_lefttype,
                                              ltstrat);
                revleop = get_opfamily_member(opfamily,
                                              op_righttype, op_lefttype,
                                              lestrat);
            }
            break;
        case COMPARE_GT:
            /* descending-order case */
            isgt = true;
            ltstrat = IndexAmTranslateCompareType(COMPARE_LT, opmethod, opfamily, true);
            gtstrat = IndexAmTranslateCompareType(COMPARE_GT, opmethod, opfamily, true);
            gestrat = IndexAmTranslateCompareType(COMPARE_GE, opmethod, opfamily, true);
            if (op_lefttype == op_righttype)
            {
                /* easy case */
                ltop = get_opfamily_member(opfamily,
                                           op_lefttype, op_righttype,
                                           gtstrat);
                leop = get_opfamily_member(opfamily,
                                           op_lefttype, op_righttype,
                                           gestrat);
                lsortop = ltop;
                rsortop = ltop;
                lstatop = get_opfamily_member(opfamily,
                                              op_lefttype, op_lefttype,
                                              ltstrat);
                rstatop = lstatop;
                revltop = ltop;
                revleop = leop;
            }
            else
            {
                ltop = get_opfamily_member(opfamily,
                                           op_lefttype, op_righttype,
                                           gtstrat);
                leop = get_opfamily_member(opfamily,
                                           op_lefttype, op_righttype,
                                           gestrat);
                lsortop = get_opfamily_member(opfamily,
                                              op_lefttype, op_lefttype,
                                              gtstrat);
                rsortop = get_opfamily_member(opfamily,
                                              op_righttype, op_righttype,
                                              gtstrat);
                lstatop = get_opfamily_member(opfamily,
                                              op_lefttype, op_lefttype,
                                              ltstrat);
                rstatop = get_opfamily_member(opfamily,
                                              op_righttype, op_righttype,
                                              ltstrat);
                revltop = get_opfamily_member(opfamily,
                                              op_righttype, op_lefttype,
                                              gtstrat);
                revleop = get_opfamily_member(opfamily,
                                              op_righttype, op_lefttype,
                                              gestrat);
            }
            break;
        default:
            goto fail;          /* shouldn't get here */
    }
 
    if (!OidIsValid(lsortop) ||
        !OidIsValid(rsortop) ||
        !OidIsValid(lstatop) ||
        !OidIsValid(rstatop) ||
        !OidIsValid(ltop) ||
        !OidIsValid(leop) ||
        !OidIsValid(revltop) ||
        !OidIsValid(revleop))
        goto fail;              /* insufficient info in catalogs */
 
    /* Try to get ranges of both inputs */
    if (!isgt)
    {
        if (!get_variable_range(root, &leftvar, lstatop, collation,
                                &leftmin, &leftmax))
            goto fail;          /* no range available from stats */
        if (!get_variable_range(root, &rightvar, rstatop, collation,
                                &rightmin, &rightmax))
            goto fail;          /* no range available from stats */
    }
    else
    {
        /* need to swap the max and min */
        if (!get_variable_range(root, &leftvar, lstatop, collation,
                                &leftmax, &leftmin))
            goto fail;          /* no range available from stats */
        if (!get_variable_range(root, &rightvar, rstatop, collation,
                                &rightmax, &rightmin))
            goto fail;          /* no range available from stats */
    }
 
    /*
     * Now, the fraction of the left variable that will be scanned is the
     * fraction that's <= the right-side maximum value.  But only believe
     * non-default estimates, else stick with our 1.0.
     */
    selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar,
                          rightmax, op_righttype);
    if (selec != DEFAULT_INEQ_SEL)
        *leftend = selec;
 
    /* And similarly for the right variable. */
    selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar,
                          leftmax, op_lefttype);
    if (selec != DEFAULT_INEQ_SEL)
        *rightend = selec;
 
    /*
     * Only one of the two "end" fractions can really be less than 1.0;
     * believe the smaller estimate and reset the other one to exactly 1.0. If
     * we get exactly equal estimates (as can easily happen with self-joins),
     * believe neither.
     */
    if (*leftend > *rightend)
        *leftend = 1.0;
    else if (*leftend < *rightend)
        *rightend = 1.0;
    else
        *leftend = *rightend = 1.0;
 
    /*
     * Also, the fraction of the left variable that will be scanned before the
     * first join pair is found is the fraction that's < the right-side
     * minimum value.  But only believe non-default estimates, else stick with
     * our own default.
     */
    selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar,
                          rightmin, op_righttype);
    if (selec != DEFAULT_INEQ_SEL)
        *leftstart = selec;
 
    /* And similarly for the right variable. */
    selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar,
                          leftmin, op_lefttype);
    if (selec != DEFAULT_INEQ_SEL)
        *rightstart = selec;
 
    /*
     * Only one of the two "start" fractions can really be more than zero;
     * believe the larger estimate and reset the other one to exactly 0.0. If
     * we get exactly equal estimates (as can easily happen with self-joins),
     * believe neither.
     */
    if (*leftstart < *rightstart)
        *leftstart = 0.0;
    else if (*leftstart > *rightstart)
        *rightstart = 0.0;
    else
        *leftstart = *rightstart = 0.0;
 
    /*
     * If the sort order is nulls-first, we're going to have to skip over any
     * nulls too.  These would not have been counted by scalarineqsel, and we
     * can safely add in this fraction regardless of whether we believe
     * scalarineqsel's results or not.  But be sure to clamp the sum to 1.0!
     */
    if (nulls_first)
    {
        Form_pg_statistic stats;
 
        if (HeapTupleIsValid(leftvar.statsTuple))
        {
            stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
            *leftstart += stats->stanullfrac;
            CLAMP_PROBABILITY(*leftstart);
            *leftend += stats->stanullfrac;
            CLAMP_PROBABILITY(*leftend);
        }
        if (HeapTupleIsValid(rightvar.statsTuple))
        {
            stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
            *rightstart += stats->stanullfrac;
            CLAMP_PROBABILITY(*rightstart);
            *rightend += stats->stanullfrac;
            CLAMP_PROBABILITY(*rightend);
        }
    }
 
    /* Disbelieve start >= end, just in case that can happen */
    if (*leftstart >= *leftend)
    {
        *leftstart = 0.0;
        *leftend = 1.0;
    }
    if (*rightstart >= *rightend)
    {
        *rightstart = 0.0;
        *rightend = 1.0;
    }
 
fail:
    ReleaseVariableStats(leftvar);
    ReleaseVariableStats(rightvar);
}

References Assert(), CLAMP_PROBABILITY, COMPARE_EQ, COMPARE_GE, COMPARE_GT, COMPARE_LE, COMPARE_LT, DEFAULT_INEQ_SEL, examine_variable(), get_leftop(), get_op_opfamily_properties(), get_opfamily_member(), get_opfamily_method(), get_rightop(), get_variable_range(), GETSTRUCT(), HeapTupleIsValid, IndexAmTranslateCompareType(), IndexAmTranslateStrategy(), is_opclause(), OidIsValid, ReleaseVariableStats, root, scalarineqsel(), and VariableStatData::statsTuple.

Referenced by cached_scansel().

◆ nulltestsel()

Selectivity nulltestsel	(	PlannerInfo *	root,
		NullTestType	nulltesttype,
		Node *	arg,
		int	varRelid,
		JoinType	jointype,
		SpecialJoinInfo *	sjinfo
	)

Definition at line 1703 of file selfuncs.c.

{
    VariableStatData vardata;
    double      selec;
 
    examine_variable(root, arg, varRelid, &vardata);
 
    if (HeapTupleIsValid(vardata.statsTuple))
    {
        Form_pg_statistic stats;
        double      freq_null;
 
        stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
        freq_null = stats->stanullfrac;
 
        switch (nulltesttype)
        {
            case IS_NULL:
 
                /*
                 * Use freq_null directly.
                 */
                selec = freq_null;
                break;
            case IS_NOT_NULL:
 
                /*
                 * Select not unknown (not null) values. Calculate from
                 * freq_null.
                 */
                selec = 1.0 - freq_null;
                break;
            default:
                elog(ERROR, "unrecognized nulltesttype: %d",
                     (int) nulltesttype);
                return (Selectivity) 0; /* keep compiler quiet */
        }
    }
    else if (vardata.var && IsA(vardata.var, Var) &&
             ((Var *) vardata.var)->varattno < 0)
    {
        /*
         * There are no stats for system columns, but we know they are never
         * NULL.
         */
        selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
    }
    else
    {
        /*
         * No ANALYZE stats available, so make a guess
         */
        switch (nulltesttype)
        {
            case IS_NULL:
                selec = DEFAULT_UNK_SEL;
                break;
            case IS_NOT_NULL:
                selec = DEFAULT_NOT_UNK_SEL;
                break;
            default:
                elog(ERROR, "unrecognized nulltesttype: %d",
                     (int) nulltesttype);
                return (Selectivity) 0; /* keep compiler quiet */
        }
    }
 
    ReleaseVariableStats(vardata);
 
    /* result should be in range, but make sure... */
    CLAMP_PROBABILITY(selec);
 
    return (Selectivity) selec;
}

References arg, CLAMP_PROBABILITY, DEFAULT_NOT_UNK_SEL, DEFAULT_UNK_SEL, elog, ERROR, examine_variable(), GETSTRUCT(), HeapTupleIsValid, IS_NOT_NULL, IS_NULL, IsA, ReleaseVariableStats, root, VariableStatData::statsTuple, and VariableStatData::var.

Referenced by clause_selectivity_ext(), and clauselist_selectivity_ext().

◆ rowcomparesel()

Selectivity rowcomparesel	(	PlannerInfo *	root,
		RowCompareExpr *	clause,
		int	varRelid,
		JoinType	jointype,
		SpecialJoinInfo *	sjinfo
	)

Definition at line 2210 of file selfuncs.c.

{
    Selectivity s1;
    Oid         opno = linitial_oid(clause->opnos);
    Oid         inputcollid = linitial_oid(clause->inputcollids);
    List       *opargs;
    bool        is_join_clause;
 
    /* Build equivalent arg list for single operator */
    opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
 
    /*
     * Decide if it's a join clause.  This should match clausesel.c's
     * treat_as_join_clause(), except that we intentionally consider only the
     * leading columns and not the rest of the clause.
     */
    if (varRelid != 0)
    {
        /*
         * Caller is forcing restriction mode (eg, because we are examining an
         * inner indexscan qual).
         */
        is_join_clause = false;
    }
    else if (sjinfo == NULL)
    {
        /*
         * It must be a restriction clause, since it's being evaluated at a
         * scan node.
         */
        is_join_clause = false;
    }
    else
    {
        /*
         * Otherwise, it's a join if there's more than one base relation used.
         */
        is_join_clause = (NumRelids(root, (Node *) opargs) > 1);
    }
 
    if (is_join_clause)
    {
        /* Estimate selectivity for a join clause. */
        s1 = join_selectivity(root, opno,
                              opargs,
                              inputcollid,
                              jointype,
                              sjinfo);
    }
    else
    {
        /* Estimate selectivity for a restriction clause. */
        s1 = restriction_selectivity(root, opno,
                                     opargs,
                                     inputcollid,
                                     varRelid);
    }
 
    return s1;
}

References join_selectivity(), RowCompareExpr::largs, linitial, linitial_oid, list_make2, NumRelids(), RowCompareExpr::rargs, restriction_selectivity(), root, and s1.

Referenced by clause_selectivity_ext().

◆ scalararraysel()

Selectivity scalararraysel	(	PlannerInfo *	root,
		ScalarArrayOpExpr *	clause,
		bool	is_join_clause,
		int	varRelid,
		JoinType	jointype,
		SpecialJoinInfo *	sjinfo
	)

Definition at line 1821 of file selfuncs.c.

{
    Oid         operator = clause->opno;
    bool        useOr = clause->useOr;
    bool        isEquality = false;
    bool        isInequality = false;
    Node       *leftop;
    Node       *rightop;
    Oid         nominal_element_type;
    Oid         nominal_element_collation;
    TypeCacheEntry *typentry;
    RegProcedure oprsel;
    FmgrInfo    oprselproc;
    Selectivity s1;
    Selectivity s1disjoint;
 
    /* First, deconstruct the expression */
    Assert(list_length(clause->args) == 2);
    leftop = (Node *) linitial(clause->args);
    rightop = (Node *) lsecond(clause->args);
 
    /* aggressively reduce both sides to constants */
    leftop = estimate_expression_value(root, leftop);
    rightop = estimate_expression_value(root, rightop);
 
    /* get nominal (after relabeling) element type of rightop */
    nominal_element_type = get_base_element_type(exprType(rightop));
    if (!OidIsValid(nominal_element_type))
        return (Selectivity) 0.5;   /* probably shouldn't happen */
    /* get nominal collation, too, for generating constants */
    nominal_element_collation = exprCollation(rightop);
 
    /* look through any binary-compatible relabeling of rightop */
    rightop = strip_array_coercion(rightop);
 
    /*
     * Detect whether the operator is the default equality or inequality
     * operator of the array element type.
     */
    typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
    if (OidIsValid(typentry->eq_opr))
    {
        if (operator == typentry->eq_opr)
            isEquality = true;
        else if (get_negator(operator) == typentry->eq_opr)
            isInequality = true;
    }
 
    /*
     * If it is equality or inequality, we might be able to estimate this as a
     * form of array containment; for instance "const = ANY(column)" can be
     * treated as "ARRAY[const] <@ column".  scalararraysel_containment tries
     * that, and returns the selectivity estimate if successful, or -1 if not.
     */
    if ((isEquality || isInequality) && !is_join_clause)
    {
        s1 = scalararraysel_containment(root, leftop, rightop,
                                        nominal_element_type,
                                        isEquality, useOr, varRelid);
        if (s1 >= 0.0)
            return s1;
    }
 
    /*
     * Look up the underlying operator's selectivity estimator. Punt if it
     * hasn't got one.
     */
    if (is_join_clause)
        oprsel = get_oprjoin(operator);
    else
        oprsel = get_oprrest(operator);
    if (!oprsel)
        return (Selectivity) 0.5;
    fmgr_info(oprsel, &oprselproc);
 
    /*
     * In the array-containment check above, we must only believe that an
     * operator is equality or inequality if it is the default btree equality
     * operator (or its negator) for the element type, since those are the
     * operators that array containment will use.  But in what follows, we can
     * be a little laxer, and also believe that any operators using eqsel() or
     * neqsel() as selectivity estimator act like equality or inequality.
     */
    if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
        isEquality = true;
    else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
        isInequality = true;
 
    /*
     * We consider three cases:
     *
     * 1. rightop is an Array constant: deconstruct the array, apply the
     * operator's selectivity function for each array element, and merge the
     * results in the same way that clausesel.c does for AND/OR combinations.
     *
     * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
     * function for each element of the ARRAY[] construct, and merge.
     *
     * 3. otherwise, make a guess ...
     */
    if (rightop && IsA(rightop, Const))
    {
        Datum       arraydatum = ((Const *) rightop)->constvalue;
        bool        arrayisnull = ((Const *) rightop)->constisnull;
        ArrayType  *arrayval;
        int16       elmlen;
        bool        elmbyval;
        char        elmalign;
        int         num_elems;
        Datum      *elem_values;
        bool       *elem_nulls;
        int         i;
 
        if (arrayisnull)        /* qual can't succeed if null array */
            return (Selectivity) 0.0;
        arrayval = DatumGetArrayTypeP(arraydatum);
        get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
                             &elmlen, &elmbyval, &elmalign);
        deconstruct_array(arrayval,
                          ARR_ELEMTYPE(arrayval),
                          elmlen, elmbyval, elmalign,
                          &elem_values, &elem_nulls, &num_elems);
 
        /*
         * For generic operators, we assume the probability of success is
         * independent for each array element.  But for "= ANY" or "<> ALL",
         * if the array elements are distinct (which'd typically be the case)
         * then the probabilities are disjoint, and we should just sum them.
         *
         * If we were being really tense we would try to confirm that the
         * elements are all distinct, but that would be expensive and it
         * doesn't seem to be worth the cycles; it would amount to penalizing
         * well-written queries in favor of poorly-written ones.  However, we
         * do protect ourselves a little bit by checking whether the
         * disjointness assumption leads to an impossible (out of range)
         * probability; if so, we fall back to the normal calculation.
         */
        s1 = s1disjoint = (useOr ? 0.0 : 1.0);
 
        for (i = 0; i < num_elems; i++)
        {
            List       *args;
            Selectivity s2;
 
            args = list_make2(leftop,
                              makeConst(nominal_element_type,
                                        -1,
                                        nominal_element_collation,
                                        elmlen,
                                        elem_values[i],
                                        elem_nulls[i],
                                        elmbyval));
            if (is_join_clause)
                s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
                                                      clause->inputcollid,
                                                      PointerGetDatum(root),
                                                      ObjectIdGetDatum(operator),
                                                      PointerGetDatum(args),
                                                      Int16GetDatum(jointype),
                                                      PointerGetDatum(sjinfo)));
            else
                s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
                                                      clause->inputcollid,
                                                      PointerGetDatum(root),
                                                      ObjectIdGetDatum(operator),
                                                      PointerGetDatum(args),
                                                      Int32GetDatum(varRelid)));
 
            if (useOr)
            {
                s1 = s1 + s2 - s1 * s2;
                if (isEquality)
                    s1disjoint += s2;
            }
            else
            {
                s1 = s1 * s2;
                if (isInequality)
                    s1disjoint += s2 - 1.0;
            }
        }
 
        /* accept disjoint-probability estimate if in range */
        if ((useOr ? isEquality : isInequality) &&
            s1disjoint >= 0.0 && s1disjoint <= 1.0)
            s1 = s1disjoint;
    }
    else if (rightop && IsA(rightop, ArrayExpr) &&
             !((ArrayExpr *) rightop)->multidims)
    {
        ArrayExpr  *arrayexpr = (ArrayExpr *) rightop;
        int16       elmlen;
        bool        elmbyval;
        ListCell   *l;
 
        get_typlenbyval(arrayexpr->element_typeid,
                        &elmlen, &elmbyval);
 
        /*
         * We use the assumption of disjoint probabilities here too, although
         * the odds of equal array elements are rather higher if the elements
         * are not all constants (which they won't be, else constant folding
         * would have reduced the ArrayExpr to a Const).  In this path it's
         * critical to have the sanity check on the s1disjoint estimate.
         */
        s1 = s1disjoint = (useOr ? 0.0 : 1.0);
 
        foreach(l, arrayexpr->elements)
        {
            Node       *elem = (Node *) lfirst(l);
            List       *args;
            Selectivity s2;
 
            /*
             * Theoretically, if elem isn't of nominal_element_type we should
             * insert a RelabelType, but it seems unlikely that any operator
             * estimation function would really care ...
             */
            args = list_make2(leftop, elem);
            if (is_join_clause)
                s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
                                                      clause->inputcollid,
                                                      PointerGetDatum(root),
                                                      ObjectIdGetDatum(operator),
                                                      PointerGetDatum(args),
                                                      Int16GetDatum(jointype),
                                                      PointerGetDatum(sjinfo)));
            else
                s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
                                                      clause->inputcollid,
                                                      PointerGetDatum(root),
                                                      ObjectIdGetDatum(operator),
                                                      PointerGetDatum(args),
                                                      Int32GetDatum(varRelid)));
 
            if (useOr)
            {
                s1 = s1 + s2 - s1 * s2;
                if (isEquality)
                    s1disjoint += s2;
            }
            else
            {
                s1 = s1 * s2;
                if (isInequality)
                    s1disjoint += s2 - 1.0;
            }
        }
 
        /* accept disjoint-probability estimate if in range */
        if ((useOr ? isEquality : isInequality) &&
            s1disjoint >= 0.0 && s1disjoint <= 1.0)
            s1 = s1disjoint;
    }
    else
    {
        CaseTestExpr *dummyexpr;
        List       *args;
        Selectivity s2;
        int         i;
 
        /*
         * We need a dummy rightop to pass to the operator selectivity
         * routine.  It can be pretty much anything that doesn't look like a
         * constant; CaseTestExpr is a convenient choice.
         */
        dummyexpr = makeNode(CaseTestExpr);
        dummyexpr->typeId = nominal_element_type;
        dummyexpr->typeMod = -1;
        dummyexpr->collation = clause->inputcollid;
        args = list_make2(leftop, dummyexpr);
        if (is_join_clause)
            s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
                                                  clause->inputcollid,
                                                  PointerGetDatum(root),
                                                  ObjectIdGetDatum(operator),
                                                  PointerGetDatum(args),
                                                  Int16GetDatum(jointype),
                                                  PointerGetDatum(sjinfo)));
        else
            s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
                                                  clause->inputcollid,
                                                  PointerGetDatum(root),
                                                  ObjectIdGetDatum(operator),
                                                  PointerGetDatum(args),
                                                  Int32GetDatum(varRelid)));
        s1 = useOr ? 0.0 : 1.0;
 
        /*
         * Arbitrarily assume 10 elements in the eventual array value (see
         * also estimate_array_length).  We don't risk an assumption of
         * disjoint probabilities here.
         */
        for (i = 0; i < 10; i++)
        {
            if (useOr)
                s1 = s1 + s2 - s1 * s2;
            else
                s1 = s1 * s2;
        }
    }
 
    /* result should be in range, but make sure... */
    CLAMP_PROBABILITY(s1);
 
    return s1;
}

References generate_unaccent_rules::args, ScalarArrayOpExpr::args, ARR_ELEMTYPE, Assert(), CLAMP_PROBABILITY, DatumGetArrayTypeP, DatumGetFloat8(), deconstruct_array(), TypeCacheEntry::eq_opr, estimate_expression_value(), exprCollation(), exprType(), fmgr_info(), FunctionCall4Coll(), FunctionCall5Coll(), get_base_element_type(), get_negator(), get_oprjoin(), get_oprrest(), get_typlenbyval(), get_typlenbyvalalign(), i, Int16GetDatum(), Int32GetDatum(), IsA, lfirst, linitial, list_length(), list_make2, lookup_type_cache(), lsecond, makeConst(), makeNode, ObjectIdGetDatum(), OidIsValid, ScalarArrayOpExpr::opno, PointerGetDatum(), root, s1, s2, scalararraysel_containment(), strip_array_coercion(), TYPECACHE_EQ_OPR, CaseTestExpr::typeId, and ScalarArrayOpExpr::useOr.

Referenced by clause_selectivity_ext().

◆ scalararraysel_containment()

Selectivity scalararraysel_containment	(	PlannerInfo *	root,
		Node *	leftop,
		Node *	rightop,
		Oid	elemtype,
		bool	isEquality,
		bool	useOr,
		int	varRelid
	)

Definition at line 81 of file array_selfuncs.c.

{
    Selectivity selec;
    VariableStatData vardata;
    Datum       constval;
    TypeCacheEntry *typentry;
    FmgrInfo   *cmpfunc;
 
    /*
     * rightop must be a variable, else punt.
     */
    examine_variable(root, rightop, varRelid, &vardata);
    if (!vardata.rel)
    {
        ReleaseVariableStats(vardata);
        return -1.0;
    }
 
    /*
     * leftop must be a constant, else punt.
     */
    if (!IsA(leftop, Const))
    {
        ReleaseVariableStats(vardata);
        return -1.0;
    }
    if (((Const *) leftop)->constisnull)
    {
        /* qual can't succeed if null on left */
        ReleaseVariableStats(vardata);
        return (Selectivity) 0.0;
    }
    constval = ((Const *) leftop)->constvalue;
 
    /* Get element type's default comparison function */
    typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
    if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
    {
        ReleaseVariableStats(vardata);
        return -1.0;
    }
    cmpfunc = &typentry->cmp_proc_finfo;
 
    /*
     * If the operator is <>, swap ANY/ALL, then invert the result later.
     */
    if (!isEquality)
        useOr = !useOr;
 
    /* Get array element stats for var, if available */
    if (HeapTupleIsValid(vardata.statsTuple) &&
        statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
    {
        Form_pg_statistic stats;
        AttStatsSlot sslot;
        AttStatsSlot hslot;
 
        stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
 
        /* MCELEM will be an array of same type as element */
        if (get_attstatsslot(&sslot, vardata.statsTuple,
                             STATISTIC_KIND_MCELEM, InvalidOid,
                             ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
        {
            /* For ALL case, also get histogram of distinct-element counts */
            if (useOr ||
                !get_attstatsslot(&hslot, vardata.statsTuple,
                                  STATISTIC_KIND_DECHIST, InvalidOid,
                                  ATTSTATSSLOT_NUMBERS))
                memset(&hslot, 0, sizeof(hslot));
 
            /*
             * For = ANY, estimate as var @> ARRAY[const].
             *
             * For = ALL, estimate as var <@ ARRAY[const].
             */
            if (useOr)
                selec = mcelem_array_contain_overlap_selec(sslot.values,
                                                           sslot.nvalues,
                                                           sslot.numbers,
                                                           sslot.nnumbers,
                                                           &constval, 1,
                                                           OID_ARRAY_CONTAINS_OP,
                                                           typentry);
            else
                selec = mcelem_array_contained_selec(sslot.values,
                                                     sslot.nvalues,
                                                     sslot.numbers,
                                                     sslot.nnumbers,
                                                     &constval, 1,
                                                     hslot.numbers,
                                                     hslot.nnumbers,
                                                     OID_ARRAY_CONTAINED_OP,
                                                     typentry);
 
            free_attstatsslot(&hslot);
            free_attstatsslot(&sslot);
        }
        else
        {
            /* No most-common-elements info, so do without */
            if (useOr)
                selec = mcelem_array_contain_overlap_selec(NULL, 0,
                                                           NULL, 0,
                                                           &constval, 1,
                                                           OID_ARRAY_CONTAINS_OP,
                                                           typentry);
            else
                selec = mcelem_array_contained_selec(NULL, 0,
                                                     NULL, 0,
                                                     &constval, 1,
                                                     NULL, 0,
                                                     OID_ARRAY_CONTAINED_OP,
                                                     typentry);
        }
 
        /*
         * MCE stats count only non-null rows, so adjust for null rows.
         */
        selec *= (1.0 - stats->stanullfrac);
    }
    else
    {
        /* No stats at all, so do without */
        if (useOr)
            selec = mcelem_array_contain_overlap_selec(NULL, 0,
                                                       NULL, 0,
                                                       &constval, 1,
                                                       OID_ARRAY_CONTAINS_OP,
                                                       typentry);
        else
            selec = mcelem_array_contained_selec(NULL, 0,
                                                 NULL, 0,
                                                 &constval, 1,
                                                 NULL, 0,
                                                 OID_ARRAY_CONTAINED_OP,
                                                 typentry);
        /* we assume no nulls here, so no stanullfrac correction */
    }
 
    ReleaseVariableStats(vardata);
 
    /*
     * If the operator is <>, invert the results.
     */
    if (!isEquality)
        selec = 1.0 - selec;
 
    CLAMP_PROBABILITY(selec);
 
    return selec;
}

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT(), HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, VariableStatData::rel, ReleaseVariableStats, root, statistic_proc_security_check(), VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and AttStatsSlot::values.

Referenced by scalararraysel().

◆ statistic_proc_security_check()

bool statistic_proc_security_check	(	VariableStatData *	vardata,
		Oid	func_oid
	)

Definition at line 6120 of file selfuncs.c.

{
    if (vardata->acl_ok)
        return true;
 
    if (!OidIsValid(func_oid))
        return false;
 
    if (get_func_leakproof(func_oid))
        return true;
 
    ereport(DEBUG2,
            (errmsg_internal("not using statistics because function \"%s\" is not leakproof",
                             get_func_name(func_oid))));
    return false;
}

References VariableStatData::acl_ok, DEBUG2, ereport, errmsg_internal(), get_func_leakproof(), get_func_name(), and OidIsValid.

Referenced by calc_arraycontsel(), calc_hist_selectivity(), eqjoinsel(), get_variable_range(), histogram_selectivity(), ineq_histogram_selectivity(), mcv_selectivity(), scalararraysel_containment(), and var_eq_const().

◆ var_eq_const()

double var_eq_const	(	VariableStatData *	vardata,
		Oid	oproid,
		Oid	collation,
		Datum	constval,
		bool	constisnull,
		bool	varonleft,
		bool	negate
	)

Definition at line 300 of file selfuncs.c.

{
    double      selec;
    double      nullfrac = 0.0;
    bool        isdefault;
    Oid         opfuncoid;
 
    /*
     * If the constant is NULL, assume operator is strict and return zero, ie,
     * operator will never return TRUE.  (It's zero even for a negator op.)
     */
    if (constisnull)
        return 0.0;
 
    /*
     * Grab the nullfrac for use below.  Note we allow use of nullfrac
     * regardless of security check.
     */
    if (HeapTupleIsValid(vardata->statsTuple))
    {
        Form_pg_statistic stats;
 
        stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
        nullfrac = stats->stanullfrac;
    }
 
    /*
     * If we matched the var to a unique index, DISTINCT or GROUP-BY clause,
     * assume there is exactly one match regardless of anything else.  (This
     * is slightly bogus, since the index or clause's equality operator might
     * be different from ours, but it's much more likely to be right than
     * ignoring the information.)
     */
    if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
    {
        selec = 1.0 / vardata->rel->tuples;
    }
    else if (HeapTupleIsValid(vardata->statsTuple) &&
             statistic_proc_security_check(vardata,
                                           (opfuncoid = get_opcode(oproid))))
    {
        AttStatsSlot sslot;
        bool        match = false;
        int         i;
 
        /*
         * Is the constant "=" to any of the column's most common values?
         * (Although the given operator may not really be "=", we will assume
         * that seeing whether it returns TRUE is an appropriate test.  If you
         * don't like this, maybe you shouldn't be using eqsel for your
         * operator...)
         */
        if (get_attstatsslot(&sslot, vardata->statsTuple,
                             STATISTIC_KIND_MCV, InvalidOid,
                             ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
        {
            LOCAL_FCINFO(fcinfo, 2);
            FmgrInfo    eqproc;
 
            fmgr_info(opfuncoid, &eqproc);
 
            /*
             * Save a few cycles by setting up the fcinfo struct just once.
             * Using FunctionCallInvoke directly also avoids failure if the
             * eqproc returns NULL, though really equality functions should
             * never do that.
             */
            InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
                                     NULL, NULL);
            fcinfo->args[0].isnull = false;
            fcinfo->args[1].isnull = false;
            /* be careful to apply operator right way 'round */
            if (varonleft)
                fcinfo->args[1].value = constval;
            else
                fcinfo->args[0].value = constval;
 
            for (i = 0; i < sslot.nvalues; i++)
            {
                Datum       fresult;
 
                if (varonleft)
                    fcinfo->args[0].value = sslot.values[i];
                else
                    fcinfo->args[1].value = sslot.values[i];
                fcinfo->isnull = false;
                fresult = FunctionCallInvoke(fcinfo);
                if (!fcinfo->isnull && DatumGetBool(fresult))
                {
                    match = true;
                    break;
                }
            }
        }
        else
        {
            /* no most-common-value info available */
            i = 0;              /* keep compiler quiet */
        }
 
        if (match)
        {
            /*
             * Constant is "=" to this common value.  We know selectivity
             * exactly (or as exactly as ANALYZE could calculate it, anyway).
             */
            selec = sslot.numbers[i];
        }
        else
        {
            /*
             * Comparison is against a constant that is neither NULL nor any
             * of the common values.  Its selectivity cannot be more than
             * this:
             */
            double      sumcommon = 0.0;
            double      otherdistinct;
 
            for (i = 0; i < sslot.nnumbers; i++)
                sumcommon += sslot.numbers[i];
            selec = 1.0 - sumcommon - nullfrac;
            CLAMP_PROBABILITY(selec);
 
            /*
             * and in fact it's probably a good deal less. We approximate that
             * all the not-common values share this remaining fraction
             * equally, so we divide by the number of other distinct values.
             */
            otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
                sslot.nnumbers;
            if (otherdistinct > 1)
                selec /= otherdistinct;
 
            /*
             * Another cross-check: selectivity shouldn't be estimated as more
             * than the least common "most common value".
             */
            if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
                selec = sslot.numbers[sslot.nnumbers - 1];
        }
 
        free_attstatsslot(&sslot);
    }
    else
    {
        /*
         * No ANALYZE stats available, so make a guess using estimated number
         * of distinct values and assuming they are equally common. (The guess
         * is unlikely to be very good, but we do know a few special cases.)
         */
        selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
    }
 
    /* now adjust if we wanted <> rather than = */
    if (negate)
        selec = 1.0 - selec - nullfrac;
 
    /* result should be in range, but make sure... */
    CLAMP_PROBABILITY(selec);
 
    return selec;
}

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, DatumGetBool(), fmgr_info(), free_attstatsslot(), FunctionCallInvoke, get_attstatsslot(), get_opcode(), get_variable_numdistinct(), GETSTRUCT(), HeapTupleIsValid, i, InitFunctionCallInfoData, InvalidOid, VariableStatData::isunique, LOCAL_FCINFO, AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, VariableStatData::rel, statistic_proc_security_check(), VariableStatData::statsTuple, RelOptInfo::tuples, and AttStatsSlot::values.

Referenced by boolvarsel(), eqsel_internal(), patternsel_common(), and prefix_selectivity().

◆ var_eq_non_const()

double var_eq_non_const	(	VariableStatData *	vardata,
		Oid	oproid,
		Oid	collation,
		Node *	other,
		bool	varonleft,
		bool	negate
	)

Definition at line 471 of file selfuncs.c.

{
    double      selec;
    double      nullfrac = 0.0;
    bool        isdefault;
 
    /*
     * Grab the nullfrac for use below.
     */
    if (HeapTupleIsValid(vardata->statsTuple))
    {
        Form_pg_statistic stats;
 
        stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
        nullfrac = stats->stanullfrac;
    }
 
    /*
     * If we matched the var to a unique index, DISTINCT or GROUP-BY clause,
     * assume there is exactly one match regardless of anything else.  (This
     * is slightly bogus, since the index or clause's equality operator might
     * be different from ours, but it's much more likely to be right than
     * ignoring the information.)
     */
    if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
    {
        selec = 1.0 / vardata->rel->tuples;
    }
    else if (HeapTupleIsValid(vardata->statsTuple))
    {
        double      ndistinct;
        AttStatsSlot sslot;
 
        /*
         * Search is for a value that we do not know a priori, but we will
         * assume it is not NULL.  Estimate the selectivity as non-null
         * fraction divided by number of distinct values, so that we get a
         * result averaged over all possible values whether common or
         * uncommon.  (Essentially, we are assuming that the not-yet-known
         * comparison value is equally likely to be any of the possible
         * values, regardless of their frequency in the table.  Is that a good
         * idea?)
         */
        selec = 1.0 - nullfrac;
        ndistinct = get_variable_numdistinct(vardata, &isdefault);
        if (ndistinct > 1)
            selec /= ndistinct;
 
        /*
         * Cross-check: selectivity should never be estimated as more than the
         * most common value's.
         */
        if (get_attstatsslot(&sslot, vardata->statsTuple,
                             STATISTIC_KIND_MCV, InvalidOid,
                             ATTSTATSSLOT_NUMBERS))
        {
            if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
                selec = sslot.numbers[0];
            free_attstatsslot(&sslot);
        }
    }
    else
    {
        /*
         * No ANALYZE stats available, so make a guess using estimated number
         * of distinct values and assuming they are equally common. (The guess
         * is unlikely to be very good, but we do know a few special cases.)
         */
        selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
    }
 
    /* now adjust if we wanted <> rather than = */
    if (negate)
        selec = 1.0 - selec - nullfrac;
 
    /* result should be in range, but make sure... */
    CLAMP_PROBABILITY(selec);
 
    return selec;
}

References ATTSTATSSLOT_NUMBERS, CLAMP_PROBABILITY, free_attstatsslot(), get_attstatsslot(), get_variable_numdistinct(), GETSTRUCT(), HeapTupleIsValid, InvalidOid, VariableStatData::isunique, AttStatsSlot::nnumbers, AttStatsSlot::numbers, VariableStatData::rel, VariableStatData::statsTuple, and RelOptInfo::tuples.

Referenced by eqsel_internal().

Variable Documentation

◆ get_index_stats_hook

PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook

extern

Definition at line 149 of file selfuncs.c.

Referenced by brincostestimate(), examine_indexcol_variable(), and examine_variable().

◆ get_relation_stats_hook

PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook

extern

Definition at line 148 of file selfuncs.c.

Referenced by brincostestimate(), examine_indexcol_variable(), and examine_simple_variable().

Data Structures

Macros

Typedefs

Functions

Variables

Macro Definition Documentation

◆ CLAMP_PROBABILITY

◆ DEFAULT_EQ_SEL

◆ DEFAULT_INEQ_SEL

◆ DEFAULT_MATCH_SEL

◆ DEFAULT_MATCHING_SEL

◆ DEFAULT_MULTIRANGE_INEQ_SEL

◆ DEFAULT_NOT_UNK_SEL

◆ DEFAULT_NUM_DISTINCT

◆ DEFAULT_RANGE_INEQ_SEL

◆ DEFAULT_UNK_SEL

◆ ReleaseVariableStats

◆ SELFLAG_USED_DEFAULT

Typedef Documentation

◆ EstimationInfo

◆ get_index_stats_hook_type

◆ get_relation_stats_hook_type

◆ VariableStatData

Function Documentation

◆ add_predicate_to_index_quals()

◆ booltestsel()

◆ boolvarsel()

◆ estimate_array_length()

◆ estimate_hash_bucket_stats()

◆ estimate_hashagg_tablesize()

◆ estimate_multivariate_bucketsize()

◆ estimate_num_groups()

◆ examine_variable()

◆ generic_restriction_selectivity()

◆ genericcostestimate()

◆ get_join_variables()

◆ get_quals_from_indexclauses()

◆ get_restriction_variable()

◆ get_variable_numdistinct()

◆ histogram_selectivity()

◆ index_other_operands_eval_cost()

◆ ineq_histogram_selectivity()

◆ mcv_selectivity()

◆ mergejoinscansel()

◆ nulltestsel()

◆ rowcomparesel()

◆ scalararraysel()

◆ scalararraysel_containment()

◆ statistic_proc_security_check()

◆ var_eq_const()

◆ var_eq_non_const()

Variable Documentation

◆ get_index_stats_hook

◆ get_relation_stats_hook