#include "statistics/statistics.h"
#include "utils/sortsupport.h"

Include dependency graph for extended_stats_internal.h:

This graph shows which files directly or indirectly include this file:

Data Structures
struct	StdAnalyzeData

struct	ScalarItem

struct	DimensionInfo

struct	MultiSortSupportData

struct	SortItem

struct	StatsBuildData

Typedefs
typedef struct DimensionInfo	DimensionInfo

typedef struct MultiSortSupportData	MultiSortSupportData

typedef MultiSortSupportData *	MultiSortSupport

typedef struct SortItem	SortItem

typedef struct StatsBuildData	StatsBuildData

Functions
MVNDistinct *	statext_ndistinct_build (double totalrows, StatsBuildData *data)

bytea *	statext_ndistinct_serialize (MVNDistinct *ndistinct)

MVNDistinct *	statext_ndistinct_deserialize (bytea *data)

MVDependencies *	statext_dependencies_build (StatsBuildData *data)

bytea *	statext_dependencies_serialize (MVDependencies *dependencies)

MVDependencies *	statext_dependencies_deserialize (bytea *data)

MCVList *	statext_mcv_build (StatsBuildData *data, double totalrows, int stattarget)

bytea *	statext_mcv_serialize (MCVList mcvlist, VacAttrStats *stats)

MCVList *	statext_mcv_deserialize (bytea *data)

MultiSortSupport	multi_sort_init (int ndims)

void	multi_sort_add_dimension (MultiSortSupport mss, int sortdim, Oid oper, Oid collation)

int	multi_sort_compare (const void a, const void b, void *arg)

int	multi_sort_compare_dim (int dim, const SortItem a, const SortItem b, MultiSortSupport mss)

int	multi_sort_compare_dims (int start, int end, const SortItem a, const SortItem b, MultiSortSupport mss)

int	compare_scalars_simple (const void a, const void b, void *arg)

int	compare_datums_simple (Datum a, Datum b, SortSupport ssup)

AttrNumber *	build_attnums_array (Bitmapset attrs, int nexprs, int numattrs)

SortItem *	build_sorted_items (StatsBuildData data, int nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)

bool	examine_opclause_args (List args, Node exprp, Const cstp, bool expronleftp)

Selectivity	mcv_combine_selectivities (Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)

Selectivity	mcv_clauselist_selectivity (PlannerInfo root, StatisticExtInfo stat, List clauses, int varRelid, JoinType jointype, SpecialJoinInfo sjinfo, RelOptInfo rel, Selectivity basesel, Selectivity *totalsel)

Selectivity	mcv_clause_selectivity_or (PlannerInfo root, StatisticExtInfo stat, MCVList mcv, Node clause, bool *or_matches, Selectivity basesel, Selectivity overlap_mcvsel, Selectivity overlap_basesel, Selectivity *totalsel)

Typedef Documentation

◆ DimensionInfo

typedef struct DimensionInfo DimensionInfo

◆ MultiSortSupport

typedef MultiSortSupportData* MultiSortSupport

Definition at line 51 of file extended_stats_internal.h.

◆ MultiSortSupportData

typedef struct MultiSortSupportData MultiSortSupportData

◆ SortItem

typedef struct SortItem SortItem

◆ StatsBuildData

typedef struct StatsBuildData StatsBuildData

Function Documentation

◆ build_attnums_array()

AttrNumber * build_attnums_array	(	Bitmapset *	attrs,
		int	nexprs,
		int *	numattrs
	)

Definition at line 938 of file extended_stats.c.

{
    int         i,
                j;
    AttrNumber *attnums;
    int         num = bms_num_members(attrs);
 
    if (numattrs)
        *numattrs = num;
 
    /* build attnums from the bitmapset */
    attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
    i = 0;
    j = -1;
    while ((j = bms_next_member(attrs, j)) >= 0)
    {
        int         attnum = (j - nexprs);
 
        /*
         * Make sure the bitmap contains only user-defined attributes. As
         * bitmaps can't contain negative values, this can be violated in two
         * ways. Firstly, the bitmap might contain 0 as a member, and secondly
         * the integer value might be larger than MaxAttrNumber.
         */
        Assert(AttributeNumberIsValid(attnum));
        Assert(attnum <= MaxAttrNumber);
        Assert(attnum >= (-nexprs));
 
        attnums[i++] = (AttrNumber) attnum;
 
        /* protect against overflows */
        Assert(i <= num);
    }
 
    return attnums;
}

References Assert(), attnum, AttributeNumberIsValid, bms_next_member(), bms_num_members(), i, j, MaxAttrNumber, and palloc().

◆ build_sorted_items()

SortItem * build_sorted_items	(	StatsBuildData *	data,
		int *	nitems,
		MultiSortSupport	mss,
		int	numattrs,
		AttrNumber *	attnums
	)

Definition at line 983 of file extended_stats.c.

{
    int         i,
                j,
                len,
                nrows;
    int         nvalues = data->numrows * numattrs;
 
    SortItem   *items;
    Datum      *values;
    bool       *isnull;
    char       *ptr;
    int        *typlen;
 
    /* Compute the total amount of memory we need (both items and values). */
    len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
 
    /* Allocate the memory and split it into the pieces. */
    ptr = palloc0(len);
 
    /* items to sort */
    items = (SortItem *) ptr;
    ptr += data->numrows * sizeof(SortItem);
 
    /* values and null flags */
    values = (Datum *) ptr;
    ptr += nvalues * sizeof(Datum);
 
    isnull = (bool *) ptr;
    ptr += nvalues * sizeof(bool);
 
    /* make sure we consumed the whole buffer exactly */
    Assert((ptr - (char *) items) == len);
 
    /* fix the pointers to Datum and bool arrays */
    nrows = 0;
    for (i = 0; i < data->numrows; i++)
    {
        items[nrows].values = &values[nrows * numattrs];
        items[nrows].isnull = &isnull[nrows * numattrs];
 
        nrows++;
    }
 
    /* build a local cache of typlen for all attributes */
    typlen = (int *) palloc(sizeof(int) * data->nattnums);
    for (i = 0; i < data->nattnums; i++)
        typlen[i] = get_typlen(data->stats[i]->attrtypid);
 
    nrows = 0;
    for (i = 0; i < data->numrows; i++)
    {
        bool        toowide = false;
 
        /* load the values/null flags from sample rows */
        for (j = 0; j < numattrs; j++)
        {
            Datum       value;
            bool        isnull;
            int         attlen;
            AttrNumber  attnum = attnums[j];
 
            int         idx;
 
            /* match attnum to the pre-calculated data */
            for (idx = 0; idx < data->nattnums; idx++)
            {
                if (attnum == data->attnums[idx])
                    break;
            }
 
            Assert(idx < data->nattnums);
 
            value = data->values[idx][i];
            isnull = data->nulls[idx][i];
            attlen = typlen[idx];
 
            /*
             * If this is a varlena value, check if it's too wide and if yes
             * then skip the whole item. Otherwise detoast the value.
             *
             * XXX It may happen that we've already detoasted some preceding
             * values for the current item. We don't bother to cleanup those
             * on the assumption that those are small (below WIDTH_THRESHOLD)
             * and will be discarded at the end of analyze.
             */
            if ((!isnull) && (attlen == -1))
            {
                if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
                {
                    toowide = true;
                    break;
                }
 
                value = PointerGetDatum(PG_DETOAST_DATUM(value));
            }
 
            items[nrows].values[j] = value;
            items[nrows].isnull[j] = isnull;
        }
 
        if (toowide)
            continue;
 
        nrows++;
    }
 
    /* store the actual number of items (ignoring the too-wide ones) */
    *nitems = nrows;
 
    /* all items were too wide */
    if (nrows == 0)
    {
        /* everything is allocated as a single chunk */
        pfree(items);
        return NULL;
    }
 
    /* do the sort, using the multi-sort */
    qsort_interruptible(items, nrows, sizeof(SortItem),
                        multi_sort_compare, mss);
 
    return items;
}

References Assert(), attlen, attnum, data, get_typlen(), i, idx(), items, j, len, multi_sort_compare(), nitems, palloc(), palloc0(), pfree(), PG_DETOAST_DATUM, PointerGetDatum(), qsort_interruptible(), toast_raw_datum_size(), value, values, and WIDTH_THRESHOLD.

Referenced by dependency_degree(), and statext_mcv_build().

◆ compare_datums_simple()

int compare_datums_simple	(	Datum	a,
		Datum	b,
		SortSupport	ssup
	)

Definition at line 924 of file extended_stats.c.

{
    return ApplySortComparator(a, false, b, false, ssup);
}

References a, ApplySortComparator(), and b.

Referenced by compare_scalars_simple(), and statext_mcv_serialize().

◆ compare_scalars_simple()

int compare_scalars_simple	(	const void *	a,
		const void *	b,
		void *	arg
	)

Definition at line 916 of file extended_stats.c.

{
    return compare_datums_simple(*(Datum *) a,
                                 *(Datum *) b,
                                 (SortSupport) arg);
}

References a, arg, b, and compare_datums_simple().

Referenced by statext_mcv_serialize().

◆ examine_opclause_args()

bool examine_opclause_args	(	List *	args,
		Node **	exprp,
		Const **	cstp,
		bool *	expronleftp
	)

Definition at line 2052 of file extended_stats.c.

{
    Node       *expr;
    Const      *cst;
    bool        expronleft;
    Node       *leftop,
               *rightop;
 
    /* enforced by statext_is_compatible_clause_internal */
    Assert(list_length(args) == 2);
 
    leftop = linitial(args);
    rightop = lsecond(args);
 
    /* strip RelabelType from either side of the expression */
    if (IsA(leftop, RelabelType))
        leftop = (Node *) ((RelabelType *) leftop)->arg;
 
    if (IsA(rightop, RelabelType))
        rightop = (Node *) ((RelabelType *) rightop)->arg;
 
    if (IsA(rightop, Const))
    {
        expr = (Node *) leftop;
        cst = (Const *) rightop;
        expronleft = true;
    }
    else if (IsA(leftop, Const))
    {
        expr = (Node *) rightop;
        cst = (Const *) leftop;
        expronleft = false;
    }
    else
        return false;
 
    /* return pointers to the extracted parts if requested */
    if (exprp)
        *exprp = expr;
 
    if (cstp)
        *cstp = cst;
 
    if (expronleftp)
        *expronleftp = expronleft;
 
    return true;
}

References arg, generate_unaccent_rules::args, Assert(), IsA, linitial, list_length(), and lsecond.

Referenced by mcv_get_match_bitmap(), and statext_is_compatible_clause_internal().

◆ mcv_clause_selectivity_or()

Selectivity mcv_clause_selectivity_or	(	PlannerInfo *	root,
		StatisticExtInfo *	stat,
		MCVList *	mcv,
		Node *	clause,
		bool **	or_matches,
		Selectivity *	basesel,
		Selectivity *	overlap_mcvsel,
		Selectivity *	overlap_basesel,
		Selectivity *	totalsel
	)

Definition at line 2126 of file mcv.c.

{
    Selectivity s = 0.0;
    bool       *new_matches;
    int         i;
 
    /* build the OR-matches bitmap, if not built already */
    if (*or_matches == NULL)
        *or_matches = palloc0(sizeof(bool) * mcv->nitems);
 
    /* build the match bitmap for the new clause */
    new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
                                       stat->exprs, mcv, false);
 
    /*
     * Sum the frequencies for all the MCV items matching this clause and also
     * those matching the overlap between this clause and any of the preceding
     * clauses as described above.
     */
    *basesel = 0.0;
    *overlap_mcvsel = 0.0;
    *overlap_basesel = 0.0;
    *totalsel = 0.0;
    for (i = 0; i < mcv->nitems; i++)
    {
        *totalsel += mcv->items[i].frequency;
 
        if (new_matches[i])
        {
            s += mcv->items[i].frequency;
            *basesel += mcv->items[i].base_frequency;
 
            if ((*or_matches)[i])
            {
                *overlap_mcvsel += mcv->items[i].frequency;
                *overlap_basesel += mcv->items[i].base_frequency;
            }
        }
 
        /* update the OR-matches bitmap for the next clause */
        (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
    }
 
    pfree(new_matches);
 
    return s;
}

References MCVItem::base_frequency, MCVItem::frequency, i, MCVList::items, list_make1, mcv_get_match_bitmap(), MCVList::nitems, palloc0(), pfree(), and root.

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity	(	PlannerInfo *	root,
		StatisticExtInfo *	stat,
		List *	clauses,
		int	varRelid,
		JoinType	jointype,
		SpecialJoinInfo *	sjinfo,
		RelOptInfo *	rel,
		Selectivity *	basesel,
		Selectivity *	totalsel
	)

Definition at line 2048 of file mcv.c.

{
    int         i;
    MCVList    *mcv;
    Selectivity s = 0.0;
    RangeTblEntry *rte = root->simple_rte_array[rel->relid];
 
    /* match/mismatch bitmap for each MCV item */
    bool       *matches = NULL;
 
    /* load the MCV list stored in the statistics object */
    mcv = statext_mcv_load(stat->statOid, rte->inh);
 
    /* build a match bitmap for the clauses */
    matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
                                   mcv, false);
 
    /* sum frequencies for all the matching MCV items */
    *basesel = 0.0;
    *totalsel = 0.0;
    for (i = 0; i < mcv->nitems; i++)
    {
        *totalsel += mcv->items[i].frequency;
 
        if (matches[i] != false)
        {
            *basesel += mcv->items[i].base_frequency;
            s += mcv->items[i].frequency;
        }
    }
 
    return s;
}

References MCVItem::base_frequency, MCVItem::frequency, i, RangeTblEntry::inh, MCVList::items, mcv_get_match_bitmap(), MCVList::nitems, RelOptInfo::relid, root, and statext_mcv_load().

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_combine_selectivities()

Selectivity mcv_combine_selectivities	(	Selectivity	simple_sel,
		Selectivity	mcv_sel,
		Selectivity	mcv_basesel,
		Selectivity	mcv_totalsel
	)

Definition at line 2006 of file mcv.c.

{
    Selectivity other_sel;
    Selectivity sel;
 
    /* estimated selectivity of values not covered by MCV matches */
    other_sel = simple_sel - mcv_basesel;
    CLAMP_PROBABILITY(other_sel);
 
    /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
    if (other_sel > 1.0 - mcv_totalsel)
        other_sel = 1.0 - mcv_totalsel;
 
    /* overall selectivity is the sum of the MCV and non-MCV parts */
    sel = mcv_sel + other_sel;
    CLAMP_PROBABILITY(sel);
 
    return sel;
}

References CLAMP_PROBABILITY.

Referenced by statext_mcv_clauselist_selectivity().

◆ multi_sort_add_dimension()

void multi_sort_add_dimension	(	MultiSortSupport	mss,
		int	sortdim,
		Oid	oper,
		Oid	collation
	)

Definition at line 848 of file extended_stats.c.

{
    SortSupport ssup = &mss->ssup[sortdim];
 
    ssup->ssup_cxt = CurrentMemoryContext;
    ssup->ssup_collation = collation;
    ssup->ssup_nulls_first = false;
 
    PrepareSortSupportFromOrderingOp(oper, ssup);
}

References CurrentMemoryContext, oper(), PrepareSortSupportFromOrderingOp(), MultiSortSupportData::ssup, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, and SortSupportData::ssup_nulls_first.

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ multi_sort_compare()

int multi_sort_compare	(	const void *	a,
		const void *	b,
		void *	arg
	)

Definition at line 862 of file extended_stats.c.

{
    MultiSortSupport mss = (MultiSortSupport) arg;
    SortItem   *ia = (SortItem *) a;
    SortItem   *ib = (SortItem *) b;
    int         i;
 
    for (i = 0; i < mss->ndims; i++)
    {
        int         compare;
 
        compare = ApplySortComparator(ia->values[i], ia->isnull[i],
                                      ib->values[i], ib->isnull[i],
                                      &mss->ssup[i]);
 
        if (compare != 0)
            return compare;
    }
 
    /* equal by default */
    return 0;
}

References a, ApplySortComparator(), arg, b, compare(), i, SortItem::isnull, MultiSortSupportData::ndims, MultiSortSupportData::ssup, and SortItem::values.

Referenced by build_distinct_groups(), build_sorted_items(), count_distinct_groups(), ndistinct_for_combination(), and statext_mcv_build().

◆ multi_sort_compare_dim()

int multi_sort_compare_dim	(	int	dim,
		const SortItem *	a,
		const SortItem *	b,
		MultiSortSupport	mss
	)

Definition at line 887 of file extended_stats.c.

{
    return ApplySortComparator(a->values[dim], a->isnull[dim],
                               b->values[dim], b->isnull[dim],
                               &mss->ssup[dim]);
}

References a, ApplySortComparator(), b, and MultiSortSupportData::ssup.

Referenced by dependency_degree().

◆ multi_sort_compare_dims()

int multi_sort_compare_dims	(	int	start,
		int	end,
		const SortItem *	a,
		const SortItem *	b,
		MultiSortSupport	mss
	)

Definition at line 896 of file extended_stats.c.

{
    int         dim;
 
    for (dim = start; dim <= end; dim++)
    {
        int         r = ApplySortComparator(a->values[dim], a->isnull[dim],
                                            b->values[dim], b->isnull[dim],
                                            &mss->ssup[dim]);
 
        if (r != 0)
            return r;
    }
 
    return 0;
}

References a, ApplySortComparator(), b, MultiSortSupportData::ssup, and start.

Referenced by dependency_degree().

◆ multi_sort_init()

MultiSortSupport multi_sort_init ( int ndims )

Definition at line 829 of file extended_stats.c.

{
    MultiSortSupport mss;
 
    Assert(ndims >= 2);
 
    mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
                                     + sizeof(SortSupportData) * ndims);
 
    mss->ndims = ndims;
 
    return mss;
}

References Assert(), MultiSortSupportData::ndims, and palloc0().

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ statext_dependencies_build()

MVDependencies * statext_dependencies_build ( StatsBuildData * data )

Definition at line 348 of file dependencies.c.

{
    int         i,
                k;
 
    /* result */
    MVDependencies *dependencies = NULL;
    MemoryContext cxt;
 
    Assert(data->nattnums >= 2);
 
    /* tracks memory allocated by dependency_degree calls */
    cxt = AllocSetContextCreate(CurrentMemoryContext,
                                "dependency_degree cxt",
                                ALLOCSET_DEFAULT_SIZES);
 
    /*
     * We'll try build functional dependencies starting from the smallest ones
     * covering just 2 columns, to the largest ones, covering all columns
     * included in the statistics object.  We start from the smallest ones
     * because we want to be able to skip already implied ones.
     */
    for (k = 2; k <= data->nattnums; k++)
    {
        AttrNumber *dependency; /* array with k elements */
 
        /* prepare a DependencyGenerator of variation */
        DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
 
        /* generate all possible variations of k values (out of n) */
        while ((dependency = DependencyGenerator_next(DependencyGenerator)))
        {
            double      degree;
            MVDependency *d;
            MemoryContext oldcxt;
 
            /* release memory used by dependency degree calculation */
            oldcxt = MemoryContextSwitchTo(cxt);
 
            /* compute how valid the dependency seems */
            degree = dependency_degree(data, k, dependency);
 
            MemoryContextSwitchTo(oldcxt);
            MemoryContextReset(cxt);
 
            /*
             * if the dependency seems entirely invalid, don't store it
             */
            if (degree == 0.0)
                continue;
 
            d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
                                         + k * sizeof(AttrNumber));
 
            /* copy the dependency (and keep the indexes into stxkeys) */
            d->degree = degree;
            d->nattributes = k;
            for (i = 0; i < k; i++)
                d->attributes[i] = data->attnums[dependency[i]];
 
            /* initialize the list of dependencies */
            if (dependencies == NULL)
            {
                dependencies
                    = (MVDependencies *) palloc0(sizeof(MVDependencies));
 
                dependencies->magic = STATS_DEPS_MAGIC;
                dependencies->type = STATS_DEPS_TYPE_BASIC;
                dependencies->ndeps = 0;
            }
 
            dependencies->ndeps++;
            dependencies = (MVDependencies *) repalloc(dependencies,
                                                       offsetof(MVDependencies, deps)
                                                       + dependencies->ndeps * sizeof(MVDependency *));
 
            dependencies->deps[dependencies->ndeps - 1] = d;
        }
 
        /*
         * we're done with variations of k elements, so free the
         * DependencyGenerator
         */
        DependencyGenerator_free(DependencyGenerator);
    }
 
    MemoryContextDelete(cxt);
 
    return dependencies;
}

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), MVDependency::attributes, CurrentMemoryContext, data, MVDependency::degree, dependency_degree(), DependencyGenerator_free(), DependencyGenerator_init(), DependencyGenerator_next(), MVDependencies::deps, i, if(), MVDependencies::magic, MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), MVDependency::nattributes, MVDependencies::ndeps, palloc0(), repalloc(), STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, and MVDependencies::type.

Referenced by BuildRelationExtStatistics().

◆ statext_dependencies_deserialize()

MVDependencies * statext_dependencies_deserialize ( bytea * data )

Definition at line 499 of file dependencies.c.

{
    int         i;
    Size        min_expected_size;
    MVDependencies *dependencies;
    char       *tmp;
 
    if (data == NULL)
        return NULL;
 
    if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
        elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
             VARSIZE_ANY_EXHDR(data), SizeOfHeader);
 
    /* read the MVDependencies header */
    dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
 
    /* initialize pointer to the data part (skip the varlena header) */
    tmp = VARDATA_ANY(data);
 
    /* read the header fields and perform basic sanity checks */
    memcpy(&dependencies->magic, tmp, sizeof(uint32));
    tmp += sizeof(uint32);
    memcpy(&dependencies->type, tmp, sizeof(uint32));
    tmp += sizeof(uint32);
    memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
    tmp += sizeof(uint32);
 
    if (dependencies->magic != STATS_DEPS_MAGIC)
        elog(ERROR, "invalid dependency magic %d (expected %d)",
             dependencies->magic, STATS_DEPS_MAGIC);
 
    if (dependencies->type != STATS_DEPS_TYPE_BASIC)
        elog(ERROR, "invalid dependency type %d (expected %d)",
             dependencies->type, STATS_DEPS_TYPE_BASIC);
 
    if (dependencies->ndeps == 0)
        elog(ERROR, "invalid zero-length item array in MVDependencies");
 
    /* what minimum bytea size do we expect for those parameters */
    min_expected_size = SizeOfItem(dependencies->ndeps);
 
    if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
        elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
             VARSIZE_ANY_EXHDR(data), min_expected_size);
 
    /* allocate space for the MCV items */
    dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
                            + (dependencies->ndeps * sizeof(MVDependency *)));
 
    for (i = 0; i < dependencies->ndeps; i++)
    {
        double      degree;
        AttrNumber  k;
        MVDependency *d;
 
        /* degree of validity */
        memcpy(&degree, tmp, sizeof(double));
        tmp += sizeof(double);
 
        /* number of attributes */
        memcpy(&k, tmp, sizeof(AttrNumber));
        tmp += sizeof(AttrNumber);
 
        /* is the number of attributes valid? */
        Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
 
        /* now that we know the number of attributes, allocate the dependency */
        d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
                                     + (k * sizeof(AttrNumber)));
 
        d->degree = degree;
        d->nattributes = k;
 
        /* copy attribute numbers */
        memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
        tmp += sizeof(AttrNumber) * d->nattributes;
 
        dependencies->deps[i] = d;
 
        /* still within the bytea */
        Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
    }
 
    /* we should have consumed the whole bytea exactly */
    Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
 
    return dependencies;
}

References Assert(), MVDependency::attributes, data, MVDependency::degree, MVDependencies::deps, elog, ERROR, i, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, palloc0(), repalloc(), SizeOfHeader, SizeOfItem, STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, STATS_MAX_DIMENSIONS, MVDependencies::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_dependencies_out(), and statext_dependencies_load().

◆ statext_dependencies_serialize()

bytea * statext_dependencies_serialize ( MVDependencies * dependencies )

Definition at line 444 of file dependencies.c.

{
    int         i;
    bytea      *output;
    char       *tmp;
    Size        len;
 
    /* we need to store ndeps, with a number of attributes for each one */
    len = VARHDRSZ + SizeOfHeader;
 
    /* and also include space for the actual attribute numbers and degrees */
    for (i = 0; i < dependencies->ndeps; i++)
        len += SizeOfItem(dependencies->deps[i]->nattributes);
 
    output = (bytea *) palloc0(len);
    SET_VARSIZE(output, len);
 
    tmp = VARDATA(output);
 
    /* Store the base struct values (magic, type, ndeps) */
    memcpy(tmp, &dependencies->magic, sizeof(uint32));
    tmp += sizeof(uint32);
    memcpy(tmp, &dependencies->type, sizeof(uint32));
    tmp += sizeof(uint32);
    memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
    tmp += sizeof(uint32);
 
    /* store number of attributes and attribute numbers for each dependency */
    for (i = 0; i < dependencies->ndeps; i++)
    {
        MVDependency *d = dependencies->deps[i];
 
        memcpy(tmp, &d->degree, sizeof(double));
        tmp += sizeof(double);
 
        memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
        tmp += sizeof(AttrNumber);
 
        memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
        tmp += sizeof(AttrNumber) * d->nattributes;
 
        /* protect against overflow */
        Assert(tmp <= ((char *) output + len));
    }
 
    /* make sure we've produced exactly the right amount of data */
    Assert(tmp == ((char *) output + len));
 
    return output;
}

References Assert(), MVDependency::attributes, MVDependency::degree, MVDependencies::deps, i, len, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, output, palloc0(), SET_VARSIZE, SizeOfHeader, SizeOfItem, MVDependencies::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().

◆ statext_mcv_build()

MCVList * statext_mcv_build	(	StatsBuildData *	data,
		double	totalrows,
		int	stattarget
	)

Definition at line 180 of file mcv.c.

{
    int         i,
                numattrs,
                numrows,
                ngroups,
                nitems;
    double      mincount;
    SortItem   *items;
    SortItem   *groups;
    MCVList    *mcvlist = NULL;
    MultiSortSupport mss;
 
    /* comparator for all the columns */
    mss = build_mss(data);
 
    /* sort the rows */
    items = build_sorted_items(data, &nitems, mss,
                               data->nattnums, data->attnums);
 
    if (!items)
        return NULL;
 
    /* for convenience */
    numattrs = data->nattnums;
    numrows = data->numrows;
 
    /* transform the sorted rows into groups (sorted by frequency) */
    groups = build_distinct_groups(nitems, items, mss, &ngroups);
 
    /*
     * The maximum number of MCV items to store, based on the statistics
     * target we computed for the statistics object (from the target set for
     * the object itself, attributes and the system default). In any case, we
     * can't keep more groups than we have available.
     */
    nitems = stattarget;
    if (nitems > ngroups)
        nitems = ngroups;
 
    /*
     * Decide how many items to keep in the MCV list. We can't use the same
     * algorithm as per-column MCV lists, because that only considers the
     * actual group frequency - but we're primarily interested in how the
     * actual frequency differs from the base frequency (product of simple
     * per-column frequencies, as if the columns were independent).
     *
     * Using the same algorithm might exclude items that are close to the
     * "average" frequency of the sample. But that does not say whether the
     * observed frequency is close to the base frequency or not. We also need
     * to consider unexpectedly uncommon items (again, compared to the base
     * frequency), and the single-column algorithm does not have to.
     *
     * We simply decide how many items to keep by computing the minimum count
     * using get_mincount_for_mcv_list() and then keep all items that seem to
     * be more common than that.
     */
    mincount = get_mincount_for_mcv_list(numrows, totalrows);
 
    /*
     * Walk the groups until we find the first group with a count below the
     * mincount threshold (the index of that group is the number of groups we
     * want to keep).
     */
    for (i = 0; i < nitems; i++)
    {
        if (groups[i].count < mincount)
        {
            nitems = i;
            break;
        }
    }
 
    /*
     * At this point, we know the number of items for the MCV list. There
     * might be none (for uniform distribution with many groups), and in that
     * case, there will be no MCV list. Otherwise, construct the MCV list.
     */
    if (nitems > 0)
    {
        int         j;
        SortItem    key;
        MultiSortSupport tmp;
 
        /* frequencies for values in each attribute */
        SortItem  **freqs;
        int        *nfreqs;
 
        /* used to search values */
        tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
                                        + sizeof(SortSupportData));
 
        /* compute frequencies for values in each column */
        nfreqs = (int *) palloc0(sizeof(int) * numattrs);
        freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
 
        /*
         * Allocate the MCV list structure, set the global parameters.
         */
        mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
                                      sizeof(MCVItem) * nitems);
 
        mcvlist->magic = STATS_MCV_MAGIC;
        mcvlist->type = STATS_MCV_TYPE_BASIC;
        mcvlist->ndimensions = numattrs;
        mcvlist->nitems = nitems;
 
        /* store info about data type OIDs */
        for (i = 0; i < numattrs; i++)
            mcvlist->types[i] = data->stats[i]->attrtypid;
 
        /* Copy the first chunk of groups into the result. */
        for (i = 0; i < nitems; i++)
        {
            /* just point to the proper place in the list */
            MCVItem    *item = &mcvlist->items[i];
 
            item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
            item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
 
            /* copy values for the group */
            memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
            memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
 
            /* groups should be sorted by frequency in descending order */
            Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
 
            /* group frequency */
            item->frequency = (double) groups[i].count / numrows;
 
            /* base frequency, if the attributes were independent */
            item->base_frequency = 1.0;
            for (j = 0; j < numattrs; j++)
            {
                SortItem   *freq;
 
                /* single dimension */
                tmp->ndims = 1;
                tmp->ssup[0] = mss->ssup[j];
 
                /* fill search key */
                key.values = &groups[i].values[j];
                key.isnull = &groups[i].isnull[j];
 
                freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
                                                sizeof(SortItem),
                                                multi_sort_compare, tmp);
 
                item->base_frequency *= ((double) freq->count) / numrows;
            }
        }
 
        pfree(nfreqs);
        pfree(freqs);
    }
 
    pfree(items);
    pfree(groups);
 
    return mcvlist;
}

References Assert(), MCVItem::base_frequency, bsearch_arg(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), SortItem::count, data, for(), MCVItem::frequency, get_mincount_for_mcv_list(), i, SortItem::isnull, MCVItem::isnull, MCVList::items, items, j, sort-test::key, MCVList::magic, multi_sort_compare(), MCVList::ndimensions, MultiSortSupportData::ndims, MCVList::nitems, nitems, palloc(), palloc0(), pfree(), MultiSortSupportData::ssup, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, MCVList::type, MCVList::types, SortItem::values, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

◆ statext_mcv_deserialize()

MCVList * statext_mcv_deserialize ( bytea * data )

Definition at line 996 of file mcv.c.

{
    int         dim,
                i;
    Size        expected_size;
    MCVList    *mcvlist;
    char       *raw;
    char       *ptr;
    char       *endptr PG_USED_FOR_ASSERTS_ONLY;
 
    int         ndims,
                nitems;
    DimensionInfo *info = NULL;
 
    /* local allocation buffer (used only for deserialization) */
    Datum     **map = NULL;
 
    /* MCV list */
    Size        mcvlen;
 
    /* buffer used for the result */
    Size        datalen;
    char       *dataptr;
    char       *valuesptr;
    char       *isnullptr;
 
    if (data == NULL)
        return NULL;
 
    /*
     * We can't possibly deserialize a MCV list if there's not even a complete
     * header. We need an explicit formula here, because we serialize the
     * header fields one by one, so we need to ignore struct alignment.
     */
    if (VARSIZE_ANY(data) < MinSizeOfMCVList)
        elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
             VARSIZE_ANY(data), MinSizeOfMCVList);
 
    /* read the MCV list header */
    mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
 
    /* pointer to the data part (skip the varlena header) */
    raw = (char *) data;
    ptr = VARDATA_ANY(raw);
    endptr = (char *) raw + VARSIZE_ANY(data);
 
    /* get the header and perform further sanity checks */
    memcpy(&mcvlist->magic, ptr, sizeof(uint32));
    ptr += sizeof(uint32);
 
    memcpy(&mcvlist->type, ptr, sizeof(uint32));
    ptr += sizeof(uint32);
 
    memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
    ptr += sizeof(uint32);
 
    memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
    ptr += sizeof(AttrNumber);
 
    if (mcvlist->magic != STATS_MCV_MAGIC)
        elog(ERROR, "invalid MCV magic %u (expected %u)",
             mcvlist->magic, STATS_MCV_MAGIC);
 
    if (mcvlist->type != STATS_MCV_TYPE_BASIC)
        elog(ERROR, "invalid MCV type %u (expected %u)",
             mcvlist->type, STATS_MCV_TYPE_BASIC);
 
    if (mcvlist->ndimensions == 0)
        elog(ERROR, "invalid zero-length dimension array in MCVList");
    else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
             (mcvlist->ndimensions < 0))
        elog(ERROR, "invalid length (%d) dimension array in MCVList",
             mcvlist->ndimensions);
 
    if (mcvlist->nitems == 0)
        elog(ERROR, "invalid zero-length item array in MCVList");
    else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
        elog(ERROR, "invalid length (%u) item array in MCVList",
             mcvlist->nitems);
 
    nitems = mcvlist->nitems;
    ndims = mcvlist->ndimensions;
 
    /*
     * Check amount of data including DimensionInfo for all dimensions and
     * also the serialized items (including uint16 indexes). Also, walk
     * through the dimension information and add it to the sum.
     */
    expected_size = SizeOfMCVList(ndims, nitems);
 
    /*
     * Check that we have at least the dimension and info records, along with
     * the items. We don't know the size of the serialized values yet. We need
     * to do this check first, before accessing the dimension info.
     */
    if (VARSIZE_ANY(data) < expected_size)
        elog(ERROR, "invalid MCV size %zu (expected %zu)",
             VARSIZE_ANY(data), expected_size);
 
    /* Now copy the array of type Oids. */
    memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
    ptr += (sizeof(Oid) * ndims);
 
    /* Now it's safe to access the dimension info. */
    info = palloc(ndims * sizeof(DimensionInfo));
 
    memcpy(info, ptr, ndims * sizeof(DimensionInfo));
    ptr += (ndims * sizeof(DimensionInfo));
 
    /* account for the value arrays */
    for (dim = 0; dim < ndims; dim++)
    {
        /*
         * XXX I wonder if we can/should rely on asserts here. Maybe those
         * checks should be done every time?
         */
        Assert(info[dim].nvalues >= 0);
        Assert(info[dim].nbytes >= 0);
 
        expected_size += info[dim].nbytes;
    }
 
    /*
     * Now we know the total expected MCV size, including all the pieces
     * (header, dimension info. items and deduplicated data). So do the final
     * check on size.
     */
    if (VARSIZE_ANY(data) != expected_size)
        elog(ERROR, "invalid MCV size %zu (expected %zu)",
             VARSIZE_ANY(data), expected_size);
 
    /*
     * We need an array of Datum values for each dimension, so that we can
     * easily translate the uint16 indexes later. We also need a top-level
     * array of pointers to those per-dimension arrays.
     *
     * While allocating the arrays for dimensions, compute how much space we
     * need for a copy of the by-ref data, as we can't simply point to the
     * original values (it might go away).
     */
    datalen = 0;                /* space for by-ref data */
    map = (Datum **) palloc(ndims * sizeof(Datum *));
 
    for (dim = 0; dim < ndims; dim++)
    {
        map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
 
        /* space needed for a copy of data for by-ref types */
        datalen += info[dim].nbytes_aligned;
    }
 
    /*
     * Now resize the MCV list so that the allocation includes all the data.
     *
     * Allocate space for a copy of the data, as we can't simply reference the
     * serialized data - it's not aligned properly, and it may disappear while
     * we're still using the MCV list, e.g. due to catcache release.
     *
     * We do care about alignment here, because we will allocate all the
     * pieces at once, but then use pointers to different parts.
     */
    mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
 
    /* arrays of values and isnull flags for all MCV items */
    mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
    mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
 
    /* we don't quite need to align this, but it makes some asserts easier */
    mcvlen += MAXALIGN(datalen);
 
    /* now resize the deserialized MCV list, and compute pointers to parts */
    mcvlist = repalloc(mcvlist, mcvlen);
 
    /* pointer to the beginning of values/isnull arrays */
    valuesptr = (char *) mcvlist
        + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
 
    isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
 
    dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
 
    /*
     * Build mapping (index => value) for translating the serialized data into
     * the in-memory representation.
     */
    for (dim = 0; dim < ndims; dim++)
    {
        /* remember start position in the input array */
        char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
 
        if (info[dim].typbyval)
        {
            /* for by-val types we simply copy data into the mapping */
            for (i = 0; i < info[dim].nvalues; i++)
            {
                Datum       v = 0;
 
                memcpy(&v, ptr, info[dim].typlen);
                ptr += info[dim].typlen;
 
                map[dim][i] = fetch_att(&v, true, info[dim].typlen);
 
                /* no under/overflow of input array */
                Assert(ptr <= (start + info[dim].nbytes));
            }
        }
        else
        {
            /* for by-ref types we need to also make a copy of the data */
 
            /* passed by reference, but fixed length (name, tid, ...) */
            if (info[dim].typlen > 0)
            {
                for (i = 0; i < info[dim].nvalues; i++)
                {
                    memcpy(dataptr, ptr, info[dim].typlen);
                    ptr += info[dim].typlen;
 
                    /* just point into the array */
                    map[dim][i] = PointerGetDatum(dataptr);
                    dataptr += MAXALIGN(info[dim].typlen);
                }
            }
            else if (info[dim].typlen == -1)
            {
                /* varlena */
                for (i = 0; i < info[dim].nvalues; i++)
                {
                    uint32      len;
 
                    /* read the uint32 length */
                    memcpy(&len, ptr, sizeof(uint32));
                    ptr += sizeof(uint32);
 
                    /* the length is data-only */
                    SET_VARSIZE(dataptr, len + VARHDRSZ);
                    memcpy(VARDATA(dataptr), ptr, len);
                    ptr += len;
 
                    /* just point into the array */
                    map[dim][i] = PointerGetDatum(dataptr);
 
                    /* skip to place of the next deserialized value */
                    dataptr += MAXALIGN(len + VARHDRSZ);
                }
            }
            else if (info[dim].typlen == -2)
            {
                /* cstring */
                for (i = 0; i < info[dim].nvalues; i++)
                {
                    uint32      len;
 
                    memcpy(&len, ptr, sizeof(uint32));
                    ptr += sizeof(uint32);
 
                    memcpy(dataptr, ptr, len);
                    ptr += len;
 
                    /* just point into the array */
                    map[dim][i] = PointerGetDatum(dataptr);
                    dataptr += MAXALIGN(len);
                }
            }
 
            /* no under/overflow of input array */
            Assert(ptr <= (start + info[dim].nbytes));
 
            /* no overflow of the output mcv value */
            Assert(dataptr <= ((char *) mcvlist + mcvlen));
        }
 
        /* check we consumed input data for this dimension exactly */
        Assert(ptr == (start + info[dim].nbytes));
    }
 
    /* we should have also filled the MCV list exactly */
    Assert(dataptr == ((char *) mcvlist + mcvlen));
 
    /* deserialize the MCV items and translate the indexes to Datums */
    for (i = 0; i < nitems; i++)
    {
        MCVItem    *item = &mcvlist->items[i];
 
        item->values = (Datum *) valuesptr;
        valuesptr += MAXALIGN(sizeof(Datum) * ndims);
 
        item->isnull = (bool *) isnullptr;
        isnullptr += MAXALIGN(sizeof(bool) * ndims);
 
        memcpy(item->isnull, ptr, sizeof(bool) * ndims);
        ptr += sizeof(bool) * ndims;
 
        memcpy(&item->frequency, ptr, sizeof(double));
        ptr += sizeof(double);
 
        memcpy(&item->base_frequency, ptr, sizeof(double));
        ptr += sizeof(double);
 
        /* finally translate the indexes (for non-NULL only) */
        for (dim = 0; dim < ndims; dim++)
        {
            uint16      index;
 
            memcpy(&index, ptr, sizeof(uint16));
            ptr += sizeof(uint16);
 
            if (item->isnull[dim])
                continue;
 
            item->values[dim] = map[dim][index];
        }
 
        /* check we're not overflowing the input */
        Assert(ptr <= endptr);
    }
 
    /* check that we processed all the data */
    Assert(ptr == endptr);
 
    /* release the buffers used for mapping */
    for (dim = 0; dim < ndims; dim++)
        pfree(map[dim]);
 
    pfree(map);
 
    return mcvlist;
}

References Assert(), MCVItem::base_frequency, data, elog, ERROR, fetch_att(), MCVItem::frequency, i, MCVItem::isnull, MCVList::items, items, len, MCVList::magic, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, nitems, DimensionInfo::nvalues, palloc(), palloc0(), pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), repalloc(), SET_VARSIZE, SizeOfMCVList, start, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, MCVList::type, MCVList::types, DimensionInfo::typlen, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY.

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

◆ statext_mcv_serialize()

bytea * statext_mcv_serialize	(	MCVList *	mcvlist,
		VacAttrStats **	stats
	)

Definition at line 621 of file mcv.c.

{
    int         i;
    int         dim;
    int         ndims = mcvlist->ndimensions;
 
    SortSupport ssup;
    DimensionInfo *info;
 
    Size        total_length;
 
    /* serialized items (indexes into arrays, etc.) */
    bytea      *raw;
    char       *ptr;
    char       *endptr PG_USED_FOR_ASSERTS_ONLY;
 
    /* values per dimension (and number of non-NULL values) */
    Datum     **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
    int        *counts = (int *) palloc0(sizeof(int) * ndims);
 
    /*
     * We'll include some rudimentary information about the attribute types
     * (length, by-val flag), so that we don't have to look them up while
     * deserializing the MCV list (we already have the type OID in the
     * header).  This is safe because when changing the type of the attribute
     * the statistics gets dropped automatically.  We need to store the info
     * about the arrays of deduplicated values anyway.
     */
    info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
 
    /* sort support data for all attributes included in the MCV list */
    ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
 
    /* collect and deduplicate values for each dimension (attribute) */
    for (dim = 0; dim < ndims; dim++)
    {
        int         ndistinct;
        TypeCacheEntry *typentry;
 
        /*
         * Lookup the LT operator (can't get it from stats extra_data, as we
         * don't know how to interpret that - scalar vs. array etc.).
         */
        typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
 
        /* copy important info about the data type (length, by-value) */
        info[dim].typlen = stats[dim]->attrtype->typlen;
        info[dim].typbyval = stats[dim]->attrtype->typbyval;
 
        /* allocate space for values in the attribute and collect them */
        values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
 
        for (i = 0; i < mcvlist->nitems; i++)
        {
            /* skip NULL values - we don't need to deduplicate those */
            if (mcvlist->items[i].isnull[dim])
                continue;
 
            /* append the value at the end */
            values[dim][counts[dim]] = mcvlist->items[i].values[dim];
            counts[dim] += 1;
        }
 
        /* if there are just NULL values in this dimension, we're done */
        if (counts[dim] == 0)
            continue;
 
        /* sort and deduplicate the data */
        ssup[dim].ssup_cxt = CurrentMemoryContext;
        ssup[dim].ssup_collation = stats[dim]->attrcollid;
        ssup[dim].ssup_nulls_first = false;
 
        PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
 
        qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
                            compare_scalars_simple, &ssup[dim]);
 
        /*
         * Walk through the array and eliminate duplicate values, but keep the
         * ordering (so that we can do a binary search later). We know there's
         * at least one item as (counts[dim] != 0), so we can skip the first
         * element.
         */
        ndistinct = 1;          /* number of distinct values */
        for (i = 1; i < counts[dim]; i++)
        {
            /* expect sorted array */
            Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
 
            /* if the value is the same as the previous one, we can skip it */
            if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
                continue;
 
            values[dim][ndistinct] = values[dim][i];
            ndistinct += 1;
        }
 
        /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
        Assert(ndistinct <= PG_UINT16_MAX);
 
        /*
         * Store additional info about the attribute - number of deduplicated
         * values, and also size of the serialized data. For fixed-length data
         * types this is trivial to compute, for varwidth types we need to
         * actually walk the array and sum the sizes.
         */
        info[dim].nvalues = ndistinct;
 
        if (info[dim].typbyval) /* by-value data types */
        {
            info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
 
            /*
             * We copy the data into the MCV item during deserialization, so
             * we don't need to allocate any extra space.
             */
            info[dim].nbytes_aligned = 0;
        }
        else if (info[dim].typlen > 0)  /* fixed-length by-ref */
        {
            /*
             * We don't care about alignment in the serialized data, so we
             * pack the data as much as possible. But we also track how much
             * data will be needed after deserialization, and in that case we
             * need to account for alignment of each item.
             *
             * Note: As the items are fixed-length, we could easily compute
             * this during deserialization, but we do it here anyway.
             */
            info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
            info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
        }
        else if (info[dim].typlen == -1)    /* varlena */
        {
            info[dim].nbytes = 0;
            info[dim].nbytes_aligned = 0;
            for (i = 0; i < info[dim].nvalues; i++)
            {
                Size        len;
 
                /*
                 * For varlena values, we detoast the values and store the
                 * length and data separately. We don't bother with alignment
                 * here, which means that during deserialization we need to
                 * copy the fields and only access the copies.
                 */
                values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
 
                /* serialized length (uint32 length + data) */
                len = VARSIZE_ANY_EXHDR(values[dim][i]);
                info[dim].nbytes += sizeof(uint32); /* length */
                info[dim].nbytes += len;    /* value (no header) */
 
                /*
                 * During deserialization we'll build regular varlena values
                 * with full headers, and we need to align them properly.
                 */
                info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
            }
        }
        else if (info[dim].typlen == -2)    /* cstring */
        {
            info[dim].nbytes = 0;
            info[dim].nbytes_aligned = 0;
            for (i = 0; i < info[dim].nvalues; i++)
            {
                Size        len;
 
                /*
                 * cstring is handled similar to varlena - first we store the
                 * length as uint32 and then the data. We don't care about
                 * alignment, which means that during deserialization we need
                 * to copy the fields and only access the copies.
                 */
 
                /* c-strings include terminator, so +1 byte */
                len = strlen(DatumGetCString(values[dim][i])) + 1;
                info[dim].nbytes += sizeof(uint32); /* length */
                info[dim].nbytes += len;    /* value */
 
                /* space needed for properly aligned deserialized copies */
                info[dim].nbytes_aligned += MAXALIGN(len);
            }
        }
 
        /* we know (count>0) so there must be some data */
        Assert(info[dim].nbytes > 0);
    }
 
    /*
     * Now we can finally compute how much space we'll actually need for the
     * whole serialized MCV list (varlena header, MCV header, dimension info
     * for each attribute, deduplicated values and items).
     */
    total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
        + sizeof(AttrNumber)    /* ndimensions */
        + (ndims * sizeof(Oid));    /* attribute types */
 
    /* dimension info */
    total_length += ndims * sizeof(DimensionInfo);
 
    /* add space for the arrays of deduplicated values */
    for (i = 0; i < ndims; i++)
        total_length += info[i].nbytes;
 
    /*
     * And finally account for the items (those are fixed-length, thanks to
     * replacing values with uint16 indexes into the deduplicated arrays).
     */
    total_length += mcvlist->nitems * ITEM_SIZE(dim);
 
    /*
     * Allocate space for the whole serialized MCV list (we'll skip bytes, so
     * we set them to zero to make the result more compressible).
     */
    raw = (bytea *) palloc0(VARHDRSZ + total_length);
    SET_VARSIZE(raw, VARHDRSZ + total_length);
 
    ptr = VARDATA(raw);
    endptr = ptr + total_length;
 
    /* copy the MCV list header fields, one by one */
    memcpy(ptr, &mcvlist->magic, sizeof(uint32));
    ptr += sizeof(uint32);
 
    memcpy(ptr, &mcvlist->type, sizeof(uint32));
    ptr += sizeof(uint32);
 
    memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
    ptr += sizeof(uint32);
 
    memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
    ptr += sizeof(AttrNumber);
 
    memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
    ptr += (sizeof(Oid) * ndims);
 
    /* store information about the attributes (data amounts, ...) */
    memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
    ptr += sizeof(DimensionInfo) * ndims;
 
    /* Copy the deduplicated values for all attributes to the output. */
    for (dim = 0; dim < ndims; dim++)
    {
        /* remember the starting point for Asserts later */
        char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
 
        for (i = 0; i < info[dim].nvalues; i++)
        {
            Datum       value = values[dim][i];
 
            if (info[dim].typbyval) /* passed by value */
            {
                Datum       tmp;
 
                /*
                 * For byval types, we need to copy just the significant bytes
                 * - we can't use memcpy directly, as that assumes
                 * little-endian behavior.  store_att_byval does almost what
                 * we need, but it requires a properly aligned buffer - the
                 * output buffer does not guarantee that. So we simply use a
                 * local Datum variable (which guarantees proper alignment),
                 * and then copy the value from it.
                 */
                store_att_byval(&tmp, value, info[dim].typlen);
 
                memcpy(ptr, &tmp, info[dim].typlen);
                ptr += info[dim].typlen;
            }
            else if (info[dim].typlen > 0)  /* passed by reference */
            {
                /* no special alignment needed, treated as char array */
                memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
                ptr += info[dim].typlen;
            }
            else if (info[dim].typlen == -1)    /* varlena */
            {
                uint32      len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
 
                /* copy the length */
                memcpy(ptr, &len, sizeof(uint32));
                ptr += sizeof(uint32);
 
                /* data from the varlena value (without the header) */
                memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
                ptr += len;
            }
            else if (info[dim].typlen == -2)    /* cstring */
            {
                uint32      len = (uint32) strlen(DatumGetCString(value)) + 1;
 
                /* copy the length */
                memcpy(ptr, &len, sizeof(uint32));
                ptr += sizeof(uint32);
 
                /* value */
                memcpy(ptr, DatumGetCString(value), len);
                ptr += len;
            }
 
            /* no underflows or overflows */
            Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
        }
 
        /* we should get exactly nbytes of data for this dimension */
        Assert((ptr - start) == info[dim].nbytes);
    }
 
    /* Serialize the items, with uint16 indexes instead of the values. */
    for (i = 0; i < mcvlist->nitems; i++)
    {
        MCVItem    *mcvitem = &mcvlist->items[i];
 
        /* don't write beyond the allocated space */
        Assert(ptr <= (endptr - ITEM_SIZE(dim)));
 
        /* copy NULL and frequency flags into the serialized MCV */
        memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
        ptr += sizeof(bool) * ndims;
 
        memcpy(ptr, &mcvitem->frequency, sizeof(double));
        ptr += sizeof(double);
 
        memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
        ptr += sizeof(double);
 
        /* store the indexes last */
        for (dim = 0; dim < ndims; dim++)
        {
            uint16      index = 0;
            Datum      *value;
 
            /* do the lookup only for non-NULL values */
            if (!mcvitem->isnull[dim])
            {
                value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
                                              info[dim].nvalues, sizeof(Datum),
                                              compare_scalars_simple, &ssup[dim]);
 
                Assert(value != NULL);  /* serialization or deduplication
                                         * error */
 
                /* compute index within the deduplicated array */
                index = (uint16) (value - values[dim]);
 
                /* check the index is within expected bounds */
                Assert(index < info[dim].nvalues);
            }
 
            /* copy the index into the serialized MCV */
            memcpy(ptr, &index, sizeof(uint16));
            ptr += sizeof(uint16);
        }
 
        /* make sure we don't overflow the allocated value */
        Assert(ptr <= endptr);
    }
 
    /* at this point we expect to match the total_length exactly */
    Assert(ptr == endptr);
 
    pfree(values);
    pfree(counts);
 
    return raw;
}

References Assert(), VacAttrStats::attrcollid, VacAttrStats::attrtype, MCVItem::base_frequency, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString(), DatumGetPointer(), MCVItem::frequency, i, MCVItem::isnull, ITEM_SIZE, MCVList::items, len, lookup_type_cache(), TypeCacheEntry::lt_opr, MCVList::magic, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, palloc0(), pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), PrepareSortSupportFromOrderingOp(), qsort_interruptible(), SET_VARSIZE, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, start, store_att_byval(), DimensionInfo::typbyval, MCVList::type, TYPECACHE_LT_OPR, MCVList::types, DimensionInfo::typlen, value, values, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by statext_store().

◆ statext_ndistinct_build()

MVNDistinct * statext_ndistinct_build	(	double	totalrows,
		StatsBuildData *	data
	)

Definition at line 88 of file mvdistinct.c.

{
    MVNDistinct *result;
    int         k;
    int         itemcnt;
    int         numattrs = data->nattnums;
    int         numcombs = num_combinations(numattrs);
 
    result = palloc(offsetof(MVNDistinct, items) +
                    numcombs * sizeof(MVNDistinctItem));
    result->magic = STATS_NDISTINCT_MAGIC;
    result->type = STATS_NDISTINCT_TYPE_BASIC;
    result->nitems = numcombs;
 
    itemcnt = 0;
    for (k = 2; k <= numattrs; k++)
    {
        int        *combination;
        CombinationGenerator *generator;
 
        /* generate combinations of K out of N elements */
        generator = generator_init(numattrs, k);
 
        while ((combination = generator_next(generator)))
        {
            MVNDistinctItem *item = &result->items[itemcnt];
            int         j;
 
            item->attributes = palloc(sizeof(AttrNumber) * k);
            item->nattributes = k;
 
            /* translate the indexes to attnums */
            for (j = 0; j < k; j++)
            {
                item->attributes[j] = data->attnums[combination[j]];
 
                Assert(AttributeNumberIsValid(item->attributes[j]));
            }
 
            item->ndistinct =
                ndistinct_for_combination(totalrows, data, k, combination);
 
            itemcnt++;
            Assert(itemcnt <= result->nitems);
        }
 
        generator_free(generator);
    }
 
    /* must consume exactly the whole output array */
    Assert(itemcnt == result->nitems);
 
    return result;
}

References Assert(), AttributeNumberIsValid, MVNDistinctItem::attributes, data, generator_free(), generator_init(), generator_next(), MVNDistinct::items, items, j, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, ndistinct_for_combination(), MVNDistinct::nitems, nitems, num_combinations(), palloc(), STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, and MVNDistinct::type.

Referenced by BuildRelationExtStatistics().

◆ statext_ndistinct_deserialize()

MVNDistinct * statext_ndistinct_deserialize ( bytea * data )

Definition at line 250 of file mvdistinct.c.

{
    int         i;
    Size        minimum_size;
    MVNDistinct ndist;
    MVNDistinct *ndistinct;
    char       *tmp;
 
    if (data == NULL)
        return NULL;
 
    /* we expect at least the basic fields of MVNDistinct struct */
    if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
        elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
             VARSIZE_ANY_EXHDR(data), SizeOfHeader);
 
    /* initialize pointer to the data part (skip the varlena header) */
    tmp = VARDATA_ANY(data);
 
    /* read the header fields and perform basic sanity checks */
    memcpy(&ndist.magic, tmp, sizeof(uint32));
    tmp += sizeof(uint32);
    memcpy(&ndist.type, tmp, sizeof(uint32));
    tmp += sizeof(uint32);
    memcpy(&ndist.nitems, tmp, sizeof(uint32));
    tmp += sizeof(uint32);
 
    if (ndist.magic != STATS_NDISTINCT_MAGIC)
        elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
             ndist.magic, STATS_NDISTINCT_MAGIC);
    if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
        elog(ERROR, "invalid ndistinct type %d (expected %d)",
             ndist.type, STATS_NDISTINCT_TYPE_BASIC);
    if (ndist.nitems == 0)
        elog(ERROR, "invalid zero-length item array in MVNDistinct");
 
    /* what minimum bytea size do we expect for those parameters */
    minimum_size = MinSizeOfItems(ndist.nitems);
    if (VARSIZE_ANY_EXHDR(data) < minimum_size)
        elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
             VARSIZE_ANY_EXHDR(data), minimum_size);
 
    /*
     * Allocate space for the ndistinct items (no space for each item's
     * attnos: those live in bitmapsets allocated separately)
     */
    ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
                        (ndist.nitems * sizeof(MVNDistinctItem)));
    ndistinct->magic = ndist.magic;
    ndistinct->type = ndist.type;
    ndistinct->nitems = ndist.nitems;
 
    for (i = 0; i < ndistinct->nitems; i++)
    {
        MVNDistinctItem *item = &ndistinct->items[i];
 
        /* ndistinct value */
        memcpy(&item->ndistinct, tmp, sizeof(double));
        tmp += sizeof(double);
 
        /* number of attributes */
        memcpy(&item->nattributes, tmp, sizeof(int));
        tmp += sizeof(int);
        Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
 
        item->attributes
            = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
 
        memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
        tmp += sizeof(AttrNumber) * item->nattributes;
 
        /* still within the bytea */
        Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
    }
 
    /* we should have consumed the whole bytea exactly */
    Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
 
    return ndistinct;
}

References Assert(), MVNDistinctItem::attributes, data, elog, ERROR, i, MVNDistinct::items, items, MVNDistinct::magic, MAXALIGN, MinSizeOfItems, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, palloc(), palloc0(), SizeOfHeader, STATS_MAX_DIMENSIONS, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_ndistinct_out(), and statext_ndistinct_load().

◆ statext_ndistinct_serialize()

bytea * statext_ndistinct_serialize ( MVNDistinct * ndistinct )

Definition at line 179 of file mvdistinct.c.

{
    int         i;
    bytea      *output;
    char       *tmp;
    Size        len;
 
    Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
    Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
 
    /*
     * Base size is size of scalar fields in the struct, plus one base struct
     * for each item, including number of items for each.
     */
    len = VARHDRSZ + SizeOfHeader;
 
    /* and also include space for the actual attribute numbers */
    for (i = 0; i < ndistinct->nitems; i++)
    {
        int         nmembers;
 
        nmembers = ndistinct->items[i].nattributes;
        Assert(nmembers >= 2);
 
        len += SizeOfItem(nmembers);
    }
 
    output = (bytea *) palloc(len);
    SET_VARSIZE(output, len);
 
    tmp = VARDATA(output);
 
    /* Store the base struct values (magic, type, nitems) */
    memcpy(tmp, &ndistinct->magic, sizeof(uint32));
    tmp += sizeof(uint32);
    memcpy(tmp, &ndistinct->type, sizeof(uint32));
    tmp += sizeof(uint32);
    memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
    tmp += sizeof(uint32);
 
    /*
     * store number of attributes and attribute numbers for each entry
     */
    for (i = 0; i < ndistinct->nitems; i++)
    {
        MVNDistinctItem item = ndistinct->items[i];
        int         nmembers = item.nattributes;
 
        memcpy(tmp, &item.ndistinct, sizeof(double));
        tmp += sizeof(double);
        memcpy(tmp, &nmembers, sizeof(int));
        tmp += sizeof(int);
 
        memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
        tmp += nmembers * sizeof(AttrNumber);
 
        /* protect against overflows */
        Assert(tmp <= ((char *) output + len));
    }
 
    /* check we used exactly the expected space */
    Assert(tmp == ((char *) output + len));
 
    return output;
}

References Assert(), MVNDistinctItem::attributes, i, MVNDistinct::items, len, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, output, palloc(), SET_VARSIZE, SizeOfHeader, SizeOfItem, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().

Data Structures

Typedefs

Functions

Typedef Documentation

◆ DimensionInfo

◆ MultiSortSupport

◆ MultiSortSupportData

◆ SortItem

◆ StatsBuildData

Function Documentation

◆ build_attnums_array()

◆ build_sorted_items()

◆ compare_datums_simple()

◆ compare_scalars_simple()

◆ examine_opclause_args()

◆ mcv_clause_selectivity_or()

◆ mcv_clauselist_selectivity()

◆ mcv_combine_selectivities()

◆ multi_sort_add_dimension()

◆ multi_sort_compare()

◆ multi_sort_compare_dim()

◆ multi_sort_compare_dims()

◆ multi_sort_init()

◆ statext_dependencies_build()

◆ statext_dependencies_deserialize()

◆ statext_dependencies_serialize()

◆ statext_mcv_build()

◆ statext_mcv_deserialize()

◆ statext_mcv_serialize()

◆ statext_ndistinct_build()

◆ statext_ndistinct_deserialize()

◆ statext_ndistinct_serialize()