PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
mcv.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_statistic_ext_data.h"
#include "fmgr.h"
#include "funcapi.h"
#include "nodes/nodeFuncs.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
Include dependency graph for mcv.c:

Go to the source code of this file.

Macros

#define ITEM_SIZE(ndims)    ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
 
#define MinSizeOfMCVList    (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
 
#define SizeOfMCVList(ndims, nitems)
 
#define RESULT_MERGE(value, is_or, match)    ((is_or) ? ((value) || (match)) : ((value) && (match)))
 
#define RESULT_IS_FINAL(value, is_or)   ((is_or) ? (value) : (!(value)))
 

Functions

static MultiSortSupport build_mss (StatsBuildData *data)
 
static SortItembuild_distinct_groups (int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
 
static SortItem ** build_column_frequencies (SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
 
static int count_distinct_groups (int numrows, SortItem *items, MultiSortSupport mss)
 
static double get_mincount_for_mcv_list (int samplerows, double totalrows)
 
MCVListstatext_mcv_build (StatsBuildData *data, double totalrows, int stattarget)
 
static int compare_sort_item_count (const void *a, const void *b, void *arg)
 
static int sort_item_compare (const void *a, const void *b, void *arg)
 
MCVListstatext_mcv_load (Oid mvoid, bool inh)
 
byteastatext_mcv_serialize (MCVList *mcvlist, VacAttrStats **stats)
 
MCVListstatext_mcv_deserialize (bytea *data)
 
Datum pg_stats_ext_mcvlist_items (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_in (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_out (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_recv (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_send (PG_FUNCTION_ARGS)
 
static int mcv_match_expression (Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
 
static bool * mcv_get_match_bitmap (PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
 
Selectivity mcv_combine_selectivities (Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)
 
Selectivity mcv_clauselist_selectivity (PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
 
Selectivity mcv_clause_selectivity_or (PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)
 

Macro Definition Documentation

◆ ITEM_SIZE

#define ITEM_SIZE (   ndims)     ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))

Definition at line 53 of file mcv.c.

◆ MinSizeOfMCVList

#define MinSizeOfMCVList    (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))

Definition at line 59 of file mcv.c.

◆ RESULT_IS_FINAL

#define RESULT_IS_FINAL (   value,
  is_or 
)    ((is_or) ? (value) : (!(value)))

Definition at line 100 of file mcv.c.

◆ RESULT_MERGE

#define RESULT_MERGE (   value,
  is_or,
  match 
)     ((is_or) ? ((value) || (match)) : ((value) && (match)))

Definition at line 88 of file mcv.c.

◆ SizeOfMCVList

#define SizeOfMCVList (   ndims,
  nitems 
)
Value:
((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
((ndims) * sizeof(DimensionInfo)) + \
((nitems) * ITEM_SIZE(ndims)))
struct DimensionInfo DimensionInfo
#define nitems(x)
Definition: indent.h:31
#define ITEM_SIZE(ndims)
Definition: mcv.c:53
#define MinSizeOfMCVList
Definition: mcv.c:59
unsigned int Oid
Definition: postgres_ext.h:31

Definition at line 68 of file mcv.c.

Function Documentation

◆ build_column_frequencies()

static SortItem ** build_column_frequencies ( SortItem groups,
int  ngroups,
MultiSortSupport  mss,
int *  ncounts 
)
static

Definition at line 490 of file mcv.c.

492{
493 int i,
494 dim;
495 SortItem **result;
496 char *ptr;
497
498 Assert(groups);
499 Assert(ncounts);
500
501 /* allocate arrays for all columns as a single chunk */
502 ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
503 mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
504
505 /* initial array of pointers */
506 result = (SortItem **) ptr;
507 ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
508
509 for (dim = 0; dim < mss->ndims; dim++)
510 {
511 SortSupport ssup = &mss->ssup[dim];
512
513 /* array of values for a single column */
514 result[dim] = (SortItem *) ptr;
515 ptr += MAXALIGN(sizeof(SortItem) * ngroups);
516
517 /* extract data for the dimension */
518 for (i = 0; i < ngroups; i++)
519 {
520 /* point into the input groups */
521 result[dim][i].values = &groups[i].values[dim];
522 result[dim][i].isnull = &groups[i].isnull[dim];
523 result[dim][i].count = groups[i].count;
524 }
525
526 /* sort the values, deduplicate */
527 qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
528 sort_item_compare, ssup);
529
530 /*
531 * Identify distinct values, compute frequency (there might be
532 * multiple MCV items containing this value, so we need to sum counts
533 * from all of them.
534 */
535 ncounts[dim] = 1;
536 for (i = 1; i < ngroups; i++)
537 {
538 if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
539 {
540 result[dim][ncounts[dim] - 1].count += result[dim][i].count;
541 continue;
542 }
543
544 result[dim][ncounts[dim]] = result[dim][i];
545
546 ncounts[dim]++;
547 }
548 }
549
550 return result;
551}
#define MAXALIGN(LEN)
Definition: c.h:765
#define Assert(condition)
Definition: c.h:812
int i
Definition: isn.c:72
static int sort_item_compare(const void *a, const void *b, void *arg)
Definition: mcv.c:465
void * palloc(Size size)
Definition: mcxt.c:1317
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]

References Assert, SortItem::count, i, SortItem::isnull, MAXALIGN, MultiSortSupportData::ndims, palloc(), qsort_interruptible(), sort_item_compare(), MultiSortSupportData::ssup, and SortItem::values.

Referenced by statext_mcv_build().

◆ build_distinct_groups()

static SortItem * build_distinct_groups ( int  numrows,
SortItem items,
MultiSortSupport  mss,
int *  ndistinct 
)
static

Definition at line 424 of file mcv.c.

426{
427 int i,
428 j;
429 int ngroups = count_distinct_groups(numrows, items, mss);
430
431 SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
432
433 j = 0;
434 groups[0] = items[0];
435 groups[0].count = 1;
436
437 for (i = 1; i < numrows; i++)
438 {
439 /* Assume sorted in ascending order. */
440 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
441
442 /* New distinct group detected. */
443 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
444 {
445 groups[++j] = items[i];
446 groups[j].count = 0;
447 }
448
449 groups[j].count++;
450 }
451
452 /* ensure we filled the expected number of distinct groups */
453 Assert(j + 1 == ngroups);
454
455 /* Sort the distinct groups by frequency (in descending order). */
456 qsort_interruptible(groups, ngroups, sizeof(SortItem),
458
459 *ndistinct = ngroups;
460 return groups;
461}
int multi_sort_compare(const void *a, const void *b, void *arg)
int j
Definition: isn.c:73
static int compare_sort_item_count(const void *a, const void *b, void *arg)
Definition: mcv.c:403
static int count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
Definition: mcv.c:379
static ItemArray items
Definition: test_tidstore.c:48

References Assert, compare_sort_item_count(), SortItem::count, count_distinct_groups(), i, items, j, multi_sort_compare(), palloc(), and qsort_interruptible().

Referenced by statext_mcv_build().

◆ build_mss()

static MultiSortSupport build_mss ( StatsBuildData data)
static

Definition at line 347 of file mcv.c.

348{
349 int i;
350 int numattrs = data->nattnums;
351
352 /* Sort by multiple columns (using array of SortSupport) */
353 MultiSortSupport mss = multi_sort_init(numattrs);
354
355 /* prepare the sort functions for all the attributes */
356 for (i = 0; i < numattrs; i++)
357 {
358 VacAttrStats *colstat = data->stats[i];
360
362 if (type->lt_opr == InvalidOid) /* shouldn't happen */
363 elog(ERROR, "cache lookup failed for ordering operator for type %u",
364 colstat->attrtypid);
365
366 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
367 }
368
369 return mss;
370}
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
const void * data
#define InvalidOid
Definition: postgres_ext.h:36
Oid attrtypid
Definition: vacuum.h:126
Oid attrcollid
Definition: vacuum.h:129
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:386
#define TYPECACHE_LT_OPR
Definition: typcache.h:138
const char * type

References VacAttrStats::attrcollid, VacAttrStats::attrtypid, data, elog, ERROR, i, InvalidOid, lookup_type_cache(), multi_sort_add_dimension(), multi_sort_init(), type, and TYPECACHE_LT_OPR.

Referenced by statext_mcv_build().

◆ compare_sort_item_count()

static int compare_sort_item_count ( const void *  a,
const void *  b,
void *  arg 
)
static

Definition at line 403 of file mcv.c.

404{
405 SortItem *ia = (SortItem *) a;
406 SortItem *ib = (SortItem *) b;
407
408 if (ia->count == ib->count)
409 return 0;
410 else if (ia->count > ib->count)
411 return -1;
412
413 return 1;
414}
int b
Definition: isn.c:69
int a
Definition: isn.c:68

References a, b, and SortItem::count.

Referenced by build_distinct_groups().

◆ count_distinct_groups()

static int count_distinct_groups ( int  numrows,
SortItem items,
MultiSortSupport  mss 
)
static

Definition at line 379 of file mcv.c.

380{
381 int i;
382 int ndistinct;
383
384 ndistinct = 1;
385 for (i = 1; i < numrows; i++)
386 {
387 /* make sure the array really is sorted */
388 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
389
390 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
391 ndistinct += 1;
392 }
393
394 return ndistinct;
395}

References Assert, i, items, and multi_sort_compare().

Referenced by build_distinct_groups().

◆ get_mincount_for_mcv_list()

static double get_mincount_for_mcv_list ( int  samplerows,
double  totalrows 
)
static

Definition at line 148 of file mcv.c.

149{
150 double n = samplerows;
151 double N = totalrows;
152 double numer,
153 denom;
154
155 numer = n * (N - n);
156 denom = N - n + 0.04 * n * (N - 1);
157
158 /* Guard against division by zero (possible if n = N = 1) */
159 if (denom == 0.0)
160 return 0.0;
161
162 return numer / denom;
163}

Referenced by statext_mcv_build().

◆ mcv_clause_selectivity_or()

Selectivity mcv_clause_selectivity_or ( PlannerInfo root,
StatisticExtInfo stat,
MCVList mcv,
Node clause,
bool **  or_matches,
Selectivity basesel,
Selectivity overlap_mcvsel,
Selectivity overlap_basesel,
Selectivity totalsel 
)

Definition at line 2126 of file mcv.c.

2130{
2131 Selectivity s = 0.0;
2132 bool *new_matches;
2133 int i;
2134
2135 /* build the OR-matches bitmap, if not built already */
2136 if (*or_matches == NULL)
2137 *or_matches = palloc0(sizeof(bool) * mcv->nitems);
2138
2139 /* build the match bitmap for the new clause */
2140 new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2141 stat->exprs, mcv, false);
2142
2143 /*
2144 * Sum the frequencies for all the MCV items matching this clause and also
2145 * those matching the overlap between this clause and any of the preceding
2146 * clauses as described above.
2147 */
2148 *basesel = 0.0;
2149 *overlap_mcvsel = 0.0;
2150 *overlap_basesel = 0.0;
2151 *totalsel = 0.0;
2152 for (i = 0; i < mcv->nitems; i++)
2153 {
2154 *totalsel += mcv->items[i].frequency;
2155
2156 if (new_matches[i])
2157 {
2158 s += mcv->items[i].frequency;
2159 *basesel += mcv->items[i].base_frequency;
2160
2161 if ((*or_matches)[i])
2162 {
2163 *overlap_mcvsel += mcv->items[i].frequency;
2164 *overlap_basesel += mcv->items[i].base_frequency;
2165 }
2166 }
2167
2168 /* update the OR-matches bitmap for the next clause */
2169 (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2170 }
2171
2172 pfree(new_matches);
2173
2174 return s;
2175}
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1599
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
double Selectivity
Definition: nodes.h:250
#define list_make1(x1)
Definition: pg_list.h:212
tree ctl root
Definition: radixtree.h:1857
double frequency
Definition: statistics.h:80
double base_frequency
Definition: statistics.h:81
uint32 nitems
Definition: statistics.h:91
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94

References MCVItem::base_frequency, MCVItem::frequency, i, MCVList::items, list_make1, mcv_get_match_bitmap(), MCVList::nitems, palloc0(), pfree(), and root.

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity ( PlannerInfo root,
StatisticExtInfo stat,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Selectivity basesel,
Selectivity totalsel 
)

Definition at line 2048 of file mcv.c.

2053{
2054 int i;
2055 MCVList *mcv;
2056 Selectivity s = 0.0;
2057 RangeTblEntry *rte = root->simple_rte_array[rel->relid];
2058
2059 /* match/mismatch bitmap for each MCV item */
2060 bool *matches = NULL;
2061
2062 /* load the MCV list stored in the statistics object */
2063 mcv = statext_mcv_load(stat->statOid, rte->inh);
2064
2065 /* build a match bitmap for the clauses */
2066 matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2067 mcv, false);
2068
2069 /* sum frequencies for all the matching MCV items */
2070 *basesel = 0.0;
2071 *totalsel = 0.0;
2072 for (i = 0; i < mcv->nitems; i++)
2073 {
2074 *totalsel += mcv->items[i].frequency;
2075
2076 if (matches[i] != false)
2077 {
2078 *basesel += mcv->items[i].base_frequency;
2079 s += mcv->items[i].frequency;
2080 }
2081 }
2082
2083 return s;
2084}
MCVList * statext_mcv_load(Oid mvoid, bool inh)
Definition: mcv.c:558
Index relid
Definition: pathnodes.h:918

References MCVItem::base_frequency, MCVItem::frequency, i, RangeTblEntry::inh, MCVList::items, mcv_get_match_bitmap(), MCVList::nitems, RelOptInfo::relid, root, and statext_mcv_load().

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_combine_selectivities()

Selectivity mcv_combine_selectivities ( Selectivity  simple_sel,
Selectivity  mcv_sel,
Selectivity  mcv_basesel,
Selectivity  mcv_totalsel 
)

Definition at line 2006 of file mcv.c.

2010{
2011 Selectivity other_sel;
2012 Selectivity sel;
2013
2014 /* estimated selectivity of values not covered by MCV matches */
2015 other_sel = simple_sel - mcv_basesel;
2016 CLAMP_PROBABILITY(other_sel);
2017
2018 /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2019 if (other_sel > 1.0 - mcv_totalsel)
2020 other_sel = 1.0 - mcv_totalsel;
2021
2022 /* overall selectivity is the sum of the MCV and non-MCV parts */
2023 sel = mcv_sel + other_sel;
2024 CLAMP_PROBABILITY(sel);
2025
2026 return sel;
2027}
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63

References CLAMP_PROBABILITY.

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_get_match_bitmap()

static bool * mcv_get_match_bitmap ( PlannerInfo root,
List clauses,
Bitmapset keys,
List exprs,
MCVList mcvlist,
bool  is_or 
)
static

Definition at line 1599 of file mcv.c.

1602{
1603 ListCell *l;
1604 bool *matches;
1605
1606 /* The bitmap may be partially built. */
1607 Assert(clauses != NIL);
1608 Assert(mcvlist != NULL);
1609 Assert(mcvlist->nitems > 0);
1611
1612 matches = palloc(sizeof(bool) * mcvlist->nitems);
1613 memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
1614
1615 /*
1616 * Loop through the list of clauses, and for each of them evaluate all the
1617 * MCV items not yet eliminated by the preceding clauses.
1618 */
1619 foreach(l, clauses)
1620 {
1621 Node *clause = (Node *) lfirst(l);
1622
1623 /* if it's a RestrictInfo, then extract the clause */
1624 if (IsA(clause, RestrictInfo))
1625 clause = (Node *) ((RestrictInfo *) clause)->clause;
1626
1627 /*
1628 * Handle the various types of clauses - OpClause, NullTest and
1629 * AND/OR/NOT
1630 */
1631 if (is_opclause(clause))
1632 {
1633 OpExpr *expr = (OpExpr *) clause;
1634 FmgrInfo opproc;
1635
1636 /* valid only after examine_opclause_args returns true */
1637 Node *clause_expr;
1638 Const *cst;
1639 bool expronleft;
1640 int idx;
1641 Oid collid;
1642
1643 fmgr_info(get_opcode(expr->opno), &opproc);
1644
1645 /* extract the var/expr and const from the expression */
1646 if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1647 elog(ERROR, "incompatible clause");
1648
1649 /* match the attribute/expression to a dimension of the statistic */
1650 idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1651
1652 /*
1653 * Walk through the MCV items and evaluate the current clause. We
1654 * can skip items that were already ruled out, and terminate if
1655 * there are no remaining MCV items that might possibly match.
1656 */
1657 for (int i = 0; i < mcvlist->nitems; i++)
1658 {
1659 bool match = true;
1660 MCVItem *item = &mcvlist->items[i];
1661
1662 Assert(idx >= 0);
1663
1664 /*
1665 * When the MCV item or the Const value is NULL we can treat
1666 * this as a mismatch. We must not call the operator because
1667 * of strictness.
1668 */
1669 if (item->isnull[idx] || cst->constisnull)
1670 {
1671 matches[i] = RESULT_MERGE(matches[i], is_or, false);
1672 continue;
1673 }
1674
1675 /*
1676 * Skip MCV items that can't change result in the bitmap. Once
1677 * the value gets false for AND-lists, or true for OR-lists,
1678 * we don't need to look at more clauses.
1679 */
1680 if (RESULT_IS_FINAL(matches[i], is_or))
1681 continue;
1682
1683 /*
1684 * First check whether the constant is below the lower
1685 * boundary (in that case we can skip the bucket, because
1686 * there's no overlap).
1687 *
1688 * We don't store collations used to build the statistics, but
1689 * we can use the collation for the attribute itself, as
1690 * stored in varcollid. We do reset the statistics after a
1691 * type change (including collation change), so this is OK.
1692 * For expressions, we use the collation extracted from the
1693 * expression itself.
1694 */
1695 if (expronleft)
1696 match = DatumGetBool(FunctionCall2Coll(&opproc,
1697 collid,
1698 item->values[idx],
1699 cst->constvalue));
1700 else
1701 match = DatumGetBool(FunctionCall2Coll(&opproc,
1702 collid,
1703 cst->constvalue,
1704 item->values[idx]));
1705
1706 /* update the match bitmap with the result */
1707 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1708 }
1709 }
1710 else if (IsA(clause, ScalarArrayOpExpr))
1711 {
1712 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1713 FmgrInfo opproc;
1714
1715 /* valid only after examine_opclause_args returns true */
1716 Node *clause_expr;
1717 Const *cst;
1718 bool expronleft;
1719 Oid collid;
1720 int idx;
1721
1722 /* array evaluation */
1723 ArrayType *arrayval;
1724 int16 elmlen;
1725 bool elmbyval;
1726 char elmalign;
1727 int num_elems;
1728 Datum *elem_values;
1729 bool *elem_nulls;
1730
1731 fmgr_info(get_opcode(expr->opno), &opproc);
1732
1733 /* extract the var/expr and const from the expression */
1734 if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1735 elog(ERROR, "incompatible clause");
1736
1737 /* We expect Var on left */
1738 if (!expronleft)
1739 elog(ERROR, "incompatible clause");
1740
1741 /*
1742 * Deconstruct the array constant, unless it's NULL (we'll cover
1743 * that case below)
1744 */
1745 if (!cst->constisnull)
1746 {
1747 arrayval = DatumGetArrayTypeP(cst->constvalue);
1749 &elmlen, &elmbyval, &elmalign);
1750 deconstruct_array(arrayval,
1751 ARR_ELEMTYPE(arrayval),
1752 elmlen, elmbyval, elmalign,
1753 &elem_values, &elem_nulls, &num_elems);
1754 }
1755
1756 /* match the attribute/expression to a dimension of the statistic */
1757 idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1758
1759 /*
1760 * Walk through the MCV items and evaluate the current clause. We
1761 * can skip items that were already ruled out, and terminate if
1762 * there are no remaining MCV items that might possibly match.
1763 */
1764 for (int i = 0; i < mcvlist->nitems; i++)
1765 {
1766 int j;
1767 bool match = !expr->useOr;
1768 MCVItem *item = &mcvlist->items[i];
1769
1770 /*
1771 * When the MCV item or the Const value is NULL we can treat
1772 * this as a mismatch. We must not call the operator because
1773 * of strictness.
1774 */
1775 if (item->isnull[idx] || cst->constisnull)
1776 {
1777 matches[i] = RESULT_MERGE(matches[i], is_or, false);
1778 continue;
1779 }
1780
1781 /*
1782 * Skip MCV items that can't change result in the bitmap. Once
1783 * the value gets false for AND-lists, or true for OR-lists,
1784 * we don't need to look at more clauses.
1785 */
1786 if (RESULT_IS_FINAL(matches[i], is_or))
1787 continue;
1788
1789 for (j = 0; j < num_elems; j++)
1790 {
1791 Datum elem_value = elem_values[j];
1792 bool elem_isnull = elem_nulls[j];
1793 bool elem_match;
1794
1795 /* NULL values always evaluate as not matching. */
1796 if (elem_isnull)
1797 {
1798 match = RESULT_MERGE(match, expr->useOr, false);
1799 continue;
1800 }
1801
1802 /*
1803 * Stop evaluating the array elements once we reach a
1804 * matching value that can't change - ALL() is the same as
1805 * AND-list, ANY() is the same as OR-list.
1806 */
1807 if (RESULT_IS_FINAL(match, expr->useOr))
1808 break;
1809
1810 elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
1811 collid,
1812 item->values[idx],
1813 elem_value));
1814
1815 match = RESULT_MERGE(match, expr->useOr, elem_match);
1816 }
1817
1818 /* update the match bitmap with the result */
1819 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1820 }
1821 }
1822 else if (IsA(clause, NullTest))
1823 {
1824 NullTest *expr = (NullTest *) clause;
1825 Node *clause_expr = (Node *) (expr->arg);
1826
1827 /* match the attribute/expression to a dimension of the statistic */
1828 int idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
1829
1830 /*
1831 * Walk through the MCV items and evaluate the current clause. We
1832 * can skip items that were already ruled out, and terminate if
1833 * there are no remaining MCV items that might possibly match.
1834 */
1835 for (int i = 0; i < mcvlist->nitems; i++)
1836 {
1837 bool match = false; /* assume mismatch */
1838 MCVItem *item = &mcvlist->items[i];
1839
1840 /* if the clause mismatches the MCV item, update the bitmap */
1841 switch (expr->nulltesttype)
1842 {
1843 case IS_NULL:
1844 match = (item->isnull[idx]) ? true : match;
1845 break;
1846
1847 case IS_NOT_NULL:
1848 match = (!item->isnull[idx]) ? true : match;
1849 break;
1850 }
1851
1852 /* now, update the match bitmap, depending on OR/AND type */
1853 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1854 }
1855 }
1856 else if (is_orclause(clause) || is_andclause(clause))
1857 {
1858 /* AND/OR clause, with all subclauses being compatible */
1859
1860 int i;
1861 BoolExpr *bool_clause = ((BoolExpr *) clause);
1862 List *bool_clauses = bool_clause->args;
1863
1864 /* match/mismatch bitmap for each MCV item */
1865 bool *bool_matches = NULL;
1866
1867 Assert(bool_clauses != NIL);
1868 Assert(list_length(bool_clauses) >= 2);
1869
1870 /* build the match bitmap for the OR-clauses */
1871 bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
1872 mcvlist, is_orclause(clause));
1873
1874 /*
1875 * Merge the bitmap produced by mcv_get_match_bitmap into the
1876 * current one. We need to consider if we're evaluating AND or OR
1877 * condition when merging the results.
1878 */
1879 for (i = 0; i < mcvlist->nitems; i++)
1880 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
1881
1882 pfree(bool_matches);
1883 }
1884 else if (is_notclause(clause))
1885 {
1886 /* NOT clause, with all subclauses compatible */
1887
1888 int i;
1889 BoolExpr *not_clause = ((BoolExpr *) clause);
1890 List *not_args = not_clause->args;
1891
1892 /* match/mismatch bitmap for each MCV item */
1893 bool *not_matches = NULL;
1894
1895 Assert(not_args != NIL);
1896 Assert(list_length(not_args) == 1);
1897
1898 /* build the match bitmap for the NOT-clause */
1899 not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
1900 mcvlist, false);
1901
1902 /*
1903 * Merge the bitmap produced by mcv_get_match_bitmap into the
1904 * current one. We're handling a NOT clause, so invert the result
1905 * before merging it into the global bitmap.
1906 */
1907 for (i = 0; i < mcvlist->nitems; i++)
1908 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
1909
1910 pfree(not_matches);
1911 }
1912 else if (IsA(clause, Var))
1913 {
1914 /* Var (has to be a boolean Var, possibly from below NOT) */
1915
1916 Var *var = (Var *) (clause);
1917
1918 /* match the attribute to a dimension of the statistic */
1919 int idx = bms_member_index(keys, var->varattno);
1920
1921 Assert(var->vartype == BOOLOID);
1922
1923 /*
1924 * Walk through the MCV items and evaluate the current clause. We
1925 * can skip items that were already ruled out, and terminate if
1926 * there are no remaining MCV items that might possibly match.
1927 */
1928 for (int i = 0; i < mcvlist->nitems; i++)
1929 {
1930 MCVItem *item = &mcvlist->items[i];
1931 bool match = false;
1932
1933 /* if the item is NULL, it's a mismatch */
1934 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1935 match = true;
1936
1937 /* update the result bitmap */
1938 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1939 }
1940 }
1941 else
1942 {
1943 /* Otherwise, it must be a bare boolean-returning expression */
1944 int idx;
1945
1946 /* match the expression to a dimension of the statistic */
1947 idx = mcv_match_expression(clause, keys, exprs, NULL);
1948
1949 /*
1950 * Walk through the MCV items and evaluate the current clause. We
1951 * can skip items that were already ruled out, and terminate if
1952 * there are no remaining MCV items that might possibly match.
1953 */
1954 for (int i = 0; i < mcvlist->nitems; i++)
1955 {
1956 bool match;
1957 MCVItem *item = &mcvlist->items[i];
1958
1959 /* "match" just means it's bool TRUE */
1960 match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
1961
1962 /* now, update the match bitmap, depending on OR/AND type */
1963 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1964 }
1965 }
1966 }
1967
1968 return matches;
1969}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define DatumGetArrayTypeP(X)
Definition: array.h:261
#define ARR_ELEMTYPE(a)
Definition: array.h:292
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3631
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:539
int16_t int16
Definition: c.h:480
Oid collid
bool examine_opclause_args(List *args, Node **exprp, Const **cstp, bool *expronleftp)
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2271
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1285
#define RESULT_MERGE(value, is_or, match)
Definition: mcv.c:88
#define RESULT_IS_FINAL(value, is_or)
Definition: mcv.c:100
static int mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
Definition: mcv.c:1535
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:107
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:116
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:125
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
uintptr_t Datum
Definition: postgres.h:64
@ IS_NULL
Definition: primnodes.h:1952
@ IS_NOT_NULL
Definition: primnodes.h:1952
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:70
List * args
Definition: primnodes.h:940
Definition: fmgr.h:57
Definition: pg_list.h:54
bool * isnull
Definition: statistics.h:82
Datum * values
Definition: statistics.h:83
Definition: nodes.h:129
NullTestType nulltesttype
Definition: primnodes.h:1959
Expr * arg
Definition: primnodes.h:1958
Oid opno
Definition: primnodes.h:818
List * args
Definition: primnodes.h:836
Definition: primnodes.h:248
AttrNumber varattno
Definition: primnodes.h:260

References NullTest::arg, OpExpr::args, ScalarArrayOpExpr::args, BoolExpr::args, ARR_ELEMTYPE, Assert, bms_member_index(), collid, DatumGetArrayTypeP, DatumGetBool(), deconstruct_array(), elog, ERROR, examine_opclause_args(), fmgr_info(), FunctionCall2Coll(), get_opcode(), get_typlenbyvalalign(), i, idx(), is_andclause(), IS_NOT_NULL, is_notclause(), IS_NULL, is_opclause(), is_orclause(), IsA, MCVItem::isnull, MCVList::items, j, lfirst, list_length(), mcv_get_match_bitmap(), mcv_match_expression(), NIL, MCVList::nitems, NullTest::nulltesttype, OpExpr::opno, ScalarArrayOpExpr::opno, palloc(), pfree(), RESULT_IS_FINAL, RESULT_MERGE, root, STATS_MCVLIST_MAX_ITEMS, ScalarArrayOpExpr::useOr, MCVItem::values, and Var::varattno.

Referenced by mcv_clause_selectivity_or(), mcv_clauselist_selectivity(), and mcv_get_match_bitmap().

◆ mcv_match_expression()

static int mcv_match_expression ( Node expr,
Bitmapset keys,
List exprs,
Oid collid 
)
static

Definition at line 1535 of file mcv.c.

1536{
1537 int idx;
1538
1539 if (IsA(expr, Var))
1540 {
1541 /* simple Var, so just lookup using varattno */
1542 Var *var = (Var *) expr;
1543
1544 if (collid)
1545 *collid = var->varcollid;
1546
1547 idx = bms_member_index(keys, var->varattno);
1548
1549 if (idx < 0)
1550 elog(ERROR, "variable not found in statistics object");
1551 }
1552 else
1553 {
1554 /* expression - lookup in stats expressions */
1555 ListCell *lc;
1556
1557 if (collid)
1558 *collid = exprCollation(expr);
1559
1560 /* expressions are stored after the simple columns */
1561 idx = bms_num_members(keys);
1562 foreach(lc, exprs)
1563 {
1564 Node *stat_expr = (Node *) lfirst(lc);
1565
1566 if (equal(expr, stat_expr))
1567 break;
1568
1569 idx++;
1570 }
1571
1572 if (lc == NULL)
1573 elog(ERROR, "expression not found in statistics object");
1574 }
1575
1576 return idx;
1577}
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:751
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:816

References bms_member_index(), bms_num_members(), collid, elog, equal(), ERROR, exprCollation(), idx(), IsA, lfirst, and Var::varattno.

Referenced by mcv_get_match_bitmap().

◆ pg_mcv_list_in()

Datum pg_mcv_list_in ( PG_FUNCTION_ARGS  )

Definition at line 1472 of file mcv.c.

1473{
1474 /*
1475 * pg_mcv_list stores the data in binary form and parsing text input is
1476 * not needed, so disallow this.
1477 */
1478 ereport(ERROR,
1479 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1480 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1481
1482 PG_RETURN_VOID(); /* keep compiler quiet */
1483}
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349

References ereport, errcode(), errmsg(), ERROR, and PG_RETURN_VOID.

◆ pg_mcv_list_out()

Datum pg_mcv_list_out ( PG_FUNCTION_ARGS  )

Definition at line 1498 of file mcv.c.

1499{
1500 return byteaout(fcinfo);
1501}
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:388

References byteaout().

◆ pg_mcv_list_recv()

Datum pg_mcv_list_recv ( PG_FUNCTION_ARGS  )

Definition at line 1507 of file mcv.c.

1508{
1509 ereport(ERROR,
1510 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1511 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1512
1513 PG_RETURN_VOID(); /* keep compiler quiet */
1514}

References ereport, errcode(), errmsg(), ERROR, and PG_RETURN_VOID.

◆ pg_mcv_list_send()

Datum pg_mcv_list_send ( PG_FUNCTION_ARGS  )

Definition at line 1523 of file mcv.c.

1524{
1525 return byteasend(fcinfo);
1526}
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:490

References byteasend().

◆ pg_stats_ext_mcvlist_items()

Datum pg_stats_ext_mcvlist_items ( PG_FUNCTION_ARGS  )

Definition at line 1338 of file mcv.c.

1339{
1340 FuncCallContext *funcctx;
1341
1342 /* stuff done only on the first call of the function */
1343 if (SRF_IS_FIRSTCALL())
1344 {
1345 MemoryContext oldcontext;
1346 MCVList *mcvlist;
1347 TupleDesc tupdesc;
1348
1349 /* create a function context for cross-call persistence */
1350 funcctx = SRF_FIRSTCALL_INIT();
1351
1352 /* switch to memory context appropriate for multiple function calls */
1353 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1354
1356
1357 funcctx->user_fctx = mcvlist;
1358
1359 /* total number of tuples to be returned */
1360 funcctx->max_calls = 0;
1361 if (funcctx->user_fctx != NULL)
1362 funcctx->max_calls = mcvlist->nitems;
1363
1364 /* Build a tuple descriptor for our result type */
1365 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1366 ereport(ERROR,
1367 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1368 errmsg("function returning record called in context "
1369 "that cannot accept type record")));
1370 tupdesc = BlessTupleDesc(tupdesc);
1371
1372 /*
1373 * generate attribute metadata needed later to produce tuples from raw
1374 * C strings
1375 */
1376 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1377
1378 MemoryContextSwitchTo(oldcontext);
1379 }
1380
1381 /* stuff done on every call of the function */
1382 funcctx = SRF_PERCALL_SETUP();
1383
1384 if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more
1385 * left to send */
1386 {
1387 Datum values[5];
1388 bool nulls[5];
1389 HeapTuple tuple;
1390 Datum result;
1391 ArrayBuildState *astate_values = NULL;
1392 ArrayBuildState *astate_nulls = NULL;
1393
1394 int i;
1395 MCVList *mcvlist;
1396 MCVItem *item;
1397
1398 mcvlist = (MCVList *) funcctx->user_fctx;
1399
1400 Assert(funcctx->call_cntr < mcvlist->nitems);
1401
1402 item = &mcvlist->items[funcctx->call_cntr];
1403
1404 for (i = 0; i < mcvlist->ndimensions; i++)
1405 {
1406
1407 astate_nulls = accumArrayResult(astate_nulls,
1408 BoolGetDatum(item->isnull[i]),
1409 false,
1410 BOOLOID,
1412
1413 if (!item->isnull[i])
1414 {
1415 bool isvarlena;
1416 Oid outfunc;
1417 FmgrInfo fmgrinfo;
1418 Datum val;
1419 text *txt;
1420
1421 /* lookup output func for the type */
1422 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
1423 fmgr_info(outfunc, &fmgrinfo);
1424
1425 val = FunctionCall1(&fmgrinfo, item->values[i]);
1427
1428 astate_values = accumArrayResult(astate_values,
1429 PointerGetDatum(txt),
1430 false,
1431 TEXTOID,
1433 }
1434 else
1435 astate_values = accumArrayResult(astate_values,
1436 (Datum) 0,
1437 true,
1438 TEXTOID,
1440 }
1441
1442 values[0] = Int32GetDatum(funcctx->call_cntr);
1443 values[1] = makeArrayResult(astate_values, CurrentMemoryContext);
1444 values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);
1445 values[3] = Float8GetDatum(item->frequency);
1447
1448 /* no NULLs in the tuple */
1449 memset(nulls, 0, sizeof(nulls));
1450
1451 /* build a tuple */
1452 tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1453
1454 /* make the tuple into a datum */
1455 result = HeapTupleGetDatum(tuple);
1456
1457 SRF_RETURN_NEXT(funcctx, result);
1458 }
1459 else /* do when there is no more left */
1460 {
1461 SRF_RETURN_DONE(funcctx);
1462 }
1463}
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:5350
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5420
static Datum values[MAXATTR]
Definition: bootstrap.c:151
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2258
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:2273
Datum Float8GetDatum(float8 X)
Definition: fmgr.c:1816
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:659
#define PG_GETARG_BYTEA_P(n)
Definition: fmgr.h:335
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
long val
Definition: informix.c:689
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2907
MCVList * statext_mcv_deserialize(bytea *data)
Definition: mcv.c:996
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static Datum BoolGetDatum(bool X)
Definition: postgres.h:102
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
TupleDesc tupdesc
Definition: funcapi.h:38
void * user_fctx
Definition: funcapi.h:82
uint64 max_calls
Definition: funcapi.h:74
uint64 call_cntr
Definition: funcapi.h:65
AttInMetadata * attinmeta
Definition: funcapi.h:91
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
AttrNumber ndimensions
Definition: statistics.h:92
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93
Definition: c.h:641
text * cstring_to_text(const char *s)
Definition: varlena.c:184

References accumArrayResult(), Assert, FuncCallContext::attinmeta, MCVItem::base_frequency, BlessTupleDesc(), BoolGetDatum(), FuncCallContext::call_cntr, cstring_to_text(), CurrentMemoryContext, DatumGetPointer(), ereport, errcode(), errmsg(), ERROR, Float8GetDatum(), fmgr_info(), MCVItem::frequency, FunctionCall1, get_call_result_type(), getTypeOutputInfo(), heap_form_tuple(), HeapTupleGetDatum(), i, Int32GetDatum(), MCVItem::isnull, MCVList::items, makeArrayResult(), FuncCallContext::max_calls, MemoryContextSwitchTo(), FuncCallContext::multi_call_memory_ctx, MCVList::ndimensions, MCVList::nitems, PG_GETARG_BYTEA_P, PointerGetDatum(), SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, statext_mcv_deserialize(), AttInMetadata::tupdesc, TupleDescGetAttInMetadata(), TYPEFUNC_COMPOSITE, MCVList::types, FuncCallContext::user_fctx, val, values, and MCVItem::values.

◆ sort_item_compare()

static int sort_item_compare ( const void *  a,
const void *  b,
void *  arg 
)
static

Definition at line 465 of file mcv.c.

466{
467 SortSupport ssup = (SortSupport) arg;
468 SortItem *ia = (SortItem *) a;
469 SortItem *ib = (SortItem *) b;
470
471 return ApplySortComparator(ia->values[0], ia->isnull[0],
472 ib->values[0], ib->isnull[0],
473 ssup);
474}
void * arg
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

References a, ApplySortComparator(), arg, b, SortItem::isnull, and SortItem::values.

Referenced by build_column_frequencies().

◆ statext_mcv_build()

MCVList * statext_mcv_build ( StatsBuildData data,
double  totalrows,
int  stattarget 
)

Definition at line 180 of file mcv.c.

181{
182 int i,
183 numattrs,
184 numrows,
185 ngroups,
186 nitems;
187 double mincount;
189 SortItem *groups;
190 MCVList *mcvlist = NULL;
192
193 /* comparator for all the columns */
194 mss = build_mss(data);
195
196 /* sort the rows */
198 data->nattnums, data->attnums);
199
200 if (!items)
201 return NULL;
202
203 /* for convenience */
204 numattrs = data->nattnums;
205 numrows = data->numrows;
206
207 /* transform the sorted rows into groups (sorted by frequency) */
208 groups = build_distinct_groups(nitems, items, mss, &ngroups);
209
210 /*
211 * The maximum number of MCV items to store, based on the statistics
212 * target we computed for the statistics object (from the target set for
213 * the object itself, attributes and the system default). In any case, we
214 * can't keep more groups than we have available.
215 */
216 nitems = stattarget;
217 if (nitems > ngroups)
218 nitems = ngroups;
219
220 /*
221 * Decide how many items to keep in the MCV list. We can't use the same
222 * algorithm as per-column MCV lists, because that only considers the
223 * actual group frequency - but we're primarily interested in how the
224 * actual frequency differs from the base frequency (product of simple
225 * per-column frequencies, as if the columns were independent).
226 *
227 * Using the same algorithm might exclude items that are close to the
228 * "average" frequency of the sample. But that does not say whether the
229 * observed frequency is close to the base frequency or not. We also need
230 * to consider unexpectedly uncommon items (again, compared to the base
231 * frequency), and the single-column algorithm does not have to.
232 *
233 * We simply decide how many items to keep by computing the minimum count
234 * using get_mincount_for_mcv_list() and then keep all items that seem to
235 * be more common than that.
236 */
237 mincount = get_mincount_for_mcv_list(numrows, totalrows);
238
239 /*
240 * Walk the groups until we find the first group with a count below the
241 * mincount threshold (the index of that group is the number of groups we
242 * want to keep).
243 */
244 for (i = 0; i < nitems; i++)
245 {
246 if (groups[i].count < mincount)
247 {
248 nitems = i;
249 break;
250 }
251 }
252
253 /*
254 * At this point, we know the number of items for the MCV list. There
255 * might be none (for uniform distribution with many groups), and in that
256 * case, there will be no MCV list. Otherwise, construct the MCV list.
257 */
258 if (nitems > 0)
259 {
260 int j;
263
264 /* frequencies for values in each attribute */
265 SortItem **freqs;
266 int *nfreqs;
267
268 /* used to search values */
269 tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
270 + sizeof(SortSupportData));
271
272 /* compute frequencies for values in each column */
273 nfreqs = (int *) palloc0(sizeof(int) * numattrs);
274 freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
275
276 /*
277 * Allocate the MCV list structure, set the global parameters.
278 */
279 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
280 sizeof(MCVItem) * nitems);
281
282 mcvlist->magic = STATS_MCV_MAGIC;
283 mcvlist->type = STATS_MCV_TYPE_BASIC;
284 mcvlist->ndimensions = numattrs;
285 mcvlist->nitems = nitems;
286
287 /* store info about data type OIDs */
288 for (i = 0; i < numattrs; i++)
289 mcvlist->types[i] = data->stats[i]->attrtypid;
290
291 /* Copy the first chunk of groups into the result. */
292 for (i = 0; i < nitems; i++)
293 {
294 /* just point to the proper place in the list */
295 MCVItem *item = &mcvlist->items[i];
296
297 item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
298 item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
299
300 /* copy values for the group */
301 memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
302 memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
303
304 /* groups should be sorted by frequency in descending order */
305 Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
306
307 /* group frequency */
308 item->frequency = (double) groups[i].count / numrows;
309
310 /* base frequency, if the attributes were independent */
311 item->base_frequency = 1.0;
312 for (j = 0; j < numattrs; j++)
313 {
314 SortItem *freq;
315
316 /* single dimension */
317 tmp->ndims = 1;
318 tmp->ssup[0] = mss->ssup[j];
319
320 /* fill search key */
321 key.values = &groups[i].values[j];
322 key.isnull = &groups[i].isnull[j];
323
324 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
325 sizeof(SortItem),
326 multi_sort_compare, tmp);
327
328 item->base_frequency *= ((double) freq->count) / numrows;
329 }
330 }
331
332 pfree(nfreqs);
333 pfree(freqs);
334 }
335
336 pfree(items);
337 pfree(groups);
338
339 return mcvlist;
340}
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
MultiSortSupportData * MultiSortSupport
for(;;)
static MultiSortSupport build_mss(StatsBuildData *data)
Definition: mcv.c:347
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:148
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:490
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:424
void * bsearch_arg(const void *key, const void *base0, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition: bsearch_arg.c:55
struct SortSupportData SortSupportData
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:67
#define STATS_MCV_MAGIC
Definition: statistics.h:66
struct MCVItem MCVItem
uint32 type
Definition: statistics.h:90
uint32 magic
Definition: statistics.h:89

References Assert, MCVItem::base_frequency, bsearch_arg(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), SortItem::count, data, for(), MCVItem::frequency, get_mincount_for_mcv_list(), i, SortItem::isnull, MCVItem::isnull, MCVList::items, items, j, sort-test::key, MCVList::magic, multi_sort_compare(), MCVList::ndimensions, MultiSortSupportData::ndims, MCVList::nitems, nitems, palloc(), palloc0(), pfree(), MultiSortSupportData::ssup, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, MCVList::type, MCVList::types, SortItem::values, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

◆ statext_mcv_deserialize()

MCVList * statext_mcv_deserialize ( bytea data)

Definition at line 996 of file mcv.c.

997{
998 int dim,
999 i;
1000 Size expected_size;
1001 MCVList *mcvlist;
1002 char *raw;
1003 char *ptr;
1004 char *endptr PG_USED_FOR_ASSERTS_ONLY;
1005
1006 int ndims,
1007 nitems;
1008 DimensionInfo *info = NULL;
1009
1010 /* local allocation buffer (used only for deserialization) */
1011 Datum **map = NULL;
1012
1013 /* MCV list */
1014 Size mcvlen;
1015
1016 /* buffer used for the result */
1017 Size datalen;
1018 char *dataptr;
1019 char *valuesptr;
1020 char *isnullptr;
1021
1022 if (data == NULL)
1023 return NULL;
1024
1025 /*
1026 * We can't possibly deserialize a MCV list if there's not even a complete
1027 * header. We need an explicit formula here, because we serialize the
1028 * header fields one by one, so we need to ignore struct alignment.
1029 */
1031 elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
1033
1034 /* read the MCV list header */
1035 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1036
1037 /* pointer to the data part (skip the varlena header) */
1038 raw = (char *) data;
1039 ptr = VARDATA_ANY(raw);
1040 endptr = (char *) raw + VARSIZE_ANY(data);
1041
1042 /* get the header and perform further sanity checks */
1043 memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1044 ptr += sizeof(uint32);
1045
1046 memcpy(&mcvlist->type, ptr, sizeof(uint32));
1047 ptr += sizeof(uint32);
1048
1049 memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1050 ptr += sizeof(uint32);
1051
1052 memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1053 ptr += sizeof(AttrNumber);
1054
1055 if (mcvlist->magic != STATS_MCV_MAGIC)
1056 elog(ERROR, "invalid MCV magic %u (expected %u)",
1057 mcvlist->magic, STATS_MCV_MAGIC);
1058
1059 if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1060 elog(ERROR, "invalid MCV type %u (expected %u)",
1061 mcvlist->type, STATS_MCV_TYPE_BASIC);
1062
1063 if (mcvlist->ndimensions == 0)
1064 elog(ERROR, "invalid zero-length dimension array in MCVList");
1065 else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1066 (mcvlist->ndimensions < 0))
1067 elog(ERROR, "invalid length (%d) dimension array in MCVList",
1068 mcvlist->ndimensions);
1069
1070 if (mcvlist->nitems == 0)
1071 elog(ERROR, "invalid zero-length item array in MCVList");
1072 else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1073 elog(ERROR, "invalid length (%u) item array in MCVList",
1074 mcvlist->nitems);
1075
1076 nitems = mcvlist->nitems;
1077 ndims = mcvlist->ndimensions;
1078
1079 /*
1080 * Check amount of data including DimensionInfo for all dimensions and
1081 * also the serialized items (including uint16 indexes). Also, walk
1082 * through the dimension information and add it to the sum.
1083 */
1084 expected_size = SizeOfMCVList(ndims, nitems);
1085
1086 /*
1087 * Check that we have at least the dimension and info records, along with
1088 * the items. We don't know the size of the serialized values yet. We need
1089 * to do this check first, before accessing the dimension info.
1090 */
1091 if (VARSIZE_ANY(data) < expected_size)
1092 elog(ERROR, "invalid MCV size %zu (expected %zu)",
1093 VARSIZE_ANY(data), expected_size);
1094
1095 /* Now copy the array of type Oids. */
1096 memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1097 ptr += (sizeof(Oid) * ndims);
1098
1099 /* Now it's safe to access the dimension info. */
1100 info = palloc(ndims * sizeof(DimensionInfo));
1101
1102 memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1103 ptr += (ndims * sizeof(DimensionInfo));
1104
1105 /* account for the value arrays */
1106 for (dim = 0; dim < ndims; dim++)
1107 {
1108 /*
1109 * XXX I wonder if we can/should rely on asserts here. Maybe those
1110 * checks should be done every time?
1111 */
1112 Assert(info[dim].nvalues >= 0);
1113 Assert(info[dim].nbytes >= 0);
1114
1115 expected_size += info[dim].nbytes;
1116 }
1117
1118 /*
1119 * Now we know the total expected MCV size, including all the pieces
1120 * (header, dimension info. items and deduplicated data). So do the final
1121 * check on size.
1122 */
1123 if (VARSIZE_ANY(data) != expected_size)
1124 elog(ERROR, "invalid MCV size %zu (expected %zu)",
1125 VARSIZE_ANY(data), expected_size);
1126
1127 /*
1128 * We need an array of Datum values for each dimension, so that we can
1129 * easily translate the uint16 indexes later. We also need a top-level
1130 * array of pointers to those per-dimension arrays.
1131 *
1132 * While allocating the arrays for dimensions, compute how much space we
1133 * need for a copy of the by-ref data, as we can't simply point to the
1134 * original values (it might go away).
1135 */
1136 datalen = 0; /* space for by-ref data */
1137 map = (Datum **) palloc(ndims * sizeof(Datum *));
1138
1139 for (dim = 0; dim < ndims; dim++)
1140 {
1141 map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1142
1143 /* space needed for a copy of data for by-ref types */
1144 datalen += info[dim].nbytes_aligned;
1145 }
1146
1147 /*
1148 * Now resize the MCV list so that the allocation includes all the data.
1149 *
1150 * Allocate space for a copy of the data, as we can't simply reference the
1151 * serialized data - it's not aligned properly, and it may disappear while
1152 * we're still using the MCV list, e.g. due to catcache release.
1153 *
1154 * We do care about alignment here, because we will allocate all the
1155 * pieces at once, but then use pointers to different parts.
1156 */
1157 mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1158
1159 /* arrays of values and isnull flags for all MCV items */
1160 mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1161 mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1162
1163 /* we don't quite need to align this, but it makes some asserts easier */
1164 mcvlen += MAXALIGN(datalen);
1165
1166 /* now resize the deserialized MCV list, and compute pointers to parts */
1167 mcvlist = repalloc(mcvlist, mcvlen);
1168
1169 /* pointer to the beginning of values/isnull arrays */
1170 valuesptr = (char *) mcvlist
1171 + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1172
1173 isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1174
1175 dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1176
1177 /*
1178 * Build mapping (index => value) for translating the serialized data into
1179 * the in-memory representation.
1180 */
1181 for (dim = 0; dim < ndims; dim++)
1182 {
1183 /* remember start position in the input array */
1184 char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1185
1186 if (info[dim].typbyval)
1187 {
1188 /* for by-val types we simply copy data into the mapping */
1189 for (i = 0; i < info[dim].nvalues; i++)
1190 {
1191 Datum v = 0;
1192
1193 memcpy(&v, ptr, info[dim].typlen);
1194 ptr += info[dim].typlen;
1195
1196 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1197
1198 /* no under/overflow of input array */
1199 Assert(ptr <= (start + info[dim].nbytes));
1200 }
1201 }
1202 else
1203 {
1204 /* for by-ref types we need to also make a copy of the data */
1205
1206 /* passed by reference, but fixed length (name, tid, ...) */
1207 if (info[dim].typlen > 0)
1208 {
1209 for (i = 0; i < info[dim].nvalues; i++)
1210 {
1211 memcpy(dataptr, ptr, info[dim].typlen);
1212 ptr += info[dim].typlen;
1213
1214 /* just point into the array */
1215 map[dim][i] = PointerGetDatum(dataptr);
1216 dataptr += MAXALIGN(info[dim].typlen);
1217 }
1218 }
1219 else if (info[dim].typlen == -1)
1220 {
1221 /* varlena */
1222 for (i = 0; i < info[dim].nvalues; i++)
1223 {
1224 uint32 len;
1225
1226 /* read the uint32 length */
1227 memcpy(&len, ptr, sizeof(uint32));
1228 ptr += sizeof(uint32);
1229
1230 /* the length is data-only */
1231 SET_VARSIZE(dataptr, len + VARHDRSZ);
1232 memcpy(VARDATA(dataptr), ptr, len);
1233 ptr += len;
1234
1235 /* just point into the array */
1236 map[dim][i] = PointerGetDatum(dataptr);
1237
1238 /* skip to place of the next deserialized value */
1239 dataptr += MAXALIGN(len + VARHDRSZ);
1240 }
1241 }
1242 else if (info[dim].typlen == -2)
1243 {
1244 /* cstring */
1245 for (i = 0; i < info[dim].nvalues; i++)
1246 {
1247 uint32 len;
1248
1249 memcpy(&len, ptr, sizeof(uint32));
1250 ptr += sizeof(uint32);
1251
1252 memcpy(dataptr, ptr, len);
1253 ptr += len;
1254
1255 /* just point into the array */
1256 map[dim][i] = PointerGetDatum(dataptr);
1257 dataptr += MAXALIGN(len);
1258 }
1259 }
1260
1261 /* no under/overflow of input array */
1262 Assert(ptr <= (start + info[dim].nbytes));
1263
1264 /* no overflow of the output mcv value */
1265 Assert(dataptr <= ((char *) mcvlist + mcvlen));
1266 }
1267
1268 /* check we consumed input data for this dimension exactly */
1269 Assert(ptr == (start + info[dim].nbytes));
1270 }
1271
1272 /* we should have also filled the MCV list exactly */
1273 Assert(dataptr == ((char *) mcvlist + mcvlen));
1274
1275 /* deserialize the MCV items and translate the indexes to Datums */
1276 for (i = 0; i < nitems; i++)
1277 {
1278 MCVItem *item = &mcvlist->items[i];
1279
1280 item->values = (Datum *) valuesptr;
1281 valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1282
1283 item->isnull = (bool *) isnullptr;
1284 isnullptr += MAXALIGN(sizeof(bool) * ndims);
1285
1286 memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1287 ptr += sizeof(bool) * ndims;
1288
1289 memcpy(&item->frequency, ptr, sizeof(double));
1290 ptr += sizeof(double);
1291
1292 memcpy(&item->base_frequency, ptr, sizeof(double));
1293 ptr += sizeof(double);
1294
1295 /* finally translate the indexes (for non-NULL only) */
1296 for (dim = 0; dim < ndims; dim++)
1297 {
1298 uint16 index;
1299
1300 memcpy(&index, ptr, sizeof(uint16));
1301 ptr += sizeof(uint16);
1302
1303 if (item->isnull[dim])
1304 continue;
1305
1306 item->values[dim] = map[dim][index];
1307 }
1308
1309 /* check we're not overflowing the input */
1310 Assert(ptr <= endptr);
1311 }
1312
1313 /* check that we processed all the data */
1314 Assert(ptr == endptr);
1315
1316 /* release the buffers used for mapping */
1317 for (dim = 0; dim < ndims; dim++)
1318 pfree(map[dim]);
1319
1320 pfree(map);
1321
1322 return mcvlist;
1323}
int16 AttrNumber
Definition: attnum.h:21
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:201
#define VARHDRSZ
Definition: c.h:646
uint16_t uint16
Definition: c.h:484
uint32_t uint32
Definition: c.h:485
size_t Size
Definition: c.h:559
return str start
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:68
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
const void size_t len
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
Definition: type.h:96
static Datum fetch_att(const void *T, bool attbyval, int attlen)
Definition: tupmacs.h:53
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305

References Assert, MCVItem::base_frequency, data, elog, ERROR, fetch_att(), MCVItem::frequency, i, MCVItem::isnull, MCVList::items, items, len, MCVList::magic, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, nitems, DimensionInfo::nvalues, palloc(), palloc0(), pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), repalloc(), SET_VARSIZE, SizeOfMCVList, start, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, MCVList::type, MCVList::types, DimensionInfo::typlen, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY.

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

◆ statext_mcv_load()

MCVList * statext_mcv_load ( Oid  mvoid,
bool  inh 
)

Definition at line 558 of file mcv.c.

559{
560 MCVList *result;
561 bool isnull;
562 Datum mcvlist;
563 HeapTuple htup = SearchSysCache2(STATEXTDATASTXOID,
564 ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
565
566 if (!HeapTupleIsValid(htup))
567 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
568
569 mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
570 Anum_pg_statistic_ext_data_stxdmcv, &isnull);
571
572 if (isnull)
573 elog(ERROR,
574 "requested statistics kind \"%c\" is not yet built for statistics object %u",
575 STATS_EXT_MCV, mvoid);
576
577 result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
578
579 ReleaseSysCache(htup);
580
581 return result;
582}
#define DatumGetByteaP(X)
Definition: fmgr.h:331
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:600
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition: syscache.c:232

References BoolGetDatum(), DatumGetByteaP, elog, ERROR, HeapTupleIsValid, ObjectIdGetDatum(), ReleaseSysCache(), SearchSysCache2(), statext_mcv_deserialize(), and SysCacheGetAttr().

Referenced by mcv_clauselist_selectivity(), and statext_mcv_clauselist_selectivity().

◆ statext_mcv_serialize()

bytea * statext_mcv_serialize ( MCVList mcvlist,
VacAttrStats **  stats 
)

Definition at line 621 of file mcv.c.

622{
623 int i;
624 int dim;
625 int ndims = mcvlist->ndimensions;
626
627 SortSupport ssup;
628 DimensionInfo *info;
629
630 Size total_length;
631
632 /* serialized items (indexes into arrays, etc.) */
633 bytea *raw;
634 char *ptr;
635 char *endptr PG_USED_FOR_ASSERTS_ONLY;
636
637 /* values per dimension (and number of non-NULL values) */
638 Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
639 int *counts = (int *) palloc0(sizeof(int) * ndims);
640
641 /*
642 * We'll include some rudimentary information about the attribute types
643 * (length, by-val flag), so that we don't have to look them up while
644 * deserializing the MCV list (we already have the type OID in the
645 * header). This is safe because when changing the type of the attribute
646 * the statistics gets dropped automatically. We need to store the info
647 * about the arrays of deduplicated values anyway.
648 */
649 info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
650
651 /* sort support data for all attributes included in the MCV list */
652 ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
653
654 /* collect and deduplicate values for each dimension (attribute) */
655 for (dim = 0; dim < ndims; dim++)
656 {
657 int ndistinct;
658 TypeCacheEntry *typentry;
659
660 /*
661 * Lookup the LT operator (can't get it from stats extra_data, as we
662 * don't know how to interpret that - scalar vs. array etc.).
663 */
664 typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
665
666 /* copy important info about the data type (length, by-value) */
667 info[dim].typlen = stats[dim]->attrtype->typlen;
668 info[dim].typbyval = stats[dim]->attrtype->typbyval;
669
670 /* allocate space for values in the attribute and collect them */
671 values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
672
673 for (i = 0; i < mcvlist->nitems; i++)
674 {
675 /* skip NULL values - we don't need to deduplicate those */
676 if (mcvlist->items[i].isnull[dim])
677 continue;
678
679 /* append the value at the end */
680 values[dim][counts[dim]] = mcvlist->items[i].values[dim];
681 counts[dim] += 1;
682 }
683
684 /* if there are just NULL values in this dimension, we're done */
685 if (counts[dim] == 0)
686 continue;
687
688 /* sort and deduplicate the data */
689 ssup[dim].ssup_cxt = CurrentMemoryContext;
690 ssup[dim].ssup_collation = stats[dim]->attrcollid;
691 ssup[dim].ssup_nulls_first = false;
692
693 PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
694
695 qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
696 compare_scalars_simple, &ssup[dim]);
697
698 /*
699 * Walk through the array and eliminate duplicate values, but keep the
700 * ordering (so that we can do a binary search later). We know there's
701 * at least one item as (counts[dim] != 0), so we can skip the first
702 * element.
703 */
704 ndistinct = 1; /* number of distinct values */
705 for (i = 1; i < counts[dim]; i++)
706 {
707 /* expect sorted array */
708 Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
709
710 /* if the value is the same as the previous one, we can skip it */
711 if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
712 continue;
713
714 values[dim][ndistinct] = values[dim][i];
715 ndistinct += 1;
716 }
717
718 /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
719 Assert(ndistinct <= PG_UINT16_MAX);
720
721 /*
722 * Store additional info about the attribute - number of deduplicated
723 * values, and also size of the serialized data. For fixed-length data
724 * types this is trivial to compute, for varwidth types we need to
725 * actually walk the array and sum the sizes.
726 */
727 info[dim].nvalues = ndistinct;
728
729 if (info[dim].typbyval) /* by-value data types */
730 {
731 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
732
733 /*
734 * We copy the data into the MCV item during deserialization, so
735 * we don't need to allocate any extra space.
736 */
737 info[dim].nbytes_aligned = 0;
738 }
739 else if (info[dim].typlen > 0) /* fixed-length by-ref */
740 {
741 /*
742 * We don't care about alignment in the serialized data, so we
743 * pack the data as much as possible. But we also track how much
744 * data will be needed after deserialization, and in that case we
745 * need to account for alignment of each item.
746 *
747 * Note: As the items are fixed-length, we could easily compute
748 * this during deserialization, but we do it here anyway.
749 */
750 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
751 info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
752 }
753 else if (info[dim].typlen == -1) /* varlena */
754 {
755 info[dim].nbytes = 0;
756 info[dim].nbytes_aligned = 0;
757 for (i = 0; i < info[dim].nvalues; i++)
758 {
759 Size len;
760
761 /*
762 * For varlena values, we detoast the values and store the
763 * length and data separately. We don't bother with alignment
764 * here, which means that during deserialization we need to
765 * copy the fields and only access the copies.
766 */
768
769 /* serialized length (uint32 length + data) */
770 len = VARSIZE_ANY_EXHDR(values[dim][i]);
771 info[dim].nbytes += sizeof(uint32); /* length */
772 info[dim].nbytes += len; /* value (no header) */
773
774 /*
775 * During deserialization we'll build regular varlena values
776 * with full headers, and we need to align them properly.
777 */
778 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
779 }
780 }
781 else if (info[dim].typlen == -2) /* cstring */
782 {
783 info[dim].nbytes = 0;
784 info[dim].nbytes_aligned = 0;
785 for (i = 0; i < info[dim].nvalues; i++)
786 {
787 Size len;
788
789 /*
790 * cstring is handled similar to varlena - first we store the
791 * length as uint32 and then the data. We don't care about
792 * alignment, which means that during deserialization we need
793 * to copy the fields and only access the copies.
794 */
795
796 /* c-strings include terminator, so +1 byte */
797 len = strlen(DatumGetCString(values[dim][i])) + 1;
798 info[dim].nbytes += sizeof(uint32); /* length */
799 info[dim].nbytes += len; /* value */
800
801 /* space needed for properly aligned deserialized copies */
802 info[dim].nbytes_aligned += MAXALIGN(len);
803 }
804 }
805
806 /* we know (count>0) so there must be some data */
807 Assert(info[dim].nbytes > 0);
808 }
809
810 /*
811 * Now we can finally compute how much space we'll actually need for the
812 * whole serialized MCV list (varlena header, MCV header, dimension info
813 * for each attribute, deduplicated values and items).
814 */
815 total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
816 + sizeof(AttrNumber) /* ndimensions */
817 + (ndims * sizeof(Oid)); /* attribute types */
818
819 /* dimension info */
820 total_length += ndims * sizeof(DimensionInfo);
821
822 /* add space for the arrays of deduplicated values */
823 for (i = 0; i < ndims; i++)
824 total_length += info[i].nbytes;
825
826 /*
827 * And finally account for the items (those are fixed-length, thanks to
828 * replacing values with uint16 indexes into the deduplicated arrays).
829 */
830 total_length += mcvlist->nitems * ITEM_SIZE(dim);
831
832 /*
833 * Allocate space for the whole serialized MCV list (we'll skip bytes, so
834 * we set them to zero to make the result more compressible).
835 */
836 raw = (bytea *) palloc0(VARHDRSZ + total_length);
837 SET_VARSIZE(raw, VARHDRSZ + total_length);
838
839 ptr = VARDATA(raw);
840 endptr = ptr + total_length;
841
842 /* copy the MCV list header fields, one by one */
843 memcpy(ptr, &mcvlist->magic, sizeof(uint32));
844 ptr += sizeof(uint32);
845
846 memcpy(ptr, &mcvlist->type, sizeof(uint32));
847 ptr += sizeof(uint32);
848
849 memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
850 ptr += sizeof(uint32);
851
852 memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
853 ptr += sizeof(AttrNumber);
854
855 memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
856 ptr += (sizeof(Oid) * ndims);
857
858 /* store information about the attributes (data amounts, ...) */
859 memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
860 ptr += sizeof(DimensionInfo) * ndims;
861
862 /* Copy the deduplicated values for all attributes to the output. */
863 for (dim = 0; dim < ndims; dim++)
864 {
865 /* remember the starting point for Asserts later */
867
868 for (i = 0; i < info[dim].nvalues; i++)
869 {
870 Datum value = values[dim][i];
871
872 if (info[dim].typbyval) /* passed by value */
873 {
874 Datum tmp;
875
876 /*
877 * For byval types, we need to copy just the significant bytes
878 * - we can't use memcpy directly, as that assumes
879 * little-endian behavior. store_att_byval does almost what
880 * we need, but it requires a properly aligned buffer - the
881 * output buffer does not guarantee that. So we simply use a
882 * local Datum variable (which guarantees proper alignment),
883 * and then copy the value from it.
884 */
885 store_att_byval(&tmp, value, info[dim].typlen);
886
887 memcpy(ptr, &tmp, info[dim].typlen);
888 ptr += info[dim].typlen;
889 }
890 else if (info[dim].typlen > 0) /* passed by reference */
891 {
892 /* no special alignment needed, treated as char array */
893 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
894 ptr += info[dim].typlen;
895 }
896 else if (info[dim].typlen == -1) /* varlena */
897 {
899
900 /* copy the length */
901 memcpy(ptr, &len, sizeof(uint32));
902 ptr += sizeof(uint32);
903
904 /* data from the varlena value (without the header) */
905 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
906 ptr += len;
907 }
908 else if (info[dim].typlen == -2) /* cstring */
909 {
910 uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
911
912 /* copy the length */
913 memcpy(ptr, &len, sizeof(uint32));
914 ptr += sizeof(uint32);
915
916 /* value */
917 memcpy(ptr, DatumGetCString(value), len);
918 ptr += len;
919 }
920
921 /* no underflows or overflows */
922 Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
923 }
924
925 /* we should get exactly nbytes of data for this dimension */
926 Assert((ptr - start) == info[dim].nbytes);
927 }
928
929 /* Serialize the items, with uint16 indexes instead of the values. */
930 for (i = 0; i < mcvlist->nitems; i++)
931 {
932 MCVItem *mcvitem = &mcvlist->items[i];
933
934 /* don't write beyond the allocated space */
935 Assert(ptr <= (endptr - ITEM_SIZE(dim)));
936
937 /* copy NULL and frequency flags into the serialized MCV */
938 memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
939 ptr += sizeof(bool) * ndims;
940
941 memcpy(ptr, &mcvitem->frequency, sizeof(double));
942 ptr += sizeof(double);
943
944 memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
945 ptr += sizeof(double);
946
947 /* store the indexes last */
948 for (dim = 0; dim < ndims; dim++)
949 {
950 uint16 index = 0;
951 Datum *value;
952
953 /* do the lookup only for non-NULL values */
954 if (!mcvitem->isnull[dim])
955 {
956 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
957 info[dim].nvalues, sizeof(Datum),
958 compare_scalars_simple, &ssup[dim]);
959
960 Assert(value != NULL); /* serialization or deduplication
961 * error */
962
963 /* compute index within the deduplicated array */
964 index = (uint16) (value - values[dim]);
965
966 /* check the index is within expected bounds */
967 Assert(index < info[dim].nvalues);
968 }
969
970 /* copy the index into the serialized MCV */
971 memcpy(ptr, &index, sizeof(uint16));
972 ptr += sizeof(uint16);
973 }
974
975 /* make sure we don't overflow the allocated value */
976 Assert(ptr <= endptr);
977 }
978
979 /* at this point we expect to match the total_length exactly */
980 Assert(ptr == endptr);
981
982 pfree(values);
983 pfree(counts);
984
985 return raw;
986}
#define PG_UINT16_MAX
Definition: c.h:541
int compare_scalars_simple(const void *a, const void *b, void *arg)
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
static struct @161 value
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:134
bool ssup_nulls_first
Definition: sortsupport.h:75
MemoryContext ssup_cxt
Definition: sortsupport.h:66
Form_pg_type attrtype
Definition: vacuum.h:128
static void store_att_byval(void *T, Datum newdatum, int attlen)
Definition: tupmacs.h:211
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References Assert, VacAttrStats::attrcollid, VacAttrStats::attrtype, MCVItem::base_frequency, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString(), DatumGetPointer(), MCVItem::frequency, i, MCVItem::isnull, ITEM_SIZE, MCVList::items, len, lookup_type_cache(), TypeCacheEntry::lt_opr, MCVList::magic, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, palloc0(), pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), PrepareSortSupportFromOrderingOp(), qsort_interruptible(), SET_VARSIZE, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, start, store_att_byval(), DimensionInfo::typbyval, MCVList::type, TYPECACHE_LT_OPR, MCVList::types, DimensionInfo::typlen, value, values, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by statext_store().