PostgreSQL Source Code  git master
mcv.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_statistic_ext_data.h"
#include "fmgr.h"
#include "funcapi.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/fmgroids.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
Include dependency graph for mcv.c:

Go to the source code of this file.

Macros

#define ITEM_SIZE(ndims)   ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
 
#define MinSizeOfMCVList   (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
 
#define SizeOfMCVList(ndims, nitems)
 
#define RESULT_MERGE(value, is_or, match)   ((is_or) ? ((value) || (match)) : ((value) && (match)))
 
#define RESULT_IS_FINAL(value, is_or)   ((is_or) ? (value) : (!(value)))
 

Functions

static MultiSortSupport build_mss (VacAttrStats **stats, int numattrs)
 
static SortItembuild_distinct_groups (int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
 
static SortItem ** build_column_frequencies (SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
 
static int count_distinct_groups (int numrows, SortItem *items, MultiSortSupport mss)
 
static double get_mincount_for_mcv_list (int samplerows, double totalrows)
 
MCVListstatext_mcv_build (int numrows, HeapTuple *rows, Bitmapset *attrs, VacAttrStats **stats, double totalrows, int stattarget)
 
static int compare_sort_item_count (const void *a, const void *b)
 
static int sort_item_compare (const void *a, const void *b, void *arg)
 
MCVListstatext_mcv_load (Oid mvoid)
 
byteastatext_mcv_serialize (MCVList *mcvlist, VacAttrStats **stats)
 
MCVListstatext_mcv_deserialize (bytea *data)
 
Datum pg_stats_ext_mcvlist_items (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_in (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_out (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_recv (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_send (PG_FUNCTION_ARGS)
 
static boolmcv_get_match_bitmap (PlannerInfo *root, List *clauses, Bitmapset *keys, MCVList *mcvlist, bool is_or)
 
Selectivity mcv_clauselist_selectivity (PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
 

Macro Definition Documentation

◆ ITEM_SIZE

#define ITEM_SIZE (   ndims)    ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))

Definition at line 56 of file mcv.c.

Referenced by statext_mcv_serialize().

◆ MinSizeOfMCVList

#define MinSizeOfMCVList   (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))

Definition at line 62 of file mcv.c.

Referenced by statext_mcv_deserialize().

◆ RESULT_IS_FINAL

#define RESULT_IS_FINAL (   value,
  is_or 
)    ((is_or) ? (value) : (!(value)))

Definition at line 103 of file mcv.c.

Referenced by mcv_get_match_bitmap().

◆ RESULT_MERGE

#define RESULT_MERGE (   value,
  is_or,
  match 
)    ((is_or) ? ((value) || (match)) : ((value) && (match)))

Definition at line 91 of file mcv.c.

Referenced by mcv_get_match_bitmap().

◆ SizeOfMCVList

#define SizeOfMCVList (   ndims,
  nitems 
)
Value:
((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
((ndims) * sizeof(DimensionInfo)) + \
((nitems) * ITEM_SIZE(ndims)))
#define MinSizeOfMCVList
Definition: mcv.c:62
struct DimensionInfo DimensionInfo
unsigned int Oid
Definition: postgres_ext.h:31
#define ITEM_SIZE(ndims)
Definition: mcv.c:56

Definition at line 71 of file mcv.c.

Referenced by statext_mcv_deserialize().

Function Documentation

◆ build_column_frequencies()

static SortItem ** build_column_frequencies ( SortItem groups,
int  ngroups,
MultiSortSupport  mss,
int *  ncounts 
)
static

Definition at line 489 of file mcv.c.

References Assert, SortItem::count, i, SortItem::isnull, MAXALIGN, MultiSortSupportData::ndims, palloc(), qsort_arg(), sort_item_compare(), MultiSortSupportData::ssup, and SortItem::values.

Referenced by statext_mcv_build().

491 {
492  int i,
493  dim;
494  SortItem **result;
495  char *ptr;
496 
497  Assert(groups);
498  Assert(ncounts);
499 
500  /* allocate arrays for all columns as a single chunk */
501  ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
502  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
503 
504  /* initial array of pointers */
505  result = (SortItem **) ptr;
506  ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
507 
508  for (dim = 0; dim < mss->ndims; dim++)
509  {
510  SortSupport ssup = &mss->ssup[dim];
511 
512  /* array of values for a single column */
513  result[dim] = (SortItem *) ptr;
514  ptr += MAXALIGN(sizeof(SortItem) * ngroups);
515 
516  /* extract data for the dimension */
517  for (i = 0; i < ngroups; i++)
518  {
519  /* point into the input groups */
520  result[dim][i].values = &groups[i].values[dim];
521  result[dim][i].isnull = &groups[i].isnull[dim];
522  result[dim][i].count = groups[i].count;
523  }
524 
525  /* sort the values, deduplicate */
526  qsort_arg((void *) result[dim], ngroups, sizeof(SortItem),
527  sort_item_compare, ssup);
528 
529  /*
530  * Identify distinct values, compute frequency (there might be
531  * multiple MCV items containing this value, so we need to sum counts
532  * from all of them.
533  */
534  ncounts[dim] = 1;
535  for (i = 1; i < ngroups; i++)
536  {
537  if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
538  {
539  result[dim][ncounts[dim] - 1].count += result[dim][i].count;
540  continue;
541  }
542 
543  result[dim][ncounts[dim]] = result[dim][i];
544 
545  ncounts[dim]++;
546  }
547  }
548 
549  return result;
550 }
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
static int sort_item_compare(const void *a, const void *b, void *arg)
Definition: mcv.c:464
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
int result
Definition: header.h:19
#define Assert(condition)
Definition: c.h:746
#define MAXALIGN(LEN)
Definition: c.h:699
void * palloc(Size size)
Definition: mcxt.c:950
int i

◆ build_distinct_groups()

static SortItem * build_distinct_groups ( int  numrows,
SortItem items,
MultiSortSupport  mss,
int *  ndistinct 
)
static

Definition at line 423 of file mcv.c.

References Assert, compare_sort_item_count(), SortItem::count, count_distinct_groups(), i, multi_sort_compare(), palloc(), and pg_qsort().

Referenced by statext_mcv_build().

425 {
426  int i,
427  j;
428  int ngroups = count_distinct_groups(numrows, items, mss);
429 
430  SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
431 
432  j = 0;
433  groups[0] = items[0];
434  groups[0].count = 1;
435 
436  for (i = 1; i < numrows; i++)
437  {
438  /* Assume sorted in ascending order. */
439  Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
440 
441  /* New distinct group detected. */
442  if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
443  {
444  groups[++j] = items[i];
445  groups[j].count = 0;
446  }
447 
448  groups[j].count++;
449  }
450 
451  /* ensure we filled the expected number of distinct groups */
452  Assert(j + 1 == ngroups);
453 
454  /* Sort the distinct groups by frequency (in descending order). */
455  pg_qsort((void *) groups, ngroups, sizeof(SortItem),
457 
458  *ndistinct = ngroups;
459  return groups;
460 }
static int compare_sort_item_count(const void *a, const void *b)
Definition: mcv.c:403
#define Assert(condition)
Definition: c.h:746
int multi_sort_compare(const void *a, const void *b, void *arg)
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
Definition: qsort.c:113
void * palloc(Size size)
Definition: mcxt.c:950
int i
static int count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
Definition: mcv.c:380

◆ build_mss()

static MultiSortSupport build_mss ( VacAttrStats **  stats,
int  numattrs 
)
static

Definition at line 349 of file mcv.c.

References VacAttrStats::attrcollid, VacAttrStats::attrtypid, elog, ERROR, i, InvalidOid, lookup_type_cache(), TypeCacheEntry::lt_opr, multi_sort_add_dimension(), multi_sort_init(), generate_unaccent_rules::type, and TYPECACHE_LT_OPR.

Referenced by statext_mcv_build().

350 {
351  int i;
352 
353  /* Sort by multiple columns (using array of SortSupport) */
354  MultiSortSupport mss = multi_sort_init(numattrs);
355 
356  /* prepare the sort functions for all the attributes */
357  for (i = 0; i < numattrs; i++)
358  {
359  VacAttrStats *colstat = stats[i];
361 
362  type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
363  if (type->lt_opr == InvalidOid) /* shouldn't happen */
364  elog(ERROR, "cache lookup failed for ordering operator for type %u",
365  colstat->attrtypid);
366 
367  multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
368  }
369 
370  return mss;
371 }
Oid attrtypid
Definition: vacuum.h:124
#define ERROR
Definition: elog.h:43
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
MultiSortSupport multi_sort_init(int ndims)
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:331
#define InvalidOid
Definition: postgres_ext.h:36
Oid attrcollid
Definition: vacuum.h:127
#define elog(elevel,...)
Definition: elog.h:214
int i
#define TYPECACHE_LT_OPR
Definition: typcache.h:131

◆ compare_sort_item_count()

static int compare_sort_item_count ( const void *  a,
const void *  b 
)
static

Definition at line 403 of file mcv.c.

References SortItem::count.

Referenced by build_distinct_groups().

404 {
405  SortItem *ia = (SortItem *) a;
406  SortItem *ib = (SortItem *) b;
407 
408  if (ia->count == ib->count)
409  return 0;
410  else if (ia->count > ib->count)
411  return -1;
412 
413  return 1;
414 }

◆ count_distinct_groups()

static int count_distinct_groups ( int  numrows,
SortItem items,
MultiSortSupport  mss 
)
static

Definition at line 380 of file mcv.c.

References Assert, i, and multi_sort_compare().

Referenced by build_distinct_groups().

381 {
382  int i;
383  int ndistinct;
384 
385  ndistinct = 1;
386  for (i = 1; i < numrows; i++)
387  {
388  /* make sure the array really is sorted */
389  Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
390 
391  if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
392  ndistinct += 1;
393  }
394 
395  return ndistinct;
396 }
#define Assert(condition)
Definition: c.h:746
int multi_sort_compare(const void *a, const void *b, void *arg)
int i

◆ get_mincount_for_mcv_list()

static double get_mincount_for_mcv_list ( int  samplerows,
double  totalrows 
)
static

Definition at line 151 of file mcv.c.

Referenced by statext_mcv_build().

152 {
153  double n = samplerows;
154  double N = totalrows;
155  double numer,
156  denom;
157 
158  numer = n * (N - n);
159  denom = N - n + 0.04 * n * (N - 1);
160 
161  /* Guard against division by zero (possible if n = N = 1) */
162  if (denom == 0.0)
163  return 0.0;
164 
165  return numer / denom;
166 }

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity ( PlannerInfo root,
StatisticExtInfo stat,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Selectivity basesel,
Selectivity totalsel 
)

Definition at line 1903 of file mcv.c.

References MCVItem::base_frequency, MCVItem::frequency, i, MCVList::items, StatisticExtInfo::keys, mcv_get_match_bitmap(), MCVList::nitems, statext_mcv_load(), and StatisticExtInfo::statOid.

Referenced by statext_mcv_clauselist_selectivity().

1908 {
1909  int i;
1910  MCVList *mcv;
1911  Selectivity s = 0.0;
1912 
1913  /* match/mismatch bitmap for each MCV item */
1914  bool *matches = NULL;
1915 
1916  /* load the MCV list stored in the statistics object */
1917  mcv = statext_mcv_load(stat->statOid);
1918 
1919  /* build a match bitmap for the clauses */
1920  matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, false);
1921 
1922  /* sum frequencies for all the matching MCV items */
1923  *basesel = 0.0;
1924  *totalsel = 0.0;
1925  for (i = 0; i < mcv->nitems; i++)
1926  {
1927  *totalsel += mcv->items[i].frequency;
1928 
1929  if (matches[i] != false)
1930  {
1931  /* XXX Shouldn't the basesel be outside the if condition? */
1932  *basesel += mcv->items[i].base_frequency;
1933  s += mcv->items[i].frequency;
1934  }
1935  }
1936 
1937  return s;
1938 }
uint32 nitems
Definition: statistics.h:90
double Selectivity
Definition: nodes.h:661
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1545
MCVList * statext_mcv_load(Oid mvoid)
Definition: mcv.c:557
const symbol * s
Definition: header.h:17
double base_frequency
Definition: statistics.h:80
Bitmapset * keys
Definition: pathnodes.h:917
int i
double frequency
Definition: statistics.h:79

◆ mcv_get_match_bitmap()

static bool* mcv_get_match_bitmap ( PlannerInfo root,
List clauses,
Bitmapset keys,
MCVList mcvlist,
bool  is_or 
)
static

Definition at line 1545 of file mcv.c.

References NullTest::arg, OpExpr::args, ScalarArrayOpExpr::args, BoolExpr::args, ARR_ELEMTYPE, Assert, bms_member_index(), Const::constisnull, Const::constvalue, DatumGetArrayTypeP, DatumGetBool, deconstruct_array(), elog, ERROR, examine_clause_args(), false, fmgr_info(), FunctionCall2Coll(), get_opcode(), get_typlenbyvalalign(), i, idx(), is_andclause(), IS_NOT_NULL, is_notclause(), IS_NULL, is_opclause(), is_orclause(), IsA, MCVItem::isnull, MCVList::items, lfirst, list_length(), NIL, MCVList::nitems, NullTest::nulltesttype, OpExpr::opno, ScalarArrayOpExpr::opno, palloc(), pfree(), RESULT_IS_FINAL, RESULT_MERGE, STATS_MCVLIST_MAX_ITEMS, Node::type, ScalarArrayOpExpr::useOr, MCVItem::values, Var::varattno, Var::varcollid, and Var::vartype.

Referenced by mcv_clauselist_selectivity().

1547 {
1548  int i;
1549  ListCell *l;
1550  bool *matches;
1551 
1552  /* The bitmap may be partially built. */
1553  Assert(clauses != NIL);
1554  Assert(list_length(clauses) >= 1);
1555  Assert(mcvlist != NULL);
1556  Assert(mcvlist->nitems > 0);
1557  Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
1558 
1559  matches = palloc(sizeof(bool) * mcvlist->nitems);
1560  memset(matches, (is_or) ? false : true,
1561  sizeof(bool) * mcvlist->nitems);
1562 
1563  /*
1564  * Loop through the list of clauses, and for each of them evaluate all the
1565  * MCV items not yet eliminated by the preceding clauses.
1566  */
1567  foreach(l, clauses)
1568  {
1569  Node *clause = (Node *) lfirst(l);
1570 
1571  /* if it's a RestrictInfo, then extract the clause */
1572  if (IsA(clause, RestrictInfo))
1573  clause = (Node *) ((RestrictInfo *) clause)->clause;
1574 
1575  /*
1576  * Handle the various types of clauses - OpClause, NullTest and
1577  * AND/OR/NOT
1578  */
1579  if (is_opclause(clause))
1580  {
1581  OpExpr *expr = (OpExpr *) clause;
1582  FmgrInfo opproc;
1583 
1584  /* valid only after examine_clause_args returns true */
1585  Var *var;
1586  Const *cst;
1587  bool varonleft;
1588 
1589  fmgr_info(get_opcode(expr->opno), &opproc);
1590 
1591  /* extract the var and const from the expression */
1592  if (examine_clause_args(expr->args, &var, &cst, &varonleft))
1593  {
1594  int idx;
1595 
1596  /* match the attribute to a dimension of the statistic */
1597  idx = bms_member_index(keys, var->varattno);
1598 
1599  /*
1600  * Walk through the MCV items and evaluate the current clause.
1601  * We can skip items that were already ruled out, and
1602  * terminate if there are no remaining MCV items that might
1603  * possibly match.
1604  */
1605  for (i = 0; i < mcvlist->nitems; i++)
1606  {
1607  bool match = true;
1608  MCVItem *item = &mcvlist->items[i];
1609 
1610  /*
1611  * When the MCV item or the Const value is NULL we can
1612  * treat this as a mismatch. We must not call the operator
1613  * because of strictness.
1614  */
1615  if (item->isnull[idx] || cst->constisnull)
1616  {
1617  matches[i] = RESULT_MERGE(matches[i], is_or, false);
1618  continue;
1619  }
1620 
1621  /*
1622  * Skip MCV items that can't change result in the bitmap.
1623  * Once the value gets false for AND-lists, or true for
1624  * OR-lists, we don't need to look at more clauses.
1625  */
1626  if (RESULT_IS_FINAL(matches[i], is_or))
1627  continue;
1628 
1629  /*
1630  * First check whether the constant is below the lower
1631  * boundary (in that case we can skip the bucket, because
1632  * there's no overlap).
1633  *
1634  * We don't store collations used to build the statistics,
1635  * but we can use the collation for the attribute itself,
1636  * as stored in varcollid. We do reset the statistics
1637  * after a type change (including collation change), so
1638  * this is OK. We may need to relax this after allowing
1639  * extended statistics on expressions.
1640  */
1641  if (varonleft)
1642  match = DatumGetBool(FunctionCall2Coll(&opproc,
1643  var->varcollid,
1644  item->values[idx],
1645  cst->constvalue));
1646  else
1647  match = DatumGetBool(FunctionCall2Coll(&opproc,
1648  var->varcollid,
1649  cst->constvalue,
1650  item->values[idx]));
1651 
1652  /* update the match bitmap with the result */
1653  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1654  }
1655  }
1656  }
1657  else if (IsA(clause, ScalarArrayOpExpr))
1658  {
1659  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1660  FmgrInfo opproc;
1661 
1662  /* valid only after examine_clause_args returns true */
1663  Var *var;
1664  Const *cst;
1665  bool varonleft;
1666 
1667  fmgr_info(get_opcode(expr->opno), &opproc);
1668 
1669  /* extract the var and const from the expression */
1670  if (examine_clause_args(expr->args, &var, &cst, &varonleft))
1671  {
1672  int idx;
1673 
1674  ArrayType *arrayval;
1675  int16 elmlen;
1676  bool elmbyval;
1677  char elmalign;
1678  int num_elems;
1679  Datum *elem_values;
1680  bool *elem_nulls;
1681 
1682  /* ScalarArrayOpExpr has the Var always on the left */
1683  Assert(varonleft);
1684 
1685  if (!cst->constisnull)
1686  {
1687  arrayval = DatumGetArrayTypeP(cst->constvalue);
1689  &elmlen, &elmbyval, &elmalign);
1690  deconstruct_array(arrayval,
1691  ARR_ELEMTYPE(arrayval),
1692  elmlen, elmbyval, elmalign,
1693  &elem_values, &elem_nulls, &num_elems);
1694  }
1695 
1696  /* match the attribute to a dimension of the statistic */
1697  idx = bms_member_index(keys, var->varattno);
1698 
1699  /*
1700  * Walk through the MCV items and evaluate the current clause.
1701  * We can skip items that were already ruled out, and
1702  * terminate if there are no remaining MCV items that might
1703  * possibly match.
1704  */
1705  for (i = 0; i < mcvlist->nitems; i++)
1706  {
1707  int j;
1708  bool match = (expr->useOr ? false : true);
1709  MCVItem *item = &mcvlist->items[i];
1710 
1711  /*
1712  * When the MCV item or the Const value is NULL we can
1713  * treat this as a mismatch. We must not call the operator
1714  * because of strictness.
1715  */
1716  if (item->isnull[idx] || cst->constisnull)
1717  {
1718  matches[i] = RESULT_MERGE(matches[i], is_or, false);
1719  continue;
1720  }
1721 
1722  /*
1723  * Skip MCV items that can't change result in the bitmap.
1724  * Once the value gets false for AND-lists, or true for
1725  * OR-lists, we don't need to look at more clauses.
1726  */
1727  if (RESULT_IS_FINAL(matches[i], is_or))
1728  continue;
1729 
1730  for (j = 0; j < num_elems; j++)
1731  {
1732  Datum elem_value = elem_values[j];
1733  bool elem_isnull = elem_nulls[j];
1734  bool elem_match;
1735 
1736  /* NULL values always evaluate as not matching. */
1737  if (elem_isnull)
1738  {
1739  match = RESULT_MERGE(match, expr->useOr, false);
1740  continue;
1741  }
1742 
1743  /*
1744  * Stop evaluating the array elements once we reach
1745  * match value that can't change - ALL() is the same
1746  * as AND-list, ANY() is the same as OR-list.
1747  */
1748  if (RESULT_IS_FINAL(match, expr->useOr))
1749  break;
1750 
1751  elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
1752  var->varcollid,
1753  item->values[idx],
1754  elem_value));
1755 
1756  match = RESULT_MERGE(match, expr->useOr, elem_match);
1757  }
1758 
1759  /* update the match bitmap with the result */
1760  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1761  }
1762  }
1763  }
1764  else if (IsA(clause, NullTest))
1765  {
1766  NullTest *expr = (NullTest *) clause;
1767  Var *var = (Var *) (expr->arg);
1768 
1769  /* match the attribute to a dimension of the statistic */
1770  int idx = bms_member_index(keys, var->varattno);
1771 
1772  /*
1773  * Walk through the MCV items and evaluate the current clause. We
1774  * can skip items that were already ruled out, and terminate if
1775  * there are no remaining MCV items that might possibly match.
1776  */
1777  for (i = 0; i < mcvlist->nitems; i++)
1778  {
1779  bool match = false; /* assume mismatch */
1780  MCVItem *item = &mcvlist->items[i];
1781 
1782  /* if the clause mismatches the MCV item, update the bitmap */
1783  switch (expr->nulltesttype)
1784  {
1785  case IS_NULL:
1786  match = (item->isnull[idx]) ? true : match;
1787  break;
1788 
1789  case IS_NOT_NULL:
1790  match = (!item->isnull[idx]) ? true : match;
1791  break;
1792  }
1793 
1794  /* now, update the match bitmap, depending on OR/AND type */
1795  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1796  }
1797  }
1798  else if (is_orclause(clause) || is_andclause(clause))
1799  {
1800  /* AND/OR clause, with all subclauses being compatible */
1801 
1802  int i;
1803  BoolExpr *bool_clause = ((BoolExpr *) clause);
1804  List *bool_clauses = bool_clause->args;
1805 
1806  /* match/mismatch bitmap for each MCV item */
1807  bool *bool_matches = NULL;
1808 
1809  Assert(bool_clauses != NIL);
1810  Assert(list_length(bool_clauses) >= 2);
1811 
1812  /* build the match bitmap for the OR-clauses */
1813  bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys,
1814  mcvlist, is_orclause(clause));
1815 
1816  /*
1817  * Merge the bitmap produced by mcv_get_match_bitmap into the
1818  * current one. We need to consider if we're evaluating AND or OR
1819  * condition when merging the results.
1820  */
1821  for (i = 0; i < mcvlist->nitems; i++)
1822  matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
1823 
1824  pfree(bool_matches);
1825  }
1826  else if (is_notclause(clause))
1827  {
1828  /* NOT clause, with all subclauses compatible */
1829 
1830  int i;
1831  BoolExpr *not_clause = ((BoolExpr *) clause);
1832  List *not_args = not_clause->args;
1833 
1834  /* match/mismatch bitmap for each MCV item */
1835  bool *not_matches = NULL;
1836 
1837  Assert(not_args != NIL);
1838  Assert(list_length(not_args) == 1);
1839 
1840  /* build the match bitmap for the NOT-clause */
1841  not_matches = mcv_get_match_bitmap(root, not_args, keys,
1842  mcvlist, false);
1843 
1844  /*
1845  * Merge the bitmap produced by mcv_get_match_bitmap into the
1846  * current one. We're handling a NOT clause, so invert the result
1847  * before merging it into the global bitmap.
1848  */
1849  for (i = 0; i < mcvlist->nitems; i++)
1850  matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
1851 
1852  pfree(not_matches);
1853  }
1854  else if (IsA(clause, Var))
1855  {
1856  /* Var (has to be a boolean Var, possibly from below NOT) */
1857 
1858  Var *var = (Var *) (clause);
1859 
1860  /* match the attribute to a dimension of the statistic */
1861  int idx = bms_member_index(keys, var->varattno);
1862 
1863  Assert(var->vartype == BOOLOID);
1864 
1865  /*
1866  * Walk through the MCV items and evaluate the current clause. We
1867  * can skip items that were already ruled out, and terminate if
1868  * there are no remaining MCV items that might possibly match.
1869  */
1870  for (i = 0; i < mcvlist->nitems; i++)
1871  {
1872  MCVItem *item = &mcvlist->items[i];
1873  bool match = false;
1874 
1875  /* if the item is NULL, it's a mismatch */
1876  if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1877  match = true;
1878 
1879  /* update the result bitmap */
1880  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1881  }
1882  }
1883  else
1884  elog(ERROR, "unknown clause type: %d", clause->type);
1885  }
1886 
1887  return matches;
1888 }
Datum constvalue
Definition: primnodes.h:214
uint32 nitems
Definition: statistics.h:90
signed short int16
Definition: c.h:362
#define NIL
Definition: pg_list.h:65
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:579
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:106
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2159
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:97
Definition: nodes.h:528
AttrNumber varattno
Definition: primnodes.h:186
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define false
Definition: c.h:333
Datum * values
Definition: statistics.h:82
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1152
Definition: primnodes.h:181
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1545
void pfree(void *pointer)
Definition: mcxt.c:1057
#define ERROR
Definition: elog.h:43
Oid vartype
Definition: primnodes.h:188
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
Expr * arg
Definition: primnodes.h:1222
#define RESULT_MERGE(value, is_or, match)
Definition: mcv.c:91
NodeTag type
Definition: nodes.h:530
#define DatumGetBool(X)
Definition: postgres.h:393
bool examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp)
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:453
#define RESULT_IS_FINAL(value, is_or)
Definition: mcv.c:103
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:115
uintptr_t Datum
Definition: postgres.h:367
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:69
NullTestType nulltesttype
Definition: primnodes.h:1223
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1202
#define Assert(condition)
Definition: c.h:746
#define lfirst(lc)
Definition: pg_list.h:169
static int list_length(const List *l)
Definition: pg_list.h:149
List * args
Definition: primnodes.h:583
bool * isnull
Definition: statistics.h:81
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3483
void * palloc(Size size)
Definition: mcxt.c:950
#define elog(elevel,...)
Definition: elog.h:214
int i
Oid varcollid
Definition: primnodes.h:190
Oid opno
Definition: primnodes.h:516
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:66
List * args
Definition: primnodes.h:522
Definition: pg_list.h:50
#define ARR_ELEMTYPE(a)
Definition: array.h:280
bool constisnull
Definition: primnodes.h:215
#define DatumGetArrayTypeP(X)
Definition: array.h:249

◆ pg_mcv_list_in()

Datum pg_mcv_list_in ( PG_FUNCTION_ARGS  )

Definition at line 1469 of file mcv.c.

References ereport, errcode(), errmsg(), ERROR, and PG_RETURN_VOID.

1470 {
1471  /*
1472  * pg_mcv_list stores the data in binary form and parsing text input is
1473  * not needed, so disallow this.
1474  */
1475  ereport(ERROR,
1476  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1477  errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1478 
1479  PG_RETURN_VOID(); /* keep compiler quiet */
1480 }
int errcode(int sqlerrcode)
Definition: elog.c:610
#define ERROR
Definition: elog.h:43
#define ereport(elevel,...)
Definition: elog.h:144
#define PG_RETURN_VOID()
Definition: fmgr.h:348
int errmsg(const char *fmt,...)
Definition: elog.c:821

◆ pg_mcv_list_out()

Datum pg_mcv_list_out ( PG_FUNCTION_ARGS  )

Definition at line 1495 of file mcv.c.

References byteaout().

1496 {
1497  return byteaout(fcinfo);
1498 }
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:390

◆ pg_mcv_list_recv()

Datum pg_mcv_list_recv ( PG_FUNCTION_ARGS  )

Definition at line 1504 of file mcv.c.

References ereport, errcode(), errmsg(), ERROR, and PG_RETURN_VOID.

1505 {
1506  ereport(ERROR,
1507  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1508  errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1509 
1510  PG_RETURN_VOID(); /* keep compiler quiet */
1511 }
int errcode(int sqlerrcode)
Definition: elog.c:610
#define ERROR
Definition: elog.h:43
#define ereport(elevel,...)
Definition: elog.h:144
#define PG_RETURN_VOID()
Definition: fmgr.h:348
int errmsg(const char *fmt,...)
Definition: elog.c:821

◆ pg_mcv_list_send()

Datum pg_mcv_list_send ( PG_FUNCTION_ARGS  )

Definition at line 1520 of file mcv.c.

References byteasend().

1521 {
1522  return byteasend(fcinfo);
1523 }
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:492

◆ pg_stats_ext_mcvlist_items()

Datum pg_stats_ext_mcvlist_items ( PG_FUNCTION_ARGS  )

Definition at line 1335 of file mcv.c.

References accumArrayResult(), Assert, FuncCallContext::attinmeta, MCVItem::base_frequency, BlessTupleDesc(), BoolGetDatum, FuncCallContext::call_cntr, cstring_to_text(), CurrentMemoryContext, DatumGetPointer, ereport, errcode(), errmsg(), ERROR, Float8GetDatum(), fmgr_info(), MCVItem::frequency, FunctionCall1, get_call_result_type(), getTypeOutputInfo(), heap_form_tuple(), HeapTupleGetDatum, i, Int32GetDatum, MCVItem::isnull, MCVList::items, makeArrayResult(), FuncCallContext::max_calls, MemoryContextSwitchTo(), FuncCallContext::multi_call_memory_ctx, MCVList::ndimensions, MCVList::nitems, PG_GETARG_BYTEA_P, PointerGetDatum, SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, statext_mcv_deserialize(), AttInMetadata::tupdesc, TupleDescGetAttInMetadata(), TYPEFUNC_COMPOSITE, MCVList::types, FuncCallContext::user_fctx, val, MCVItem::values, and values.

1336 {
1337  FuncCallContext *funcctx;
1338 
1339  /* stuff done only on the first call of the function */
1340  if (SRF_IS_FIRSTCALL())
1341  {
1342  MemoryContext oldcontext;
1343  MCVList *mcvlist;
1344  TupleDesc tupdesc;
1345 
1346  /* create a function context for cross-call persistence */
1347  funcctx = SRF_FIRSTCALL_INIT();
1348 
1349  /* switch to memory context appropriate for multiple function calls */
1350  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1351 
1353 
1354  funcctx->user_fctx = mcvlist;
1355 
1356  /* total number of tuples to be returned */
1357  funcctx->max_calls = 0;
1358  if (funcctx->user_fctx != NULL)
1359  funcctx->max_calls = mcvlist->nitems;
1360 
1361  /* Build a tuple descriptor for our result type */
1362  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1363  ereport(ERROR,
1364  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1365  errmsg("function returning record called in context "
1366  "that cannot accept type record")));
1367  tupdesc = BlessTupleDesc(tupdesc);
1368 
1369  /*
1370  * generate attribute metadata needed later to produce tuples from raw
1371  * C strings
1372  */
1373  funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1374 
1375  MemoryContextSwitchTo(oldcontext);
1376  }
1377 
1378  /* stuff done on every call of the function */
1379  funcctx = SRF_PERCALL_SETUP();
1380 
1381  if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more
1382  * left to send */
1383  {
1384  Datum values[5];
1385  bool nulls[5];
1386  HeapTuple tuple;
1387  Datum result;
1388  ArrayBuildState *astate_values = NULL;
1389  ArrayBuildState *astate_nulls = NULL;
1390 
1391  int i;
1392  MCVList *mcvlist;
1393  MCVItem *item;
1394 
1395  mcvlist = (MCVList *) funcctx->user_fctx;
1396 
1397  Assert(funcctx->call_cntr < mcvlist->nitems);
1398 
1399  item = &mcvlist->items[funcctx->call_cntr];
1400 
1401  for (i = 0; i < mcvlist->ndimensions; i++)
1402  {
1403 
1404  astate_nulls = accumArrayResult(astate_nulls,
1405  BoolGetDatum(item->isnull[i]),
1406  false,
1407  BOOLOID,
1409 
1410  if (!item->isnull[i])
1411  {
1412  bool isvarlena;
1413  Oid outfunc;
1414  FmgrInfo fmgrinfo;
1415  Datum val;
1416  text *txt;
1417 
1418  /* lookup output func for the type */
1419  getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
1420  fmgr_info(outfunc, &fmgrinfo);
1421 
1422  val = FunctionCall1(&fmgrinfo, item->values[i]);
1423  txt = cstring_to_text(DatumGetPointer(val));
1424 
1425  astate_values = accumArrayResult(astate_values,
1426  PointerGetDatum(txt),
1427  false,
1428  TEXTOID,
1430  }
1431  else
1432  astate_values = accumArrayResult(astate_values,
1433  (Datum) 0,
1434  true,
1435  TEXTOID,
1437  }
1438 
1439  values[0] = Int32GetDatum(funcctx->call_cntr);
1440  values[1] = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext));
1441  values[2] = PointerGetDatum(makeArrayResult(astate_nulls, CurrentMemoryContext));
1442  values[3] = Float8GetDatum(item->frequency);
1443  values[4] = Float8GetDatum(item->base_frequency);
1444 
1445  /* no NULLs in the tuple */
1446  memset(nulls, 0, sizeof(nulls));
1447 
1448  /* build a tuple */
1449  tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1450 
1451  /* make the tuple into a datum */
1452  result = HeapTupleGetDatum(tuple);
1453 
1454  SRF_RETURN_NEXT(funcctx, result);
1455  }
1456  else /* do when there is no more left */
1457  {
1458  SRF_RETURN_DONE(funcctx);
1459  }
1460 }
uint32 nitems
Definition: statistics.h:90
uint64 call_cntr
Definition: funcapi.h:65
Definition: fmgr.h:56
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:205
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2784
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:92
MCVList * statext_mcv_deserialize(bytea *data)
Definition: mcv.c:993
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:294
#define PointerGetDatum(X)
Definition: postgres.h:556
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
int errcode(int sqlerrcode)
Definition: elog.c:610
AttrNumber ndimensions
Definition: statistics.h:91
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
Datum * values
Definition: statistics.h:82
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
unsigned int Oid
Definition: postgres_ext.h:31
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:298
Datum Float8GetDatum(float8 X)
Definition: fmgr.c:1710
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:300
#define PG_GETARG_BYTEA_P(n)
Definition: fmgr.h:334
#define ERROR
Definition: elog.h:43
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2052
AttInMetadata * attinmeta
Definition: funcapi.h:91
TupleDesc tupdesc
Definition: funcapi.h:38
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5144
uintptr_t Datum
Definition: postgres.h:367
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:2067
#define BoolGetDatum(X)
Definition: postgres.h:402
#define ereport(elevel,...)
Definition: elog.h:144
int result
Definition: header.h:19
text * cstring_to_text(const char *s)
Definition: varlena.c:188
#define Assert(condition)
Definition: c.h:746
double base_frequency
Definition: statistics.h:80
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:221
#define DatumGetPointer(X)
Definition: postgres.h:549
bool * isnull
Definition: statistics.h:81
static Datum values[MAXATTR]
Definition: bootstrap.c:165
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:5080
#define Int32GetDatum(X)
Definition: postgres.h:479
void * user_fctx
Definition: funcapi.h:82
int errmsg(const char *fmt,...)
Definition: elog.c:821
int i
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:642
Definition: c.h:563
long val
Definition: informix.c:664
uint64 max_calls
Definition: funcapi.h:74
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:318
double frequency
Definition: statistics.h:79
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:296

◆ sort_item_compare()

static int sort_item_compare ( const void *  a,
const void *  b,
void *  arg 
)
static

Definition at line 464 of file mcv.c.

References ApplySortComparator(), SortItem::isnull, and SortItem::values.

Referenced by build_column_frequencies().

465 {
466  SortSupport ssup = (SortSupport) arg;
467  SortItem *ia = (SortItem *) a;
468  SortItem *ib = (SortItem *) b;
469 
470  return ApplySortComparator(ia->values[0], ia->isnull[0],
471  ib->values[0], ib->isnull[0],
472  ssup);
473 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
void * arg
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

◆ statext_mcv_build()

MCVList* statext_mcv_build ( int  numrows,
HeapTuple rows,
Bitmapset attrs,
VacAttrStats **  stats,
double  totalrows,
int  stattarget 
)

Definition at line 183 of file mcv.c.

References Assert, VacAttrStats::attrtypid, MCVItem::base_frequency, bsearch_arg(), build_attnums_array(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), SortItem::count, MCVItem::frequency, get_mincount_for_mcv_list(), i, SortItem::isnull, MCVItem::isnull, MCVList::items, sort-test::key, MCVList::magic, multi_sort_compare(), MCVList::ndimensions, MultiSortSupportData::ndims, MCVList::nitems, offsetof, palloc(), palloc0(), pfree(), MultiSortSupportData::ssup, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, MCVList::type, MCVList::types, SortItem::values, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

185 {
186  int i,
187  numattrs,
188  ngroups,
189  nitems;
190  AttrNumber *attnums;
191  double mincount;
192  SortItem *items;
193  SortItem *groups;
194  MCVList *mcvlist = NULL;
195  MultiSortSupport mss;
196 
197  attnums = build_attnums_array(attrs, &numattrs);
198 
199  /* comparator for all the columns */
200  mss = build_mss(stats, numattrs);
201 
202  /* sort the rows */
203  items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
204  mss, numattrs, attnums);
205 
206  if (!items)
207  return NULL;
208 
209  /* transform the sorted rows into groups (sorted by frequency) */
210  groups = build_distinct_groups(nitems, items, mss, &ngroups);
211 
212  /*
213  * Maximum number of MCV items to store, based on the statistics target we
214  * computed for the statistics object (from target set for the object
215  * itself, attributes and the system default). In any case, we can't keep
216  * more groups than we have available.
217  */
218  nitems = stattarget;
219  if (nitems > ngroups)
220  nitems = ngroups;
221 
222  /*
223  * Decide how many items to keep in the MCV list. We can't use the same
224  * algorithm as per-column MCV lists, because that only considers the
225  * actual group frequency - but we're primarily interested in how the
226  * actual frequency differs from the base frequency (product of simple
227  * per-column frequencies, as if the columns were independent).
228  *
229  * Using the same algorithm might exclude items that are close to the
230  * "average" frequency of the sample. But that does not say whether the
231  * observed frequency is close to the base frequency or not. We also need
232  * to consider unexpectedly uncommon items (again, compared to the base
233  * frequency), and the single-column algorithm does not have to.
234  *
235  * We simply decide how many items to keep by computing minimum count
236  * using get_mincount_for_mcv_list() and then keep all items that seem to
237  * be more common than that.
238  */
239  mincount = get_mincount_for_mcv_list(numrows, totalrows);
240 
241  /*
242  * Walk the groups until we find the first group with a count below the
243  * mincount threshold (the index of that group is the number of groups we
244  * want to keep).
245  */
246  for (i = 0; i < nitems; i++)
247  {
248  if (groups[i].count < mincount)
249  {
250  nitems = i;
251  break;
252  }
253  }
254 
255  /*
256  * At this point we know the number of items for the MCV list. There might
257  * be none (for uniform distribution with many groups), and in that case
258  * there will be no MCV list. Otherwise construct the MCV list.
259  */
260  if (nitems > 0)
261  {
262  int j;
263  SortItem key;
264  MultiSortSupport tmp;
265 
266  /* frequencies for values in each attribute */
267  SortItem **freqs;
268  int *nfreqs;
269 
270  /* used to search values */
272  + sizeof(SortSupportData));
273 
274  /* compute frequencies for values in each column */
275  nfreqs = (int *) palloc0(sizeof(int) * numattrs);
276  freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
277 
278  /*
279  * Allocate the MCV list structure, set the global parameters.
280  */
281  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
282  sizeof(MCVItem) * nitems);
283 
284  mcvlist->magic = STATS_MCV_MAGIC;
285  mcvlist->type = STATS_MCV_TYPE_BASIC;
286  mcvlist->ndimensions = numattrs;
287  mcvlist->nitems = nitems;
288 
289  /* store info about data type OIDs */
290  for (i = 0; i < numattrs; i++)
291  mcvlist->types[i] = stats[i]->attrtypid;
292 
293  /* Copy the first chunk of groups into the result. */
294  for (i = 0; i < nitems; i++)
295  {
296  /* just pointer to the proper place in the list */
297  MCVItem *item = &mcvlist->items[i];
298 
299  item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
300  item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
301 
302  /* copy values for the group */
303  memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
304  memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
305 
306  /* groups should be sorted by frequency in descending order */
307  Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
308 
309  /* group frequency */
310  item->frequency = (double) groups[i].count / numrows;
311 
312  /* base frequency, if the attributes were independent */
313  item->base_frequency = 1.0;
314  for (j = 0; j < numattrs; j++)
315  {
316  SortItem *freq;
317 
318  /* single dimension */
319  tmp->ndims = 1;
320  tmp->ssup[0] = mss->ssup[j];
321 
322  /* fill search key */
323  key.values = &groups[i].values[j];
324  key.isnull = &groups[i].isnull[j];
325 
326  freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
327  sizeof(SortItem),
328  multi_sort_compare, tmp);
329 
330  item->base_frequency *= ((double) freq->count) / numrows;
331  }
332  }
333 
334  pfree(nfreqs);
335  pfree(freqs);
336  }
337 
338  pfree(items);
339  pfree(groups);
340 
341  return mcvlist;
342 }
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:423
uint32 nitems
Definition: statistics.h:90
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:489
void * bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:92
static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs)
Definition: mcv.c:349
SortItem * build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
uint32 magic
Definition: statistics.h:88
AttrNumber ndimensions
Definition: statistics.h:91
struct MCVItem MCVItem
Datum * values
Definition: statistics.h:82
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:151
void pfree(void *pointer)
Definition: mcxt.c:1057
Oid attrtypid
Definition: vacuum.h:124
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
AttrNumber * build_attnums_array(Bitmapset *attrs, int *numattrs)
uint32 type
Definition: statistics.h:89
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:66
struct SortSupportData SortSupportData
void * palloc0(Size size)
Definition: mcxt.c:981
uintptr_t Datum
Definition: postgres.h:367
#define Assert(condition)
Definition: c.h:746
double base_frequency
Definition: statistics.h:80
#define STATS_MCV_MAGIC
Definition: statistics.h:65
int multi_sort_compare(const void *a, const void *b, void *arg)
bool * isnull
Definition: statistics.h:81
void * palloc(Size size)
Definition: mcxt.c:950
int i
MultiSortSupportData * MultiSortSupport
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:669
double frequency
Definition: statistics.h:79

◆ statext_mcv_deserialize()

MCVList* statext_mcv_deserialize ( bytea data)

Definition at line 993 of file mcv.c.

References Assert, MCVItem::base_frequency, elog, ERROR, fetch_att, MCVItem::frequency, i, MCVItem::isnull, MCVList::items, MCVList::magic, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, offsetof, palloc(), palloc0(), pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum, repalloc(), SET_VARSIZE, SizeOfMCVList, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, MCVList::type, MCVList::types, DimensionInfo::typlen, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY.

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

994 {
995  int dim,
996  i;
997  Size expected_size;
998  MCVList *mcvlist;
999  char *raw;
1000  char *ptr;
1001  char *endptr PG_USED_FOR_ASSERTS_ONLY;
1002 
1003  int ndims,
1004  nitems;
1005  DimensionInfo *info = NULL;
1006 
1007  /* local allocation buffer (used only for deserialization) */
1008  Datum **map = NULL;
1009 
1010  /* MCV list */
1011  Size mcvlen;
1012 
1013  /* buffer used for the result */
1014  Size datalen;
1015  char *dataptr;
1016  char *valuesptr;
1017  char *isnullptr;
1018 
1019  if (data == NULL)
1020  return NULL;
1021 
1022  /*
1023  * We can't possibly deserialize a MCV list if there's not even a complete
1024  * header. We need an explicit formula here, because we serialize the
1025  * header fields one by one, so we need to ignore struct alignment.
1026  */
1027  if (VARSIZE_ANY(data) < MinSizeOfMCVList)
1028  elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
1029  VARSIZE_ANY(data), MinSizeOfMCVList);
1030 
1031  /* read the MCV list header */
1032  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1033 
1034  /* pointer to the data part (skip the varlena header) */
1035  raw = (char *) data;
1036  ptr = VARDATA_ANY(raw);
1037  endptr = (char *) raw + VARSIZE_ANY(data);
1038 
1039  /* get the header and perform further sanity checks */
1040  memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1041  ptr += sizeof(uint32);
1042 
1043  memcpy(&mcvlist->type, ptr, sizeof(uint32));
1044  ptr += sizeof(uint32);
1045 
1046  memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1047  ptr += sizeof(uint32);
1048 
1049  memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1050  ptr += sizeof(AttrNumber);
1051 
1052  if (mcvlist->magic != STATS_MCV_MAGIC)
1053  elog(ERROR, "invalid MCV magic %u (expected %u)",
1054  mcvlist->magic, STATS_MCV_MAGIC);
1055 
1056  if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1057  elog(ERROR, "invalid MCV type %u (expected %u)",
1058  mcvlist->type, STATS_MCV_TYPE_BASIC);
1059 
1060  if (mcvlist->ndimensions == 0)
1061  elog(ERROR, "invalid zero-length dimension array in MCVList");
1062  else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1063  (mcvlist->ndimensions < 0))
1064  elog(ERROR, "invalid length (%d) dimension array in MCVList",
1065  mcvlist->ndimensions);
1066 
1067  if (mcvlist->nitems == 0)
1068  elog(ERROR, "invalid zero-length item array in MCVList");
1069  else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1070  elog(ERROR, "invalid length (%u) item array in MCVList",
1071  mcvlist->nitems);
1072 
1073  nitems = mcvlist->nitems;
1074  ndims = mcvlist->ndimensions;
1075 
1076  /*
1077  * Check amount of data including DimensionInfo for all dimensions and
1078  * also the serialized items (including uint16 indexes). Also, walk
1079  * through the dimension information and add it to the sum.
1080  */
1081  expected_size = SizeOfMCVList(ndims, nitems);
1082 
1083  /*
1084  * Check that we have at least the dimension and info records, along with
1085  * the items. We don't know the size of the serialized values yet. We need
1086  * to do this check first, before accessing the dimension info.
1087  */
1088  if (VARSIZE_ANY(data) < expected_size)
1089  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1090  VARSIZE_ANY(data), expected_size);
1091 
1092  /* Now copy the array of type Oids. */
1093  memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1094  ptr += (sizeof(Oid) * ndims);
1095 
1096  /* Now it's safe to access the dimension info. */
1097  info = palloc(ndims * sizeof(DimensionInfo));
1098 
1099  memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1100  ptr += (ndims * sizeof(DimensionInfo));
1101 
1102  /* account for the value arrays */
1103  for (dim = 0; dim < ndims; dim++)
1104  {
1105  /*
1106  * XXX I wonder if we can/should rely on asserts here. Maybe those
1107  * checks should be done every time?
1108  */
1109  Assert(info[dim].nvalues >= 0);
1110  Assert(info[dim].nbytes >= 0);
1111 
1112  expected_size += info[dim].nbytes;
1113  }
1114 
1115  /*
1116  * Now we know the total expected MCV size, including all the pieces
1117  * (header, dimension info. items and deduplicated data). So do the final
1118  * check on size.
1119  */
1120  if (VARSIZE_ANY(data) != expected_size)
1121  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1122  VARSIZE_ANY(data), expected_size);
1123 
1124  /*
1125  * We need an array of Datum values for each dimension, so that we can
1126  * easily translate the uint16 indexes later. We also need a top-level
1127  * array of pointers to those per-dimension arrays.
1128  *
1129  * While allocating the arrays for dimensions, compute how much space we
1130  * need for a copy of the by-ref data, as we can't simply point to the
1131  * original values (it might go away).
1132  */
1133  datalen = 0; /* space for by-ref data */
1134  map = (Datum **) palloc(ndims * sizeof(Datum *));
1135 
1136  for (dim = 0; dim < ndims; dim++)
1137  {
1138  map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1139 
1140  /* space needed for a copy of data for by-ref types */
1141  datalen += info[dim].nbytes_aligned;
1142  }
1143 
1144  /*
1145  * Now resize the MCV list so that the allocation includes all the data.
1146  *
1147  * Allocate space for a copy of the data, as we can't simply reference the
1148  * serialized data - it's not aligned properly, and it may disappear while
1149  * we're still using the MCV list, e.g. due to catcache release.
1150  *
1151  * We do care about alignment here, because we will allocate all the
1152  * pieces at once, but then use pointers to different parts.
1153  */
1154  mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1155 
1156  /* arrays of values and isnull flags for all MCV items */
1157  mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1158  mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1159 
1160  /* we don't quite need to align this, but it makes some asserts easier */
1161  mcvlen += MAXALIGN(datalen);
1162 
1163  /* now resize the deserialized MCV list, and compute pointers to parts */
1164  mcvlist = repalloc(mcvlist, mcvlen);
1165 
1166  /* pointer to the beginning of values/isnull arrays */
1167  valuesptr = (char *) mcvlist
1168  + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1169 
1170  isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1171 
1172  dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1173 
1174  /*
1175  * Build mapping (index => value) for translating the serialized data into
1176  * the in-memory representation.
1177  */
1178  for (dim = 0; dim < ndims; dim++)
1179  {
1180  /* remember start position in the input array */
1181  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1182 
1183  if (info[dim].typbyval)
1184  {
1185  /* for by-val types we simply copy data into the mapping */
1186  for (i = 0; i < info[dim].nvalues; i++)
1187  {
1188  Datum v = 0;
1189 
1190  memcpy(&v, ptr, info[dim].typlen);
1191  ptr += info[dim].typlen;
1192 
1193  map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1194 
1195  /* no under/overflow of input array */
1196  Assert(ptr <= (start + info[dim].nbytes));
1197  }
1198  }
1199  else
1200  {
1201  /* for by-ref types we need to also make a copy of the data */
1202 
1203  /* passed by reference, but fixed length (name, tid, ...) */
1204  if (info[dim].typlen > 0)
1205  {
1206  for (i = 0; i < info[dim].nvalues; i++)
1207  {
1208  memcpy(dataptr, ptr, info[dim].typlen);
1209  ptr += info[dim].typlen;
1210 
1211  /* just point into the array */
1212  map[dim][i] = PointerGetDatum(dataptr);
1213  dataptr += MAXALIGN(info[dim].typlen);
1214  }
1215  }
1216  else if (info[dim].typlen == -1)
1217  {
1218  /* varlena */
1219  for (i = 0; i < info[dim].nvalues; i++)
1220  {
1221  uint32 len;
1222 
1223  /* read the uint32 length */
1224  memcpy(&len, ptr, sizeof(uint32));
1225  ptr += sizeof(uint32);
1226 
1227  /* the length is data-only */
1228  SET_VARSIZE(dataptr, len + VARHDRSZ);
1229  memcpy(VARDATA(dataptr), ptr, len);
1230  ptr += len;
1231 
1232  /* just point into the array */
1233  map[dim][i] = PointerGetDatum(dataptr);
1234 
1235  /* skip to place of the next deserialized value */
1236  dataptr += MAXALIGN(len + VARHDRSZ);
1237  }
1238  }
1239  else if (info[dim].typlen == -2)
1240  {
1241  /* cstring */
1242  for (i = 0; i < info[dim].nvalues; i++)
1243  {
1244  uint32 len;
1245 
1246  memcpy(&len, ptr, sizeof(uint32));
1247  ptr += sizeof(uint32);
1248 
1249  memcpy(dataptr, ptr, len);
1250  ptr += len;
1251 
1252  /* just point into the array */
1253  map[dim][i] = PointerGetDatum(dataptr);
1254  dataptr += MAXALIGN(len);
1255  }
1256  }
1257 
1258  /* no under/overflow of input array */
1259  Assert(ptr <= (start + info[dim].nbytes));
1260 
1261  /* no overflow of the output mcv value */
1262  Assert(dataptr <= ((char *) mcvlist + mcvlen));
1263  }
1264 
1265  /* check we consumed input data for this dimension exactly */
1266  Assert(ptr == (start + info[dim].nbytes));
1267  }
1268 
1269  /* we should have also filled the MCV list exactly */
1270  Assert(dataptr == ((char *) mcvlist + mcvlen));
1271 
1272  /* deserialize the MCV items and translate the indexes to Datums */
1273  for (i = 0; i < nitems; i++)
1274  {
1275  MCVItem *item = &mcvlist->items[i];
1276 
1277  item->values = (Datum *) valuesptr;
1278  valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1279 
1280  item->isnull = (bool *) isnullptr;
1281  isnullptr += MAXALIGN(sizeof(bool) * ndims);
1282 
1283  memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1284  ptr += sizeof(bool) * ndims;
1285 
1286  memcpy(&item->frequency, ptr, sizeof(double));
1287  ptr += sizeof(double);
1288 
1289  memcpy(&item->base_frequency, ptr, sizeof(double));
1290  ptr += sizeof(double);
1291 
1292  /* finally translate the indexes (for non-NULL only) */
1293  for (dim = 0; dim < ndims; dim++)
1294  {
1295  uint16 index;
1296 
1297  memcpy(&index, ptr, sizeof(uint16));
1298  ptr += sizeof(uint16);
1299 
1300  if (item->isnull[dim])
1301  continue;
1302 
1303  item->values[dim] = map[dim][index];
1304  }
1305 
1306  /* check we're not overflowing the input */
1307  Assert(ptr <= endptr);
1308  }
1309 
1310  /* check that we processed all the data */
1311  Assert(ptr == endptr);
1312 
1313  /* release the buffers used for mapping */
1314  for (dim = 0; dim < ndims; dim++)
1315  pfree(map[dim]);
1316 
1317  pfree(map);
1318 
1319  return mcvlist;
1320 }
uint32 nitems
Definition: statistics.h:90
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:92
#define PointerGetDatum(X)
Definition: postgres.h:556
#define VARHDRSZ
Definition: c.h:569
#define MinSizeOfMCVList
Definition: mcv.c:62
struct DimensionInfo DimensionInfo
uint32 magic
Definition: statistics.h:88
AttrNumber ndimensions
Definition: statistics.h:91
struct MCVItem MCVItem
Datum * values
Definition: statistics.h:82
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
unsigned int Oid
Definition: postgres_ext.h:31
Definition: type.h:89
unsigned short uint16
Definition: c.h:374
void pfree(void *pointer)
Definition: mcxt.c:1057
#define ERROR
Definition: elog.h:43
unsigned int uint32
Definition: c.h:375
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:71
uint32 type
Definition: statistics.h:89
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:66
void * palloc0(Size size)
Definition: mcxt.c:981
uintptr_t Datum
Definition: postgres.h:367
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:69
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
#define Assert(condition)
Definition: c.h:746
double base_frequency
Definition: statistics.h:80
#define STATS_MCV_MAGIC
Definition: statistics.h:65
size_t Size
Definition: c.h:474
#define MAXALIGN(LEN)
Definition: c.h:699
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
bool * isnull
Definition: statistics.h:81
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
void * palloc(Size size)
Definition: mcxt.c:950
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:75
#define elog(elevel,...)
Definition: elog.h:214
int i
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:669
double frequency
Definition: statistics.h:79
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:122
unsigned char bool
Definition: c.h:325

◆ statext_mcv_load()

MCVList* statext_mcv_load ( Oid  mvoid)

Definition at line 557 of file mcv.c.

References DatumGetByteaP, elog, ERROR, HeapTupleIsValid, ObjectIdGetDatum, ReleaseSysCache(), SearchSysCache1(), statext_mcv_deserialize(), STATEXTDATASTXOID, and SysCacheGetAttr().

Referenced by mcv_clauselist_selectivity().

558 {
559  MCVList *result;
560  bool isnull;
561  Datum mcvlist;
563 
564  if (!HeapTupleIsValid(htup))
565  elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
566 
567  mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
568  Anum_pg_statistic_ext_data_stxdmcv, &isnull);
569 
570  if (isnull)
571  elog(ERROR,
572  "requested statistic kind \"%c\" is not yet built for statistics object %u",
573  STATS_EXT_DEPENDENCIES, mvoid);
574 
575  result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
576 
577  ReleaseSysCache(htup);
578 
579  return result;
580 }
MCVList * statext_mcv_deserialize(bytea *data)
Definition: mcv.c:993
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
#define DatumGetByteaP(X)
Definition: fmgr.h:330
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1116
uintptr_t Datum
Definition: postgres.h:367
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1377
int result
Definition: header.h:19
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define elog(elevel,...)
Definition: elog.h:214

◆ statext_mcv_serialize()

bytea* statext_mcv_serialize ( MCVList mcvlist,
VacAttrStats **  stats 
)

Definition at line 618 of file mcv.c.

References Assert, VacAttrStats::attrcollid, VacAttrStats::attrtype, MCVItem::base_frequency, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString, DatumGetPointer, MCVItem::frequency, i, MCVItem::isnull, ITEM_SIZE, MCVList::items, lookup_type_cache(), TypeCacheEntry::lt_opr, MCVList::magic, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, palloc0(), pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum, PrepareSortSupportFromOrderingOp(), qsort_arg(), SET_VARSIZE, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, store_att_byval, DimensionInfo::typbyval, MCVList::type, TYPECACHE_LT_OPR, MCVList::types, DimensionInfo::typlen, value, MCVItem::values, values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by statext_store().

619 {
620  int i;
621  int dim;
622  int ndims = mcvlist->ndimensions;
623 
624  SortSupport ssup;
625  DimensionInfo *info;
626 
627  Size total_length;
628 
629  /* serialized items (indexes into arrays, etc.) */
630  bytea *raw;
631  char *ptr;
632  char *endptr PG_USED_FOR_ASSERTS_ONLY;
633 
634  /* values per dimension (and number of non-NULL values) */
635  Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
636  int *counts = (int *) palloc0(sizeof(int) * ndims);
637 
638  /*
639  * We'll include some rudimentary information about the attribute types
640  * (length, by-val flag), so that we don't have to look them up while
641  * deserializating the MCV list (we already have the type OID in the
642  * header). This is safe, because when changing type of the attribute the
643  * statistics gets dropped automatically. We need to store the info about
644  * the arrays of deduplicated values anyway.
645  */
646  info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
647 
648  /* sort support data for all attributes included in the MCV list */
649  ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
650 
651  /* collect and deduplicate values for each dimension (attribute) */
652  for (dim = 0; dim < ndims; dim++)
653  {
654  int ndistinct;
655  TypeCacheEntry *typentry;
656 
657  /*
658  * Lookup the LT operator (can't get it from stats extra_data, as we
659  * don't know how to interpret that - scalar vs. array etc.).
660  */
661  typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
662 
663  /* copy important info about the data type (length, by-value) */
664  info[dim].typlen = stats[dim]->attrtype->typlen;
665  info[dim].typbyval = stats[dim]->attrtype->typbyval;
666 
667  /* allocate space for values in the attribute and collect them */
668  values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
669 
670  for (i = 0; i < mcvlist->nitems; i++)
671  {
672  /* skip NULL values - we don't need to deduplicate those */
673  if (mcvlist->items[i].isnull[dim])
674  continue;
675 
676  /* append the value at the end */
677  values[dim][counts[dim]] = mcvlist->items[i].values[dim];
678  counts[dim] += 1;
679  }
680 
681  /* if there are just NULL values in this dimension, we're done */
682  if (counts[dim] == 0)
683  continue;
684 
685  /* sort and deduplicate the data */
686  ssup[dim].ssup_cxt = CurrentMemoryContext;
687  ssup[dim].ssup_collation = stats[dim]->attrcollid;
688  ssup[dim].ssup_nulls_first = false;
689 
690  PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
691 
692  qsort_arg(values[dim], counts[dim], sizeof(Datum),
693  compare_scalars_simple, &ssup[dim]);
694 
695  /*
696  * Walk through the array and eliminate duplicate values, but keep the
697  * ordering (so that we can do bsearch later). We know there's at
698  * least one item as (counts[dim] != 0), so we can skip the first
699  * element.
700  */
701  ndistinct = 1; /* number of distinct values */
702  for (i = 1; i < counts[dim]; i++)
703  {
704  /* expect sorted array */
705  Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
706 
707  /* if the value is the same as the previous one, we can skip it */
708  if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
709  continue;
710 
711  values[dim][ndistinct] = values[dim][i];
712  ndistinct += 1;
713  }
714 
715  /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
716  Assert(ndistinct <= PG_UINT16_MAX);
717 
718  /*
719  * Store additional info about the attribute - number of deduplicated
720  * values, and also size of the serialized data. For fixed-length data
721  * types this is trivial to compute, for varwidth types we need to
722  * actually walk the array and sum the sizes.
723  */
724  info[dim].nvalues = ndistinct;
725 
726  if (info[dim].typbyval) /* by-value data types */
727  {
728  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
729 
730  /*
731  * We copy the data into the MCV item during deserialization, so
732  * we don't need to allocate any extra space.
733  */
734  info[dim].nbytes_aligned = 0;
735  }
736  else if (info[dim].typlen > 0) /* fixed-length by-ref */
737  {
738  /*
739  * We don't care about alignment in the serialized data, so we
740  * pack the data as much as possible. But we also track how much
741  * data will be needed after deserialization, and in that case we
742  * need to account for alignment of each item.
743  *
744  * Note: As the items are fixed-length, we could easily compute
745  * this during deserialization, but we do it here anyway.
746  */
747  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
748  info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
749  }
750  else if (info[dim].typlen == -1) /* varlena */
751  {
752  info[dim].nbytes = 0;
753  info[dim].nbytes_aligned = 0;
754  for (i = 0; i < info[dim].nvalues; i++)
755  {
756  Size len;
757 
758  /*
759  * For varlena values, we detoast the values and store the
760  * length and data separately. We don't bother with alignment
761  * here, which means that during deserialization we need to
762  * copy the fields and only access the copies.
763  */
764  values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
765 
766  /* serialized length (uint32 length + data) */
767  len = VARSIZE_ANY_EXHDR(values[dim][i]);
768  info[dim].nbytes += sizeof(uint32); /* length */
769  info[dim].nbytes += len; /* value (no header) */
770 
771  /*
772  * During deserialization we'll build regular varlena values
773  * with full headers, and we need to align them properly.
774  */
775  info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
776  }
777  }
778  else if (info[dim].typlen == -2) /* cstring */
779  {
780  info[dim].nbytes = 0;
781  info[dim].nbytes_aligned = 0;
782  for (i = 0; i < info[dim].nvalues; i++)
783  {
784  Size len;
785 
786  /*
787  * For cstring, we do similar thing as for varlena - first we
788  * store the length as uint32 and then the data. We don't care
789  * about alignment, which means that during deserialization we
790  * need to copy the fields and only access the copies.
791  */
792 
793  /* c-strings include terminator, so +1 byte */
794  len = strlen(DatumGetCString(values[dim][i])) + 1;
795  info[dim].nbytes += sizeof(uint32); /* length */
796  info[dim].nbytes += len; /* value */
797 
798  /* space needed for properly aligned deserialized copies */
799  info[dim].nbytes_aligned += MAXALIGN(len);
800  }
801  }
802 
803  /* we know (count>0) so there must be some data */
804  Assert(info[dim].nbytes > 0);
805  }
806 
807  /*
808  * Now we can finally compute how much space we'll actually need for the
809  * whole serialized MCV list (varlena header, MCV header, dimension info
810  * for each attribute, deduplicated values and items).
811  */
812  total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
813  + sizeof(AttrNumber) /* ndimensions */
814  + (ndims * sizeof(Oid)); /* attribute types */
815 
816  /* dimension info */
817  total_length += ndims * sizeof(DimensionInfo);
818 
819  /* add space for the arrays of deduplicated values */
820  for (i = 0; i < ndims; i++)
821  total_length += info[i].nbytes;
822 
823  /*
824  * And finally account for the items (those are fixed-length, thanks to
825  * replacing values with uint16 indexes into the deduplicated arrays).
826  */
827  total_length += mcvlist->nitems * ITEM_SIZE(dim);
828 
829  /*
830  * Allocate space for the whole serialized MCV list (we'll skip bytes, so
831  * we set them to zero to make the result more compressible).
832  */
833  raw = (bytea *) palloc0(VARHDRSZ + total_length);
834  SET_VARSIZE(raw, VARHDRSZ + total_length);
835 
836  ptr = VARDATA(raw);
837  endptr = ptr + total_length;
838 
839  /* copy the MCV list header fields, one by one */
840  memcpy(ptr, &mcvlist->magic, sizeof(uint32));
841  ptr += sizeof(uint32);
842 
843  memcpy(ptr, &mcvlist->type, sizeof(uint32));
844  ptr += sizeof(uint32);
845 
846  memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
847  ptr += sizeof(uint32);
848 
849  memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
850  ptr += sizeof(AttrNumber);
851 
852  memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
853  ptr += (sizeof(Oid) * ndims);
854 
855  /* store information about the attributes (data amounts, ...) */
856  memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
857  ptr += sizeof(DimensionInfo) * ndims;
858 
859  /* Copy the deduplicated values for all attributes to the output. */
860  for (dim = 0; dim < ndims; dim++)
861  {
862  /* remember the starting point for Asserts later */
863  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
864 
865  for (i = 0; i < info[dim].nvalues; i++)
866  {
867  Datum value = values[dim][i];
868 
869  if (info[dim].typbyval) /* passed by value */
870  {
871  Datum tmp;
872 
873  /*
874  * For values passed by value, we need to copy just the
875  * significant bytes - we can't use memcpy directly, as that
876  * assumes little endian behavior. store_att_byval does
877  * almost what we need, but it requires properly aligned
878  * buffer - the output buffer does not guarantee that. So we
879  * simply use a local Datum variable (which guarantees proper
880  * alignment), and then copy the value from it.
881  */
882  store_att_byval(&tmp, value, info[dim].typlen);
883 
884  memcpy(ptr, &tmp, info[dim].typlen);
885  ptr += info[dim].typlen;
886  }
887  else if (info[dim].typlen > 0) /* passed by reference */
888  {
889  /* no special alignment needed, treated as char array */
890  memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
891  ptr += info[dim].typlen;
892  }
893  else if (info[dim].typlen == -1) /* varlena */
894  {
896 
897  /* copy the length */
898  memcpy(ptr, &len, sizeof(uint32));
899  ptr += sizeof(uint32);
900 
901  /* data from the varlena value (without the header) */
902  memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
903  ptr += len;
904  }
905  else if (info[dim].typlen == -2) /* cstring */
906  {
907  uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
908 
909  /* copy the length */
910  memcpy(ptr, &len, sizeof(uint32));
911  ptr += sizeof(uint32);
912 
913  /* value */
914  memcpy(ptr, DatumGetCString(value), len);
915  ptr += len;
916  }
917 
918  /* no underflows or overflows */
919  Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
920  }
921 
922  /* we should get exactly nbytes of data for this dimension */
923  Assert((ptr - start) == info[dim].nbytes);
924  }
925 
926  /* Serialize the items, with uint16 indexes instead of the values. */
927  for (i = 0; i < mcvlist->nitems; i++)
928  {
929  MCVItem *mcvitem = &mcvlist->items[i];
930 
931  /* don't write beyond the allocated space */
932  Assert(ptr <= (endptr - ITEM_SIZE(dim)));
933 
934  /* copy NULL and frequency flags into the serialized MCV */
935  memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
936  ptr += sizeof(bool) * ndims;
937 
938  memcpy(ptr, &mcvitem->frequency, sizeof(double));
939  ptr += sizeof(double);
940 
941  memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
942  ptr += sizeof(double);
943 
944  /* store the indexes last */
945  for (dim = 0; dim < ndims; dim++)
946  {
947  uint16 index = 0;
948  Datum *value;
949 
950  /* do the lookup only for non-NULL values */
951  if (!mcvitem->isnull[dim])
952  {
953  value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
954  info[dim].nvalues, sizeof(Datum),
955  compare_scalars_simple, &ssup[dim]);
956 
957  Assert(value != NULL); /* serialization or deduplication
958  * error */
959 
960  /* compute index within the deduplicated array */
961  index = (uint16) (value - values[dim]);
962 
963  /* check the index is within expected bounds */
964  Assert(index < info[dim].nvalues);
965  }
966 
967  /* copy the index into the serialized MCV */
968  memcpy(ptr, &index, sizeof(uint16));
969  ptr += sizeof(uint16);
970  }
971 
972  /* make sure we don't overflow the allocated value */
973  Assert(ptr <= endptr);
974  }
975 
976  /* at this point we expect to match the total_length exactly */
977  Assert(ptr == endptr);
978 
979  pfree(values);
980  pfree(counts);
981 
982  return raw;
983 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
uint32 nitems
Definition: statistics.h:90
bool ssup_nulls_first
Definition: sortsupport.h:75
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
void * bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:92
#define PointerGetDatum(X)
Definition: postgres.h:556
#define VARHDRSZ
Definition: c.h:569
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:135
struct DimensionInfo DimensionInfo
uint32 magic
Definition: statistics.h:88
AttrNumber ndimensions
Definition: statistics.h:91
Datum * values
Definition: statistics.h:82
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
unsigned int Oid
Definition: postgres_ext.h:31
Definition: type.h:89
unsigned short uint16
Definition: c.h:374
void pfree(void *pointer)
Definition: mcxt.c:1057
#define PG_UINT16_MAX
Definition: c.h:456
#define DatumGetCString(X)
Definition: postgres.h:566
MemoryContext ssup_cxt
Definition: sortsupport.h:66
int compare_scalars_simple(const void *a, const void *b, void *arg)
unsigned int uint32
Definition: c.h:375
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
uint32 type
Definition: statistics.h:89
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
#define store_att_byval(T, newdatum, attlen)
Definition: tupmacs.h:226
void * palloc0(Size size)
Definition: mcxt.c:981
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:331
#define ITEM_SIZE(ndims)
Definition: mcv.c:56
static struct @143 value
#define Assert(condition)
Definition: c.h:746
double base_frequency
Definition: statistics.h:80
size_t Size
Definition: c.h:474
#define MAXALIGN(LEN)
Definition: c.h:699
Oid attrcollid
Definition: vacuum.h:127
#define DatumGetPointer(X)
Definition: postgres.h:549
bool * isnull
Definition: statistics.h:81
static Datum values[MAXATTR]
Definition: bootstrap.c:165
Form_pg_type attrtype
Definition: vacuum.h:126
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
int i
#define TYPECACHE_LT_OPR
Definition: typcache.h:131
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
Definition: c.h:563
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
int16 AttrNumber
Definition: attnum.h:21
double frequency
Definition: statistics.h:79
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:122
unsigned char bool
Definition: c.h:325