PostgreSQL Source Code  git master
mcv.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_statistic_ext_data.h"
#include "fmgr.h"
#include "funcapi.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/fmgroids.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
Include dependency graph for mcv.c:

Go to the source code of this file.

Macros

#define ITEM_SIZE(ndims)   ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
 
#define MinSizeOfMCVList   (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
 
#define SizeOfMCVList(ndims, nitems)
 
#define RESULT_MERGE(value, is_or, match)   ((is_or) ? ((value) || (match)) : ((value) && (match)))
 
#define RESULT_IS_FINAL(value, is_or)   ((is_or) ? (value) : (!(value)))
 

Functions

static MultiSortSupport build_mss (StatsBuildData *data)
 
static SortItembuild_distinct_groups (int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
 
static SortItem ** build_column_frequencies (SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
 
static int count_distinct_groups (int numrows, SortItem *items, MultiSortSupport mss)
 
static double get_mincount_for_mcv_list (int samplerows, double totalrows)
 
MCVListstatext_mcv_build (StatsBuildData *data, double totalrows, int stattarget)
 
static int compare_sort_item_count (const void *a, const void *b)
 
static int sort_item_compare (const void *a, const void *b, void *arg)
 
MCVListstatext_mcv_load (Oid mvoid)
 
byteastatext_mcv_serialize (MCVList *mcvlist, VacAttrStats **stats)
 
MCVListstatext_mcv_deserialize (bytea *data)
 
Datum pg_stats_ext_mcvlist_items (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_in (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_out (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_recv (PG_FUNCTION_ARGS)
 
Datum pg_mcv_list_send (PG_FUNCTION_ARGS)
 
static int mcv_match_expression (Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
 
static boolmcv_get_match_bitmap (PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
 
Selectivity mcv_combine_selectivities (Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)
 
Selectivity mcv_clauselist_selectivity (PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
 
Selectivity mcv_clause_selectivity_or (PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)
 

Macro Definition Documentation

◆ ITEM_SIZE

#define ITEM_SIZE (   ndims)    ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))

Definition at line 57 of file mcv.c.

Referenced by statext_mcv_serialize().

◆ MinSizeOfMCVList

#define MinSizeOfMCVList   (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))

Definition at line 63 of file mcv.c.

Referenced by statext_mcv_deserialize().

◆ RESULT_IS_FINAL

#define RESULT_IS_FINAL (   value,
  is_or 
)    ((is_or) ? (value) : (!(value)))

Definition at line 104 of file mcv.c.

Referenced by mcv_get_match_bitmap().

◆ RESULT_MERGE

#define RESULT_MERGE (   value,
  is_or,
  match 
)    ((is_or) ? ((value) || (match)) : ((value) && (match)))

Definition at line 92 of file mcv.c.

Referenced by mcv_get_match_bitmap().

◆ SizeOfMCVList

#define SizeOfMCVList (   ndims,
  nitems 
)
Value:
((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
((ndims) * sizeof(DimensionInfo)) + \
((nitems) * ITEM_SIZE(ndims)))
#define MinSizeOfMCVList
Definition: mcv.c:63
struct DimensionInfo DimensionInfo
unsigned int Oid
Definition: postgres_ext.h:31
#define ITEM_SIZE(ndims)
Definition: mcv.c:57

Definition at line 72 of file mcv.c.

Referenced by statext_mcv_deserialize().

Function Documentation

◆ build_column_frequencies()

static SortItem ** build_column_frequencies ( SortItem groups,
int  ngroups,
MultiSortSupport  mss,
int *  ncounts 
)
static

Definition at line 494 of file mcv.c.

References Assert, SortItem::count, i, SortItem::isnull, MAXALIGN, MultiSortSupportData::ndims, palloc(), qsort_arg(), sort_item_compare(), MultiSortSupportData::ssup, and SortItem::values.

Referenced by statext_mcv_build().

496 {
497  int i,
498  dim;
499  SortItem **result;
500  char *ptr;
501 
502  Assert(groups);
503  Assert(ncounts);
504 
505  /* allocate arrays for all columns as a single chunk */
506  ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
507  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
508 
509  /* initial array of pointers */
510  result = (SortItem **) ptr;
511  ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
512 
513  for (dim = 0; dim < mss->ndims; dim++)
514  {
515  SortSupport ssup = &mss->ssup[dim];
516 
517  /* array of values for a single column */
518  result[dim] = (SortItem *) ptr;
519  ptr += MAXALIGN(sizeof(SortItem) * ngroups);
520 
521  /* extract data for the dimension */
522  for (i = 0; i < ngroups; i++)
523  {
524  /* point into the input groups */
525  result[dim][i].values = &groups[i].values[dim];
526  result[dim][i].isnull = &groups[i].isnull[dim];
527  result[dim][i].count = groups[i].count;
528  }
529 
530  /* sort the values, deduplicate */
531  qsort_arg((void *) result[dim], ngroups, sizeof(SortItem),
532  sort_item_compare, ssup);
533 
534  /*
535  * Identify distinct values, compute frequency (there might be
536  * multiple MCV items containing this value, so we need to sum counts
537  * from all of them.
538  */
539  ncounts[dim] = 1;
540  for (i = 1; i < ngroups; i++)
541  {
542  if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
543  {
544  result[dim][ncounts[dim] - 1].count += result[dim][i].count;
545  continue;
546  }
547 
548  result[dim][ncounts[dim]] = result[dim][i];
549 
550  ncounts[dim]++;
551  }
552  }
553 
554  return result;
555 }
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
static int sort_item_compare(const void *a, const void *b, void *arg)
Definition: mcv.c:469
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
int result
Definition: header.h:19
#define Assert(condition)
Definition: c.h:804
#define MAXALIGN(LEN)
Definition: c.h:757
void * palloc(Size size)
Definition: mcxt.c:1062
int i

◆ build_distinct_groups()

static SortItem * build_distinct_groups ( int  numrows,
SortItem items,
MultiSortSupport  mss,
int *  ndistinct 
)
static

Definition at line 428 of file mcv.c.

References Assert, compare_sort_item_count(), SortItem::count, count_distinct_groups(), i, multi_sort_compare(), palloc(), and pg_qsort().

Referenced by statext_mcv_build().

430 {
431  int i,
432  j;
433  int ngroups = count_distinct_groups(numrows, items, mss);
434 
435  SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
436 
437  j = 0;
438  groups[0] = items[0];
439  groups[0].count = 1;
440 
441  for (i = 1; i < numrows; i++)
442  {
443  /* Assume sorted in ascending order. */
444  Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
445 
446  /* New distinct group detected. */
447  if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
448  {
449  groups[++j] = items[i];
450  groups[j].count = 0;
451  }
452 
453  groups[j].count++;
454  }
455 
456  /* ensure we filled the expected number of distinct groups */
457  Assert(j + 1 == ngroups);
458 
459  /* Sort the distinct groups by frequency (in descending order). */
460  pg_qsort((void *) groups, ngroups, sizeof(SortItem),
462 
463  *ndistinct = ngroups;
464  return groups;
465 }
static int compare_sort_item_count(const void *a, const void *b)
Definition: mcv.c:407
#define Assert(condition)
Definition: c.h:804
int multi_sort_compare(const void *a, const void *b, void *arg)
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
void * palloc(Size size)
Definition: mcxt.c:1062
int i
static int count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
Definition: mcv.c:383

◆ build_mss()

static MultiSortSupport build_mss ( StatsBuildData data)
static

Definition at line 351 of file mcv.c.

References VacAttrStats::attrcollid, VacAttrStats::attrtypid, elog, ERROR, i, InvalidOid, lookup_type_cache(), TypeCacheEntry::lt_opr, multi_sort_add_dimension(), multi_sort_init(), StatsBuildData::nattnums, StatsBuildData::stats, generate_unaccent_rules::type, and TYPECACHE_LT_OPR.

Referenced by statext_mcv_build().

352 {
353  int i;
354  int numattrs = data->nattnums;
355 
356  /* Sort by multiple columns (using array of SortSupport) */
357  MultiSortSupport mss = multi_sort_init(numattrs);
358 
359  /* prepare the sort functions for all the attributes */
360  for (i = 0; i < numattrs; i++)
361  {
362  VacAttrStats *colstat = data->stats[i];
364 
365  type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
366  if (type->lt_opr == InvalidOid) /* shouldn't happen */
367  elog(ERROR, "cache lookup failed for ordering operator for type %u",
368  colstat->attrtypid);
369 
370  multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
371  }
372 
373  return mss;
374 }
Oid attrtypid
Definition: vacuum.h:124
#define ERROR
Definition: elog.h:46
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
MultiSortSupport multi_sort_init(int ndims)
VacAttrStats ** stats
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:339
#define InvalidOid
Definition: postgres_ext.h:36
Oid attrcollid
Definition: vacuum.h:127
#define elog(elevel,...)
Definition: elog.h:232
int i
#define TYPECACHE_LT_OPR
Definition: typcache.h:137

◆ compare_sort_item_count()

static int compare_sort_item_count ( const void *  a,
const void *  b 
)
static

Definition at line 407 of file mcv.c.

References SortItem::count.

Referenced by build_distinct_groups().

408 {
409  SortItem *ia = (SortItem *) a;
410  SortItem *ib = (SortItem *) b;
411 
412  if (ia->count == ib->count)
413  return 0;
414  else if (ia->count > ib->count)
415  return -1;
416 
417  return 1;
418 }

◆ count_distinct_groups()

static int count_distinct_groups ( int  numrows,
SortItem items,
MultiSortSupport  mss 
)
static

Definition at line 383 of file mcv.c.

References Assert, i, and multi_sort_compare().

Referenced by build_distinct_groups().

384 {
385  int i;
386  int ndistinct;
387 
388  ndistinct = 1;
389  for (i = 1; i < numrows; i++)
390  {
391  /* make sure the array really is sorted */
392  Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
393 
394  if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
395  ndistinct += 1;
396  }
397 
398  return ndistinct;
399 }
#define Assert(condition)
Definition: c.h:804
int multi_sort_compare(const void *a, const void *b, void *arg)
int i

◆ get_mincount_for_mcv_list()

static double get_mincount_for_mcv_list ( int  samplerows,
double  totalrows 
)
static

Definition at line 152 of file mcv.c.

Referenced by statext_mcv_build().

153 {
154  double n = samplerows;
155  double N = totalrows;
156  double numer,
157  denom;
158 
159  numer = n * (N - n);
160  denom = N - n + 0.04 * n * (N - 1);
161 
162  /* Guard against division by zero (possible if n = N = 1) */
163  if (denom == 0.0)
164  return 0.0;
165 
166  return numer / denom;
167 }

◆ mcv_clause_selectivity_or()

Selectivity mcv_clause_selectivity_or ( PlannerInfo root,
StatisticExtInfo stat,
MCVList mcv,
Node clause,
bool **  or_matches,
Selectivity basesel,
Selectivity overlap_mcvsel,
Selectivity overlap_basesel,
Selectivity totalsel 
)

Definition at line 2110 of file mcv.c.

References MCVItem::base_frequency, StatisticExtInfo::exprs, MCVItem::frequency, i, MCVList::items, StatisticExtInfo::keys, list_make1, mcv_get_match_bitmap(), MCVList::nitems, palloc0(), and pfree().

Referenced by statext_mcv_clauselist_selectivity().

2114 {
2115  Selectivity s = 0.0;
2116  bool *new_matches;
2117  int i;
2118 
2119  /* build the OR-matches bitmap, if not built already */
2120  if (*or_matches == NULL)
2121  *or_matches = palloc0(sizeof(bool) * mcv->nitems);
2122 
2123  /* build the match bitmap for the new clause */
2124  new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2125  stat->exprs, mcv, false);
2126 
2127  /*
2128  * Sum the frequencies for all the MCV items matching this clause and also
2129  * those matching the overlap between this clause and any of the preceding
2130  * clauses as described above.
2131  */
2132  *basesel = 0.0;
2133  *overlap_mcvsel = 0.0;
2134  *overlap_basesel = 0.0;
2135  *totalsel = 0.0;
2136  for (i = 0; i < mcv->nitems; i++)
2137  {
2138  *totalsel += mcv->items[i].frequency;
2139 
2140  if (new_matches[i])
2141  {
2142  s += mcv->items[i].frequency;
2143  *basesel += mcv->items[i].base_frequency;
2144 
2145  if ((*or_matches)[i])
2146  {
2147  *overlap_mcvsel += mcv->items[i].frequency;
2148  *overlap_basesel += mcv->items[i].base_frequency;
2149  }
2150  }
2151 
2152  /* update the OR-matches bitmap for the next clause */
2153  (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2154  }
2155 
2156  pfree(new_matches);
2157 
2158  return s;
2159 }
uint32 nitems
Definition: statistics.h:91
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1606
double Selectivity
Definition: nodes.h:669
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94
#define list_make1(x1)
Definition: pg_list.h:206
void pfree(void *pointer)
Definition: mcxt.c:1169
void * palloc0(Size size)
Definition: mcxt.c:1093
const symbol * s
Definition: header.h:17
double base_frequency
Definition: statistics.h:81
Bitmapset * keys
Definition: pathnodes.h:939
int i
double frequency
Definition: statistics.h:80

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity ( PlannerInfo root,
StatisticExtInfo stat,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Selectivity basesel,
Selectivity totalsel 
)

Definition at line 2033 of file mcv.c.

References MCVItem::base_frequency, StatisticExtInfo::exprs, MCVItem::frequency, i, MCVList::items, StatisticExtInfo::keys, mcv_get_match_bitmap(), MCVList::nitems, statext_mcv_load(), and StatisticExtInfo::statOid.

Referenced by statext_mcv_clauselist_selectivity().

2038 {
2039  int i;
2040  MCVList *mcv;
2041  Selectivity s = 0.0;
2042 
2043  /* match/mismatch bitmap for each MCV item */
2044  bool *matches = NULL;
2045 
2046  /* load the MCV list stored in the statistics object */
2047  mcv = statext_mcv_load(stat->statOid);
2048 
2049  /* build a match bitmap for the clauses */
2050  matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2051  mcv, false);
2052 
2053  /* sum frequencies for all the matching MCV items */
2054  *basesel = 0.0;
2055  *totalsel = 0.0;
2056  for (i = 0; i < mcv->nitems; i++)
2057  {
2058  *totalsel += mcv->items[i].frequency;
2059 
2060  if (matches[i] != false)
2061  {
2062  *basesel += mcv->items[i].base_frequency;
2063  s += mcv->items[i].frequency;
2064  }
2065  }
2066 
2067  return s;
2068 }
uint32 nitems
Definition: statistics.h:91
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1606
double Selectivity
Definition: nodes.h:669
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94
MCVList * statext_mcv_load(Oid mvoid)
Definition: mcv.c:562
const symbol * s
Definition: header.h:17
double base_frequency
Definition: statistics.h:81
Bitmapset * keys
Definition: pathnodes.h:939
int i
double frequency
Definition: statistics.h:80

◆ mcv_combine_selectivities()

Selectivity mcv_combine_selectivities ( Selectivity  simple_sel,
Selectivity  mcv_sel,
Selectivity  mcv_basesel,
Selectivity  mcv_totalsel 
)

Definition at line 1991 of file mcv.c.

References CLAMP_PROBABILITY.

Referenced by statext_mcv_clauselist_selectivity().

1995 {
1996  Selectivity other_sel;
1997  Selectivity sel;
1998 
1999  /* estimated selectivity of values not covered by MCV matches */
2000  other_sel = simple_sel - mcv_basesel;
2001  CLAMP_PROBABILITY(other_sel);
2002 
2003  /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2004  if (other_sel > 1.0 - mcv_totalsel)
2005  other_sel = 1.0 - mcv_totalsel;
2006 
2007  /* overall selectivity is the sum of the MCV and non-MCV parts */
2008  sel = mcv_sel + other_sel;
2009  CLAMP_PROBABILITY(sel);
2010 
2011  return sel;
2012 }
double Selectivity
Definition: nodes.h:669
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63

◆ mcv_get_match_bitmap()

static bool* mcv_get_match_bitmap ( PlannerInfo root,
List clauses,
Bitmapset keys,
List exprs,
MCVList mcvlist,
bool  is_or 
)
static

Definition at line 1606 of file mcv.c.

References NullTest::arg, OpExpr::args, ScalarArrayOpExpr::args, BoolExpr::args, ARR_ELEMTYPE, Assert, bms_member_index(), bms_num_members(), Const::constisnull, Const::constvalue, DatumGetArrayTypeP, DatumGetBool, deconstruct_array(), elog, ERROR, examine_opclause_args(), fmgr_info(), FunctionCall2Coll(), get_opcode(), get_typlenbyvalalign(), i, idx(), is_andclause(), IS_NOT_NULL, is_notclause(), IS_NULL, is_opclause(), is_orclause(), IsA, MCVItem::isnull, MCVList::items, lfirst, list_length(), mcv_match_expression(), NIL, MCVList::nitems, NullTest::nulltesttype, OpExpr::opno, ScalarArrayOpExpr::opno, palloc(), pfree(), RESULT_IS_FINAL, RESULT_MERGE, STATS_MCVLIST_MAX_ITEMS, Node::type, ScalarArrayOpExpr::useOr, MCVItem::values, Var::varattno, and Var::vartype.

Referenced by mcv_clause_selectivity_or(), and mcv_clauselist_selectivity().

1609 {
1610  int i;
1611  ListCell *l;
1612  bool *matches;
1613 
1614  /* The bitmap may be partially built. */
1615  Assert(clauses != NIL);
1616  Assert(list_length(clauses) >= 1);
1617  Assert(mcvlist != NULL);
1618  Assert(mcvlist->nitems > 0);
1619  Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
1620 
1621  matches = palloc(sizeof(bool) * mcvlist->nitems);
1622  memset(matches, (is_or) ? false : true,
1623  sizeof(bool) * mcvlist->nitems);
1624 
1625  /*
1626  * Loop through the list of clauses, and for each of them evaluate all the
1627  * MCV items not yet eliminated by the preceding clauses.
1628  */
1629  foreach(l, clauses)
1630  {
1631  Node *clause = (Node *) lfirst(l);
1632 
1633  /* if it's a RestrictInfo, then extract the clause */
1634  if (IsA(clause, RestrictInfo))
1635  clause = (Node *) ((RestrictInfo *) clause)->clause;
1636 
1637  /*
1638  * Handle the various types of clauses - OpClause, NullTest and
1639  * AND/OR/NOT
1640  */
1641  if (is_opclause(clause))
1642  {
1643  OpExpr *expr = (OpExpr *) clause;
1644  FmgrInfo opproc;
1645 
1646  /* valid only after examine_opclause_args returns true */
1647  Node *clause_expr;
1648  Const *cst;
1649  bool expronleft;
1650  int idx;
1651  Oid collid;
1652 
1653  fmgr_info(get_opcode(expr->opno), &opproc);
1654 
1655  /* extract the var/expr and const from the expression */
1656  if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1657  elog(ERROR, "incompatible clause");
1658 
1659  /* match the attribute/expression to a dimension of the statistic */
1660  idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1661 
1662  Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1663 
1664  /*
1665  * Walk through the MCV items and evaluate the current clause. We
1666  * can skip items that were already ruled out, and terminate if
1667  * there are no remaining MCV items that might possibly match.
1668  */
1669  for (i = 0; i < mcvlist->nitems; i++)
1670  {
1671  bool match = true;
1672  MCVItem *item = &mcvlist->items[i];
1673 
1674  Assert(idx >= 0);
1675 
1676  /*
1677  * When the MCV item or the Const value is NULL we can treat
1678  * this as a mismatch. We must not call the operator because
1679  * of strictness.
1680  */
1681  if (item->isnull[idx] || cst->constisnull)
1682  {
1683  matches[i] = RESULT_MERGE(matches[i], is_or, false);
1684  continue;
1685  }
1686 
1687  /*
1688  * Skip MCV items that can't change result in the bitmap. Once
1689  * the value gets false for AND-lists, or true for OR-lists,
1690  * we don't need to look at more clauses.
1691  */
1692  if (RESULT_IS_FINAL(matches[i], is_or))
1693  continue;
1694 
1695  /*
1696  * First check whether the constant is below the lower
1697  * boundary (in that case we can skip the bucket, because
1698  * there's no overlap).
1699  *
1700  * We don't store collations used to build the statistics, but
1701  * we can use the collation for the attribute itself, as
1702  * stored in varcollid. We do reset the statistics after a
1703  * type change (including collation change), so this is OK.
1704  * For expressions, we use the collation extracted from the
1705  * expression itself.
1706  */
1707  if (expronleft)
1708  match = DatumGetBool(FunctionCall2Coll(&opproc,
1709  collid,
1710  item->values[idx],
1711  cst->constvalue));
1712  else
1713  match = DatumGetBool(FunctionCall2Coll(&opproc,
1714  collid,
1715  cst->constvalue,
1716  item->values[idx]));
1717 
1718  /* update the match bitmap with the result */
1719  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1720  }
1721  }
1722  else if (IsA(clause, ScalarArrayOpExpr))
1723  {
1724  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1725  FmgrInfo opproc;
1726 
1727  /* valid only after examine_opclause_args returns true */
1728  Node *clause_expr;
1729  Const *cst;
1730  bool expronleft;
1731  Oid collid;
1732  int idx;
1733 
1734  /* array evaluation */
1735  ArrayType *arrayval;
1736  int16 elmlen;
1737  bool elmbyval;
1738  char elmalign;
1739  int num_elems;
1740  Datum *elem_values;
1741  bool *elem_nulls;
1742 
1743  fmgr_info(get_opcode(expr->opno), &opproc);
1744 
1745  /* extract the var/expr and const from the expression */
1746  if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1747  elog(ERROR, "incompatible clause");
1748 
1749  /* ScalarArrayOpExpr has the Var always on the left */
1750  Assert(expronleft);
1751 
1752  /* XXX what if (cst->constisnull == NULL)? */
1753  if (!cst->constisnull)
1754  {
1755  arrayval = DatumGetArrayTypeP(cst->constvalue);
1757  &elmlen, &elmbyval, &elmalign);
1758  deconstruct_array(arrayval,
1759  ARR_ELEMTYPE(arrayval),
1760  elmlen, elmbyval, elmalign,
1761  &elem_values, &elem_nulls, &num_elems);
1762  }
1763 
1764  /* match the attribute/expression to a dimension of the statistic */
1765  idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1766 
1767  /*
1768  * Walk through the MCV items and evaluate the current clause. We
1769  * can skip items that were already ruled out, and terminate if
1770  * there are no remaining MCV items that might possibly match.
1771  */
1772  for (i = 0; i < mcvlist->nitems; i++)
1773  {
1774  int j;
1775  bool match = !expr->useOr;
1776  MCVItem *item = &mcvlist->items[i];
1777 
1778  /*
1779  * When the MCV item or the Const value is NULL we can treat
1780  * this as a mismatch. We must not call the operator because
1781  * of strictness.
1782  */
1783  if (item->isnull[idx] || cst->constisnull)
1784  {
1785  matches[i] = RESULT_MERGE(matches[i], is_or, false);
1786  continue;
1787  }
1788 
1789  /*
1790  * Skip MCV items that can't change result in the bitmap. Once
1791  * the value gets false for AND-lists, or true for OR-lists,
1792  * we don't need to look at more clauses.
1793  */
1794  if (RESULT_IS_FINAL(matches[i], is_or))
1795  continue;
1796 
1797  for (j = 0; j < num_elems; j++)
1798  {
1799  Datum elem_value = elem_values[j];
1800  bool elem_isnull = elem_nulls[j];
1801  bool elem_match;
1802 
1803  /* NULL values always evaluate as not matching. */
1804  if (elem_isnull)
1805  {
1806  match = RESULT_MERGE(match, expr->useOr, false);
1807  continue;
1808  }
1809 
1810  /*
1811  * Stop evaluating the array elements once we reach a
1812  * matching value that can't change - ALL() is the same as
1813  * AND-list, ANY() is the same as OR-list.
1814  */
1815  if (RESULT_IS_FINAL(match, expr->useOr))
1816  break;
1817 
1818  elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
1819  collid,
1820  item->values[idx],
1821  elem_value));
1822 
1823  match = RESULT_MERGE(match, expr->useOr, elem_match);
1824  }
1825 
1826  /* update the match bitmap with the result */
1827  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1828  }
1829  }
1830  else if (IsA(clause, NullTest))
1831  {
1832  NullTest *expr = (NullTest *) clause;
1833  Node *clause_expr = (Node *) (expr->arg);
1834 
1835  /* match the attribute/expression to a dimension of the statistic */
1836  int idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
1837 
1838  /*
1839  * Walk through the MCV items and evaluate the current clause. We
1840  * can skip items that were already ruled out, and terminate if
1841  * there are no remaining MCV items that might possibly match.
1842  */
1843  for (i = 0; i < mcvlist->nitems; i++)
1844  {
1845  bool match = false; /* assume mismatch */
1846  MCVItem *item = &mcvlist->items[i];
1847 
1848  /* if the clause mismatches the MCV item, update the bitmap */
1849  switch (expr->nulltesttype)
1850  {
1851  case IS_NULL:
1852  match = (item->isnull[idx]) ? true : match;
1853  break;
1854 
1855  case IS_NOT_NULL:
1856  match = (!item->isnull[idx]) ? true : match;
1857  break;
1858  }
1859 
1860  /* now, update the match bitmap, depending on OR/AND type */
1861  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1862  }
1863  }
1864  else if (is_orclause(clause) || is_andclause(clause))
1865  {
1866  /* AND/OR clause, with all subclauses being compatible */
1867 
1868  int i;
1869  BoolExpr *bool_clause = ((BoolExpr *) clause);
1870  List *bool_clauses = bool_clause->args;
1871 
1872  /* match/mismatch bitmap for each MCV item */
1873  bool *bool_matches = NULL;
1874 
1875  Assert(bool_clauses != NIL);
1876  Assert(list_length(bool_clauses) >= 2);
1877 
1878  /* build the match bitmap for the OR-clauses */
1879  bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
1880  mcvlist, is_orclause(clause));
1881 
1882  /*
1883  * Merge the bitmap produced by mcv_get_match_bitmap into the
1884  * current one. We need to consider if we're evaluating AND or OR
1885  * condition when merging the results.
1886  */
1887  for (i = 0; i < mcvlist->nitems; i++)
1888  matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
1889 
1890  pfree(bool_matches);
1891  }
1892  else if (is_notclause(clause))
1893  {
1894  /* NOT clause, with all subclauses compatible */
1895 
1896  int i;
1897  BoolExpr *not_clause = ((BoolExpr *) clause);
1898  List *not_args = not_clause->args;
1899 
1900  /* match/mismatch bitmap for each MCV item */
1901  bool *not_matches = NULL;
1902 
1903  Assert(not_args != NIL);
1904  Assert(list_length(not_args) == 1);
1905 
1906  /* build the match bitmap for the NOT-clause */
1907  not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
1908  mcvlist, false);
1909 
1910  /*
1911  * Merge the bitmap produced by mcv_get_match_bitmap into the
1912  * current one. We're handling a NOT clause, so invert the result
1913  * before merging it into the global bitmap.
1914  */
1915  for (i = 0; i < mcvlist->nitems; i++)
1916  matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
1917 
1918  pfree(not_matches);
1919  }
1920  else if (IsA(clause, Var))
1921  {
1922  /* Var (has to be a boolean Var, possibly from below NOT) */
1923 
1924  Var *var = (Var *) (clause);
1925 
1926  /* match the attribute to a dimension of the statistic */
1927  int idx = bms_member_index(keys, var->varattno);
1928 
1929  Assert(var->vartype == BOOLOID);
1930 
1931  /*
1932  * Walk through the MCV items and evaluate the current clause. We
1933  * can skip items that were already ruled out, and terminate if
1934  * there are no remaining MCV items that might possibly match.
1935  */
1936  for (i = 0; i < mcvlist->nitems; i++)
1937  {
1938  MCVItem *item = &mcvlist->items[i];
1939  bool match = false;
1940 
1941  /* if the item is NULL, it's a mismatch */
1942  if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1943  match = true;
1944 
1945  /* update the result bitmap */
1946  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1947  }
1948  }
1949  else
1950  elog(ERROR, "unknown clause type: %d", clause->type);
1951  }
1952 
1953  return matches;
1954 }
Datum constvalue
Definition: primnodes.h:219
uint32 nitems
Definition: statistics.h:91
signed short int16
Definition: c.h:428
#define NIL
Definition: pg_list.h:65
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1606
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:587
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:106
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2218
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:97
Definition: nodes.h:536
AttrNumber varattno
Definition: primnodes.h:191
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
Datum * values
Definition: statistics.h:83
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1148
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:186
void pfree(void *pointer)
Definition: mcxt.c:1169
#define ERROR
Definition: elog.h:46
Oid vartype
Definition: primnodes.h:193
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
Expr * arg
Definition: primnodes.h:1265
#define RESULT_MERGE(value, is_or, match)
Definition: mcv.c:92
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
NodeTag type
Definition: nodes.h:538
#define DatumGetBool(X)
Definition: postgres.h:437
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:453
#define RESULT_IS_FINAL(value, is_or)
Definition: mcv.c:104
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:115
uintptr_t Datum
Definition: postgres.h:411
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:70
NullTestType nulltesttype
Definition: primnodes.h:1266
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1256
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
static int list_length(const List *l)
Definition: pg_list.h:149
List * args
Definition: primnodes.h:626
bool * isnull
Definition: statistics.h:82
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3491
void * palloc(Size size)
Definition: mcxt.c:1062
static int mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
Definition: mcv.c:1538
#define elog(elevel,...)
Definition: elog.h:232
int i
Oid opno
Definition: primnodes.h:542
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:66
List * args
Definition: primnodes.h:548
Definition: pg_list.h:50
#define ARR_ELEMTYPE(a)
Definition: array.h:285
bool constisnull
Definition: primnodes.h:220
bool examine_opclause_args(List *args, Node **exprp, Const **cstp, bool *expronleftp)
#define DatumGetArrayTypeP(X)
Definition: array.h:254

◆ mcv_match_expression()

static int mcv_match_expression ( Node expr,
Bitmapset keys,
List exprs,
Oid collid 
)
static

Definition at line 1538 of file mcv.c.

References Assert, bms_member_index(), bms_num_members(), equal(), exprCollation(), idx(), IsA, lfirst, list_length(), Var::varattno, and Var::varcollid.

Referenced by mcv_get_match_bitmap().

1539 {
1540  int idx = -1;
1541 
1542  if (IsA(expr, Var))
1543  {
1544  /* simple Var, so just lookup using varattno */
1545  Var *var = (Var *) expr;
1546 
1547  if (collid)
1548  *collid = var->varcollid;
1549 
1550  idx = bms_member_index(keys, var->varattno);
1551 
1552  /* make sure the index is valid */
1553  Assert((idx >= 0) && (idx <= bms_num_members(keys)));
1554  }
1555  else
1556  {
1557  ListCell *lc;
1558 
1559  /* expressions are stored after the simple columns */
1560  idx = bms_num_members(keys);
1561 
1562  if (collid)
1563  *collid = exprCollation(expr);
1564 
1565  /* expression - lookup in stats expressions */
1566  foreach(lc, exprs)
1567  {
1568  Node *stat_expr = (Node *) lfirst(lc);
1569 
1570  if (equal(expr, stat_expr))
1571  break;
1572 
1573  idx++;
1574  }
1575 
1576  /* make sure the index is valid */
1577  Assert((idx >= bms_num_members(keys)) &&
1578  (idx <= bms_num_members(keys) + list_length(exprs)));
1579  }
1580 
1581  Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1582 
1583  return idx;
1584 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:587
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3149
Definition: nodes.h:536
AttrNumber varattno
Definition: primnodes.h:191
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
Definition: primnodes.h:186
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:453
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
static int list_length(const List *l)
Definition: pg_list.h:149
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:759
Oid varcollid
Definition: primnodes.h:195

◆ pg_mcv_list_in()

Datum pg_mcv_list_in ( PG_FUNCTION_ARGS  )

Definition at line 1475 of file mcv.c.

References ereport, errcode(), errmsg(), ERROR, and PG_RETURN_VOID.

1476 {
1477  /*
1478  * pg_mcv_list stores the data in binary form and parsing text input is
1479  * not needed, so disallow this.
1480  */
1481  ereport(ERROR,
1482  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1483  errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1484 
1485  PG_RETURN_VOID(); /* keep compiler quiet */
1486 }
int errcode(int sqlerrcode)
Definition: elog.c:698
#define ERROR
Definition: elog.h:46
#define ereport(elevel,...)
Definition: elog.h:157
#define PG_RETURN_VOID()
Definition: fmgr.h:349
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ pg_mcv_list_out()

Datum pg_mcv_list_out ( PG_FUNCTION_ARGS  )

Definition at line 1501 of file mcv.c.

References byteaout().

1502 {
1503  return byteaout(fcinfo);
1504 }
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:391

◆ pg_mcv_list_recv()

Datum pg_mcv_list_recv ( PG_FUNCTION_ARGS  )

Definition at line 1510 of file mcv.c.

References ereport, errcode(), errmsg(), ERROR, and PG_RETURN_VOID.

1511 {
1512  ereport(ERROR,
1513  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1514  errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1515 
1516  PG_RETURN_VOID(); /* keep compiler quiet */
1517 }
int errcode(int sqlerrcode)
Definition: elog.c:698
#define ERROR
Definition: elog.h:46
#define ereport(elevel,...)
Definition: elog.h:157
#define PG_RETURN_VOID()
Definition: fmgr.h:349
int errmsg(const char *fmt,...)
Definition: elog.c:909

◆ pg_mcv_list_send()

Datum pg_mcv_list_send ( PG_FUNCTION_ARGS  )

Definition at line 1526 of file mcv.c.

References byteasend().

1527 {
1528  return byteasend(fcinfo);
1529 }
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:493

◆ pg_stats_ext_mcvlist_items()

Datum pg_stats_ext_mcvlist_items ( PG_FUNCTION_ARGS  )

Definition at line 1341 of file mcv.c.

References accumArrayResult(), Assert, FuncCallContext::attinmeta, MCVItem::base_frequency, BlessTupleDesc(), BoolGetDatum, FuncCallContext::call_cntr, cstring_to_text(), CurrentMemoryContext, DatumGetPointer, ereport, errcode(), errmsg(), ERROR, Float8GetDatum(), fmgr_info(), MCVItem::frequency, FunctionCall1, get_call_result_type(), getTypeOutputInfo(), heap_form_tuple(), HeapTupleGetDatum, i, Int32GetDatum, MCVItem::isnull, MCVList::items, makeArrayResult(), FuncCallContext::max_calls, MemoryContextSwitchTo(), FuncCallContext::multi_call_memory_ctx, MCVList::ndimensions, MCVList::nitems, PG_GETARG_BYTEA_P, PointerGetDatum, SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, statext_mcv_deserialize(), AttInMetadata::tupdesc, TupleDescGetAttInMetadata(), TYPEFUNC_COMPOSITE, MCVList::types, FuncCallContext::user_fctx, val, MCVItem::values, and values.

1342 {
1343  FuncCallContext *funcctx;
1344 
1345  /* stuff done only on the first call of the function */
1346  if (SRF_IS_FIRSTCALL())
1347  {
1348  MemoryContext oldcontext;
1349  MCVList *mcvlist;
1350  TupleDesc tupdesc;
1351 
1352  /* create a function context for cross-call persistence */
1353  funcctx = SRF_FIRSTCALL_INIT();
1354 
1355  /* switch to memory context appropriate for multiple function calls */
1356  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1357 
1359 
1360  funcctx->user_fctx = mcvlist;
1361 
1362  /* total number of tuples to be returned */
1363  funcctx->max_calls = 0;
1364  if (funcctx->user_fctx != NULL)
1365  funcctx->max_calls = mcvlist->nitems;
1366 
1367  /* Build a tuple descriptor for our result type */
1368  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1369  ereport(ERROR,
1370  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1371  errmsg("function returning record called in context "
1372  "that cannot accept type record")));
1373  tupdesc = BlessTupleDesc(tupdesc);
1374 
1375  /*
1376  * generate attribute metadata needed later to produce tuples from raw
1377  * C strings
1378  */
1379  funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1380 
1381  MemoryContextSwitchTo(oldcontext);
1382  }
1383 
1384  /* stuff done on every call of the function */
1385  funcctx = SRF_PERCALL_SETUP();
1386 
1387  if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more
1388  * left to send */
1389  {
1390  Datum values[5];
1391  bool nulls[5];
1392  HeapTuple tuple;
1393  Datum result;
1394  ArrayBuildState *astate_values = NULL;
1395  ArrayBuildState *astate_nulls = NULL;
1396 
1397  int i;
1398  MCVList *mcvlist;
1399  MCVItem *item;
1400 
1401  mcvlist = (MCVList *) funcctx->user_fctx;
1402 
1403  Assert(funcctx->call_cntr < mcvlist->nitems);
1404 
1405  item = &mcvlist->items[funcctx->call_cntr];
1406 
1407  for (i = 0; i < mcvlist->ndimensions; i++)
1408  {
1409 
1410  astate_nulls = accumArrayResult(astate_nulls,
1411  BoolGetDatum(item->isnull[i]),
1412  false,
1413  BOOLOID,
1415 
1416  if (!item->isnull[i])
1417  {
1418  bool isvarlena;
1419  Oid outfunc;
1420  FmgrInfo fmgrinfo;
1421  Datum val;
1422  text *txt;
1423 
1424  /* lookup output func for the type */
1425  getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
1426  fmgr_info(outfunc, &fmgrinfo);
1427 
1428  val = FunctionCall1(&fmgrinfo, item->values[i]);
1429  txt = cstring_to_text(DatumGetPointer(val));
1430 
1431  astate_values = accumArrayResult(astate_values,
1432  PointerGetDatum(txt),
1433  false,
1434  TEXTOID,
1436  }
1437  else
1438  astate_values = accumArrayResult(astate_values,
1439  (Datum) 0,
1440  true,
1441  TEXTOID,
1443  }
1444 
1445  values[0] = Int32GetDatum(funcctx->call_cntr);
1446  values[1] = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext));
1447  values[2] = PointerGetDatum(makeArrayResult(astate_nulls, CurrentMemoryContext));
1448  values[3] = Float8GetDatum(item->frequency);
1449  values[4] = Float8GetDatum(item->base_frequency);
1450 
1451  /* no NULLs in the tuple */
1452  memset(nulls, 0, sizeof(nulls));
1453 
1454  /* build a tuple */
1455  tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1456 
1457  /* make the tuple into a datum */
1458  result = HeapTupleGetDatum(tuple);
1459 
1460  SRF_RETURN_NEXT(funcctx, result);
1461  }
1462  else /* do when there is no more left */
1463  {
1464  SRF_RETURN_DONE(funcctx);
1465  }
1466 }
uint32 nitems
Definition: statistics.h:91
uint64 call_cntr
Definition: funcapi.h:65
Definition: fmgr.h:56
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:207
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2854
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93
MCVList * statext_mcv_deserialize(bytea *data)
Definition: mcv.c:999
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:293
#define PointerGetDatum(X)
Definition: postgres.h:600
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
int errcode(int sqlerrcode)
Definition: elog.c:698
AttrNumber ndimensions
Definition: statistics.h:92
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
Datum * values
Definition: statistics.h:83
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94
unsigned int Oid
Definition: postgres_ext.h:31
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:297
Datum Float8GetDatum(float8 X)
Definition: fmgr.c:1706
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:299
#define PG_GETARG_BYTEA_P(n)
Definition: fmgr.h:335
#define ERROR
Definition: elog.h:46
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2082
AttInMetadata * attinmeta
Definition: funcapi.h:91
TupleDesc tupdesc
Definition: funcapi.h:38
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5186
uintptr_t Datum
Definition: postgres.h:411
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:2097
#define BoolGetDatum(X)
Definition: postgres.h:446
#define ereport(elevel,...)
Definition: elog.h:157
int result
Definition: header.h:19
text * cstring_to_text(const char *s)
Definition: varlena.c:189
#define Assert(condition)
Definition: c.h:804
double base_frequency
Definition: statistics.h:81
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:220
#define DatumGetPointer(X)
Definition: postgres.h:593
bool * isnull
Definition: statistics.h:82
static Datum values[MAXATTR]
Definition: bootstrap.c:156
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:5122
#define Int32GetDatum(X)
Definition: postgres.h:523
void * user_fctx
Definition: funcapi.h:82
int errmsg(const char *fmt,...)
Definition: elog.c:909
int i
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:644
Definition: c.h:621
long val
Definition: informix.c:664
uint64 max_calls
Definition: funcapi.h:74
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:317
double frequency
Definition: statistics.h:80
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:295

◆ sort_item_compare()

static int sort_item_compare ( const void *  a,
const void *  b,
void *  arg 
)
static

Definition at line 469 of file mcv.c.

References ApplySortComparator(), SortItem::isnull, and SortItem::values.

Referenced by build_column_frequencies().

470 {
471  SortSupport ssup = (SortSupport) arg;
472  SortItem *ia = (SortItem *) a;
473  SortItem *ib = (SortItem *) b;
474 
475  return ApplySortComparator(ia->values[0], ia->isnull[0],
476  ib->values[0], ib->isnull[0],
477  ssup);
478 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
void * arg
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

◆ statext_mcv_build()

MCVList* statext_mcv_build ( StatsBuildData data,
double  totalrows,
int  stattarget 
)

Definition at line 184 of file mcv.c.

References Assert, StatsBuildData::attnums, VacAttrStats::attrtypid, MCVItem::base_frequency, bsearch_arg(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), SortItem::count, MCVItem::frequency, get_mincount_for_mcv_list(), i, SortItem::isnull, MCVItem::isnull, MCVList::items, sort-test::key, MCVList::magic, multi_sort_compare(), StatsBuildData::nattnums, MCVList::ndimensions, MultiSortSupportData::ndims, MCVList::nitems, StatsBuildData::numrows, offsetof, palloc(), palloc0(), pfree(), MultiSortSupportData::ssup, StatsBuildData::stats, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, MCVList::type, MCVList::types, SortItem::values, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

185 {
186  int i,
187  numattrs,
188  numrows,
189  ngroups,
190  nitems;
191  double mincount;
192  SortItem *items;
193  SortItem *groups;
194  MCVList *mcvlist = NULL;
195  MultiSortSupport mss;
196 
197  /* comparator for all the columns */
198  mss = build_mss(data);
199 
200  /* sort the rows */
201  items = build_sorted_items(data, &nitems, mss,
202  data->nattnums, data->attnums);
203 
204  if (!items)
205  return NULL;
206 
207  /* for convenience */
208  numattrs = data->nattnums;
209  numrows = data->numrows;
210 
211  /* transform the sorted rows into groups (sorted by frequency) */
212  groups = build_distinct_groups(nitems, items, mss, &ngroups);
213 
214  /*
215  * The maximum number of MCV items to store, based on the statistics
216  * target we computed for the statistics object (from the target set for
217  * the object itself, attributes and the system default). In any case, we
218  * can't keep more groups than we have available.
219  */
220  nitems = stattarget;
221  if (nitems > ngroups)
222  nitems = ngroups;
223 
224  /*
225  * Decide how many items to keep in the MCV list. We can't use the same
226  * algorithm as per-column MCV lists, because that only considers the
227  * actual group frequency - but we're primarily interested in how the
228  * actual frequency differs from the base frequency (product of simple
229  * per-column frequencies, as if the columns were independent).
230  *
231  * Using the same algorithm might exclude items that are close to the
232  * "average" frequency of the sample. But that does not say whether the
233  * observed frequency is close to the base frequency or not. We also need
234  * to consider unexpectedly uncommon items (again, compared to the base
235  * frequency), and the single-column algorithm does not have to.
236  *
237  * We simply decide how many items to keep by computing the minimum count
238  * using get_mincount_for_mcv_list() and then keep all items that seem to
239  * be more common than that.
240  */
241  mincount = get_mincount_for_mcv_list(numrows, totalrows);
242 
243  /*
244  * Walk the groups until we find the first group with a count below the
245  * mincount threshold (the index of that group is the number of groups we
246  * want to keep).
247  */
248  for (i = 0; i < nitems; i++)
249  {
250  if (groups[i].count < mincount)
251  {
252  nitems = i;
253  break;
254  }
255  }
256 
257  /*
258  * At this point, we know the number of items for the MCV list. There
259  * might be none (for uniform distribution with many groups), and in that
260  * case, there will be no MCV list. Otherwise, construct the MCV list.
261  */
262  if (nitems > 0)
263  {
264  int j;
265  SortItem key;
266  MultiSortSupport tmp;
267 
268  /* frequencies for values in each attribute */
269  SortItem **freqs;
270  int *nfreqs;
271 
272  /* used to search values */
274  + sizeof(SortSupportData));
275 
276  /* compute frequencies for values in each column */
277  nfreqs = (int *) palloc0(sizeof(int) * numattrs);
278  freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
279 
280  /*
281  * Allocate the MCV list structure, set the global parameters.
282  */
283  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
284  sizeof(MCVItem) * nitems);
285 
286  mcvlist->magic = STATS_MCV_MAGIC;
287  mcvlist->type = STATS_MCV_TYPE_BASIC;
288  mcvlist->ndimensions = numattrs;
289  mcvlist->nitems = nitems;
290 
291  /* store info about data type OIDs */
292  for (i = 0; i < numattrs; i++)
293  mcvlist->types[i] = data->stats[i]->attrtypid;
294 
295  /* Copy the first chunk of groups into the result. */
296  for (i = 0; i < nitems; i++)
297  {
298  /* just pointer to the proper place in the list */
299  MCVItem *item = &mcvlist->items[i];
300 
301  item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
302  item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
303 
304  /* copy values for the group */
305  memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
306  memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
307 
308  /* groups should be sorted by frequency in descending order */
309  Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
310 
311  /* group frequency */
312  item->frequency = (double) groups[i].count / numrows;
313 
314  /* base frequency, if the attributes were independent */
315  item->base_frequency = 1.0;
316  for (j = 0; j < numattrs; j++)
317  {
318  SortItem *freq;
319 
320  /* single dimension */
321  tmp->ndims = 1;
322  tmp->ssup[0] = mss->ssup[j];
323 
324  /* fill search key */
325  key.values = &groups[i].values[j];
326  key.isnull = &groups[i].isnull[j];
327 
328  freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
329  sizeof(SortItem),
330  multi_sort_compare, tmp);
331 
332  item->base_frequency *= ((double) freq->count) / numrows;
333  }
334  }
335 
336  pfree(nfreqs);
337  pfree(freqs);
338  }
339 
340  pfree(items);
341  pfree(groups);
342 
343  return mcvlist;
344 }
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:428
uint32 nitems
Definition: statistics.h:91
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:494
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93
uint32 magic
Definition: statistics.h:89
AttrNumber ndimensions
Definition: statistics.h:92
struct MCVItem MCVItem
Datum * values
Definition: statistics.h:83
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:152
void pfree(void *pointer)
Definition: mcxt.c:1169
Oid attrtypid
Definition: vacuum.h:124
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
uint32 type
Definition: statistics.h:90
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:67
struct SortSupportData SortSupportData
static MultiSortSupport build_mss(StatsBuildData *data)
Definition: mcv.c:351
void * bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition: bsearch_arg.c:55
void * palloc0(Size size)
Definition: mcxt.c:1093
uintptr_t Datum
Definition: postgres.h:411
VacAttrStats ** stats
#define Assert(condition)
Definition: c.h:804
double base_frequency
Definition: statistics.h:81
#define STATS_MCV_MAGIC
Definition: statistics.h:66
int multi_sort_compare(const void *a, const void *b, void *arg)
bool * isnull
Definition: statistics.h:82
void * palloc(Size size)
Definition: mcxt.c:1062
int i
MultiSortSupportData * MultiSortSupport
#define offsetof(type, field)
Definition: c.h:727
double frequency
Definition: statistics.h:80

◆ statext_mcv_deserialize()

MCVList* statext_mcv_deserialize ( bytea data)

Definition at line 999 of file mcv.c.

References Assert, MCVItem::base_frequency, elog, ERROR, fetch_att, MCVItem::frequency, i, MCVItem::isnull, MCVList::items, MCVList::magic, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, offsetof, palloc(), palloc0(), pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum, repalloc(), SET_VARSIZE, SizeOfMCVList, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, MCVList::type, MCVList::types, DimensionInfo::typlen, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY.

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

1000 {
1001  int dim,
1002  i;
1003  Size expected_size;
1004  MCVList *mcvlist;
1005  char *raw;
1006  char *ptr;
1007  char *endptr PG_USED_FOR_ASSERTS_ONLY;
1008 
1009  int ndims,
1010  nitems;
1011  DimensionInfo *info = NULL;
1012 
1013  /* local allocation buffer (used only for deserialization) */
1014  Datum **map = NULL;
1015 
1016  /* MCV list */
1017  Size mcvlen;
1018 
1019  /* buffer used for the result */
1020  Size datalen;
1021  char *dataptr;
1022  char *valuesptr;
1023  char *isnullptr;
1024 
1025  if (data == NULL)
1026  return NULL;
1027 
1028  /*
1029  * We can't possibly deserialize a MCV list if there's not even a complete
1030  * header. We need an explicit formula here, because we serialize the
1031  * header fields one by one, so we need to ignore struct alignment.
1032  */
1033  if (VARSIZE_ANY(data) < MinSizeOfMCVList)
1034  elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
1035  VARSIZE_ANY(data), MinSizeOfMCVList);
1036 
1037  /* read the MCV list header */
1038  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1039 
1040  /* pointer to the data part (skip the varlena header) */
1041  raw = (char *) data;
1042  ptr = VARDATA_ANY(raw);
1043  endptr = (char *) raw + VARSIZE_ANY(data);
1044 
1045  /* get the header and perform further sanity checks */
1046  memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1047  ptr += sizeof(uint32);
1048 
1049  memcpy(&mcvlist->type, ptr, sizeof(uint32));
1050  ptr += sizeof(uint32);
1051 
1052  memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1053  ptr += sizeof(uint32);
1054 
1055  memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1056  ptr += sizeof(AttrNumber);
1057 
1058  if (mcvlist->magic != STATS_MCV_MAGIC)
1059  elog(ERROR, "invalid MCV magic %u (expected %u)",
1060  mcvlist->magic, STATS_MCV_MAGIC);
1061 
1062  if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1063  elog(ERROR, "invalid MCV type %u (expected %u)",
1064  mcvlist->type, STATS_MCV_TYPE_BASIC);
1065 
1066  if (mcvlist->ndimensions == 0)
1067  elog(ERROR, "invalid zero-length dimension array in MCVList");
1068  else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1069  (mcvlist->ndimensions < 0))
1070  elog(ERROR, "invalid length (%d) dimension array in MCVList",
1071  mcvlist->ndimensions);
1072 
1073  if (mcvlist->nitems == 0)
1074  elog(ERROR, "invalid zero-length item array in MCVList");
1075  else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1076  elog(ERROR, "invalid length (%u) item array in MCVList",
1077  mcvlist->nitems);
1078 
1079  nitems = mcvlist->nitems;
1080  ndims = mcvlist->ndimensions;
1081 
1082  /*
1083  * Check amount of data including DimensionInfo for all dimensions and
1084  * also the serialized items (including uint16 indexes). Also, walk
1085  * through the dimension information and add it to the sum.
1086  */
1087  expected_size = SizeOfMCVList(ndims, nitems);
1088 
1089  /*
1090  * Check that we have at least the dimension and info records, along with
1091  * the items. We don't know the size of the serialized values yet. We need
1092  * to do this check first, before accessing the dimension info.
1093  */
1094  if (VARSIZE_ANY(data) < expected_size)
1095  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1096  VARSIZE_ANY(data), expected_size);
1097 
1098  /* Now copy the array of type Oids. */
1099  memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1100  ptr += (sizeof(Oid) * ndims);
1101 
1102  /* Now it's safe to access the dimension info. */
1103  info = palloc(ndims * sizeof(DimensionInfo));
1104 
1105  memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1106  ptr += (ndims * sizeof(DimensionInfo));
1107 
1108  /* account for the value arrays */
1109  for (dim = 0; dim < ndims; dim++)
1110  {
1111  /*
1112  * XXX I wonder if we can/should rely on asserts here. Maybe those
1113  * checks should be done every time?
1114  */
1115  Assert(info[dim].nvalues >= 0);
1116  Assert(info[dim].nbytes >= 0);
1117 
1118  expected_size += info[dim].nbytes;
1119  }
1120 
1121  /*
1122  * Now we know the total expected MCV size, including all the pieces
1123  * (header, dimension info. items and deduplicated data). So do the final
1124  * check on size.
1125  */
1126  if (VARSIZE_ANY(data) != expected_size)
1127  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1128  VARSIZE_ANY(data), expected_size);
1129 
1130  /*
1131  * We need an array of Datum values for each dimension, so that we can
1132  * easily translate the uint16 indexes later. We also need a top-level
1133  * array of pointers to those per-dimension arrays.
1134  *
1135  * While allocating the arrays for dimensions, compute how much space we
1136  * need for a copy of the by-ref data, as we can't simply point to the
1137  * original values (it might go away).
1138  */
1139  datalen = 0; /* space for by-ref data */
1140  map = (Datum **) palloc(ndims * sizeof(Datum *));
1141 
1142  for (dim = 0; dim < ndims; dim++)
1143  {
1144  map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1145 
1146  /* space needed for a copy of data for by-ref types */
1147  datalen += info[dim].nbytes_aligned;
1148  }
1149 
1150  /*
1151  * Now resize the MCV list so that the allocation includes all the data.
1152  *
1153  * Allocate space for a copy of the data, as we can't simply reference the
1154  * serialized data - it's not aligned properly, and it may disappear while
1155  * we're still using the MCV list, e.g. due to catcache release.
1156  *
1157  * We do care about alignment here, because we will allocate all the
1158  * pieces at once, but then use pointers to different parts.
1159  */
1160  mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1161 
1162  /* arrays of values and isnull flags for all MCV items */
1163  mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1164  mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1165 
1166  /* we don't quite need to align this, but it makes some asserts easier */
1167  mcvlen += MAXALIGN(datalen);
1168 
1169  /* now resize the deserialized MCV list, and compute pointers to parts */
1170  mcvlist = repalloc(mcvlist, mcvlen);
1171 
1172  /* pointer to the beginning of values/isnull arrays */
1173  valuesptr = (char *) mcvlist
1174  + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1175 
1176  isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1177 
1178  dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1179 
1180  /*
1181  * Build mapping (index => value) for translating the serialized data into
1182  * the in-memory representation.
1183  */
1184  for (dim = 0; dim < ndims; dim++)
1185  {
1186  /* remember start position in the input array */
1187  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1188 
1189  if (info[dim].typbyval)
1190  {
1191  /* for by-val types we simply copy data into the mapping */
1192  for (i = 0; i < info[dim].nvalues; i++)
1193  {
1194  Datum v = 0;
1195 
1196  memcpy(&v, ptr, info[dim].typlen);
1197  ptr += info[dim].typlen;
1198 
1199  map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1200 
1201  /* no under/overflow of input array */
1202  Assert(ptr <= (start + info[dim].nbytes));
1203  }
1204  }
1205  else
1206  {
1207  /* for by-ref types we need to also make a copy of the data */
1208 
1209  /* passed by reference, but fixed length (name, tid, ...) */
1210  if (info[dim].typlen > 0)
1211  {
1212  for (i = 0; i < info[dim].nvalues; i++)
1213  {
1214  memcpy(dataptr, ptr, info[dim].typlen);
1215  ptr += info[dim].typlen;
1216 
1217  /* just point into the array */
1218  map[dim][i] = PointerGetDatum(dataptr);
1219  dataptr += MAXALIGN(info[dim].typlen);
1220  }
1221  }
1222  else if (info[dim].typlen == -1)
1223  {
1224  /* varlena */
1225  for (i = 0; i < info[dim].nvalues; i++)
1226  {
1227  uint32 len;
1228 
1229  /* read the uint32 length */
1230  memcpy(&len, ptr, sizeof(uint32));
1231  ptr += sizeof(uint32);
1232 
1233  /* the length is data-only */
1234  SET_VARSIZE(dataptr, len + VARHDRSZ);
1235  memcpy(VARDATA(dataptr), ptr, len);
1236  ptr += len;
1237 
1238  /* just point into the array */
1239  map[dim][i] = PointerGetDatum(dataptr);
1240 
1241  /* skip to place of the next deserialized value */
1242  dataptr += MAXALIGN(len + VARHDRSZ);
1243  }
1244  }
1245  else if (info[dim].typlen == -2)
1246  {
1247  /* cstring */
1248  for (i = 0; i < info[dim].nvalues; i++)
1249  {
1250  uint32 len;
1251 
1252  memcpy(&len, ptr, sizeof(uint32));
1253  ptr += sizeof(uint32);
1254 
1255  memcpy(dataptr, ptr, len);
1256  ptr += len;
1257 
1258  /* just point into the array */
1259  map[dim][i] = PointerGetDatum(dataptr);
1260  dataptr += MAXALIGN(len);
1261  }
1262  }
1263 
1264  /* no under/overflow of input array */
1265  Assert(ptr <= (start + info[dim].nbytes));
1266 
1267  /* no overflow of the output mcv value */
1268  Assert(dataptr <= ((char *) mcvlist + mcvlen));
1269  }
1270 
1271  /* check we consumed input data for this dimension exactly */
1272  Assert(ptr == (start + info[dim].nbytes));
1273  }
1274 
1275  /* we should have also filled the MCV list exactly */
1276  Assert(dataptr == ((char *) mcvlist + mcvlen));
1277 
1278  /* deserialize the MCV items and translate the indexes to Datums */
1279  for (i = 0; i < nitems; i++)
1280  {
1281  MCVItem *item = &mcvlist->items[i];
1282 
1283  item->values = (Datum *) valuesptr;
1284  valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1285 
1286  item->isnull = (bool *) isnullptr;
1287  isnullptr += MAXALIGN(sizeof(bool) * ndims);
1288 
1289  memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1290  ptr += sizeof(bool) * ndims;
1291 
1292  memcpy(&item->frequency, ptr, sizeof(double));
1293  ptr += sizeof(double);
1294 
1295  memcpy(&item->base_frequency, ptr, sizeof(double));
1296  ptr += sizeof(double);
1297 
1298  /* finally translate the indexes (for non-NULL only) */
1299  for (dim = 0; dim < ndims; dim++)
1300  {
1301  uint16 index;
1302 
1303  memcpy(&index, ptr, sizeof(uint16));
1304  ptr += sizeof(uint16);
1305 
1306  if (item->isnull[dim])
1307  continue;
1308 
1309  item->values[dim] = map[dim][index];
1310  }
1311 
1312  /* check we're not overflowing the input */
1313  Assert(ptr <= endptr);
1314  }
1315 
1316  /* check that we processed all the data */
1317  Assert(ptr == endptr);
1318 
1319  /* release the buffers used for mapping */
1320  for (dim = 0; dim < ndims; dim++)
1321  pfree(map[dim]);
1322 
1323  pfree(map);
1324 
1325  return mcvlist;
1326 }
uint32 nitems
Definition: statistics.h:91
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
#define VARDATA(PTR)
Definition: postgres.h:315
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93
#define PointerGetDatum(X)
Definition: postgres.h:600
#define VARHDRSZ
Definition: c.h:627
#define MinSizeOfMCVList
Definition: mcv.c:63
struct DimensionInfo DimensionInfo
uint32 magic
Definition: statistics.h:89
AttrNumber ndimensions
Definition: statistics.h:92
struct MCVItem MCVItem
Datum * values
Definition: statistics.h:83
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94
unsigned int Oid
Definition: postgres_ext.h:31
Definition: type.h:89
unsigned short uint16
Definition: c.h:440
void pfree(void *pointer)
Definition: mcxt.c:1169
#define ERROR
Definition: elog.h:46
unsigned int uint32
Definition: c.h:441
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:72
uint32 type
Definition: statistics.h:90
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:67
void * palloc0(Size size)
Definition: mcxt.c:1093
uintptr_t Datum
Definition: postgres.h:411
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:70
#define VARSIZE_ANY(PTR)
Definition: postgres.h:348
#define Assert(condition)
Definition: c.h:804
double base_frequency
Definition: statistics.h:81
#define STATS_MCV_MAGIC
Definition: statistics.h:66
size_t Size
Definition: c.h:540
#define MAXALIGN(LEN)
Definition: c.h:757
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
bool * isnull
Definition: statistics.h:82
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
void * palloc(Size size)
Definition: mcxt.c:1062
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:75
#define elog(elevel,...)
Definition: elog.h:232
int i
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:342
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:727
double frequency
Definition: statistics.h:80
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
unsigned char bool
Definition: c.h:391

◆ statext_mcv_load()

MCVList* statext_mcv_load ( Oid  mvoid)

Definition at line 562 of file mcv.c.

References DatumGetByteaP, elog, ERROR, HeapTupleIsValid, ObjectIdGetDatum, ReleaseSysCache(), SearchSysCache1(), statext_mcv_deserialize(), STATEXTDATASTXOID, and SysCacheGetAttr().

Referenced by mcv_clauselist_selectivity(), and statext_mcv_clauselist_selectivity().

563 {
564  MCVList *result;
565  bool isnull;
566  Datum mcvlist;
568 
569  if (!HeapTupleIsValid(htup))
570  elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
571 
572  mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
573  Anum_pg_statistic_ext_data_stxdmcv, &isnull);
574 
575  if (isnull)
576  elog(ERROR,
577  "requested statistics kind \"%c\" is not yet built for statistics object %u",
578  STATS_EXT_DEPENDENCIES, mvoid);
579 
580  result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
581 
582  ReleaseSysCache(htup);
583 
584  return result;
585 }
MCVList * statext_mcv_deserialize(bytea *data)
Definition: mcv.c:999
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define ERROR
Definition: elog.h:46
#define DatumGetByteaP(X)
Definition: fmgr.h:331
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1127
uintptr_t Datum
Definition: postgres.h:411
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1175
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1388
int result
Definition: header.h:19
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define elog(elevel,...)
Definition: elog.h:232

◆ statext_mcv_serialize()

bytea* statext_mcv_serialize ( MCVList mcvlist,
VacAttrStats **  stats 
)

Definition at line 624 of file mcv.c.

References Assert, VacAttrStats::attrcollid, VacAttrStats::attrtype, MCVItem::base_frequency, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString, DatumGetPointer, MCVItem::frequency, i, MCVItem::isnull, ITEM_SIZE, MCVList::items, lookup_type_cache(), TypeCacheEntry::lt_opr, MCVList::magic, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, palloc0(), pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum, PrepareSortSupportFromOrderingOp(), qsort_arg(), SET_VARSIZE, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, store_att_byval, DimensionInfo::typbyval, MCVList::type, TYPECACHE_LT_OPR, MCVList::types, DimensionInfo::typlen, value, MCVItem::values, values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by statext_store().

625 {
626  int i;
627  int dim;
628  int ndims = mcvlist->ndimensions;
629 
630  SortSupport ssup;
631  DimensionInfo *info;
632 
633  Size total_length;
634 
635  /* serialized items (indexes into arrays, etc.) */
636  bytea *raw;
637  char *ptr;
638  char *endptr PG_USED_FOR_ASSERTS_ONLY;
639 
640  /* values per dimension (and number of non-NULL values) */
641  Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
642  int *counts = (int *) palloc0(sizeof(int) * ndims);
643 
644  /*
645  * We'll include some rudimentary information about the attribute types
646  * (length, by-val flag), so that we don't have to look them up while
647  * deserializing the MCV list (we already have the type OID in the
648  * header). This is safe because when changing the type of the attribute
649  * the statistics gets dropped automatically. We need to store the info
650  * about the arrays of deduplicated values anyway.
651  */
652  info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
653 
654  /* sort support data for all attributes included in the MCV list */
655  ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
656 
657  /* collect and deduplicate values for each dimension (attribute) */
658  for (dim = 0; dim < ndims; dim++)
659  {
660  int ndistinct;
661  TypeCacheEntry *typentry;
662 
663  /*
664  * Lookup the LT operator (can't get it from stats extra_data, as we
665  * don't know how to interpret that - scalar vs. array etc.).
666  */
667  typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
668 
669  /* copy important info about the data type (length, by-value) */
670  info[dim].typlen = stats[dim]->attrtype->typlen;
671  info[dim].typbyval = stats[dim]->attrtype->typbyval;
672 
673  /* allocate space for values in the attribute and collect them */
674  values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
675 
676  for (i = 0; i < mcvlist->nitems; i++)
677  {
678  /* skip NULL values - we don't need to deduplicate those */
679  if (mcvlist->items[i].isnull[dim])
680  continue;
681 
682  /* append the value at the end */
683  values[dim][counts[dim]] = mcvlist->items[i].values[dim];
684  counts[dim] += 1;
685  }
686 
687  /* if there are just NULL values in this dimension, we're done */
688  if (counts[dim] == 0)
689  continue;
690 
691  /* sort and deduplicate the data */
692  ssup[dim].ssup_cxt = CurrentMemoryContext;
693  ssup[dim].ssup_collation = stats[dim]->attrcollid;
694  ssup[dim].ssup_nulls_first = false;
695 
696  PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
697 
698  qsort_arg(values[dim], counts[dim], sizeof(Datum),
699  compare_scalars_simple, &ssup[dim]);
700 
701  /*
702  * Walk through the array and eliminate duplicate values, but keep the
703  * ordering (so that we can do a binary search later). We know there's
704  * at least one item as (counts[dim] != 0), so we can skip the first
705  * element.
706  */
707  ndistinct = 1; /* number of distinct values */
708  for (i = 1; i < counts[dim]; i++)
709  {
710  /* expect sorted array */
711  Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
712 
713  /* if the value is the same as the previous one, we can skip it */
714  if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
715  continue;
716 
717  values[dim][ndistinct] = values[dim][i];
718  ndistinct += 1;
719  }
720 
721  /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
722  Assert(ndistinct <= PG_UINT16_MAX);
723 
724  /*
725  * Store additional info about the attribute - number of deduplicated
726  * values, and also size of the serialized data. For fixed-length data
727  * types this is trivial to compute, for varwidth types we need to
728  * actually walk the array and sum the sizes.
729  */
730  info[dim].nvalues = ndistinct;
731 
732  if (info[dim].typbyval) /* by-value data types */
733  {
734  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
735 
736  /*
737  * We copy the data into the MCV item during deserialization, so
738  * we don't need to allocate any extra space.
739  */
740  info[dim].nbytes_aligned = 0;
741  }
742  else if (info[dim].typlen > 0) /* fixed-length by-ref */
743  {
744  /*
745  * We don't care about alignment in the serialized data, so we
746  * pack the data as much as possible. But we also track how much
747  * data will be needed after deserialization, and in that case we
748  * need to account for alignment of each item.
749  *
750  * Note: As the items are fixed-length, we could easily compute
751  * this during deserialization, but we do it here anyway.
752  */
753  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
754  info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
755  }
756  else if (info[dim].typlen == -1) /* varlena */
757  {
758  info[dim].nbytes = 0;
759  info[dim].nbytes_aligned = 0;
760  for (i = 0; i < info[dim].nvalues; i++)
761  {
762  Size len;
763 
764  /*
765  * For varlena values, we detoast the values and store the
766  * length and data separately. We don't bother with alignment
767  * here, which means that during deserialization we need to
768  * copy the fields and only access the copies.
769  */
770  values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
771 
772  /* serialized length (uint32 length + data) */
773  len = VARSIZE_ANY_EXHDR(values[dim][i]);
774  info[dim].nbytes += sizeof(uint32); /* length */
775  info[dim].nbytes += len; /* value (no header) */
776 
777  /*
778  * During deserialization we'll build regular varlena values
779  * with full headers, and we need to align them properly.
780  */
781  info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
782  }
783  }
784  else if (info[dim].typlen == -2) /* cstring */
785  {
786  info[dim].nbytes = 0;
787  info[dim].nbytes_aligned = 0;
788  for (i = 0; i < info[dim].nvalues; i++)
789  {
790  Size len;
791 
792  /*
793  * cstring is handled similar to varlena - first we store the
794  * length as uint32 and then the data. We don't care about
795  * alignment, which means that during deserialization we need
796  * to copy the fields and only access the copies.
797  */
798 
799  /* c-strings include terminator, so +1 byte */
800  len = strlen(DatumGetCString(values[dim][i])) + 1;
801  info[dim].nbytes += sizeof(uint32); /* length */
802  info[dim].nbytes += len; /* value */
803 
804  /* space needed for properly aligned deserialized copies */
805  info[dim].nbytes_aligned += MAXALIGN(len);
806  }
807  }
808 
809  /* we know (count>0) so there must be some data */
810  Assert(info[dim].nbytes > 0);
811  }
812 
813  /*
814  * Now we can finally compute how much space we'll actually need for the
815  * whole serialized MCV list (varlena header, MCV header, dimension info
816  * for each attribute, deduplicated values and items).
817  */
818  total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
819  + sizeof(AttrNumber) /* ndimensions */
820  + (ndims * sizeof(Oid)); /* attribute types */
821 
822  /* dimension info */
823  total_length += ndims * sizeof(DimensionInfo);
824 
825  /* add space for the arrays of deduplicated values */
826  for (i = 0; i < ndims; i++)
827  total_length += info[i].nbytes;
828 
829  /*
830  * And finally account for the items (those are fixed-length, thanks to
831  * replacing values with uint16 indexes into the deduplicated arrays).
832  */
833  total_length += mcvlist->nitems * ITEM_SIZE(dim);
834 
835  /*
836  * Allocate space for the whole serialized MCV list (we'll skip bytes, so
837  * we set them to zero to make the result more compressible).
838  */
839  raw = (bytea *) palloc0(VARHDRSZ + total_length);
840  SET_VARSIZE(raw, VARHDRSZ + total_length);
841 
842  ptr = VARDATA(raw);
843  endptr = ptr + total_length;
844 
845  /* copy the MCV list header fields, one by one */
846  memcpy(ptr, &mcvlist->magic, sizeof(uint32));
847  ptr += sizeof(uint32);
848 
849  memcpy(ptr, &mcvlist->type, sizeof(uint32));
850  ptr += sizeof(uint32);
851 
852  memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
853  ptr += sizeof(uint32);
854 
855  memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
856  ptr += sizeof(AttrNumber);
857 
858  memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
859  ptr += (sizeof(Oid) * ndims);
860 
861  /* store information about the attributes (data amounts, ...) */
862  memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
863  ptr += sizeof(DimensionInfo) * ndims;
864 
865  /* Copy the deduplicated values for all attributes to the output. */
866  for (dim = 0; dim < ndims; dim++)
867  {
868  /* remember the starting point for Asserts later */
869  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
870 
871  for (i = 0; i < info[dim].nvalues; i++)
872  {
873  Datum value = values[dim][i];
874 
875  if (info[dim].typbyval) /* passed by value */
876  {
877  Datum tmp;
878 
879  /*
880  * For byval types, we need to copy just the significant bytes
881  * - we can't use memcpy directly, as that assumes
882  * little-endian behavior. store_att_byval does almost what
883  * we need, but it requires a properly aligned buffer - the
884  * output buffer does not guarantee that. So we simply use a
885  * local Datum variable (which guarantees proper alignment),
886  * and then copy the value from it.
887  */
888  store_att_byval(&tmp, value, info[dim].typlen);
889 
890  memcpy(ptr, &tmp, info[dim].typlen);
891  ptr += info[dim].typlen;
892  }
893  else if (info[dim].typlen > 0) /* passed by reference */
894  {
895  /* no special alignment needed, treated as char array */
896  memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
897  ptr += info[dim].typlen;
898  }
899  else if (info[dim].typlen == -1) /* varlena */
900  {
902 
903  /* copy the length */
904  memcpy(ptr, &len, sizeof(uint32));
905  ptr += sizeof(uint32);
906 
907  /* data from the varlena value (without the header) */
908  memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
909  ptr += len;
910  }
911  else if (info[dim].typlen == -2) /* cstring */
912  {
913  uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
914 
915  /* copy the length */
916  memcpy(ptr, &len, sizeof(uint32));
917  ptr += sizeof(uint32);
918 
919  /* value */
920  memcpy(ptr, DatumGetCString(value), len);
921  ptr += len;
922  }
923 
924  /* no underflows or overflows */
925  Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
926  }
927 
928  /* we should get exactly nbytes of data for this dimension */
929  Assert((ptr - start) == info[dim].nbytes);
930  }
931 
932  /* Serialize the items, with uint16 indexes instead of the values. */
933  for (i = 0; i < mcvlist->nitems; i++)
934  {
935  MCVItem *mcvitem = &mcvlist->items[i];
936 
937  /* don't write beyond the allocated space */
938  Assert(ptr <= (endptr - ITEM_SIZE(dim)));
939 
940  /* copy NULL and frequency flags into the serialized MCV */
941  memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
942  ptr += sizeof(bool) * ndims;
943 
944  memcpy(ptr, &mcvitem->frequency, sizeof(double));
945  ptr += sizeof(double);
946 
947  memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
948  ptr += sizeof(double);
949 
950  /* store the indexes last */
951  for (dim = 0; dim < ndims; dim++)
952  {
953  uint16 index = 0;
954  Datum *value;
955 
956  /* do the lookup only for non-NULL values */
957  if (!mcvitem->isnull[dim])
958  {
959  value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
960  info[dim].nvalues, sizeof(Datum),
961  compare_scalars_simple, &ssup[dim]);
962 
963  Assert(value != NULL); /* serialization or deduplication
964  * error */
965 
966  /* compute index within the deduplicated array */
967  index = (uint16) (value - values[dim]);
968 
969  /* check the index is within expected bounds */
970  Assert(index < info[dim].nvalues);
971  }
972 
973  /* copy the index into the serialized MCV */
974  memcpy(ptr, &index, sizeof(uint16));
975  ptr += sizeof(uint16);
976  }
977 
978  /* make sure we don't overflow the allocated value */
979  Assert(ptr <= endptr);
980  }
981 
982  /* at this point we expect to match the total_length exactly */
983  Assert(ptr == endptr);
984 
985  pfree(values);
986  pfree(counts);
987 
988  return raw;
989 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
uint32 nitems
Definition: statistics.h:91
bool ssup_nulls_first
Definition: sortsupport.h:75
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
#define VARDATA(PTR)
Definition: postgres.h:315
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93
#define PointerGetDatum(X)
Definition: postgres.h:600
#define VARHDRSZ
Definition: c.h:627
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:135
struct DimensionInfo DimensionInfo
uint32 magic
Definition: statistics.h:89
AttrNumber ndimensions
Definition: statistics.h:92
Datum * values
Definition: statistics.h:83
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94
unsigned int Oid
Definition: postgres_ext.h:31
Definition: type.h:89
unsigned short uint16
Definition: c.h:440
void pfree(void *pointer)
Definition: mcxt.c:1169
#define PG_UINT16_MAX
Definition: c.h:522
#define DatumGetCString(X)
Definition: postgres.h:610
MemoryContext ssup_cxt
Definition: sortsupport.h:66
int compare_scalars_simple(const void *a, const void *b, void *arg)
unsigned int uint32
Definition: c.h:441
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
uint32 type
Definition: statistics.h:90
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
#define store_att_byval(T, newdatum, attlen)
Definition: tupmacs.h:226
void * bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition: bsearch_arg.c:55
void * palloc0(Size size)
Definition: mcxt.c:1093
uintptr_t Datum
Definition: postgres.h:411
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:339
#define ITEM_SIZE(ndims)
Definition: mcv.c:57
static struct @143 value
#define Assert(condition)
Definition: c.h:804
double base_frequency
Definition: statistics.h:81
size_t Size
Definition: c.h:540
#define MAXALIGN(LEN)
Definition: c.h:757
Oid attrcollid
Definition: vacuum.h:127
#define DatumGetPointer(X)
Definition: postgres.h:593
bool * isnull
Definition: statistics.h:82
static Datum values[MAXATTR]
Definition: bootstrap.c:156
Form_pg_type attrtype
Definition: vacuum.h:126
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
int i
#define TYPECACHE_LT_OPR
Definition: typcache.h:137
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
Definition: c.h:621
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:342
int16 AttrNumber
Definition: attnum.h:21
double frequency
Definition: statistics.h:80
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
unsigned char bool
Definition: c.h:391