PostgreSQL Source Code  git master
extended_stats_internal.h File Reference
Include dependency graph for extended_stats_internal.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  StdAnalyzeData
 
struct  ScalarItem
 
struct  DimensionInfo
 
struct  MultiSortSupportData
 
struct  SortItem
 
struct  StatsBuildData
 

Typedefs

typedef struct DimensionInfo DimensionInfo
 
typedef struct MultiSortSupportData MultiSortSupportData
 
typedef MultiSortSupportDataMultiSortSupport
 
typedef struct SortItem SortItem
 
typedef struct StatsBuildData StatsBuildData
 

Functions

MVNDistinctstatext_ndistinct_build (double totalrows, StatsBuildData *data)
 
byteastatext_ndistinct_serialize (MVNDistinct *ndistinct)
 
MVNDistinctstatext_ndistinct_deserialize (bytea *data)
 
MVDependenciesstatext_dependencies_build (StatsBuildData *data)
 
byteastatext_dependencies_serialize (MVDependencies *dependencies)
 
MVDependenciesstatext_dependencies_deserialize (bytea *data)
 
MCVListstatext_mcv_build (StatsBuildData *data, double totalrows, int stattarget)
 
byteastatext_mcv_serialize (MCVList *mcvlist, VacAttrStats **stats)
 
MCVListstatext_mcv_deserialize (bytea *data)
 
MultiSortSupport multi_sort_init (int ndims)
 
void multi_sort_add_dimension (MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
 
int multi_sort_compare (const void *a, const void *b, void *arg)
 
int multi_sort_compare_dim (int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int multi_sort_compare_dims (int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int compare_scalars_simple (const void *a, const void *b, void *arg)
 
int compare_datums_simple (Datum a, Datum b, SortSupport ssup)
 
AttrNumberbuild_attnums_array (Bitmapset *attrs, int nexprs, int *numattrs)
 
SortItembuild_sorted_items (StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
 
bool examine_opclause_args (List *args, Node **exprp, Const **cstp, bool *expronleftp)
 
Selectivity mcv_combine_selectivities (Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)
 
Selectivity mcv_clauselist_selectivity (PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
 
Selectivity mcv_clause_selectivity_or (PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)
 

Typedef Documentation

◆ DimensionInfo

typedef struct DimensionInfo DimensionInfo

◆ MultiSortSupport

Definition at line 51 of file extended_stats_internal.h.

◆ MultiSortSupportData

◆ SortItem

typedef struct SortItem SortItem

◆ StatsBuildData

Function Documentation

◆ build_attnums_array()

AttrNumber* build_attnums_array ( Bitmapset attrs,
int  nexprs,
int *  numattrs 
)

Definition at line 941 of file extended_stats.c.

942 {
943  int i,
944  j;
945  AttrNumber *attnums;
946  int num = bms_num_members(attrs);
947 
948  if (numattrs)
949  *numattrs = num;
950 
951  /* build attnums from the bitmapset */
952  attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
953  i = 0;
954  j = -1;
955  while ((j = bms_next_member(attrs, j)) >= 0)
956  {
957  int attnum = (j - nexprs);
958 
959  /*
960  * Make sure the bitmap contains only user-defined attributes. As
961  * bitmaps can't contain negative values, this can be violated in two
962  * ways. Firstly, the bitmap might contain 0 as a member, and secondly
963  * the integer value might be larger than MaxAttrNumber.
964  */
967  Assert(attnum >= (-nexprs));
968 
969  attnums[i++] = (AttrNumber) attnum;
970 
971  /* protect against overflows */
972  Assert(i <= num);
973  }
974 
975  return attnums;
976 }
int16 AttrNumber
Definition: attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
#define MaxAttrNumber
Definition: attnum.h:24
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1306
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:751
#define Assert(condition)
Definition: c.h:858
int j
Definition: isn.c:74
int i
Definition: isn.c:73
void * palloc(Size size)
Definition: mcxt.c:1316
int16 attnum
Definition: pg_attribute.h:74

References Assert, attnum, AttributeNumberIsValid, bms_next_member(), bms_num_members(), i, j, MaxAttrNumber, and palloc().

◆ build_sorted_items()

SortItem* build_sorted_items ( StatsBuildData data,
int *  nitems,
MultiSortSupport  mss,
int  numattrs,
AttrNumber attnums 
)

Definition at line 986 of file extended_stats.c.

989 {
990  int i,
991  j,
992  len,
993  nrows;
994  int nvalues = data->numrows * numattrs;
995 
996  SortItem *items;
997  Datum *values;
998  bool *isnull;
999  char *ptr;
1000  int *typlen;
1001 
1002  /* Compute the total amount of memory we need (both items and values). */
1003  len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
1004 
1005  /* Allocate the memory and split it into the pieces. */
1006  ptr = palloc0(len);
1007 
1008  /* items to sort */
1009  items = (SortItem *) ptr;
1010  ptr += data->numrows * sizeof(SortItem);
1011 
1012  /* values and null flags */
1013  values = (Datum *) ptr;
1014  ptr += nvalues * sizeof(Datum);
1015 
1016  isnull = (bool *) ptr;
1017  ptr += nvalues * sizeof(bool);
1018 
1019  /* make sure we consumed the whole buffer exactly */
1020  Assert((ptr - (char *) items) == len);
1021 
1022  /* fix the pointers to Datum and bool arrays */
1023  nrows = 0;
1024  for (i = 0; i < data->numrows; i++)
1025  {
1026  items[nrows].values = &values[nrows * numattrs];
1027  items[nrows].isnull = &isnull[nrows * numattrs];
1028 
1029  nrows++;
1030  }
1031 
1032  /* build a local cache of typlen for all attributes */
1033  typlen = (int *) palloc(sizeof(int) * data->nattnums);
1034  for (i = 0; i < data->nattnums; i++)
1035  typlen[i] = get_typlen(data->stats[i]->attrtypid);
1036 
1037  nrows = 0;
1038  for (i = 0; i < data->numrows; i++)
1039  {
1040  bool toowide = false;
1041 
1042  /* load the values/null flags from sample rows */
1043  for (j = 0; j < numattrs; j++)
1044  {
1045  Datum value;
1046  bool isnull;
1047  int attlen;
1048  AttrNumber attnum = attnums[j];
1049 
1050  int idx;
1051 
1052  /* match attnum to the pre-calculated data */
1053  for (idx = 0; idx < data->nattnums; idx++)
1054  {
1055  if (attnum == data->attnums[idx])
1056  break;
1057  }
1058 
1059  Assert(idx < data->nattnums);
1060 
1061  value = data->values[idx][i];
1062  isnull = data->nulls[idx][i];
1063  attlen = typlen[idx];
1064 
1065  /*
1066  * If this is a varlena value, check if it's too wide and if yes
1067  * then skip the whole item. Otherwise detoast the value.
1068  *
1069  * XXX It may happen that we've already detoasted some preceding
1070  * values for the current item. We don't bother to cleanup those
1071  * on the assumption that those are small (below WIDTH_THRESHOLD)
1072  * and will be discarded at the end of analyze.
1073  */
1074  if ((!isnull) && (attlen == -1))
1075  {
1077  {
1078  toowide = true;
1079  break;
1080  }
1081 
1083  }
1084 
1085  items[nrows].values[j] = value;
1086  items[nrows].isnull[j] = isnull;
1087  }
1088 
1089  if (toowide)
1090  continue;
1091 
1092  nrows++;
1093  }
1094 
1095  /* store the actual number of items (ignoring the too-wide ones) */
1096  *nitems = nrows;
1097 
1098  /* all items were too wide */
1099  if (nrows == 0)
1100  {
1101  /* everything is allocated as a single chunk */
1102  pfree(items);
1103  return NULL;
1104  }
1105 
1106  /* do the sort, using the multi-sort */
1107  qsort_interruptible(items, nrows, sizeof(SortItem),
1108  multi_sort_compare, mss);
1109 
1110  return items;
1111 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
static Datum values[MAXATTR]
Definition: bootstrap.c:152
unsigned char bool
Definition: c.h:456
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:545
#define WIDTH_THRESHOLD
int multi_sort_compare(const void *a, const void *b, void *arg)
struct SortItem SortItem
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define nitems(x)
Definition: indent.h:31
static struct @155 value
int16 get_typlen(Oid typid)
Definition: lsyscache.c:2197
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc0(Size size)
Definition: mcxt.c:1346
int16 attlen
Definition: pg_attribute.h:59
const void size_t len
const void * data
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static ItemArray items
Definition: test_tidstore.c:49

References Assert, attlen, attnum, data, get_typlen(), i, idx(), items, j, len, multi_sort_compare(), nitems, palloc(), palloc0(), pfree(), PG_DETOAST_DATUM, PointerGetDatum(), qsort_interruptible(), toast_raw_datum_size(), value, values, and WIDTH_THRESHOLD.

Referenced by dependency_degree(), and statext_mcv_build().

◆ compare_datums_simple()

int compare_datums_simple ( Datum  a,
Datum  b,
SortSupport  ssup 
)

Definition at line 927 of file extended_stats.c.

928 {
929  return ApplySortComparator(a, false, b, false, ssup);
930 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

References a, ApplySortComparator(), and b.

Referenced by compare_scalars_simple(), and statext_mcv_serialize().

◆ compare_scalars_simple()

int compare_scalars_simple ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 919 of file extended_stats.c.

920 {
921  return compare_datums_simple(*(Datum *) a,
922  *(Datum *) b,
923  (SortSupport) arg);
924 }
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
void * arg

References a, arg, b, and compare_datums_simple().

Referenced by statext_mcv_serialize().

◆ examine_opclause_args()

bool examine_opclause_args ( List args,
Node **  exprp,
Const **  cstp,
bool expronleftp 
)

Definition at line 2055 of file extended_stats.c.

2057 {
2058  Node *expr;
2059  Const *cst;
2060  bool expronleft;
2061  Node *leftop,
2062  *rightop;
2063 
2064  /* enforced by statext_is_compatible_clause_internal */
2065  Assert(list_length(args) == 2);
2066 
2067  leftop = linitial(args);
2068  rightop = lsecond(args);
2069 
2070  /* strip RelabelType from either side of the expression */
2071  if (IsA(leftop, RelabelType))
2072  leftop = (Node *) ((RelabelType *) leftop)->arg;
2073 
2074  if (IsA(rightop, RelabelType))
2075  rightop = (Node *) ((RelabelType *) rightop)->arg;
2076 
2077  if (IsA(rightop, Const))
2078  {
2079  expr = (Node *) leftop;
2080  cst = (Const *) rightop;
2081  expronleft = true;
2082  }
2083  else if (IsA(leftop, Const))
2084  {
2085  expr = (Node *) rightop;
2086  cst = (Const *) leftop;
2087  expronleft = false;
2088  }
2089  else
2090  return false;
2091 
2092  /* return pointers to the extracted parts if requested */
2093  if (exprp)
2094  *exprp = expr;
2095 
2096  if (cstp)
2097  *cstp = cst;
2098 
2099  if (expronleftp)
2100  *expronleftp = expronleft;
2101 
2102  return true;
2103 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
static int list_length(const List *l)
Definition: pg_list.h:152
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
Definition: nodes.h:129

References arg, generate_unaccent_rules::args, Assert, IsA, linitial, list_length(), and lsecond.

Referenced by mcv_get_match_bitmap(), and statext_is_compatible_clause_internal().

◆ mcv_clause_selectivity_or()

Selectivity mcv_clause_selectivity_or ( PlannerInfo root,
StatisticExtInfo stat,
MCVList mcv,
Node clause,
bool **  or_matches,
Selectivity basesel,
Selectivity overlap_mcvsel,
Selectivity overlap_basesel,
Selectivity totalsel 
)

Definition at line 2126 of file mcv.c.

2130 {
2131  Selectivity s = 0.0;
2132  bool *new_matches;
2133  int i;
2134 
2135  /* build the OR-matches bitmap, if not built already */
2136  if (*or_matches == NULL)
2137  *or_matches = palloc0(sizeof(bool) * mcv->nitems);
2138 
2139  /* build the match bitmap for the new clause */
2140  new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2141  stat->exprs, mcv, false);
2142 
2143  /*
2144  * Sum the frequencies for all the MCV items matching this clause and also
2145  * those matching the overlap between this clause and any of the preceding
2146  * clauses as described above.
2147  */
2148  *basesel = 0.0;
2149  *overlap_mcvsel = 0.0;
2150  *overlap_basesel = 0.0;
2151  *totalsel = 0.0;
2152  for (i = 0; i < mcv->nitems; i++)
2153  {
2154  *totalsel += mcv->items[i].frequency;
2155 
2156  if (new_matches[i])
2157  {
2158  s += mcv->items[i].frequency;
2159  *basesel += mcv->items[i].base_frequency;
2160 
2161  if ((*or_matches)[i])
2162  {
2163  *overlap_mcvsel += mcv->items[i].frequency;
2164  *overlap_basesel += mcv->items[i].base_frequency;
2165  }
2166  }
2167 
2168  /* update the OR-matches bitmap for the next clause */
2169  (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2170  }
2171 
2172  pfree(new_matches);
2173 
2174  return s;
2175 }
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1599
double Selectivity
Definition: nodes.h:250
#define list_make1(x1)
Definition: pg_list.h:212
tree ctl root
Definition: radixtree.h:1880
double frequency
Definition: statistics.h:80
double base_frequency
Definition: statistics.h:81
uint32 nitems
Definition: statistics.h:91
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94

References MCVItem::base_frequency, MCVItem::frequency, i, MCVList::items, list_make1, mcv_get_match_bitmap(), MCVList::nitems, palloc0(), pfree(), and root.

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity ( PlannerInfo root,
StatisticExtInfo stat,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Selectivity basesel,
Selectivity totalsel 
)

Definition at line 2048 of file mcv.c.

2053 {
2054  int i;
2055  MCVList *mcv;
2056  Selectivity s = 0.0;
2057  RangeTblEntry *rte = root->simple_rte_array[rel->relid];
2058 
2059  /* match/mismatch bitmap for each MCV item */
2060  bool *matches = NULL;
2061 
2062  /* load the MCV list stored in the statistics object */
2063  mcv = statext_mcv_load(stat->statOid, rte->inh);
2064 
2065  /* build a match bitmap for the clauses */
2066  matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2067  mcv, false);
2068 
2069  /* sum frequencies for all the matching MCV items */
2070  *basesel = 0.0;
2071  *totalsel = 0.0;
2072  for (i = 0; i < mcv->nitems; i++)
2073  {
2074  *totalsel += mcv->items[i].frequency;
2075 
2076  if (matches[i] != false)
2077  {
2078  *basesel += mcv->items[i].base_frequency;
2079  s += mcv->items[i].frequency;
2080  }
2081  }
2082 
2083  return s;
2084 }
MCVList * statext_mcv_load(Oid mvoid, bool inh)
Definition: mcv.c:558
Index relid
Definition: pathnodes.h:908

References MCVItem::base_frequency, MCVItem::frequency, i, RangeTblEntry::inh, MCVList::items, mcv_get_match_bitmap(), MCVList::nitems, RelOptInfo::relid, root, and statext_mcv_load().

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_combine_selectivities()

Selectivity mcv_combine_selectivities ( Selectivity  simple_sel,
Selectivity  mcv_sel,
Selectivity  mcv_basesel,
Selectivity  mcv_totalsel 
)

Definition at line 2006 of file mcv.c.

2010 {
2011  Selectivity other_sel;
2012  Selectivity sel;
2013 
2014  /* estimated selectivity of values not covered by MCV matches */
2015  other_sel = simple_sel - mcv_basesel;
2016  CLAMP_PROBABILITY(other_sel);
2017 
2018  /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2019  if (other_sel > 1.0 - mcv_totalsel)
2020  other_sel = 1.0 - mcv_totalsel;
2021 
2022  /* overall selectivity is the sum of the MCV and non-MCV parts */
2023  sel = mcv_sel + other_sel;
2024  CLAMP_PROBABILITY(sel);
2025 
2026  return sel;
2027 }
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63

References CLAMP_PROBABILITY.

Referenced by statext_mcv_clauselist_selectivity().

◆ multi_sort_add_dimension()

void multi_sort_add_dimension ( MultiSortSupport  mss,
int  sortdim,
Oid  oper,
Oid  collation 
)

Definition at line 851 of file extended_stats.c.

853 {
854  SortSupport ssup = &mss->ssup[sortdim];
855 
857  ssup->ssup_collation = collation;
858  ssup->ssup_nulls_first = false;
859 
861 }
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition: parse_oper.c:370
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:134
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
bool ssup_nulls_first
Definition: sortsupport.h:75
MemoryContext ssup_cxt
Definition: sortsupport.h:66

References CurrentMemoryContext, oper(), PrepareSortSupportFromOrderingOp(), MultiSortSupportData::ssup, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, and SortSupportData::ssup_nulls_first.

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ multi_sort_compare()

int multi_sort_compare ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 865 of file extended_stats.c.

866 {
868  SortItem *ia = (SortItem *) a;
869  SortItem *ib = (SortItem *) b;
870  int i;
871 
872  for (i = 0; i < mss->ndims; i++)
873  {
874  int compare;
875 
877  ib->values[i], ib->isnull[i],
878  &mss->ssup[i]);
879 
880  if (compare != 0)
881  return compare;
882  }
883 
884  /* equal by default */
885  return 0;
886 }
MultiSortSupportData * MultiSortSupport
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145

References a, ApplySortComparator(), arg, b, compare(), i, SortItem::isnull, MultiSortSupportData::ndims, MultiSortSupportData::ssup, and SortItem::values.

Referenced by build_distinct_groups(), build_sorted_items(), count_distinct_groups(), ndistinct_for_combination(), and statext_mcv_build().

◆ multi_sort_compare_dim()

int multi_sort_compare_dim ( int  dim,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 890 of file extended_stats.c.

892 {
893  return ApplySortComparator(a->values[dim], a->isnull[dim],
894  b->values[dim], b->isnull[dim],
895  &mss->ssup[dim]);
896 }

References a, ApplySortComparator(), b, and MultiSortSupportData::ssup.

Referenced by dependency_degree().

◆ multi_sort_compare_dims()

int multi_sort_compare_dims ( int  start,
int  end,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 899 of file extended_stats.c.

902 {
903  int dim;
904 
905  for (dim = start; dim <= end; dim++)
906  {
907  int r = ApplySortComparator(a->values[dim], a->isnull[dim],
908  b->values[dim], b->isnull[dim],
909  &mss->ssup[dim]);
910 
911  if (r != 0)
912  return r;
913  }
914 
915  return 0;
916 }
return str start

References a, ApplySortComparator(), b, MultiSortSupportData::ssup, and start.

Referenced by dependency_degree().

◆ multi_sort_init()

MultiSortSupport multi_sort_init ( int  ndims)

Definition at line 832 of file extended_stats.c.

833 {
834  MultiSortSupport mss;
835 
836  Assert(ndims >= 2);
837 
838  mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
839  + sizeof(SortSupportData) * ndims);
840 
841  mss->ndims = ndims;
842 
843  return mss;
844 }
struct SortSupportData SortSupportData

References Assert, MultiSortSupportData::ndims, and palloc0().

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ statext_dependencies_build()

MVDependencies* statext_dependencies_build ( StatsBuildData data)

Definition at line 348 of file dependencies.c.

349 {
350  int i,
351  k;
352 
353  /* result */
354  MVDependencies *dependencies = NULL;
355  MemoryContext cxt;
356 
357  Assert(data->nattnums >= 2);
358 
359  /* tracks memory allocated by dependency_degree calls */
361  "dependency_degree cxt",
363 
364  /*
365  * We'll try build functional dependencies starting from the smallest ones
366  * covering just 2 columns, to the largest ones, covering all columns
367  * included in the statistics object. We start from the smallest ones
368  * because we want to be able to skip already implied ones.
369  */
370  for (k = 2; k <= data->nattnums; k++)
371  {
372  AttrNumber *dependency; /* array with k elements */
373 
374  /* prepare a DependencyGenerator of variation */
376 
377  /* generate all possible variations of k values (out of n) */
378  while ((dependency = DependencyGenerator_next(DependencyGenerator)))
379  {
380  double degree;
381  MVDependency *d;
382  MemoryContext oldcxt;
383 
384  /* release memory used by dependency degree calculation */
385  oldcxt = MemoryContextSwitchTo(cxt);
386 
387  /* compute how valid the dependency seems */
388  degree = dependency_degree(data, k, dependency);
389 
390  MemoryContextSwitchTo(oldcxt);
391  MemoryContextReset(cxt);
392 
393  /*
394  * if the dependency seems entirely invalid, don't store it
395  */
396  if (degree == 0.0)
397  continue;
398 
399  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
400  + k * sizeof(AttrNumber));
401 
402  /* copy the dependency (and keep the indexes into stxkeys) */
403  d->degree = degree;
404  d->nattributes = k;
405  for (i = 0; i < k; i++)
406  d->attributes[i] = data->attnums[dependency[i]];
407 
408  /* initialize the list of dependencies */
409  if (dependencies == NULL)
410  {
411  dependencies
412  = (MVDependencies *) palloc0(sizeof(MVDependencies));
413 
414  dependencies->magic = STATS_DEPS_MAGIC;
415  dependencies->type = STATS_DEPS_TYPE_BASIC;
416  dependencies->ndeps = 0;
417  }
418 
419  dependencies->ndeps++;
420  dependencies = (MVDependencies *) repalloc(dependencies,
421  offsetof(MVDependencies, deps)
422  + dependencies->ndeps * sizeof(MVDependency *));
423 
424  dependencies->deps[dependencies->ndeps - 1] = d;
425  }
426 
427  /*
428  * we're done with variations of k elements, so free the
429  * DependencyGenerator
430  */
432  }
433 
434  MemoryContextDelete(cxt);
435 
436  return dependencies;
437 }
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:204
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:196
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:173
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
Definition: dependencies.c:221
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1540
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
MemoryContextSwitchTo(old_ctx)
#define STATS_DEPS_MAGIC
Definition: statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:44
uint32 ndeps
Definition: statistics.h:61
uint32 magic
Definition: statistics.h:59
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:62
AttrNumber nattributes
Definition: statistics.h:53
double degree
Definition: statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:54

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, MVDependency::attributes, CurrentMemoryContext, data, MVDependency::degree, dependency_degree(), DependencyGenerator_free(), DependencyGenerator_init(), DependencyGenerator_next(), MVDependencies::deps, i, if(), MVDependencies::magic, MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), MVDependency::nattributes, MVDependencies::ndeps, palloc0(), repalloc(), STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, and MVDependencies::type.

Referenced by BuildRelationExtStatistics().

◆ statext_dependencies_deserialize()

MVDependencies* statext_dependencies_deserialize ( bytea data)

Definition at line 499 of file dependencies.c.

500 {
501  int i;
502  Size min_expected_size;
503  MVDependencies *dependencies;
504  char *tmp;
505 
506  if (data == NULL)
507  return NULL;
508 
510  elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
512 
513  /* read the MVDependencies header */
514  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
515 
516  /* initialize pointer to the data part (skip the varlena header) */
517  tmp = VARDATA_ANY(data);
518 
519  /* read the header fields and perform basic sanity checks */
520  memcpy(&dependencies->magic, tmp, sizeof(uint32));
521  tmp += sizeof(uint32);
522  memcpy(&dependencies->type, tmp, sizeof(uint32));
523  tmp += sizeof(uint32);
524  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
525  tmp += sizeof(uint32);
526 
527  if (dependencies->magic != STATS_DEPS_MAGIC)
528  elog(ERROR, "invalid dependency magic %d (expected %d)",
529  dependencies->magic, STATS_DEPS_MAGIC);
530 
531  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
532  elog(ERROR, "invalid dependency type %d (expected %d)",
533  dependencies->type, STATS_DEPS_TYPE_BASIC);
534 
535  if (dependencies->ndeps == 0)
536  elog(ERROR, "invalid zero-length item array in MVDependencies");
537 
538  /* what minimum bytea size do we expect for those parameters */
539  min_expected_size = SizeOfItem(dependencies->ndeps);
540 
541  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
542  elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
543  VARSIZE_ANY_EXHDR(data), min_expected_size);
544 
545  /* allocate space for the MCV items */
546  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
547  + (dependencies->ndeps * sizeof(MVDependency *)));
548 
549  for (i = 0; i < dependencies->ndeps; i++)
550  {
551  double degree;
552  AttrNumber k;
553  MVDependency *d;
554 
555  /* degree of validity */
556  memcpy(&degree, tmp, sizeof(double));
557  tmp += sizeof(double);
558 
559  /* number of attributes */
560  memcpy(&k, tmp, sizeof(AttrNumber));
561  tmp += sizeof(AttrNumber);
562 
563  /* is the number of attributes valid? */
564  Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
565 
566  /* now that we know the number of attributes, allocate the dependency */
567  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
568  + (k * sizeof(AttrNumber)));
569 
570  d->degree = degree;
571  d->nattributes = k;
572 
573  /* copy attribute numbers */
574  memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
575  tmp += sizeof(AttrNumber) * d->nattributes;
576 
577  dependencies->deps[i] = d;
578 
579  /* still within the bytea */
580  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
581  }
582 
583  /* we should have consumed the whole bytea exactly */
584  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
585 
586  return dependencies;
587 }
unsigned int uint32
Definition: c.h:506
size_t Size
Definition: c.h:605
#define SizeOfHeader
Definition: dependencies.c:38
#define SizeOfItem(natts)
Definition: dependencies.c:41
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References Assert, MVDependency::attributes, data, MVDependency::degree, MVDependencies::deps, elog, ERROR, i, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, palloc0(), repalloc(), SizeOfHeader, SizeOfItem, STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, STATS_MAX_DIMENSIONS, MVDependencies::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_dependencies_out(), and statext_dependencies_load().

◆ statext_dependencies_serialize()

bytea* statext_dependencies_serialize ( MVDependencies dependencies)

Definition at line 444 of file dependencies.c.

445 {
446  int i;
447  bytea *output;
448  char *tmp;
449  Size len;
450 
451  /* we need to store ndeps, with a number of attributes for each one */
453 
454  /* and also include space for the actual attribute numbers and degrees */
455  for (i = 0; i < dependencies->ndeps; i++)
456  len += SizeOfItem(dependencies->deps[i]->nattributes);
457 
458  output = (bytea *) palloc0(len);
460 
461  tmp = VARDATA(output);
462 
463  /* Store the base struct values (magic, type, ndeps) */
464  memcpy(tmp, &dependencies->magic, sizeof(uint32));
465  tmp += sizeof(uint32);
466  memcpy(tmp, &dependencies->type, sizeof(uint32));
467  tmp += sizeof(uint32);
468  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
469  tmp += sizeof(uint32);
470 
471  /* store number of attributes and attribute numbers for each dependency */
472  for (i = 0; i < dependencies->ndeps; i++)
473  {
474  MVDependency *d = dependencies->deps[i];
475 
476  memcpy(tmp, &d->degree, sizeof(double));
477  tmp += sizeof(double);
478 
479  memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
480  tmp += sizeof(AttrNumber);
481 
482  memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
483  tmp += sizeof(AttrNumber) * d->nattributes;
484 
485  /* protect against overflow */
486  Assert(tmp <= ((char *) output + len));
487  }
488 
489  /* make sure we've produced exactly the right amount of data */
490  Assert(tmp == ((char *) output + len));
491 
492  return output;
493 }
#define VARHDRSZ
Definition: c.h:692
FILE * output
Definition: c.h:687
#define VARDATA(PTR)
Definition: varatt.h:278
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305

References Assert, MVDependency::attributes, MVDependency::degree, MVDependencies::deps, i, len, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, output, palloc0(), SET_VARSIZE, SizeOfHeader, SizeOfItem, MVDependencies::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().

◆ statext_mcv_build()

MCVList* statext_mcv_build ( StatsBuildData data,
double  totalrows,
int  stattarget 
)

Definition at line 180 of file mcv.c.

181 {
182  int i,
183  numattrs,
184  numrows,
185  ngroups,
186  nitems;
187  double mincount;
188  SortItem *items;
189  SortItem *groups;
190  MCVList *mcvlist = NULL;
191  MultiSortSupport mss;
192 
193  /* comparator for all the columns */
194  mss = build_mss(data);
195 
196  /* sort the rows */
198  data->nattnums, data->attnums);
199 
200  if (!items)
201  return NULL;
202 
203  /* for convenience */
204  numattrs = data->nattnums;
205  numrows = data->numrows;
206 
207  /* transform the sorted rows into groups (sorted by frequency) */
208  groups = build_distinct_groups(nitems, items, mss, &ngroups);
209 
210  /*
211  * The maximum number of MCV items to store, based on the statistics
212  * target we computed for the statistics object (from the target set for
213  * the object itself, attributes and the system default). In any case, we
214  * can't keep more groups than we have available.
215  */
216  nitems = stattarget;
217  if (nitems > ngroups)
218  nitems = ngroups;
219 
220  /*
221  * Decide how many items to keep in the MCV list. We can't use the same
222  * algorithm as per-column MCV lists, because that only considers the
223  * actual group frequency - but we're primarily interested in how the
224  * actual frequency differs from the base frequency (product of simple
225  * per-column frequencies, as if the columns were independent).
226  *
227  * Using the same algorithm might exclude items that are close to the
228  * "average" frequency of the sample. But that does not say whether the
229  * observed frequency is close to the base frequency or not. We also need
230  * to consider unexpectedly uncommon items (again, compared to the base
231  * frequency), and the single-column algorithm does not have to.
232  *
233  * We simply decide how many items to keep by computing the minimum count
234  * using get_mincount_for_mcv_list() and then keep all items that seem to
235  * be more common than that.
236  */
237  mincount = get_mincount_for_mcv_list(numrows, totalrows);
238 
239  /*
240  * Walk the groups until we find the first group with a count below the
241  * mincount threshold (the index of that group is the number of groups we
242  * want to keep).
243  */
244  for (i = 0; i < nitems; i++)
245  {
246  if (groups[i].count < mincount)
247  {
248  nitems = i;
249  break;
250  }
251  }
252 
253  /*
254  * At this point, we know the number of items for the MCV list. There
255  * might be none (for uniform distribution with many groups), and in that
256  * case, there will be no MCV list. Otherwise, construct the MCV list.
257  */
258  if (nitems > 0)
259  {
260  int j;
261  SortItem key;
262  MultiSortSupport tmp;
263 
264  /* frequencies for values in each attribute */
265  SortItem **freqs;
266  int *nfreqs;
267 
268  /* used to search values */
269  tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
270  + sizeof(SortSupportData));
271 
272  /* compute frequencies for values in each column */
273  nfreqs = (int *) palloc0(sizeof(int) * numattrs);
274  freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
275 
276  /*
277  * Allocate the MCV list structure, set the global parameters.
278  */
279  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
280  sizeof(MCVItem) * nitems);
281 
282  mcvlist->magic = STATS_MCV_MAGIC;
283  mcvlist->type = STATS_MCV_TYPE_BASIC;
284  mcvlist->ndimensions = numattrs;
285  mcvlist->nitems = nitems;
286 
287  /* store info about data type OIDs */
288  for (i = 0; i < numattrs; i++)
289  mcvlist->types[i] = data->stats[i]->attrtypid;
290 
291  /* Copy the first chunk of groups into the result. */
292  for (i = 0; i < nitems; i++)
293  {
294  /* just point to the proper place in the list */
295  MCVItem *item = &mcvlist->items[i];
296 
297  item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
298  item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
299 
300  /* copy values for the group */
301  memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
302  memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
303 
304  /* groups should be sorted by frequency in descending order */
305  Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
306 
307  /* group frequency */
308  item->frequency = (double) groups[i].count / numrows;
309 
310  /* base frequency, if the attributes were independent */
311  item->base_frequency = 1.0;
312  for (j = 0; j < numattrs; j++)
313  {
314  SortItem *freq;
315 
316  /* single dimension */
317  tmp->ndims = 1;
318  tmp->ssup[0] = mss->ssup[j];
319 
320  /* fill search key */
321  key.values = &groups[i].values[j];
322  key.isnull = &groups[i].isnull[j];
323 
324  freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
325  sizeof(SortItem),
326  multi_sort_compare, tmp);
327 
328  item->base_frequency *= ((double) freq->count) / numrows;
329  }
330  }
331 
332  pfree(nfreqs);
333  pfree(freqs);
334  }
335 
336  pfree(items);
337  pfree(groups);
338 
339  return mcvlist;
340 }
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
for(;;)
static MultiSortSupport build_mss(StatsBuildData *data)
Definition: mcv.c:347
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:148
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:490
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:424
void * bsearch_arg(const void *key, const void *base0, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition: bsearch_arg.c:55
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:67
#define STATS_MCV_MAGIC
Definition: statistics.h:66
struct MCVItem MCVItem
bool * isnull
Definition: statistics.h:82
Datum * values
Definition: statistics.h:83
uint32 type
Definition: statistics.h:90
uint32 magic
Definition: statistics.h:89
AttrNumber ndimensions
Definition: statistics.h:92
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93

References Assert, MCVItem::base_frequency, bsearch_arg(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), SortItem::count, data, for(), MCVItem::frequency, get_mincount_for_mcv_list(), i, SortItem::isnull, MCVItem::isnull, MCVList::items, items, j, sort-test::key, MCVList::magic, multi_sort_compare(), MCVList::ndimensions, MultiSortSupportData::ndims, MCVList::nitems, nitems, palloc(), palloc0(), pfree(), MultiSortSupportData::ssup, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, MCVList::type, MCVList::types, SortItem::values, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

◆ statext_mcv_deserialize()

MCVList* statext_mcv_deserialize ( bytea data)

Definition at line 996 of file mcv.c.

997 {
998  int dim,
999  i;
1000  Size expected_size;
1001  MCVList *mcvlist;
1002  char *raw;
1003  char *ptr;
1004  char *endptr PG_USED_FOR_ASSERTS_ONLY;
1005 
1006  int ndims,
1007  nitems;
1008  DimensionInfo *info = NULL;
1009 
1010  /* local allocation buffer (used only for deserialization) */
1011  Datum **map = NULL;
1012 
1013  /* MCV list */
1014  Size mcvlen;
1015 
1016  /* buffer used for the result */
1017  Size datalen;
1018  char *dataptr;
1019  char *valuesptr;
1020  char *isnullptr;
1021 
1022  if (data == NULL)
1023  return NULL;
1024 
1025  /*
1026  * We can't possibly deserialize a MCV list if there's not even a complete
1027  * header. We need an explicit formula here, because we serialize the
1028  * header fields one by one, so we need to ignore struct alignment.
1029  */
1031  elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
1033 
1034  /* read the MCV list header */
1035  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1036 
1037  /* pointer to the data part (skip the varlena header) */
1038  raw = (char *) data;
1039  ptr = VARDATA_ANY(raw);
1040  endptr = (char *) raw + VARSIZE_ANY(data);
1041 
1042  /* get the header and perform further sanity checks */
1043  memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1044  ptr += sizeof(uint32);
1045 
1046  memcpy(&mcvlist->type, ptr, sizeof(uint32));
1047  ptr += sizeof(uint32);
1048 
1049  memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1050  ptr += sizeof(uint32);
1051 
1052  memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1053  ptr += sizeof(AttrNumber);
1054 
1055  if (mcvlist->magic != STATS_MCV_MAGIC)
1056  elog(ERROR, "invalid MCV magic %u (expected %u)",
1057  mcvlist->magic, STATS_MCV_MAGIC);
1058 
1059  if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1060  elog(ERROR, "invalid MCV type %u (expected %u)",
1061  mcvlist->type, STATS_MCV_TYPE_BASIC);
1062 
1063  if (mcvlist->ndimensions == 0)
1064  elog(ERROR, "invalid zero-length dimension array in MCVList");
1065  else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1066  (mcvlist->ndimensions < 0))
1067  elog(ERROR, "invalid length (%d) dimension array in MCVList",
1068  mcvlist->ndimensions);
1069 
1070  if (mcvlist->nitems == 0)
1071  elog(ERROR, "invalid zero-length item array in MCVList");
1072  else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1073  elog(ERROR, "invalid length (%u) item array in MCVList",
1074  mcvlist->nitems);
1075 
1076  nitems = mcvlist->nitems;
1077  ndims = mcvlist->ndimensions;
1078 
1079  /*
1080  * Check amount of data including DimensionInfo for all dimensions and
1081  * also the serialized items (including uint16 indexes). Also, walk
1082  * through the dimension information and add it to the sum.
1083  */
1084  expected_size = SizeOfMCVList(ndims, nitems);
1085 
1086  /*
1087  * Check that we have at least the dimension and info records, along with
1088  * the items. We don't know the size of the serialized values yet. We need
1089  * to do this check first, before accessing the dimension info.
1090  */
1091  if (VARSIZE_ANY(data) < expected_size)
1092  elog(ERROR, "invalid MCV size %zu (expected %zu)",
1093  VARSIZE_ANY(data), expected_size);
1094 
1095  /* Now copy the array of type Oids. */
1096  memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1097  ptr += (sizeof(Oid) * ndims);
1098 
1099  /* Now it's safe to access the dimension info. */
1100  info = palloc(ndims * sizeof(DimensionInfo));
1101 
1102  memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1103  ptr += (ndims * sizeof(DimensionInfo));
1104 
1105  /* account for the value arrays */
1106  for (dim = 0; dim < ndims; dim++)
1107  {
1108  /*
1109  * XXX I wonder if we can/should rely on asserts here. Maybe those
1110  * checks should be done every time?
1111  */
1112  Assert(info[dim].nvalues >= 0);
1113  Assert(info[dim].nbytes >= 0);
1114 
1115  expected_size += info[dim].nbytes;
1116  }
1117 
1118  /*
1119  * Now we know the total expected MCV size, including all the pieces
1120  * (header, dimension info. items and deduplicated data). So do the final
1121  * check on size.
1122  */
1123  if (VARSIZE_ANY(data) != expected_size)
1124  elog(ERROR, "invalid MCV size %zu (expected %zu)",
1125  VARSIZE_ANY(data), expected_size);
1126 
1127  /*
1128  * We need an array of Datum values for each dimension, so that we can
1129  * easily translate the uint16 indexes later. We also need a top-level
1130  * array of pointers to those per-dimension arrays.
1131  *
1132  * While allocating the arrays for dimensions, compute how much space we
1133  * need for a copy of the by-ref data, as we can't simply point to the
1134  * original values (it might go away).
1135  */
1136  datalen = 0; /* space for by-ref data */
1137  map = (Datum **) palloc(ndims * sizeof(Datum *));
1138 
1139  for (dim = 0; dim < ndims; dim++)
1140  {
1141  map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1142 
1143  /* space needed for a copy of data for by-ref types */
1144  datalen += info[dim].nbytes_aligned;
1145  }
1146 
1147  /*
1148  * Now resize the MCV list so that the allocation includes all the data.
1149  *
1150  * Allocate space for a copy of the data, as we can't simply reference the
1151  * serialized data - it's not aligned properly, and it may disappear while
1152  * we're still using the MCV list, e.g. due to catcache release.
1153  *
1154  * We do care about alignment here, because we will allocate all the
1155  * pieces at once, but then use pointers to different parts.
1156  */
1157  mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1158 
1159  /* arrays of values and isnull flags for all MCV items */
1160  mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1161  mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1162 
1163  /* we don't quite need to align this, but it makes some asserts easier */
1164  mcvlen += MAXALIGN(datalen);
1165 
1166  /* now resize the deserialized MCV list, and compute pointers to parts */
1167  mcvlist = repalloc(mcvlist, mcvlen);
1168 
1169  /* pointer to the beginning of values/isnull arrays */
1170  valuesptr = (char *) mcvlist
1171  + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1172 
1173  isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1174 
1175  dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1176 
1177  /*
1178  * Build mapping (index => value) for translating the serialized data into
1179  * the in-memory representation.
1180  */
1181  for (dim = 0; dim < ndims; dim++)
1182  {
1183  /* remember start position in the input array */
1184  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1185 
1186  if (info[dim].typbyval)
1187  {
1188  /* for by-val types we simply copy data into the mapping */
1189  for (i = 0; i < info[dim].nvalues; i++)
1190  {
1191  Datum v = 0;
1192 
1193  memcpy(&v, ptr, info[dim].typlen);
1194  ptr += info[dim].typlen;
1195 
1196  map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1197 
1198  /* no under/overflow of input array */
1199  Assert(ptr <= (start + info[dim].nbytes));
1200  }
1201  }
1202  else
1203  {
1204  /* for by-ref types we need to also make a copy of the data */
1205 
1206  /* passed by reference, but fixed length (name, tid, ...) */
1207  if (info[dim].typlen > 0)
1208  {
1209  for (i = 0; i < info[dim].nvalues; i++)
1210  {
1211  memcpy(dataptr, ptr, info[dim].typlen);
1212  ptr += info[dim].typlen;
1213 
1214  /* just point into the array */
1215  map[dim][i] = PointerGetDatum(dataptr);
1216  dataptr += MAXALIGN(info[dim].typlen);
1217  }
1218  }
1219  else if (info[dim].typlen == -1)
1220  {
1221  /* varlena */
1222  for (i = 0; i < info[dim].nvalues; i++)
1223  {
1224  uint32 len;
1225 
1226  /* read the uint32 length */
1227  memcpy(&len, ptr, sizeof(uint32));
1228  ptr += sizeof(uint32);
1229 
1230  /* the length is data-only */
1231  SET_VARSIZE(dataptr, len + VARHDRSZ);
1232  memcpy(VARDATA(dataptr), ptr, len);
1233  ptr += len;
1234 
1235  /* just point into the array */
1236  map[dim][i] = PointerGetDatum(dataptr);
1237 
1238  /* skip to place of the next deserialized value */
1239  dataptr += MAXALIGN(len + VARHDRSZ);
1240  }
1241  }
1242  else if (info[dim].typlen == -2)
1243  {
1244  /* cstring */
1245  for (i = 0; i < info[dim].nvalues; i++)
1246  {
1247  uint32 len;
1248 
1249  memcpy(&len, ptr, sizeof(uint32));
1250  ptr += sizeof(uint32);
1251 
1252  memcpy(dataptr, ptr, len);
1253  ptr += len;
1254 
1255  /* just point into the array */
1256  map[dim][i] = PointerGetDatum(dataptr);
1257  dataptr += MAXALIGN(len);
1258  }
1259  }
1260 
1261  /* no under/overflow of input array */
1262  Assert(ptr <= (start + info[dim].nbytes));
1263 
1264  /* no overflow of the output mcv value */
1265  Assert(dataptr <= ((char *) mcvlist + mcvlen));
1266  }
1267 
1268  /* check we consumed input data for this dimension exactly */
1269  Assert(ptr == (start + info[dim].nbytes));
1270  }
1271 
1272  /* we should have also filled the MCV list exactly */
1273  Assert(dataptr == ((char *) mcvlist + mcvlen));
1274 
1275  /* deserialize the MCV items and translate the indexes to Datums */
1276  for (i = 0; i < nitems; i++)
1277  {
1278  MCVItem *item = &mcvlist->items[i];
1279 
1280  item->values = (Datum *) valuesptr;
1281  valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1282 
1283  item->isnull = (bool *) isnullptr;
1284  isnullptr += MAXALIGN(sizeof(bool) * ndims);
1285 
1286  memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1287  ptr += sizeof(bool) * ndims;
1288 
1289  memcpy(&item->frequency, ptr, sizeof(double));
1290  ptr += sizeof(double);
1291 
1292  memcpy(&item->base_frequency, ptr, sizeof(double));
1293  ptr += sizeof(double);
1294 
1295  /* finally translate the indexes (for non-NULL only) */
1296  for (dim = 0; dim < ndims; dim++)
1297  {
1298  uint16 index;
1299 
1300  memcpy(&index, ptr, sizeof(uint16));
1301  ptr += sizeof(uint16);
1302 
1303  if (item->isnull[dim])
1304  continue;
1305 
1306  item->values[dim] = map[dim][index];
1307  }
1308 
1309  /* check we're not overflowing the input */
1310  Assert(ptr <= endptr);
1311  }
1312 
1313  /* check that we processed all the data */
1314  Assert(ptr == endptr);
1315 
1316  /* release the buffers used for mapping */
1317  for (dim = 0; dim < ndims; dim++)
1318  pfree(map[dim]);
1319 
1320  pfree(map);
1321 
1322  return mcvlist;
1323 }
unsigned short uint16
Definition: c.h:505
#define MAXALIGN(LEN)
Definition: c.h:811
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
struct DimensionInfo DimensionInfo
#define MinSizeOfMCVList
Definition: mcv.c:59
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:68
unsigned int Oid
Definition: postgres_ext.h:31
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:70
Definition: type.h:95
static Datum fetch_att(const void *T, bool attbyval, int attlen)
Definition: tupmacs.h:52

References Assert, MCVItem::base_frequency, data, elog, ERROR, fetch_att(), MCVItem::frequency, i, MCVItem::isnull, MCVList::items, items, len, MCVList::magic, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, nitems, DimensionInfo::nvalues, palloc(), palloc0(), pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), repalloc(), SET_VARSIZE, SizeOfMCVList, start, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, MCVList::type, MCVList::types, DimensionInfo::typlen, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY.

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

◆ statext_mcv_serialize()

bytea* statext_mcv_serialize ( MCVList mcvlist,
VacAttrStats **  stats 
)

Definition at line 621 of file mcv.c.

622 {
623  int i;
624  int dim;
625  int ndims = mcvlist->ndimensions;
626 
627  SortSupport ssup;
628  DimensionInfo *info;
629 
630  Size total_length;
631 
632  /* serialized items (indexes into arrays, etc.) */
633  bytea *raw;
634  char *ptr;
635  char *endptr PG_USED_FOR_ASSERTS_ONLY;
636 
637  /* values per dimension (and number of non-NULL values) */
638  Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
639  int *counts = (int *) palloc0(sizeof(int) * ndims);
640 
641  /*
642  * We'll include some rudimentary information about the attribute types
643  * (length, by-val flag), so that we don't have to look them up while
644  * deserializing the MCV list (we already have the type OID in the
645  * header). This is safe because when changing the type of the attribute
646  * the statistics gets dropped automatically. We need to store the info
647  * about the arrays of deduplicated values anyway.
648  */
649  info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
650 
651  /* sort support data for all attributes included in the MCV list */
652  ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
653 
654  /* collect and deduplicate values for each dimension (attribute) */
655  for (dim = 0; dim < ndims; dim++)
656  {
657  int ndistinct;
658  TypeCacheEntry *typentry;
659 
660  /*
661  * Lookup the LT operator (can't get it from stats extra_data, as we
662  * don't know how to interpret that - scalar vs. array etc.).
663  */
664  typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
665 
666  /* copy important info about the data type (length, by-value) */
667  info[dim].typlen = stats[dim]->attrtype->typlen;
668  info[dim].typbyval = stats[dim]->attrtype->typbyval;
669 
670  /* allocate space for values in the attribute and collect them */
671  values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
672 
673  for (i = 0; i < mcvlist->nitems; i++)
674  {
675  /* skip NULL values - we don't need to deduplicate those */
676  if (mcvlist->items[i].isnull[dim])
677  continue;
678 
679  /* append the value at the end */
680  values[dim][counts[dim]] = mcvlist->items[i].values[dim];
681  counts[dim] += 1;
682  }
683 
684  /* if there are just NULL values in this dimension, we're done */
685  if (counts[dim] == 0)
686  continue;
687 
688  /* sort and deduplicate the data */
689  ssup[dim].ssup_cxt = CurrentMemoryContext;
690  ssup[dim].ssup_collation = stats[dim]->attrcollid;
691  ssup[dim].ssup_nulls_first = false;
692 
693  PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
694 
695  qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
696  compare_scalars_simple, &ssup[dim]);
697 
698  /*
699  * Walk through the array and eliminate duplicate values, but keep the
700  * ordering (so that we can do a binary search later). We know there's
701  * at least one item as (counts[dim] != 0), so we can skip the first
702  * element.
703  */
704  ndistinct = 1; /* number of distinct values */
705  for (i = 1; i < counts[dim]; i++)
706  {
707  /* expect sorted array */
708  Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
709 
710  /* if the value is the same as the previous one, we can skip it */
711  if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
712  continue;
713 
714  values[dim][ndistinct] = values[dim][i];
715  ndistinct += 1;
716  }
717 
718  /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
719  Assert(ndistinct <= PG_UINT16_MAX);
720 
721  /*
722  * Store additional info about the attribute - number of deduplicated
723  * values, and also size of the serialized data. For fixed-length data
724  * types this is trivial to compute, for varwidth types we need to
725  * actually walk the array and sum the sizes.
726  */
727  info[dim].nvalues = ndistinct;
728 
729  if (info[dim].typbyval) /* by-value data types */
730  {
731  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
732 
733  /*
734  * We copy the data into the MCV item during deserialization, so
735  * we don't need to allocate any extra space.
736  */
737  info[dim].nbytes_aligned = 0;
738  }
739  else if (info[dim].typlen > 0) /* fixed-length by-ref */
740  {
741  /*
742  * We don't care about alignment in the serialized data, so we
743  * pack the data as much as possible. But we also track how much
744  * data will be needed after deserialization, and in that case we
745  * need to account for alignment of each item.
746  *
747  * Note: As the items are fixed-length, we could easily compute
748  * this during deserialization, but we do it here anyway.
749  */
750  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
751  info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
752  }
753  else if (info[dim].typlen == -1) /* varlena */
754  {
755  info[dim].nbytes = 0;
756  info[dim].nbytes_aligned = 0;
757  for (i = 0; i < info[dim].nvalues; i++)
758  {
759  Size len;
760 
761  /*
762  * For varlena values, we detoast the values and store the
763  * length and data separately. We don't bother with alignment
764  * here, which means that during deserialization we need to
765  * copy the fields and only access the copies.
766  */
768 
769  /* serialized length (uint32 length + data) */
770  len = VARSIZE_ANY_EXHDR(values[dim][i]);
771  info[dim].nbytes += sizeof(uint32); /* length */
772  info[dim].nbytes += len; /* value (no header) */
773 
774  /*
775  * During deserialization we'll build regular varlena values
776  * with full headers, and we need to align them properly.
777  */
778  info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
779  }
780  }
781  else if (info[dim].typlen == -2) /* cstring */
782  {
783  info[dim].nbytes = 0;
784  info[dim].nbytes_aligned = 0;
785  for (i = 0; i < info[dim].nvalues; i++)
786  {
787  Size len;
788 
789  /*
790  * cstring is handled similar to varlena - first we store the
791  * length as uint32 and then the data. We don't care about
792  * alignment, which means that during deserialization we need
793  * to copy the fields and only access the copies.
794  */
795 
796  /* c-strings include terminator, so +1 byte */
797  len = strlen(DatumGetCString(values[dim][i])) + 1;
798  info[dim].nbytes += sizeof(uint32); /* length */
799  info[dim].nbytes += len; /* value */
800 
801  /* space needed for properly aligned deserialized copies */
802  info[dim].nbytes_aligned += MAXALIGN(len);
803  }
804  }
805 
806  /* we know (count>0) so there must be some data */
807  Assert(info[dim].nbytes > 0);
808  }
809 
810  /*
811  * Now we can finally compute how much space we'll actually need for the
812  * whole serialized MCV list (varlena header, MCV header, dimension info
813  * for each attribute, deduplicated values and items).
814  */
815  total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
816  + sizeof(AttrNumber) /* ndimensions */
817  + (ndims * sizeof(Oid)); /* attribute types */
818 
819  /* dimension info */
820  total_length += ndims * sizeof(DimensionInfo);
821 
822  /* add space for the arrays of deduplicated values */
823  for (i = 0; i < ndims; i++)
824  total_length += info[i].nbytes;
825 
826  /*
827  * And finally account for the items (those are fixed-length, thanks to
828  * replacing values with uint16 indexes into the deduplicated arrays).
829  */
830  total_length += mcvlist->nitems * ITEM_SIZE(dim);
831 
832  /*
833  * Allocate space for the whole serialized MCV list (we'll skip bytes, so
834  * we set them to zero to make the result more compressible).
835  */
836  raw = (bytea *) palloc0(VARHDRSZ + total_length);
837  SET_VARSIZE(raw, VARHDRSZ + total_length);
838 
839  ptr = VARDATA(raw);
840  endptr = ptr + total_length;
841 
842  /* copy the MCV list header fields, one by one */
843  memcpy(ptr, &mcvlist->magic, sizeof(uint32));
844  ptr += sizeof(uint32);
845 
846  memcpy(ptr, &mcvlist->type, sizeof(uint32));
847  ptr += sizeof(uint32);
848 
849  memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
850  ptr += sizeof(uint32);
851 
852  memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
853  ptr += sizeof(AttrNumber);
854 
855  memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
856  ptr += (sizeof(Oid) * ndims);
857 
858  /* store information about the attributes (data amounts, ...) */
859  memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
860  ptr += sizeof(DimensionInfo) * ndims;
861 
862  /* Copy the deduplicated values for all attributes to the output. */
863  for (dim = 0; dim < ndims; dim++)
864  {
865  /* remember the starting point for Asserts later */
866  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
867 
868  for (i = 0; i < info[dim].nvalues; i++)
869  {
870  Datum value = values[dim][i];
871 
872  if (info[dim].typbyval) /* passed by value */
873  {
874  Datum tmp;
875 
876  /*
877  * For byval types, we need to copy just the significant bytes
878  * - we can't use memcpy directly, as that assumes
879  * little-endian behavior. store_att_byval does almost what
880  * we need, but it requires a properly aligned buffer - the
881  * output buffer does not guarantee that. So we simply use a
882  * local Datum variable (which guarantees proper alignment),
883  * and then copy the value from it.
884  */
885  store_att_byval(&tmp, value, info[dim].typlen);
886 
887  memcpy(ptr, &tmp, info[dim].typlen);
888  ptr += info[dim].typlen;
889  }
890  else if (info[dim].typlen > 0) /* passed by reference */
891  {
892  /* no special alignment needed, treated as char array */
893  memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
894  ptr += info[dim].typlen;
895  }
896  else if (info[dim].typlen == -1) /* varlena */
897  {
899 
900  /* copy the length */
901  memcpy(ptr, &len, sizeof(uint32));
902  ptr += sizeof(uint32);
903 
904  /* data from the varlena value (without the header) */
905  memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
906  ptr += len;
907  }
908  else if (info[dim].typlen == -2) /* cstring */
909  {
910  uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
911 
912  /* copy the length */
913  memcpy(ptr, &len, sizeof(uint32));
914  ptr += sizeof(uint32);
915 
916  /* value */
917  memcpy(ptr, DatumGetCString(value), len);
918  ptr += len;
919  }
920 
921  /* no underflows or overflows */
922  Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
923  }
924 
925  /* we should get exactly nbytes of data for this dimension */
926  Assert((ptr - start) == info[dim].nbytes);
927  }
928 
929  /* Serialize the items, with uint16 indexes instead of the values. */
930  for (i = 0; i < mcvlist->nitems; i++)
931  {
932  MCVItem *mcvitem = &mcvlist->items[i];
933 
934  /* don't write beyond the allocated space */
935  Assert(ptr <= (endptr - ITEM_SIZE(dim)));
936 
937  /* copy NULL and frequency flags into the serialized MCV */
938  memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
939  ptr += sizeof(bool) * ndims;
940 
941  memcpy(ptr, &mcvitem->frequency, sizeof(double));
942  ptr += sizeof(double);
943 
944  memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
945  ptr += sizeof(double);
946 
947  /* store the indexes last */
948  for (dim = 0; dim < ndims; dim++)
949  {
950  uint16 index = 0;
951  Datum *value;
952 
953  /* do the lookup only for non-NULL values */
954  if (!mcvitem->isnull[dim])
955  {
956  value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
957  info[dim].nvalues, sizeof(Datum),
958  compare_scalars_simple, &ssup[dim]);
959 
960  Assert(value != NULL); /* serialization or deduplication
961  * error */
962 
963  /* compute index within the deduplicated array */
964  index = (uint16) (value - values[dim]);
965 
966  /* check the index is within expected bounds */
967  Assert(index < info[dim].nvalues);
968  }
969 
970  /* copy the index into the serialized MCV */
971  memcpy(ptr, &index, sizeof(uint16));
972  ptr += sizeof(uint16);
973  }
974 
975  /* make sure we don't overflow the allocated value */
976  Assert(ptr <= endptr);
977  }
978 
979  /* at this point we expect to match the total_length exactly */
980  Assert(ptr == endptr);
981 
982  pfree(values);
983  pfree(counts);
984 
985  return raw;
986 }
#define PG_UINT16_MAX
Definition: c.h:587
int compare_scalars_simple(const void *a, const void *b, void *arg)
#define ITEM_SIZE(ndims)
Definition: mcv.c:53
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
Form_pg_type attrtype
Definition: vacuum.h:128
Oid attrcollid
Definition: vacuum.h:129
static void store_att_byval(void *T, Datum newdatum, int attlen)
Definition: tupmacs.h:183
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:346
#define TYPECACHE_LT_OPR
Definition: typcache.h:138

References Assert, VacAttrStats::attrcollid, VacAttrStats::attrtype, MCVItem::base_frequency, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString(), DatumGetPointer(), MCVItem::frequency, i, MCVItem::isnull, ITEM_SIZE, MCVList::items, len, lookup_type_cache(), TypeCacheEntry::lt_opr, MCVList::magic, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, palloc0(), pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), PrepareSortSupportFromOrderingOp(), qsort_interruptible(), SET_VARSIZE, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, start, store_att_byval(), DimensionInfo::typbyval, MCVList::type, TYPECACHE_LT_OPR, MCVList::types, DimensionInfo::typlen, value, values, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by statext_store().

◆ statext_ndistinct_build()

MVNDistinct* statext_ndistinct_build ( double  totalrows,
StatsBuildData data 
)

Definition at line 88 of file mvdistinct.c.

89 {
90  MVNDistinct *result;
91  int k;
92  int itemcnt;
93  int numattrs = data->nattnums;
94  int numcombs = num_combinations(numattrs);
95 
96  result = palloc(offsetof(MVNDistinct, items) +
97  numcombs * sizeof(MVNDistinctItem));
98  result->magic = STATS_NDISTINCT_MAGIC;
100  result->nitems = numcombs;
101 
102  itemcnt = 0;
103  for (k = 2; k <= numattrs; k++)
104  {
105  int *combination;
107 
108  /* generate combinations of K out of N elements */
109  generator = generator_init(numattrs, k);
110 
111  while ((combination = generator_next(generator)))
112  {
113  MVNDistinctItem *item = &result->items[itemcnt];
114  int j;
115 
116  item->attributes = palloc(sizeof(AttrNumber) * k);
117  item->nattributes = k;
118 
119  /* translate the indexes to attnums */
120  for (j = 0; j < k; j++)
121  {
122  item->attributes[j] = data->attnums[combination[j]];
123 
125  }
126 
127  item->ndistinct =
128  ndistinct_for_combination(totalrows, data, k, combination);
129 
130  itemcnt++;
131  Assert(itemcnt <= result->nitems);
132  }
133 
135  }
136 
137  /* must consume exactly the whole output array */
138  Assert(itemcnt == result->nitems);
139 
140  return result;
141 }
static double ndistinct_for_combination(double totalrows, StatsBuildData *data, int k, int *combination)
Definition: mvdistinct.c:425
static int num_combinations(int n)
Definition: mvdistinct.c:575
static void generator_free(CombinationGenerator *state)
Definition: mvdistinct.c:642
static CombinationGenerator * generator_init(int n, int k)
Definition: mvdistinct.c:589
static int * generator_next(CombinationGenerator *state)
Definition: mvdistinct.c:627
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
double ndistinct
Definition: statistics.h:28
AttrNumber * attributes
Definition: statistics.h:30
uint32 nitems
Definition: statistics.h:38
uint32 type
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:36
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:39

References Assert, AttributeNumberIsValid, MVNDistinctItem::attributes, data, generator_free(), generator_init(), generator_next(), MVNDistinct::items, items, j, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, ndistinct_for_combination(), MVNDistinct::nitems, nitems, num_combinations(), palloc(), STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, and MVNDistinct::type.

Referenced by BuildRelationExtStatistics().

◆ statext_ndistinct_deserialize()

MVNDistinct* statext_ndistinct_deserialize ( bytea data)

Definition at line 250 of file mvdistinct.c.

251 {
252  int i;
253  Size minimum_size;
254  MVNDistinct ndist;
255  MVNDistinct *ndistinct;
256  char *tmp;
257 
258  if (data == NULL)
259  return NULL;
260 
261  /* we expect at least the basic fields of MVNDistinct struct */
263  elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
265 
266  /* initialize pointer to the data part (skip the varlena header) */
267  tmp = VARDATA_ANY(data);
268 
269  /* read the header fields and perform basic sanity checks */
270  memcpy(&ndist.magic, tmp, sizeof(uint32));
271  tmp += sizeof(uint32);
272  memcpy(&ndist.type, tmp, sizeof(uint32));
273  tmp += sizeof(uint32);
274  memcpy(&ndist.nitems, tmp, sizeof(uint32));
275  tmp += sizeof(uint32);
276 
277  if (ndist.magic != STATS_NDISTINCT_MAGIC)
278  elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
280  if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
281  elog(ERROR, "invalid ndistinct type %d (expected %d)",
283  if (ndist.nitems == 0)
284  elog(ERROR, "invalid zero-length item array in MVNDistinct");
285 
286  /* what minimum bytea size do we expect for those parameters */
287  minimum_size = MinSizeOfItems(ndist.nitems);
288  if (VARSIZE_ANY_EXHDR(data) < minimum_size)
289  elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
290  VARSIZE_ANY_EXHDR(data), minimum_size);
291 
292  /*
293  * Allocate space for the ndistinct items (no space for each item's
294  * attnos: those live in bitmapsets allocated separately)
295  */
296  ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
297  (ndist.nitems * sizeof(MVNDistinctItem)));
298  ndistinct->magic = ndist.magic;
299  ndistinct->type = ndist.type;
300  ndistinct->nitems = ndist.nitems;
301 
302  for (i = 0; i < ndistinct->nitems; i++)
303  {
304  MVNDistinctItem *item = &ndistinct->items[i];
305 
306  /* ndistinct value */
307  memcpy(&item->ndistinct, tmp, sizeof(double));
308  tmp += sizeof(double);
309 
310  /* number of attributes */
311  memcpy(&item->nattributes, tmp, sizeof(int));
312  tmp += sizeof(int);
313  Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
314 
315  item->attributes
316  = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
317 
318  memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
319  tmp += sizeof(AttrNumber) * item->nattributes;
320 
321  /* still within the bytea */
322  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
323  }
324 
325  /* we should have consumed the whole bytea exactly */
326  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
327 
328  return ndistinct;
329 }
#define SizeOfHeader
Definition: mvdistinct.c:45
#define MinSizeOfItems(nitems)
Definition: mvdistinct.c:55

References Assert, MVNDistinctItem::attributes, data, elog, ERROR, i, MVNDistinct::items, items, MVNDistinct::magic, MAXALIGN, MinSizeOfItems, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, palloc(), palloc0(), SizeOfHeader, STATS_MAX_DIMENSIONS, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_ndistinct_out(), and statext_ndistinct_load().

◆ statext_ndistinct_serialize()

bytea* statext_ndistinct_serialize ( MVNDistinct ndistinct)

Definition at line 179 of file mvdistinct.c.

180 {
181  int i;
182  bytea *output;
183  char *tmp;
184  Size len;
185 
186  Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
187  Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
188 
189  /*
190  * Base size is size of scalar fields in the struct, plus one base struct
191  * for each item, including number of items for each.
192  */
194 
195  /* and also include space for the actual attribute numbers */
196  for (i = 0; i < ndistinct->nitems; i++)
197  {
198  int nmembers;
199 
200  nmembers = ndistinct->items[i].nattributes;
201  Assert(nmembers >= 2);
202 
203  len += SizeOfItem(nmembers);
204  }
205 
206  output = (bytea *) palloc(len);
208 
209  tmp = VARDATA(output);
210 
211  /* Store the base struct values (magic, type, nitems) */
212  memcpy(tmp, &ndistinct->magic, sizeof(uint32));
213  tmp += sizeof(uint32);
214  memcpy(tmp, &ndistinct->type, sizeof(uint32));
215  tmp += sizeof(uint32);
216  memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
217  tmp += sizeof(uint32);
218 
219  /*
220  * store number of attributes and attribute numbers for each entry
221  */
222  for (i = 0; i < ndistinct->nitems; i++)
223  {
224  MVNDistinctItem item = ndistinct->items[i];
225  int nmembers = item.nattributes;
226 
227  memcpy(tmp, &item.ndistinct, sizeof(double));
228  tmp += sizeof(double);
229  memcpy(tmp, &nmembers, sizeof(int));
230  tmp += sizeof(int);
231 
232  memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
233  tmp += nmembers * sizeof(AttrNumber);
234 
235  /* protect against overflows */
236  Assert(tmp <= ((char *) output + len));
237  }
238 
239  /* check we used exactly the expected space */
240  Assert(tmp == ((char *) output + len));
241 
242  return output;
243 }
#define SizeOfItem(natts)
Definition: mvdistinct.c:48

References Assert, MVNDistinctItem::attributes, i, MVNDistinct::items, len, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, output, palloc(), SET_VARSIZE, SizeOfHeader, SizeOfItem, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().