PostgreSQL Source Code  git master
extended_stats_internal.h File Reference
Include dependency graph for extended_stats_internal.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  StdAnalyzeData
 
struct  ScalarItem
 
struct  DimensionInfo
 
struct  MultiSortSupportData
 
struct  SortItem
 
struct  StatsBuildData
 

Typedefs

typedef struct DimensionInfo DimensionInfo
 
typedef struct MultiSortSupportData MultiSortSupportData
 
typedef MultiSortSupportDataMultiSortSupport
 
typedef struct SortItem SortItem
 
typedef struct StatsBuildData StatsBuildData
 

Functions

MVNDistinctstatext_ndistinct_build (double totalrows, StatsBuildData *data)
 
byteastatext_ndistinct_serialize (MVNDistinct *ndistinct)
 
MVNDistinctstatext_ndistinct_deserialize (bytea *data)
 
MVDependenciesstatext_dependencies_build (StatsBuildData *data)
 
byteastatext_dependencies_serialize (MVDependencies *dependencies)
 
MVDependenciesstatext_dependencies_deserialize (bytea *data)
 
MCVListstatext_mcv_build (StatsBuildData *data, double totalrows, int stattarget)
 
byteastatext_mcv_serialize (MCVList *mcvlist, VacAttrStats **stats)
 
MCVListstatext_mcv_deserialize (bytea *data)
 
MultiSortSupport multi_sort_init (int ndims)
 
void multi_sort_add_dimension (MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
 
int multi_sort_compare (const void *a, const void *b, void *arg)
 
int multi_sort_compare_dim (int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int multi_sort_compare_dims (int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int compare_scalars_simple (const void *a, const void *b, void *arg)
 
int compare_datums_simple (Datum a, Datum b, SortSupport ssup)
 
AttrNumberbuild_attnums_array (Bitmapset *attrs, int nexprs, int *numattrs)
 
SortItembuild_sorted_items (StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
 
bool examine_opclause_args (List *args, Node **exprp, Const **cstp, bool *expronleftp)
 
Selectivity mcv_combine_selectivities (Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)
 
Selectivity mcv_clauselist_selectivity (PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
 
Selectivity mcv_clause_selectivity_or (PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)
 

Typedef Documentation

◆ DimensionInfo

typedef struct DimensionInfo DimensionInfo

◆ MultiSortSupport

Definition at line 51 of file extended_stats_internal.h.

◆ MultiSortSupportData

◆ SortItem

typedef struct SortItem SortItem

◆ StatsBuildData

Function Documentation

◆ build_attnums_array()

AttrNumber* build_attnums_array ( Bitmapset attrs,
int  nexprs,
int *  numattrs 
)

Definition at line 942 of file extended_stats.c.

943 {
944  int i,
945  j;
946  AttrNumber *attnums;
947  int num = bms_num_members(attrs);
948 
949  if (numattrs)
950  *numattrs = num;
951 
952  /* build attnums from the bitmapset */
953  attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
954  i = 0;
955  j = -1;
956  while ((j = bms_next_member(attrs, j)) >= 0)
957  {
958  int attnum = (j - nexprs);
959 
960  /*
961  * Make sure the bitmap contains only user-defined attributes. As
962  * bitmaps can't contain negative values, this can be violated in two
963  * ways. Firstly, the bitmap might contain 0 as a member, and secondly
964  * the integer value might be larger than MaxAttrNumber.
965  */
968  Assert(attnum >= (-nexprs));
969 
970  attnums[i++] = (AttrNumber) attnum;
971 
972  /* protect against overflows */
973  Assert(i <= num);
974  }
975 
976  return attnums;
977 }
int16 AttrNumber
Definition: attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
#define MaxAttrNumber
Definition: attnum.h:24
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1106
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:685
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc(Size size)
Definition: mcxt.c:1226
int16 attnum
Definition: pg_attribute.h:74

References Assert(), attnum, AttributeNumberIsValid, bms_next_member(), bms_num_members(), i, j, MaxAttrNumber, and palloc().

◆ build_sorted_items()

SortItem* build_sorted_items ( StatsBuildData data,
int *  nitems,
MultiSortSupport  mss,
int  numattrs,
AttrNumber attnums 
)

Definition at line 987 of file extended_stats.c.

990 {
991  int i,
992  j,
993  len,
994  nrows;
995  int nvalues = data->numrows * numattrs;
996 
997  SortItem *items;
998  Datum *values;
999  bool *isnull;
1000  char *ptr;
1001  int *typlen;
1002 
1003  /* Compute the total amount of memory we need (both items and values). */
1004  len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
1005 
1006  /* Allocate the memory and split it into the pieces. */
1007  ptr = palloc0(len);
1008 
1009  /* items to sort */
1010  items = (SortItem *) ptr;
1011  ptr += data->numrows * sizeof(SortItem);
1012 
1013  /* values and null flags */
1014  values = (Datum *) ptr;
1015  ptr += nvalues * sizeof(Datum);
1016 
1017  isnull = (bool *) ptr;
1018  ptr += nvalues * sizeof(bool);
1019 
1020  /* make sure we consumed the whole buffer exactly */
1021  Assert((ptr - (char *) items) == len);
1022 
1023  /* fix the pointers to Datum and bool arrays */
1024  nrows = 0;
1025  for (i = 0; i < data->numrows; i++)
1026  {
1027  items[nrows].values = &values[nrows * numattrs];
1028  items[nrows].isnull = &isnull[nrows * numattrs];
1029 
1030  nrows++;
1031  }
1032 
1033  /* build a local cache of typlen for all attributes */
1034  typlen = (int *) palloc(sizeof(int) * data->nattnums);
1035  for (i = 0; i < data->nattnums; i++)
1036  typlen[i] = get_typlen(data->stats[i]->attrtypid);
1037 
1038  nrows = 0;
1039  for (i = 0; i < data->numrows; i++)
1040  {
1041  bool toowide = false;
1042 
1043  /* load the values/null flags from sample rows */
1044  for (j = 0; j < numattrs; j++)
1045  {
1046  Datum value;
1047  bool isnull;
1048  int attlen;
1049  AttrNumber attnum = attnums[j];
1050 
1051  int idx;
1052 
1053  /* match attnum to the pre-calculated data */
1054  for (idx = 0; idx < data->nattnums; idx++)
1055  {
1056  if (attnum == data->attnums[idx])
1057  break;
1058  }
1059 
1060  Assert(idx < data->nattnums);
1061 
1062  value = data->values[idx][i];
1063  isnull = data->nulls[idx][i];
1064  attlen = typlen[idx];
1065 
1066  /*
1067  * If this is a varlena value, check if it's too wide and if yes
1068  * then skip the whole item. Otherwise detoast the value.
1069  *
1070  * XXX It may happen that we've already detoasted some preceding
1071  * values for the current item. We don't bother to cleanup those
1072  * on the assumption that those are small (below WIDTH_THRESHOLD)
1073  * and will be discarded at the end of analyze.
1074  */
1075  if ((!isnull) && (attlen == -1))
1076  {
1078  {
1079  toowide = true;
1080  break;
1081  }
1082 
1084  }
1085 
1086  items[nrows].values[j] = value;
1087  items[nrows].isnull[j] = isnull;
1088  }
1089 
1090  if (toowide)
1091  continue;
1092 
1093  nrows++;
1094  }
1095 
1096  /* store the actual number of items (ignoring the too-wide ones) */
1097  *nitems = nrows;
1098 
1099  /* all items were too wide */
1100  if (nrows == 0)
1101  {
1102  /* everything is allocated as a single chunk */
1103  pfree(items);
1104  return NULL;
1105  }
1106 
1107  /* do the sort, using the multi-sort */
1108  qsort_interruptible(items, nrows, sizeof(SortItem),
1109  multi_sort_compare, mss);
1110 
1111  return items;
1112 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
static Datum values[MAXATTR]
Definition: bootstrap.c:156
unsigned char bool
Definition: c.h:445
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:545
#define WIDTH_THRESHOLD
int multi_sort_compare(const void *a, const void *b, void *arg)
struct SortItem SortItem
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define nitems(x)
Definition: indent.h:31
static struct @148 value
int16 get_typlen(Oid typid)
Definition: lsyscache.c:2179
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc0(Size size)
Definition: mcxt.c:1257
int16 attlen
Definition: pg_attribute.h:59
const void size_t len
const void * data
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64

References Assert(), attlen, attnum, data, get_typlen(), i, idx(), SortItem::isnull, j, len, multi_sort_compare(), nitems, palloc(), palloc0(), pfree(), PG_DETOAST_DATUM, PointerGetDatum(), qsort_interruptible(), toast_raw_datum_size(), value, values, SortItem::values, and WIDTH_THRESHOLD.

Referenced by dependency_degree(), and statext_mcv_build().

◆ compare_datums_simple()

int compare_datums_simple ( Datum  a,
Datum  b,
SortSupport  ssup 
)

Definition at line 928 of file extended_stats.c.

929 {
930  return ApplySortComparator(a, false, b, false, ssup);
931 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

References a, ApplySortComparator(), and b.

Referenced by compare_scalars_simple(), and statext_mcv_serialize().

◆ compare_scalars_simple()

int compare_scalars_simple ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 920 of file extended_stats.c.

921 {
922  return compare_datums_simple(*(Datum *) a,
923  *(Datum *) b,
924  (SortSupport) arg);
925 }
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
void * arg

References a, arg, b, and compare_datums_simple().

Referenced by statext_mcv_serialize().

◆ examine_opclause_args()

bool examine_opclause_args ( List args,
Node **  exprp,
Const **  cstp,
bool expronleftp 
)

Definition at line 2056 of file extended_stats.c.

2058 {
2059  Node *expr;
2060  Const *cst;
2061  bool expronleft;
2062  Node *leftop,
2063  *rightop;
2064 
2065  /* enforced by statext_is_compatible_clause_internal */
2066  Assert(list_length(args) == 2);
2067 
2068  leftop = linitial(args);
2069  rightop = lsecond(args);
2070 
2071  /* strip RelabelType from either side of the expression */
2072  if (IsA(leftop, RelabelType))
2073  leftop = (Node *) ((RelabelType *) leftop)->arg;
2074 
2075  if (IsA(rightop, RelabelType))
2076  rightop = (Node *) ((RelabelType *) rightop)->arg;
2077 
2078  if (IsA(rightop, Const))
2079  {
2080  expr = (Node *) leftop;
2081  cst = (Const *) rightop;
2082  expronleft = true;
2083  }
2084  else if (IsA(leftop, Const))
2085  {
2086  expr = (Node *) rightop;
2087  cst = (Const *) leftop;
2088  expronleft = false;
2089  }
2090  else
2091  return false;
2092 
2093  /* return pointers to the extracted parts if requested */
2094  if (exprp)
2095  *exprp = expr;
2096 
2097  if (cstp)
2098  *cstp = cst;
2099 
2100  if (expronleftp)
2101  *expronleftp = expronleft;
2102 
2103  return true;
2104 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:179
static int list_length(const List *l)
Definition: pg_list.h:152
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
Definition: nodes.h:129

References arg, generate_unaccent_rules::args, Assert(), IsA, linitial, list_length(), and lsecond.

Referenced by mcv_get_match_bitmap(), and statext_is_compatible_clause_internal().

◆ mcv_clause_selectivity_or()

Selectivity mcv_clause_selectivity_or ( PlannerInfo root,
StatisticExtInfo stat,
MCVList mcv,
Node clause,
bool **  or_matches,
Selectivity basesel,
Selectivity overlap_mcvsel,
Selectivity overlap_basesel,
Selectivity totalsel 
)

Definition at line 2130 of file mcv.c.

2134 {
2135  Selectivity s = 0.0;
2136  bool *new_matches;
2137  int i;
2138 
2139  /* build the OR-matches bitmap, if not built already */
2140  if (*or_matches == NULL)
2141  *or_matches = palloc0(sizeof(bool) * mcv->nitems);
2142 
2143  /* build the match bitmap for the new clause */
2144  new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2145  stat->exprs, mcv, false);
2146 
2147  /*
2148  * Sum the frequencies for all the MCV items matching this clause and also
2149  * those matching the overlap between this clause and any of the preceding
2150  * clauses as described above.
2151  */
2152  *basesel = 0.0;
2153  *overlap_mcvsel = 0.0;
2154  *overlap_basesel = 0.0;
2155  *totalsel = 0.0;
2156  for (i = 0; i < mcv->nitems; i++)
2157  {
2158  *totalsel += mcv->items[i].frequency;
2159 
2160  if (new_matches[i])
2161  {
2162  s += mcv->items[i].frequency;
2163  *basesel += mcv->items[i].base_frequency;
2164 
2165  if ((*or_matches)[i])
2166  {
2167  *overlap_mcvsel += mcv->items[i].frequency;
2168  *overlap_basesel += mcv->items[i].base_frequency;
2169  }
2170  }
2171 
2172  /* update the OR-matches bitmap for the next clause */
2173  (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2174  }
2175 
2176  pfree(new_matches);
2177 
2178  return s;
2179 }
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1603
double Selectivity
Definition: nodes.h:261
#define list_make1(x1)
Definition: pg_list.h:212
double frequency
Definition: statistics.h:80
double base_frequency
Definition: statistics.h:81
uint32 nitems
Definition: statistics.h:91
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94

References MCVItem::base_frequency, MCVItem::frequency, i, MCVList::items, list_make1, mcv_get_match_bitmap(), MCVList::nitems, palloc0(), and pfree().

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity ( PlannerInfo root,
StatisticExtInfo stat,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Selectivity basesel,
Selectivity totalsel 
)

Definition at line 2052 of file mcv.c.

2057 {
2058  int i;
2059  MCVList *mcv;
2060  Selectivity s = 0.0;
2061  RangeTblEntry *rte = root->simple_rte_array[rel->relid];
2062 
2063  /* match/mismatch bitmap for each MCV item */
2064  bool *matches = NULL;
2065 
2066  /* load the MCV list stored in the statistics object */
2067  mcv = statext_mcv_load(stat->statOid, rte->inh);
2068 
2069  /* build a match bitmap for the clauses */
2070  matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2071  mcv, false);
2072 
2073  /* sum frequencies for all the matching MCV items */
2074  *basesel = 0.0;
2075  *totalsel = 0.0;
2076  for (i = 0; i < mcv->nitems; i++)
2077  {
2078  *totalsel += mcv->items[i].frequency;
2079 
2080  if (matches[i] != false)
2081  {
2082  *basesel += mcv->items[i].base_frequency;
2083  s += mcv->items[i].frequency;
2084  }
2085  }
2086 
2087  return s;
2088 }
MCVList * statext_mcv_load(Oid mvoid, bool inh)
Definition: mcv.c:562
Index relid
Definition: pathnodes.h:903

References MCVItem::base_frequency, MCVItem::frequency, i, RangeTblEntry::inh, MCVList::items, mcv_get_match_bitmap(), MCVList::nitems, RelOptInfo::relid, and statext_mcv_load().

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_combine_selectivities()

Selectivity mcv_combine_selectivities ( Selectivity  simple_sel,
Selectivity  mcv_sel,
Selectivity  mcv_basesel,
Selectivity  mcv_totalsel 
)

Definition at line 2010 of file mcv.c.

2014 {
2015  Selectivity other_sel;
2016  Selectivity sel;
2017 
2018  /* estimated selectivity of values not covered by MCV matches */
2019  other_sel = simple_sel - mcv_basesel;
2020  CLAMP_PROBABILITY(other_sel);
2021 
2022  /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2023  if (other_sel > 1.0 - mcv_totalsel)
2024  other_sel = 1.0 - mcv_totalsel;
2025 
2026  /* overall selectivity is the sum of the MCV and non-MCV parts */
2027  sel = mcv_sel + other_sel;
2028  CLAMP_PROBABILITY(sel);
2029 
2030  return sel;
2031 }
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63

References CLAMP_PROBABILITY.

Referenced by statext_mcv_clauselist_selectivity().

◆ multi_sort_add_dimension()

void multi_sort_add_dimension ( MultiSortSupport  mss,
int  sortdim,
Oid  oper,
Oid  collation 
)

Definition at line 852 of file extended_stats.c.

854 {
855  SortSupport ssup = &mss->ssup[sortdim];
856 
858  ssup->ssup_collation = collation;
859  ssup->ssup_nulls_first = false;
860 
862 }
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition: parse_oper.c:370
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:135
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
bool ssup_nulls_first
Definition: sortsupport.h:75
MemoryContext ssup_cxt
Definition: sortsupport.h:66

References CurrentMemoryContext, oper(), PrepareSortSupportFromOrderingOp(), MultiSortSupportData::ssup, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, and SortSupportData::ssup_nulls_first.

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ multi_sort_compare()

int multi_sort_compare ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 866 of file extended_stats.c.

867 {
869  SortItem *ia = (SortItem *) a;
870  SortItem *ib = (SortItem *) b;
871  int i;
872 
873  for (i = 0; i < mss->ndims; i++)
874  {
875  int compare;
876 
878  ib->values[i], ib->isnull[i],
879  &mss->ssup[i]);
880 
881  if (compare != 0)
882  return compare;
883  }
884 
885  /* equal by default */
886  return 0;
887 }
MultiSortSupportData * MultiSortSupport
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145

References a, ApplySortComparator(), arg, b, compare(), i, SortItem::isnull, MultiSortSupportData::ndims, MultiSortSupportData::ssup, and SortItem::values.

Referenced by build_distinct_groups(), build_sorted_items(), count_distinct_groups(), ndistinct_for_combination(), and statext_mcv_build().

◆ multi_sort_compare_dim()

int multi_sort_compare_dim ( int  dim,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 891 of file extended_stats.c.

893 {
894  return ApplySortComparator(a->values[dim], a->isnull[dim],
895  b->values[dim], b->isnull[dim],
896  &mss->ssup[dim]);
897 }

References a, ApplySortComparator(), b, and MultiSortSupportData::ssup.

Referenced by dependency_degree().

◆ multi_sort_compare_dims()

int multi_sort_compare_dims ( int  start,
int  end,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 900 of file extended_stats.c.

903 {
904  int dim;
905 
906  for (dim = start; dim <= end; dim++)
907  {
908  int r = ApplySortComparator(a->values[dim], a->isnull[dim],
909  b->values[dim], b->isnull[dim],
910  &mss->ssup[dim]);
911 
912  if (r != 0)
913  return r;
914  }
915 
916  return 0;
917 }

References a, ApplySortComparator(), b, and MultiSortSupportData::ssup.

Referenced by dependency_degree().

◆ multi_sort_init()

MultiSortSupport multi_sort_init ( int  ndims)

Definition at line 833 of file extended_stats.c.

834 {
835  MultiSortSupport mss;
836 
837  Assert(ndims >= 2);
838 
839  mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
840  + sizeof(SortSupportData) * ndims);
841 
842  mss->ndims = ndims;
843 
844  return mss;
845 }
struct SortSupportData SortSupportData

References Assert(), MultiSortSupportData::ndims, and palloc0().

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ statext_dependencies_build()

MVDependencies* statext_dependencies_build ( StatsBuildData data)

Definition at line 350 of file dependencies.c.

351 {
352  int i,
353  k;
354 
355  /* result */
356  MVDependencies *dependencies = NULL;
357  MemoryContext cxt;
358 
359  Assert(data->nattnums >= 2);
360 
361  /* tracks memory allocated by dependency_degree calls */
363  "dependency_degree cxt",
365 
366  /*
367  * We'll try build functional dependencies starting from the smallest ones
368  * covering just 2 columns, to the largest ones, covering all columns
369  * included in the statistics object. We start from the smallest ones
370  * because we want to be able to skip already implied ones.
371  */
372  for (k = 2; k <= data->nattnums; k++)
373  {
374  AttrNumber *dependency; /* array with k elements */
375 
376  /* prepare a DependencyGenerator of variation */
378 
379  /* generate all possible variations of k values (out of n) */
380  while ((dependency = DependencyGenerator_next(DependencyGenerator)))
381  {
382  double degree;
383  MVDependency *d;
384  MemoryContext oldcxt;
385 
386  /* release memory used by dependency degree calculation */
387  oldcxt = MemoryContextSwitchTo(cxt);
388 
389  /* compute how valid the dependency seems */
390  degree = dependency_degree(data, k, dependency);
391 
392  MemoryContextSwitchTo(oldcxt);
393  MemoryContextReset(cxt);
394 
395  /*
396  * if the dependency seems entirely invalid, don't store it
397  */
398  if (degree == 0.0)
399  continue;
400 
401  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
402  + k * sizeof(AttrNumber));
403 
404  /* copy the dependency (and keep the indexes into stxkeys) */
405  d->degree = degree;
406  d->nattributes = k;
407  for (i = 0; i < k; i++)
408  d->attributes[i] = data->attnums[dependency[i]];
409 
410  /* initialize the list of dependencies */
411  if (dependencies == NULL)
412  {
413  dependencies
414  = (MVDependencies *) palloc0(sizeof(MVDependencies));
415 
416  dependencies->magic = STATS_DEPS_MAGIC;
417  dependencies->type = STATS_DEPS_TYPE_BASIC;
418  dependencies->ndeps = 0;
419  }
420 
421  dependencies->ndeps++;
422  dependencies = (MVDependencies *) repalloc(dependencies,
423  offsetof(MVDependencies, deps)
424  + dependencies->ndeps * sizeof(MVDependency *));
425 
426  dependencies->deps[dependencies->ndeps - 1] = d;
427  }
428 
429  /*
430  * we're done with variations of k elements, so free the
431  * DependencyGenerator
432  */
434  }
435 
436  MemoryContextDelete(cxt);
437 
438  return dependencies;
439 }
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:206
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:198
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:175
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
Definition: dependencies.c:223
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:330
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1476
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:403
#define AllocSetContextCreate
Definition: memutils.h:126
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:150
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
#define STATS_DEPS_MAGIC
Definition: statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:44
uint32 ndeps
Definition: statistics.h:61
uint32 magic
Definition: statistics.h:59
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:62
AttrNumber nattributes
Definition: statistics.h:53
double degree
Definition: statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:54

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), MVDependency::attributes, CurrentMemoryContext, data, MVDependency::degree, dependency_degree(), DependencyGenerator_free(), DependencyGenerator_init(), DependencyGenerator_next(), MVDependencies::deps, i, if(), MVDependencies::magic, MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), MVDependency::nattributes, MVDependencies::ndeps, palloc0(), repalloc(), STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, and MVDependencies::type.

Referenced by BuildRelationExtStatistics().

◆ statext_dependencies_deserialize()

MVDependencies* statext_dependencies_deserialize ( bytea data)

Definition at line 501 of file dependencies.c.

502 {
503  int i;
504  Size min_expected_size;
505  MVDependencies *dependencies;
506  char *tmp;
507 
508  if (data == NULL)
509  return NULL;
510 
512  elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
514 
515  /* read the MVDependencies header */
516  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
517 
518  /* initialize pointer to the data part (skip the varlena header) */
519  tmp = VARDATA_ANY(data);
520 
521  /* read the header fields and perform basic sanity checks */
522  memcpy(&dependencies->magic, tmp, sizeof(uint32));
523  tmp += sizeof(uint32);
524  memcpy(&dependencies->type, tmp, sizeof(uint32));
525  tmp += sizeof(uint32);
526  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
527  tmp += sizeof(uint32);
528 
529  if (dependencies->magic != STATS_DEPS_MAGIC)
530  elog(ERROR, "invalid dependency magic %d (expected %d)",
531  dependencies->magic, STATS_DEPS_MAGIC);
532 
533  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
534  elog(ERROR, "invalid dependency type %d (expected %d)",
535  dependencies->type, STATS_DEPS_TYPE_BASIC);
536 
537  if (dependencies->ndeps == 0)
538  elog(ERROR, "invalid zero-length item array in MVDependencies");
539 
540  /* what minimum bytea size do we expect for those parameters */
541  min_expected_size = SizeOfItem(dependencies->ndeps);
542 
543  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
544  elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
545  VARSIZE_ANY_EXHDR(data), min_expected_size);
546 
547  /* allocate space for the MCV items */
548  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
549  + (dependencies->ndeps * sizeof(MVDependency *)));
550 
551  for (i = 0; i < dependencies->ndeps; i++)
552  {
553  double degree;
554  AttrNumber k;
555  MVDependency *d;
556 
557  /* degree of validity */
558  memcpy(&degree, tmp, sizeof(double));
559  tmp += sizeof(double);
560 
561  /* number of attributes */
562  memcpy(&k, tmp, sizeof(AttrNumber));
563  tmp += sizeof(AttrNumber);
564 
565  /* is the number of attributes valid? */
566  Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
567 
568  /* now that we know the number of attributes, allocate the dependency */
569  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
570  + (k * sizeof(AttrNumber)));
571 
572  d->degree = degree;
573  d->nattributes = k;
574 
575  /* copy attribute numbers */
576  memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
577  tmp += sizeof(AttrNumber) * d->nattributes;
578 
579  dependencies->deps[i] = d;
580 
581  /* still within the bytea */
582  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
583  }
584 
585  /* we should have consumed the whole bytea exactly */
586  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
587 
588  return dependencies;
589 }
unsigned int uint32
Definition: c.h:495
size_t Size
Definition: c.h:594
#define SizeOfHeader
Definition: dependencies.c:40
#define SizeOfItem(natts)
Definition: dependencies.c:43
#define ERROR
Definition: elog.h:39
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References Assert(), MVDependency::attributes, data, MVDependency::degree, MVDependencies::deps, elog(), ERROR, i, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, palloc0(), repalloc(), SizeOfHeader, SizeOfItem, STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, STATS_MAX_DIMENSIONS, MVDependencies::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_dependencies_out(), and statext_dependencies_load().

◆ statext_dependencies_serialize()

bytea* statext_dependencies_serialize ( MVDependencies dependencies)

Definition at line 446 of file dependencies.c.

447 {
448  int i;
449  bytea *output;
450  char *tmp;
451  Size len;
452 
453  /* we need to store ndeps, with a number of attributes for each one */
455 
456  /* and also include space for the actual attribute numbers and degrees */
457  for (i = 0; i < dependencies->ndeps; i++)
458  len += SizeOfItem(dependencies->deps[i]->nattributes);
459 
460  output = (bytea *) palloc0(len);
462 
463  tmp = VARDATA(output);
464 
465  /* Store the base struct values (magic, type, ndeps) */
466  memcpy(tmp, &dependencies->magic, sizeof(uint32));
467  tmp += sizeof(uint32);
468  memcpy(tmp, &dependencies->type, sizeof(uint32));
469  tmp += sizeof(uint32);
470  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
471  tmp += sizeof(uint32);
472 
473  /* store number of attributes and attribute numbers for each dependency */
474  for (i = 0; i < dependencies->ndeps; i++)
475  {
476  MVDependency *d = dependencies->deps[i];
477 
478  memcpy(tmp, &d->degree, sizeof(double));
479  tmp += sizeof(double);
480 
481  memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
482  tmp += sizeof(AttrNumber);
483 
484  memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
485  tmp += sizeof(AttrNumber) * d->nattributes;
486 
487  /* protect against overflow */
488  Assert(tmp <= ((char *) output + len));
489  }
490 
491  /* make sure we've produced exactly the right amount of data */
492  Assert(tmp == ((char *) output + len));
493 
494  return output;
495 }
#define VARHDRSZ
Definition: c.h:681
FILE * output
Definition: c.h:676
#define VARDATA(PTR)
Definition: varatt.h:278
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305

References Assert(), MVDependency::attributes, MVDependency::degree, MVDependencies::deps, i, len, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, output, palloc0(), SET_VARSIZE, SizeOfHeader, SizeOfItem, MVDependencies::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().

◆ statext_mcv_build()

MCVList* statext_mcv_build ( StatsBuildData data,
double  totalrows,
int  stattarget 
)

Definition at line 184 of file mcv.c.

185 {
186  int i,
187  numattrs,
188  numrows,
189  ngroups,
190  nitems;
191  double mincount;
192  SortItem *items;
193  SortItem *groups;
194  MCVList *mcvlist = NULL;
195  MultiSortSupport mss;
196 
197  /* comparator for all the columns */
198  mss = build_mss(data);
199 
200  /* sort the rows */
201  items = build_sorted_items(data, &nitems, mss,
202  data->nattnums, data->attnums);
203 
204  if (!items)
205  return NULL;
206 
207  /* for convenience */
208  numattrs = data->nattnums;
209  numrows = data->numrows;
210 
211  /* transform the sorted rows into groups (sorted by frequency) */
212  groups = build_distinct_groups(nitems, items, mss, &ngroups);
213 
214  /*
215  * The maximum number of MCV items to store, based on the statistics
216  * target we computed for the statistics object (from the target set for
217  * the object itself, attributes and the system default). In any case, we
218  * can't keep more groups than we have available.
219  */
220  nitems = stattarget;
221  if (nitems > ngroups)
222  nitems = ngroups;
223 
224  /*
225  * Decide how many items to keep in the MCV list. We can't use the same
226  * algorithm as per-column MCV lists, because that only considers the
227  * actual group frequency - but we're primarily interested in how the
228  * actual frequency differs from the base frequency (product of simple
229  * per-column frequencies, as if the columns were independent).
230  *
231  * Using the same algorithm might exclude items that are close to the
232  * "average" frequency of the sample. But that does not say whether the
233  * observed frequency is close to the base frequency or not. We also need
234  * to consider unexpectedly uncommon items (again, compared to the base
235  * frequency), and the single-column algorithm does not have to.
236  *
237  * We simply decide how many items to keep by computing the minimum count
238  * using get_mincount_for_mcv_list() and then keep all items that seem to
239  * be more common than that.
240  */
241  mincount = get_mincount_for_mcv_list(numrows, totalrows);
242 
243  /*
244  * Walk the groups until we find the first group with a count below the
245  * mincount threshold (the index of that group is the number of groups we
246  * want to keep).
247  */
248  for (i = 0; i < nitems; i++)
249  {
250  if (groups[i].count < mincount)
251  {
252  nitems = i;
253  break;
254  }
255  }
256 
257  /*
258  * At this point, we know the number of items for the MCV list. There
259  * might be none (for uniform distribution with many groups), and in that
260  * case, there will be no MCV list. Otherwise, construct the MCV list.
261  */
262  if (nitems > 0)
263  {
264  int j;
265  SortItem key;
266  MultiSortSupport tmp;
267 
268  /* frequencies for values in each attribute */
269  SortItem **freqs;
270  int *nfreqs;
271 
272  /* used to search values */
273  tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
274  + sizeof(SortSupportData));
275 
276  /* compute frequencies for values in each column */
277  nfreqs = (int *) palloc0(sizeof(int) * numattrs);
278  freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
279 
280  /*
281  * Allocate the MCV list structure, set the global parameters.
282  */
283  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
284  sizeof(MCVItem) * nitems);
285 
286  mcvlist->magic = STATS_MCV_MAGIC;
287  mcvlist->type = STATS_MCV_TYPE_BASIC;
288  mcvlist->ndimensions = numattrs;
289  mcvlist->nitems = nitems;
290 
291  /* store info about data type OIDs */
292  for (i = 0; i < numattrs; i++)
293  mcvlist->types[i] = data->stats[i]->attrtypid;
294 
295  /* Copy the first chunk of groups into the result. */
296  for (i = 0; i < nitems; i++)
297  {
298  /* just point to the proper place in the list */
299  MCVItem *item = &mcvlist->items[i];
300 
301  item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
302  item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
303 
304  /* copy values for the group */
305  memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
306  memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
307 
308  /* groups should be sorted by frequency in descending order */
309  Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
310 
311  /* group frequency */
312  item->frequency = (double) groups[i].count / numrows;
313 
314  /* base frequency, if the attributes were independent */
315  item->base_frequency = 1.0;
316  for (j = 0; j < numattrs; j++)
317  {
318  SortItem *freq;
319 
320  /* single dimension */
321  tmp->ndims = 1;
322  tmp->ssup[0] = mss->ssup[j];
323 
324  /* fill search key */
325  key.values = &groups[i].values[j];
326  key.isnull = &groups[i].isnull[j];
327 
328  freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
329  sizeof(SortItem),
330  multi_sort_compare, tmp);
331 
332  item->base_frequency *= ((double) freq->count) / numrows;
333  }
334  }
335 
336  pfree(nfreqs);
337  pfree(freqs);
338  }
339 
340  pfree(items);
341  pfree(groups);
342 
343  return mcvlist;
344 }
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
static MultiSortSupport build_mss(StatsBuildData *data)
Definition: mcv.c:351
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:152
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:494
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:428
void * bsearch_arg(const void *key, const void *base0, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition: bsearch_arg.c:55
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:67
#define STATS_MCV_MAGIC
Definition: statistics.h:66
struct MCVItem MCVItem
bool * isnull
Definition: statistics.h:82
Datum * values
Definition: statistics.h:83
uint32 type
Definition: statistics.h:90
uint32 magic
Definition: statistics.h:89
AttrNumber ndimensions
Definition: statistics.h:92
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93

References Assert(), MCVItem::base_frequency, bsearch_arg(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), SortItem::count, data, MCVItem::frequency, get_mincount_for_mcv_list(), i, SortItem::isnull, MCVItem::isnull, MCVList::items, j, sort-test::key, MCVList::magic, multi_sort_compare(), MCVList::ndimensions, MultiSortSupportData::ndims, MCVList::nitems, nitems, palloc(), palloc0(), pfree(), MultiSortSupportData::ssup, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, MCVList::type, MCVList::types, SortItem::values, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

◆ statext_mcv_deserialize()

MCVList* statext_mcv_deserialize ( bytea data)

Definition at line 1000 of file mcv.c.

1001 {
1002  int dim,
1003  i;
1004  Size expected_size;
1005  MCVList *mcvlist;
1006  char *raw;
1007  char *ptr;
1008  char *endptr PG_USED_FOR_ASSERTS_ONLY;
1009 
1010  int ndims,
1011  nitems;
1012  DimensionInfo *info = NULL;
1013 
1014  /* local allocation buffer (used only for deserialization) */
1015  Datum **map = NULL;
1016 
1017  /* MCV list */
1018  Size mcvlen;
1019 
1020  /* buffer used for the result */
1021  Size datalen;
1022  char *dataptr;
1023  char *valuesptr;
1024  char *isnullptr;
1025 
1026  if (data == NULL)
1027  return NULL;
1028 
1029  /*
1030  * We can't possibly deserialize a MCV list if there's not even a complete
1031  * header. We need an explicit formula here, because we serialize the
1032  * header fields one by one, so we need to ignore struct alignment.
1033  */
1035  elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
1037 
1038  /* read the MCV list header */
1039  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1040 
1041  /* pointer to the data part (skip the varlena header) */
1042  raw = (char *) data;
1043  ptr = VARDATA_ANY(raw);
1044  endptr = (char *) raw + VARSIZE_ANY(data);
1045 
1046  /* get the header and perform further sanity checks */
1047  memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1048  ptr += sizeof(uint32);
1049 
1050  memcpy(&mcvlist->type, ptr, sizeof(uint32));
1051  ptr += sizeof(uint32);
1052 
1053  memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1054  ptr += sizeof(uint32);
1055 
1056  memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1057  ptr += sizeof(AttrNumber);
1058 
1059  if (mcvlist->magic != STATS_MCV_MAGIC)
1060  elog(ERROR, "invalid MCV magic %u (expected %u)",
1061  mcvlist->magic, STATS_MCV_MAGIC);
1062 
1063  if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1064  elog(ERROR, "invalid MCV type %u (expected %u)",
1065  mcvlist->type, STATS_MCV_TYPE_BASIC);
1066 
1067  if (mcvlist->ndimensions == 0)
1068  elog(ERROR, "invalid zero-length dimension array in MCVList");
1069  else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1070  (mcvlist->ndimensions < 0))
1071  elog(ERROR, "invalid length (%d) dimension array in MCVList",
1072  mcvlist->ndimensions);
1073 
1074  if (mcvlist->nitems == 0)
1075  elog(ERROR, "invalid zero-length item array in MCVList");
1076  else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1077  elog(ERROR, "invalid length (%u) item array in MCVList",
1078  mcvlist->nitems);
1079 
1080  nitems = mcvlist->nitems;
1081  ndims = mcvlist->ndimensions;
1082 
1083  /*
1084  * Check amount of data including DimensionInfo for all dimensions and
1085  * also the serialized items (including uint16 indexes). Also, walk
1086  * through the dimension information and add it to the sum.
1087  */
1088  expected_size = SizeOfMCVList(ndims, nitems);
1089 
1090  /*
1091  * Check that we have at least the dimension and info records, along with
1092  * the items. We don't know the size of the serialized values yet. We need
1093  * to do this check first, before accessing the dimension info.
1094  */
1095  if (VARSIZE_ANY(data) < expected_size)
1096  elog(ERROR, "invalid MCV size %zu (expected %zu)",
1097  VARSIZE_ANY(data), expected_size);
1098 
1099  /* Now copy the array of type Oids. */
1100  memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1101  ptr += (sizeof(Oid) * ndims);
1102 
1103  /* Now it's safe to access the dimension info. */
1104  info = palloc(ndims * sizeof(DimensionInfo));
1105 
1106  memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1107  ptr += (ndims * sizeof(DimensionInfo));
1108 
1109  /* account for the value arrays */
1110  for (dim = 0; dim < ndims; dim++)
1111  {
1112  /*
1113  * XXX I wonder if we can/should rely on asserts here. Maybe those
1114  * checks should be done every time?
1115  */
1116  Assert(info[dim].nvalues >= 0);
1117  Assert(info[dim].nbytes >= 0);
1118 
1119  expected_size += info[dim].nbytes;
1120  }
1121 
1122  /*
1123  * Now we know the total expected MCV size, including all the pieces
1124  * (header, dimension info. items and deduplicated data). So do the final
1125  * check on size.
1126  */
1127  if (VARSIZE_ANY(data) != expected_size)
1128  elog(ERROR, "invalid MCV size %zu (expected %zu)",
1129  VARSIZE_ANY(data), expected_size);
1130 
1131  /*
1132  * We need an array of Datum values for each dimension, so that we can
1133  * easily translate the uint16 indexes later. We also need a top-level
1134  * array of pointers to those per-dimension arrays.
1135  *
1136  * While allocating the arrays for dimensions, compute how much space we
1137  * need for a copy of the by-ref data, as we can't simply point to the
1138  * original values (it might go away).
1139  */
1140  datalen = 0; /* space for by-ref data */
1141  map = (Datum **) palloc(ndims * sizeof(Datum *));
1142 
1143  for (dim = 0; dim < ndims; dim++)
1144  {
1145  map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1146 
1147  /* space needed for a copy of data for by-ref types */
1148  datalen += info[dim].nbytes_aligned;
1149  }
1150 
1151  /*
1152  * Now resize the MCV list so that the allocation includes all the data.
1153  *
1154  * Allocate space for a copy of the data, as we can't simply reference the
1155  * serialized data - it's not aligned properly, and it may disappear while
1156  * we're still using the MCV list, e.g. due to catcache release.
1157  *
1158  * We do care about alignment here, because we will allocate all the
1159  * pieces at once, but then use pointers to different parts.
1160  */
1161  mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1162 
1163  /* arrays of values and isnull flags for all MCV items */
1164  mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1165  mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1166 
1167  /* we don't quite need to align this, but it makes some asserts easier */
1168  mcvlen += MAXALIGN(datalen);
1169 
1170  /* now resize the deserialized MCV list, and compute pointers to parts */
1171  mcvlist = repalloc(mcvlist, mcvlen);
1172 
1173  /* pointer to the beginning of values/isnull arrays */
1174  valuesptr = (char *) mcvlist
1175  + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1176 
1177  isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1178 
1179  dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1180 
1181  /*
1182  * Build mapping (index => value) for translating the serialized data into
1183  * the in-memory representation.
1184  */
1185  for (dim = 0; dim < ndims; dim++)
1186  {
1187  /* remember start position in the input array */
1188  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1189 
1190  if (info[dim].typbyval)
1191  {
1192  /* for by-val types we simply copy data into the mapping */
1193  for (i = 0; i < info[dim].nvalues; i++)
1194  {
1195  Datum v = 0;
1196 
1197  memcpy(&v, ptr, info[dim].typlen);
1198  ptr += info[dim].typlen;
1199 
1200  map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1201 
1202  /* no under/overflow of input array */
1203  Assert(ptr <= (start + info[dim].nbytes));
1204  }
1205  }
1206  else
1207  {
1208  /* for by-ref types we need to also make a copy of the data */
1209 
1210  /* passed by reference, but fixed length (name, tid, ...) */
1211  if (info[dim].typlen > 0)
1212  {
1213  for (i = 0; i < info[dim].nvalues; i++)
1214  {
1215  memcpy(dataptr, ptr, info[dim].typlen);
1216  ptr += info[dim].typlen;
1217 
1218  /* just point into the array */
1219  map[dim][i] = PointerGetDatum(dataptr);
1220  dataptr += MAXALIGN(info[dim].typlen);
1221  }
1222  }
1223  else if (info[dim].typlen == -1)
1224  {
1225  /* varlena */
1226  for (i = 0; i < info[dim].nvalues; i++)
1227  {
1228  uint32 len;
1229 
1230  /* read the uint32 length */
1231  memcpy(&len, ptr, sizeof(uint32));
1232  ptr += sizeof(uint32);
1233 
1234  /* the length is data-only */
1235  SET_VARSIZE(dataptr, len + VARHDRSZ);
1236  memcpy(VARDATA(dataptr), ptr, len);
1237  ptr += len;
1238 
1239  /* just point into the array */
1240  map[dim][i] = PointerGetDatum(dataptr);
1241 
1242  /* skip to place of the next deserialized value */
1243  dataptr += MAXALIGN(len + VARHDRSZ);
1244  }
1245  }
1246  else if (info[dim].typlen == -2)
1247  {
1248  /* cstring */
1249  for (i = 0; i < info[dim].nvalues; i++)
1250  {
1251  uint32 len;
1252 
1253  memcpy(&len, ptr, sizeof(uint32));
1254  ptr += sizeof(uint32);
1255 
1256  memcpy(dataptr, ptr, len);
1257  ptr += len;
1258 
1259  /* just point into the array */
1260  map[dim][i] = PointerGetDatum(dataptr);
1261  dataptr += MAXALIGN(len);
1262  }
1263  }
1264 
1265  /* no under/overflow of input array */
1266  Assert(ptr <= (start + info[dim].nbytes));
1267 
1268  /* no overflow of the output mcv value */
1269  Assert(dataptr <= ((char *) mcvlist + mcvlen));
1270  }
1271 
1272  /* check we consumed input data for this dimension exactly */
1273  Assert(ptr == (start + info[dim].nbytes));
1274  }
1275 
1276  /* we should have also filled the MCV list exactly */
1277  Assert(dataptr == ((char *) mcvlist + mcvlen));
1278 
1279  /* deserialize the MCV items and translate the indexes to Datums */
1280  for (i = 0; i < nitems; i++)
1281  {
1282  MCVItem *item = &mcvlist->items[i];
1283 
1284  item->values = (Datum *) valuesptr;
1285  valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1286 
1287  item->isnull = (bool *) isnullptr;
1288  isnullptr += MAXALIGN(sizeof(bool) * ndims);
1289 
1290  memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1291  ptr += sizeof(bool) * ndims;
1292 
1293  memcpy(&item->frequency, ptr, sizeof(double));
1294  ptr += sizeof(double);
1295 
1296  memcpy(&item->base_frequency, ptr, sizeof(double));
1297  ptr += sizeof(double);
1298 
1299  /* finally translate the indexes (for non-NULL only) */
1300  for (dim = 0; dim < ndims; dim++)
1301  {
1302  uint16 index;
1303 
1304  memcpy(&index, ptr, sizeof(uint16));
1305  ptr += sizeof(uint16);
1306 
1307  if (item->isnull[dim])
1308  continue;
1309 
1310  item->values[dim] = map[dim][index];
1311  }
1312 
1313  /* check we're not overflowing the input */
1314  Assert(ptr <= endptr);
1315  }
1316 
1317  /* check that we processed all the data */
1318  Assert(ptr == endptr);
1319 
1320  /* release the buffers used for mapping */
1321  for (dim = 0; dim < ndims; dim++)
1322  pfree(map[dim]);
1323 
1324  pfree(map);
1325 
1326  return mcvlist;
1327 }
unsigned short uint16
Definition: c.h:494
#define MAXALIGN(LEN)
Definition: c.h:800
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:171
struct DimensionInfo DimensionInfo
#define MinSizeOfMCVList
Definition: mcv.c:63
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:72
unsigned int Oid
Definition: postgres_ext.h:31
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:70
Definition: type.h:95
static Datum fetch_att(const void *T, bool attbyval, int attlen)
Definition: tupmacs.h:52

References Assert(), MCVItem::base_frequency, data, elog(), ERROR, fetch_att(), MCVItem::frequency, i, MCVItem::isnull, MCVList::items, len, MCVList::magic, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, nitems, DimensionInfo::nvalues, palloc(), palloc0(), pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), repalloc(), SET_VARSIZE, SizeOfMCVList, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, MCVList::type, MCVList::types, DimensionInfo::typlen, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY.

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

◆ statext_mcv_serialize()

bytea* statext_mcv_serialize ( MCVList mcvlist,
VacAttrStats **  stats 
)

Definition at line 625 of file mcv.c.

626 {
627  int i;
628  int dim;
629  int ndims = mcvlist->ndimensions;
630 
631  SortSupport ssup;
632  DimensionInfo *info;
633 
634  Size total_length;
635 
636  /* serialized items (indexes into arrays, etc.) */
637  bytea *raw;
638  char *ptr;
639  char *endptr PG_USED_FOR_ASSERTS_ONLY;
640 
641  /* values per dimension (and number of non-NULL values) */
642  Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
643  int *counts = (int *) palloc0(sizeof(int) * ndims);
644 
645  /*
646  * We'll include some rudimentary information about the attribute types
647  * (length, by-val flag), so that we don't have to look them up while
648  * deserializing the MCV list (we already have the type OID in the
649  * header). This is safe because when changing the type of the attribute
650  * the statistics gets dropped automatically. We need to store the info
651  * about the arrays of deduplicated values anyway.
652  */
653  info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
654 
655  /* sort support data for all attributes included in the MCV list */
656  ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
657 
658  /* collect and deduplicate values for each dimension (attribute) */
659  for (dim = 0; dim < ndims; dim++)
660  {
661  int ndistinct;
662  TypeCacheEntry *typentry;
663 
664  /*
665  * Lookup the LT operator (can't get it from stats extra_data, as we
666  * don't know how to interpret that - scalar vs. array etc.).
667  */
668  typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
669 
670  /* copy important info about the data type (length, by-value) */
671  info[dim].typlen = stats[dim]->attrtype->typlen;
672  info[dim].typbyval = stats[dim]->attrtype->typbyval;
673 
674  /* allocate space for values in the attribute and collect them */
675  values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
676 
677  for (i = 0; i < mcvlist->nitems; i++)
678  {
679  /* skip NULL values - we don't need to deduplicate those */
680  if (mcvlist->items[i].isnull[dim])
681  continue;
682 
683  /* append the value at the end */
684  values[dim][counts[dim]] = mcvlist->items[i].values[dim];
685  counts[dim] += 1;
686  }
687 
688  /* if there are just NULL values in this dimension, we're done */
689  if (counts[dim] == 0)
690  continue;
691 
692  /* sort and deduplicate the data */
693  ssup[dim].ssup_cxt = CurrentMemoryContext;
694  ssup[dim].ssup_collation = stats[dim]->attrcollid;
695  ssup[dim].ssup_nulls_first = false;
696 
697  PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
698 
699  qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
700  compare_scalars_simple, &ssup[dim]);
701 
702  /*
703  * Walk through the array and eliminate duplicate values, but keep the
704  * ordering (so that we can do a binary search later). We know there's
705  * at least one item as (counts[dim] != 0), so we can skip the first
706  * element.
707  */
708  ndistinct = 1; /* number of distinct values */
709  for (i = 1; i < counts[dim]; i++)
710  {
711  /* expect sorted array */
712  Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
713 
714  /* if the value is the same as the previous one, we can skip it */
715  if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
716  continue;
717 
718  values[dim][ndistinct] = values[dim][i];
719  ndistinct += 1;
720  }
721 
722  /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
723  Assert(ndistinct <= PG_UINT16_MAX);
724 
725  /*
726  * Store additional info about the attribute - number of deduplicated
727  * values, and also size of the serialized data. For fixed-length data
728  * types this is trivial to compute, for varwidth types we need to
729  * actually walk the array and sum the sizes.
730  */
731  info[dim].nvalues = ndistinct;
732 
733  if (info[dim].typbyval) /* by-value data types */
734  {
735  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
736 
737  /*
738  * We copy the data into the MCV item during deserialization, so
739  * we don't need to allocate any extra space.
740  */
741  info[dim].nbytes_aligned = 0;
742  }
743  else if (info[dim].typlen > 0) /* fixed-length by-ref */
744  {
745  /*
746  * We don't care about alignment in the serialized data, so we
747  * pack the data as much as possible. But we also track how much
748  * data will be needed after deserialization, and in that case we
749  * need to account for alignment of each item.
750  *
751  * Note: As the items are fixed-length, we could easily compute
752  * this during deserialization, but we do it here anyway.
753  */
754  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
755  info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
756  }
757  else if (info[dim].typlen == -1) /* varlena */
758  {
759  info[dim].nbytes = 0;
760  info[dim].nbytes_aligned = 0;
761  for (i = 0; i < info[dim].nvalues; i++)
762  {
763  Size len;
764 
765  /*
766  * For varlena values, we detoast the values and store the
767  * length and data separately. We don't bother with alignment
768  * here, which means that during deserialization we need to
769  * copy the fields and only access the copies.
770  */
772 
773  /* serialized length (uint32 length + data) */
774  len = VARSIZE_ANY_EXHDR(values[dim][i]);
775  info[dim].nbytes += sizeof(uint32); /* length */
776  info[dim].nbytes += len; /* value (no header) */
777 
778  /*
779  * During deserialization we'll build regular varlena values
780  * with full headers, and we need to align them properly.
781  */
782  info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
783  }
784  }
785  else if (info[dim].typlen == -2) /* cstring */
786  {
787  info[dim].nbytes = 0;
788  info[dim].nbytes_aligned = 0;
789  for (i = 0; i < info[dim].nvalues; i++)
790  {
791  Size len;
792 
793  /*
794  * cstring is handled similar to varlena - first we store the
795  * length as uint32 and then the data. We don't care about
796  * alignment, which means that during deserialization we need
797  * to copy the fields and only access the copies.
798  */
799 
800  /* c-strings include terminator, so +1 byte */
801  len = strlen(DatumGetCString(values[dim][i])) + 1;
802  info[dim].nbytes += sizeof(uint32); /* length */
803  info[dim].nbytes += len; /* value */
804 
805  /* space needed for properly aligned deserialized copies */
806  info[dim].nbytes_aligned += MAXALIGN(len);
807  }
808  }
809 
810  /* we know (count>0) so there must be some data */
811  Assert(info[dim].nbytes > 0);
812  }
813 
814  /*
815  * Now we can finally compute how much space we'll actually need for the
816  * whole serialized MCV list (varlena header, MCV header, dimension info
817  * for each attribute, deduplicated values and items).
818  */
819  total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
820  + sizeof(AttrNumber) /* ndimensions */
821  + (ndims * sizeof(Oid)); /* attribute types */
822 
823  /* dimension info */
824  total_length += ndims * sizeof(DimensionInfo);
825 
826  /* add space for the arrays of deduplicated values */
827  for (i = 0; i < ndims; i++)
828  total_length += info[i].nbytes;
829 
830  /*
831  * And finally account for the items (those are fixed-length, thanks to
832  * replacing values with uint16 indexes into the deduplicated arrays).
833  */
834  total_length += mcvlist->nitems * ITEM_SIZE(dim);
835 
836  /*
837  * Allocate space for the whole serialized MCV list (we'll skip bytes, so
838  * we set them to zero to make the result more compressible).
839  */
840  raw = (bytea *) palloc0(VARHDRSZ + total_length);
841  SET_VARSIZE(raw, VARHDRSZ + total_length);
842 
843  ptr = VARDATA(raw);
844  endptr = ptr + total_length;
845 
846  /* copy the MCV list header fields, one by one */
847  memcpy(ptr, &mcvlist->magic, sizeof(uint32));
848  ptr += sizeof(uint32);
849 
850  memcpy(ptr, &mcvlist->type, sizeof(uint32));
851  ptr += sizeof(uint32);
852 
853  memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
854  ptr += sizeof(uint32);
855 
856  memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
857  ptr += sizeof(AttrNumber);
858 
859  memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
860  ptr += (sizeof(Oid) * ndims);
861 
862  /* store information about the attributes (data amounts, ...) */
863  memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
864  ptr += sizeof(DimensionInfo) * ndims;
865 
866  /* Copy the deduplicated values for all attributes to the output. */
867  for (dim = 0; dim < ndims; dim++)
868  {
869  /* remember the starting point for Asserts later */
870  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
871 
872  for (i = 0; i < info[dim].nvalues; i++)
873  {
874  Datum value = values[dim][i];
875 
876  if (info[dim].typbyval) /* passed by value */
877  {
878  Datum tmp;
879 
880  /*
881  * For byval types, we need to copy just the significant bytes
882  * - we can't use memcpy directly, as that assumes
883  * little-endian behavior. store_att_byval does almost what
884  * we need, but it requires a properly aligned buffer - the
885  * output buffer does not guarantee that. So we simply use a
886  * local Datum variable (which guarantees proper alignment),
887  * and then copy the value from it.
888  */
889  store_att_byval(&tmp, value, info[dim].typlen);
890 
891  memcpy(ptr, &tmp, info[dim].typlen);
892  ptr += info[dim].typlen;
893  }
894  else if (info[dim].typlen > 0) /* passed by reference */
895  {
896  /* no special alignment needed, treated as char array */
897  memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
898  ptr += info[dim].typlen;
899  }
900  else if (info[dim].typlen == -1) /* varlena */
901  {
903 
904  /* copy the length */
905  memcpy(ptr, &len, sizeof(uint32));
906  ptr += sizeof(uint32);
907 
908  /* data from the varlena value (without the header) */
909  memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
910  ptr += len;
911  }
912  else if (info[dim].typlen == -2) /* cstring */
913  {
914  uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
915 
916  /* copy the length */
917  memcpy(ptr, &len, sizeof(uint32));
918  ptr += sizeof(uint32);
919 
920  /* value */
921  memcpy(ptr, DatumGetCString(value), len);
922  ptr += len;
923  }
924 
925  /* no underflows or overflows */
926  Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
927  }
928 
929  /* we should get exactly nbytes of data for this dimension */
930  Assert((ptr - start) == info[dim].nbytes);
931  }
932 
933  /* Serialize the items, with uint16 indexes instead of the values. */
934  for (i = 0; i < mcvlist->nitems; i++)
935  {
936  MCVItem *mcvitem = &mcvlist->items[i];
937 
938  /* don't write beyond the allocated space */
939  Assert(ptr <= (endptr - ITEM_SIZE(dim)));
940 
941  /* copy NULL and frequency flags into the serialized MCV */
942  memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
943  ptr += sizeof(bool) * ndims;
944 
945  memcpy(ptr, &mcvitem->frequency, sizeof(double));
946  ptr += sizeof(double);
947 
948  memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
949  ptr += sizeof(double);
950 
951  /* store the indexes last */
952  for (dim = 0; dim < ndims; dim++)
953  {
954  uint16 index = 0;
955  Datum *value;
956 
957  /* do the lookup only for non-NULL values */
958  if (!mcvitem->isnull[dim])
959  {
960  value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
961  info[dim].nvalues, sizeof(Datum),
962  compare_scalars_simple, &ssup[dim]);
963 
964  Assert(value != NULL); /* serialization or deduplication
965  * error */
966 
967  /* compute index within the deduplicated array */
968  index = (uint16) (value - values[dim]);
969 
970  /* check the index is within expected bounds */
971  Assert(index < info[dim].nvalues);
972  }
973 
974  /* copy the index into the serialized MCV */
975  memcpy(ptr, &index, sizeof(uint16));
976  ptr += sizeof(uint16);
977  }
978 
979  /* make sure we don't overflow the allocated value */
980  Assert(ptr <= endptr);
981  }
982 
983  /* at this point we expect to match the total_length exactly */
984  Assert(ptr == endptr);
985 
986  pfree(values);
987  pfree(counts);
988 
989  return raw;
990 }
#define PG_UINT16_MAX
Definition: c.h:576
int compare_scalars_simple(const void *a, const void *b, void *arg)
#define ITEM_SIZE(ndims)
Definition: mcv.c:57
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
Form_pg_type attrtype
Definition: vacuum.h:127
Oid attrcollid
Definition: vacuum.h:128
static void store_att_byval(void *T, Datum newdatum, int attlen)
Definition: tupmacs.h:183
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:344
#define TYPECACHE_LT_OPR
Definition: typcache.h:137

References Assert(), VacAttrStats::attrcollid, VacAttrStats::attrtype, MCVItem::base_frequency, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString(), DatumGetPointer(), MCVItem::frequency, i, MCVItem::isnull, ITEM_SIZE, MCVList::items, len, lookup_type_cache(), TypeCacheEntry::lt_opr, MCVList::magic, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, palloc0(), pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), PrepareSortSupportFromOrderingOp(), qsort_interruptible(), SET_VARSIZE, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, store_att_byval(), DimensionInfo::typbyval, MCVList::type, TYPECACHE_LT_OPR, MCVList::types, DimensionInfo::typlen, value, values, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by statext_store().

◆ statext_ndistinct_build()

MVNDistinct* statext_ndistinct_build ( double  totalrows,
StatsBuildData data 
)

Definition at line 89 of file mvdistinct.c.

90 {
91  MVNDistinct *result;
92  int k;
93  int itemcnt;
94  int numattrs = data->nattnums;
95  int numcombs = num_combinations(numattrs);
96 
97  result = palloc(offsetof(MVNDistinct, items) +
98  numcombs * sizeof(MVNDistinctItem));
99  result->magic = STATS_NDISTINCT_MAGIC;
101  result->nitems = numcombs;
102 
103  itemcnt = 0;
104  for (k = 2; k <= numattrs; k++)
105  {
106  int *combination;
108 
109  /* generate combinations of K out of N elements */
110  generator = generator_init(numattrs, k);
111 
112  while ((combination = generator_next(generator)))
113  {
114  MVNDistinctItem *item = &result->items[itemcnt];
115  int j;
116 
117  item->attributes = palloc(sizeof(AttrNumber) * k);
118  item->nattributes = k;
119 
120  /* translate the indexes to attnums */
121  for (j = 0; j < k; j++)
122  {
123  item->attributes[j] = data->attnums[combination[j]];
124 
126  }
127 
128  item->ndistinct =
129  ndistinct_for_combination(totalrows, data, k, combination);
130 
131  itemcnt++;
132  Assert(itemcnt <= result->nitems);
133  }
134 
136  }
137 
138  /* must consume exactly the whole output array */
139  Assert(itemcnt == result->nitems);
140 
141  return result;
142 }
static double ndistinct_for_combination(double totalrows, StatsBuildData *data, int k, int *combination)
Definition: mvdistinct.c:426
static int num_combinations(int n)
Definition: mvdistinct.c:576
static void generator_free(CombinationGenerator *state)
Definition: mvdistinct.c:643
static CombinationGenerator * generator_init(int n, int k)
Definition: mvdistinct.c:590
static int * generator_next(CombinationGenerator *state)
Definition: mvdistinct.c:628
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
double ndistinct
Definition: statistics.h:28
AttrNumber * attributes
Definition: statistics.h:30
uint32 nitems
Definition: statistics.h:38
uint32 type
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:36
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:39

References Assert(), AttributeNumberIsValid, MVNDistinctItem::attributes, data, generator_free(), generator_init(), generator_next(), MVNDistinct::items, j, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, ndistinct_for_combination(), MVNDistinct::nitems, nitems, num_combinations(), palloc(), STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, and MVNDistinct::type.

Referenced by BuildRelationExtStatistics().

◆ statext_ndistinct_deserialize()

MVNDistinct* statext_ndistinct_deserialize ( bytea data)

Definition at line 251 of file mvdistinct.c.

252 {
253  int i;
254  Size minimum_size;
255  MVNDistinct ndist;
256  MVNDistinct *ndistinct;
257  char *tmp;
258 
259  if (data == NULL)
260  return NULL;
261 
262  /* we expect at least the basic fields of MVNDistinct struct */
264  elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
266 
267  /* initialize pointer to the data part (skip the varlena header) */
268  tmp = VARDATA_ANY(data);
269 
270  /* read the header fields and perform basic sanity checks */
271  memcpy(&ndist.magic, tmp, sizeof(uint32));
272  tmp += sizeof(uint32);
273  memcpy(&ndist.type, tmp, sizeof(uint32));
274  tmp += sizeof(uint32);
275  memcpy(&ndist.nitems, tmp, sizeof(uint32));
276  tmp += sizeof(uint32);
277 
278  if (ndist.magic != STATS_NDISTINCT_MAGIC)
279  elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
281  if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
282  elog(ERROR, "invalid ndistinct type %d (expected %d)",
284  if (ndist.nitems == 0)
285  elog(ERROR, "invalid zero-length item array in MVNDistinct");
286 
287  /* what minimum bytea size do we expect for those parameters */
288  minimum_size = MinSizeOfItems(ndist.nitems);
289  if (VARSIZE_ANY_EXHDR(data) < minimum_size)
290  elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
291  VARSIZE_ANY_EXHDR(data), minimum_size);
292 
293  /*
294  * Allocate space for the ndistinct items (no space for each item's
295  * attnos: those live in bitmapsets allocated separately)
296  */
297  ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
298  (ndist.nitems * sizeof(MVNDistinctItem)));
299  ndistinct->magic = ndist.magic;
300  ndistinct->type = ndist.type;
301  ndistinct->nitems = ndist.nitems;
302 
303  for (i = 0; i < ndistinct->nitems; i++)
304  {
305  MVNDistinctItem *item = &ndistinct->items[i];
306 
307  /* ndistinct value */
308  memcpy(&item->ndistinct, tmp, sizeof(double));
309  tmp += sizeof(double);
310 
311  /* number of attributes */
312  memcpy(&item->nattributes, tmp, sizeof(int));
313  tmp += sizeof(int);
314  Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
315 
316  item->attributes
317  = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
318 
319  memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
320  tmp += sizeof(AttrNumber) * item->nattributes;
321 
322  /* still within the bytea */
323  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
324  }
325 
326  /* we should have consumed the whole bytea exactly */
327  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
328 
329  return ndistinct;
330 }
#define SizeOfHeader
Definition: mvdistinct.c:46
#define MinSizeOfItems(nitems)
Definition: mvdistinct.c:56

References Assert(), MVNDistinctItem::attributes, data, elog(), ERROR, i, MVNDistinct::items, MVNDistinct::magic, MAXALIGN, MinSizeOfItems, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, palloc(), palloc0(), SizeOfHeader, STATS_MAX_DIMENSIONS, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_ndistinct_out(), and statext_ndistinct_load().

◆ statext_ndistinct_serialize()

bytea* statext_ndistinct_serialize ( MVNDistinct ndistinct)

Definition at line 180 of file mvdistinct.c.

181 {
182  int i;
183  bytea *output;
184  char *tmp;
185  Size len;
186 
187  Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
188  Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
189 
190  /*
191  * Base size is size of scalar fields in the struct, plus one base struct
192  * for each item, including number of items for each.
193  */
195 
196  /* and also include space for the actual attribute numbers */
197  for (i = 0; i < ndistinct->nitems; i++)
198  {
199  int nmembers;
200 
201  nmembers = ndistinct->items[i].nattributes;
202  Assert(nmembers >= 2);
203 
204  len += SizeOfItem(nmembers);
205  }
206 
207  output = (bytea *) palloc(len);
209 
210  tmp = VARDATA(output);
211 
212  /* Store the base struct values (magic, type, nitems) */
213  memcpy(tmp, &ndistinct->magic, sizeof(uint32));
214  tmp += sizeof(uint32);
215  memcpy(tmp, &ndistinct->type, sizeof(uint32));
216  tmp += sizeof(uint32);
217  memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
218  tmp += sizeof(uint32);
219 
220  /*
221  * store number of attributes and attribute numbers for each entry
222  */
223  for (i = 0; i < ndistinct->nitems; i++)
224  {
225  MVNDistinctItem item = ndistinct->items[i];
226  int nmembers = item.nattributes;
227 
228  memcpy(tmp, &item.ndistinct, sizeof(double));
229  tmp += sizeof(double);
230  memcpy(tmp, &nmembers, sizeof(int));
231  tmp += sizeof(int);
232 
233  memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
234  tmp += nmembers * sizeof(AttrNumber);
235 
236  /* protect against overflows */
237  Assert(tmp <= ((char *) output + len));
238  }
239 
240  /* check we used exactly the expected space */
241  Assert(tmp == ((char *) output + len));
242 
243  return output;
244 }
#define SizeOfItem(natts)
Definition: mvdistinct.c:49

References Assert(), MVNDistinctItem::attributes, i, MVNDistinct::items, len, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, output, palloc(), SET_VARSIZE, SizeOfHeader, SizeOfItem, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().