PostgreSQL Source Code  git master
extended_stats_internal.h File Reference
Include dependency graph for extended_stats_internal.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  StdAnalyzeData
 
struct  ScalarItem
 
struct  DimensionInfo
 
struct  MultiSortSupportData
 
struct  SortItem
 
struct  StatsBuildData
 

Typedefs

typedef struct DimensionInfo DimensionInfo
 
typedef struct MultiSortSupportData MultiSortSupportData
 
typedef MultiSortSupportDataMultiSortSupport
 
typedef struct SortItem SortItem
 
typedef struct StatsBuildData StatsBuildData
 

Functions

MVNDistinctstatext_ndistinct_build (double totalrows, StatsBuildData *data)
 
byteastatext_ndistinct_serialize (MVNDistinct *ndistinct)
 
MVNDistinctstatext_ndistinct_deserialize (bytea *data)
 
MVDependenciesstatext_dependencies_build (StatsBuildData *data)
 
byteastatext_dependencies_serialize (MVDependencies *dependencies)
 
MVDependenciesstatext_dependencies_deserialize (bytea *data)
 
MCVListstatext_mcv_build (StatsBuildData *data, double totalrows, int stattarget)
 
byteastatext_mcv_serialize (MCVList *mcv, VacAttrStats **stats)
 
MCVListstatext_mcv_deserialize (bytea *data)
 
MultiSortSupport multi_sort_init (int ndims)
 
void multi_sort_add_dimension (MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
 
int multi_sort_compare (const void *a, const void *b, void *arg)
 
int multi_sort_compare_dim (int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int multi_sort_compare_dims (int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int compare_scalars_simple (const void *a, const void *b, void *arg)
 
int compare_datums_simple (Datum a, Datum b, SortSupport ssup)
 
AttrNumberbuild_attnums_array (Bitmapset *attrs, int nexprs, int *numattrs)
 
SortItembuild_sorted_items (StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
 
bool examine_opclause_args (List *args, Node **exprp, Const **cstp, bool *expronleftp)
 
Selectivity mcv_combine_selectivities (Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)
 
Selectivity mcv_clauselist_selectivity (PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
 
Selectivity mcv_clause_selectivity_or (PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)
 

Typedef Documentation

◆ DimensionInfo

typedef struct DimensionInfo DimensionInfo

◆ MultiSortSupport

Definition at line 51 of file extended_stats_internal.h.

◆ MultiSortSupportData

◆ SortItem

typedef struct SortItem SortItem

◆ StatsBuildData

Function Documentation

◆ build_attnums_array()

AttrNumber* build_attnums_array ( Bitmapset attrs,
int  nexprs,
int *  numattrs 
)

Definition at line 969 of file extended_stats.c.

970 {
971  int i,
972  j;
973  AttrNumber *attnums;
974  int num = bms_num_members(attrs);
975 
976  if (numattrs)
977  *numattrs = num;
978 
979  /* build attnums from the bitmapset */
980  attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
981  i = 0;
982  j = -1;
983  while ((j = bms_next_member(attrs, j)) >= 0)
984  {
985  int attnum = (j - nexprs);
986 
987  /*
988  * Make sure the bitmap contains only user-defined attributes. As
989  * bitmaps can't contain negative values, this can be violated in two
990  * ways. Firstly, the bitmap might contain 0 as a member, and secondly
991  * the integer value might be larger than MaxAttrNumber.
992  */
995  Assert(attnum >= (-nexprs));
996 
997  attnums[i++] = (AttrNumber) attnum;
998 
999  /* protect against overflows */
1000  Assert(i <= num);
1001  }
1002 
1003  return attnums;
1004 }
int16 AttrNumber
Definition: attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
#define MaxAttrNumber
Definition: attnum.h:24
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1045
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:648
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc(Size size)
Definition: mcxt.c:1062
int16 attnum
Definition: pg_attribute.h:83

References Assert(), attnum, AttributeNumberIsValid, bms_next_member(), bms_num_members(), i, j, MaxAttrNumber, and palloc().

◆ build_sorted_items()

SortItem* build_sorted_items ( StatsBuildData data,
int *  nitems,
MultiSortSupport  mss,
int  numattrs,
AttrNumber attnums 
)

Definition at line 1014 of file extended_stats.c.

1017 {
1018  int i,
1019  j,
1020  len,
1021  nrows;
1022  int nvalues = data->numrows * numattrs;
1023 
1024  SortItem *items;
1025  Datum *values;
1026  bool *isnull;
1027  char *ptr;
1028  int *typlen;
1029 
1030  /* Compute the total amount of memory we need (both items and values). */
1031  len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
1032 
1033  /* Allocate the memory and split it into the pieces. */
1034  ptr = palloc0(len);
1035 
1036  /* items to sort */
1037  items = (SortItem *) ptr;
1038  ptr += data->numrows * sizeof(SortItem);
1039 
1040  /* values and null flags */
1041  values = (Datum *) ptr;
1042  ptr += nvalues * sizeof(Datum);
1043 
1044  isnull = (bool *) ptr;
1045  ptr += nvalues * sizeof(bool);
1046 
1047  /* make sure we consumed the whole buffer exactly */
1048  Assert((ptr - (char *) items) == len);
1049 
1050  /* fix the pointers to Datum and bool arrays */
1051  nrows = 0;
1052  for (i = 0; i < data->numrows; i++)
1053  {
1054  items[nrows].values = &values[nrows * numattrs];
1055  items[nrows].isnull = &isnull[nrows * numattrs];
1056 
1057  nrows++;
1058  }
1059 
1060  /* build a local cache of typlen for all attributes */
1061  typlen = (int *) palloc(sizeof(int) * data->nattnums);
1062  for (i = 0; i < data->nattnums; i++)
1063  typlen[i] = get_typlen(data->stats[i]->attrtypid);
1064 
1065  nrows = 0;
1066  for (i = 0; i < data->numrows; i++)
1067  {
1068  bool toowide = false;
1069 
1070  /* load the values/null flags from sample rows */
1071  for (j = 0; j < numattrs; j++)
1072  {
1073  Datum value;
1074  bool isnull;
1075  int attlen;
1076  AttrNumber attnum = attnums[j];
1077 
1078  int idx;
1079 
1080  /* match attnum to the pre-calculated data */
1081  for (idx = 0; idx < data->nattnums; idx++)
1082  {
1083  if (attnum == data->attnums[idx])
1084  break;
1085  }
1086 
1087  Assert(idx < data->nattnums);
1088 
1089  value = data->values[idx][i];
1090  isnull = data->nulls[idx][i];
1091  attlen = typlen[idx];
1092 
1093  /*
1094  * If this is a varlena value, check if it's too wide and if yes
1095  * then skip the whole item. Otherwise detoast the value.
1096  *
1097  * XXX It may happen that we've already detoasted some preceding
1098  * values for the current item. We don't bother to cleanup those
1099  * on the assumption that those are small (below WIDTH_THRESHOLD)
1100  * and will be discarded at the end of analyze.
1101  */
1102  if ((!isnull) && (attlen == -1))
1103  {
1105  {
1106  toowide = true;
1107  break;
1108  }
1109 
1111  }
1112 
1113  items[nrows].values[j] = value;
1114  items[nrows].isnull[j] = isnull;
1115  }
1116 
1117  if (toowide)
1118  continue;
1119 
1120  nrows++;
1121  }
1122 
1123  /* store the actual number of items (ignoring the too-wide ones) */
1124  *nitems = nrows;
1125 
1126  /* all items were too wide */
1127  if (nrows == 0)
1128  {
1129  /* everything is allocated as a single chunk */
1130  pfree(items);
1131  return NULL;
1132  }
1133 
1134  /* do the sort, using the multi-sort */
1135  qsort_arg((void *) items, nrows, sizeof(SortItem),
1136  multi_sort_compare, mss);
1137 
1138  return items;
1139 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
static Datum values[MAXATTR]
Definition: bootstrap.c:156
unsigned char bool
Definition: c.h:391
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:545
#define WIDTH_THRESHOLD
int multi_sort_compare(const void *a, const void *b, void *arg)
struct SortItem SortItem
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
static struct @142 value
int16 get_typlen(Oid typid)
Definition: lsyscache.c:2144
void pfree(void *pointer)
Definition: mcxt.c:1169
void * palloc0(Size size)
Definition: mcxt.c:1093
int16 attlen
Definition: pg_attribute.h:68
const void size_t len
const void * data
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
uintptr_t Datum
Definition: postgres.h:411
#define PointerGetDatum(X)
Definition: postgres.h:600

References Assert(), attlen, attnum, data, get_typlen(), i, idx(), SortItem::isnull, j, len, multi_sort_compare(), palloc(), palloc0(), pfree(), PG_DETOAST_DATUM, PointerGetDatum, qsort_arg(), toast_raw_datum_size(), value, values, SortItem::values, and WIDTH_THRESHOLD.

Referenced by dependency_degree(), and statext_mcv_build().

◆ compare_datums_simple()

int compare_datums_simple ( Datum  a,
Datum  b,
SortSupport  ssup 
)

Definition at line 955 of file extended_stats.c.

956 {
957  return ApplySortComparator(a, false, b, false, ssup);
958 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

References a, ApplySortComparator(), and b.

Referenced by compare_scalars_simple(), and statext_mcv_serialize().

◆ compare_scalars_simple()

int compare_scalars_simple ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 947 of file extended_stats.c.

948 {
949  return compare_datums_simple(*(Datum *) a,
950  *(Datum *) b,
951  (SortSupport) arg);
952 }
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
void * arg

References a, arg, b, and compare_datums_simple().

Referenced by statext_mcv_serialize().

◆ examine_opclause_args()

bool examine_opclause_args ( List args,
Node **  exprp,
Const **  cstp,
bool expronleftp 
)

Definition at line 2025 of file extended_stats.c.

2027 {
2028  Node *expr;
2029  Const *cst;
2030  bool expronleft;
2031  Node *leftop,
2032  *rightop;
2033 
2034  /* enforced by statext_is_compatible_clause_internal */
2035  Assert(list_length(args) == 2);
2036 
2037  leftop = linitial(args);
2038  rightop = lsecond(args);
2039 
2040  /* strip RelabelType from either side of the expression */
2041  if (IsA(leftop, RelabelType))
2042  leftop = (Node *) ((RelabelType *) leftop)->arg;
2043 
2044  if (IsA(rightop, RelabelType))
2045  rightop = (Node *) ((RelabelType *) rightop)->arg;
2046 
2047  if (IsA(rightop, Const))
2048  {
2049  expr = (Node *) leftop;
2050  cst = (Const *) rightop;
2051  expronleft = true;
2052  }
2053  else if (IsA(leftop, Const))
2054  {
2055  expr = (Node *) rightop;
2056  cst = (Const *) leftop;
2057  expronleft = false;
2058  }
2059  else
2060  return false;
2061 
2062  /* return pointers to the extracted parts if requested */
2063  if (exprp)
2064  *exprp = expr;
2065 
2066  if (cstp)
2067  *cstp = cst;
2068 
2069  if (expronleftp)
2070  *expronleftp = expronleft;
2071 
2072  return true;
2073 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:589
static int list_length(const List *l)
Definition: pg_list.h:149
#define linitial(l)
Definition: pg_list.h:174
#define lsecond(l)
Definition: pg_list.h:179
Definition: nodes.h:539

References arg, generate_unaccent_rules::args, Assert(), IsA, linitial, list_length(), and lsecond.

Referenced by mcv_get_match_bitmap(), and statext_is_compatible_clause_internal().

◆ mcv_clause_selectivity_or()

Selectivity mcv_clause_selectivity_or ( PlannerInfo root,
StatisticExtInfo stat,
MCVList mcv,
Node clause,
bool **  or_matches,
Selectivity basesel,
Selectivity overlap_mcvsel,
Selectivity overlap_basesel,
Selectivity totalsel 
)

Definition at line 2109 of file mcv.c.

2113 {
2114  Selectivity s = 0.0;
2115  bool *new_matches;
2116  int i;
2117 
2118  /* build the OR-matches bitmap, if not built already */
2119  if (*or_matches == NULL)
2120  *or_matches = palloc0(sizeof(bool) * mcv->nitems);
2121 
2122  /* build the match bitmap for the new clause */
2123  new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2124  stat->exprs, mcv, false);
2125 
2126  /*
2127  * Sum the frequencies for all the MCV items matching this clause and also
2128  * those matching the overlap between this clause and any of the preceding
2129  * clauses as described above.
2130  */
2131  *basesel = 0.0;
2132  *overlap_mcvsel = 0.0;
2133  *overlap_basesel = 0.0;
2134  *totalsel = 0.0;
2135  for (i = 0; i < mcv->nitems; i++)
2136  {
2137  *totalsel += mcv->items[i].frequency;
2138 
2139  if (new_matches[i])
2140  {
2141  s += mcv->items[i].frequency;
2142  *basesel += mcv->items[i].base_frequency;
2143 
2144  if ((*or_matches)[i])
2145  {
2146  *overlap_mcvsel += mcv->items[i].frequency;
2147  *overlap_basesel += mcv->items[i].base_frequency;
2148  }
2149  }
2150 
2151  /* update the OR-matches bitmap for the next clause */
2152  (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2153  }
2154 
2155  pfree(new_matches);
2156 
2157  return s;
2158 }
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1606
double Selectivity
Definition: nodes.h:671
#define list_make1(x1)
Definition: pg_list.h:206
double frequency
Definition: statistics.h:80
double base_frequency
Definition: statistics.h:81
uint32 nitems
Definition: statistics.h:91
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94

References MCVItem::base_frequency, MCVItem::frequency, i, MCVList::items, list_make1, mcv_get_match_bitmap(), MCVList::nitems, palloc0(), and pfree().

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity ( PlannerInfo root,
StatisticExtInfo stat,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Selectivity basesel,
Selectivity totalsel 
)

Definition at line 2032 of file mcv.c.

2037 {
2038  int i;
2039  MCVList *mcv;
2040  Selectivity s = 0.0;
2041 
2042  /* match/mismatch bitmap for each MCV item */
2043  bool *matches = NULL;
2044 
2045  /* load the MCV list stored in the statistics object */
2046  mcv = statext_mcv_load(stat->statOid);
2047 
2048  /* build a match bitmap for the clauses */
2049  matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2050  mcv, false);
2051 
2052  /* sum frequencies for all the matching MCV items */
2053  *basesel = 0.0;
2054  *totalsel = 0.0;
2055  for (i = 0; i < mcv->nitems; i++)
2056  {
2057  *totalsel += mcv->items[i].frequency;
2058 
2059  if (matches[i] != false)
2060  {
2061  *basesel += mcv->items[i].base_frequency;
2062  s += mcv->items[i].frequency;
2063  }
2064  }
2065 
2066  return s;
2067 }
MCVList * statext_mcv_load(Oid mvoid)
Definition: mcv.c:562

References MCVItem::base_frequency, MCVItem::frequency, i, MCVList::items, mcv_get_match_bitmap(), MCVList::nitems, and statext_mcv_load().

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_combine_selectivities()

Selectivity mcv_combine_selectivities ( Selectivity  simple_sel,
Selectivity  mcv_sel,
Selectivity  mcv_basesel,
Selectivity  mcv_totalsel 
)

Definition at line 1990 of file mcv.c.

1994 {
1995  Selectivity other_sel;
1996  Selectivity sel;
1997 
1998  /* estimated selectivity of values not covered by MCV matches */
1999  other_sel = simple_sel - mcv_basesel;
2000  CLAMP_PROBABILITY(other_sel);
2001 
2002  /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2003  if (other_sel > 1.0 - mcv_totalsel)
2004  other_sel = 1.0 - mcv_totalsel;
2005 
2006  /* overall selectivity is the sum of the MCV and non-MCV parts */
2007  sel = mcv_sel + other_sel;
2008  CLAMP_PROBABILITY(sel);
2009 
2010  return sel;
2011 }
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63

References CLAMP_PROBABILITY.

Referenced by statext_mcv_clauselist_selectivity().

◆ multi_sort_add_dimension()

void multi_sort_add_dimension ( MultiSortSupport  mss,
int  sortdim,
Oid  oper,
Oid  collation 
)

Definition at line 879 of file extended_stats.c.

881 {
882  SortSupport ssup = &mss->ssup[sortdim];
883 
885  ssup->ssup_collation = collation;
886  ssup->ssup_nulls_first = false;
887 
889 }
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition: parse_oper.c:382
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:135
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
bool ssup_nulls_first
Definition: sortsupport.h:75
MemoryContext ssup_cxt
Definition: sortsupport.h:66

References CurrentMemoryContext, oper(), PrepareSortSupportFromOrderingOp(), MultiSortSupportData::ssup, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, and SortSupportData::ssup_nulls_first.

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ multi_sort_compare()

int multi_sort_compare ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 893 of file extended_stats.c.

894 {
896  SortItem *ia = (SortItem *) a;
897  SortItem *ib = (SortItem *) b;
898  int i;
899 
900  for (i = 0; i < mss->ndims; i++)
901  {
902  int compare;
903 
905  ib->values[i], ib->isnull[i],
906  &mss->ssup[i]);
907 
908  if (compare != 0)
909  return compare;
910  }
911 
912  /* equal by default */
913  return 0;
914 }
MultiSortSupportData * MultiSortSupport
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145

References a, ApplySortComparator(), arg, b, compare(), i, SortItem::isnull, MultiSortSupportData::ndims, MultiSortSupportData::ssup, and SortItem::values.

Referenced by build_distinct_groups(), build_sorted_items(), count_distinct_groups(), ndistinct_for_combination(), and statext_mcv_build().

◆ multi_sort_compare_dim()

int multi_sort_compare_dim ( int  dim,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 918 of file extended_stats.c.

920 {
921  return ApplySortComparator(a->values[dim], a->isnull[dim],
922  b->values[dim], b->isnull[dim],
923  &mss->ssup[dim]);
924 }

References a, ApplySortComparator(), b, and MultiSortSupportData::ssup.

Referenced by dependency_degree().

◆ multi_sort_compare_dims()

int multi_sort_compare_dims ( int  start,
int  end,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 927 of file extended_stats.c.

930 {
931  int dim;
932 
933  for (dim = start; dim <= end; dim++)
934  {
935  int r = ApplySortComparator(a->values[dim], a->isnull[dim],
936  b->values[dim], b->isnull[dim],
937  &mss->ssup[dim]);
938 
939  if (r != 0)
940  return r;
941  }
942 
943  return 0;
944 }

References a, ApplySortComparator(), b, and MultiSortSupportData::ssup.

Referenced by dependency_degree().

◆ multi_sort_init()

MultiSortSupport multi_sort_init ( int  ndims)

Definition at line 860 of file extended_stats.c.

861 {
862  MultiSortSupport mss;
863 
864  Assert(ndims >= 2);
865 
867  + sizeof(SortSupportData) * ndims);
868 
869  mss->ndims = ndims;
870 
871  return mss;
872 }
#define offsetof(type, field)
Definition: c.h:727
struct SortSupportData SortSupportData

References Assert(), MultiSortSupportData::ndims, offsetof, and palloc0().

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ statext_dependencies_build()

MVDependencies* statext_dependencies_build ( StatsBuildData data)

Definition at line 350 of file dependencies.c.

351 {
352  int i,
353  k;
354 
355  /* result */
356  MVDependencies *dependencies = NULL;
357  MemoryContext cxt;
358 
359  Assert(data->nattnums >= 2);
360 
361  /* tracks memory allocated by dependency_degree calls */
363  "dependency_degree cxt",
365 
366  /*
367  * We'll try build functional dependencies starting from the smallest ones
368  * covering just 2 columns, to the largest ones, covering all columns
369  * included in the statistics object. We start from the smallest ones
370  * because we want to be able to skip already implied ones.
371  */
372  for (k = 2; k <= data->nattnums; k++)
373  {
374  AttrNumber *dependency; /* array with k elements */
375 
376  /* prepare a DependencyGenerator of variation */
378 
379  /* generate all possible variations of k values (out of n) */
380  while ((dependency = DependencyGenerator_next(DependencyGenerator)))
381  {
382  double degree;
383  MVDependency *d;
384  MemoryContext oldcxt;
385 
386  /* release memory used by dependency degree calculation */
387  oldcxt = MemoryContextSwitchTo(cxt);
388 
389  /* compute how valid the dependency seems */
390  degree = dependency_degree(data, k, dependency);
391 
392  MemoryContextSwitchTo(oldcxt);
393  MemoryContextReset(cxt);
394 
395  /*
396  * if the dependency seems entirely invalid, don't store it
397  */
398  if (degree == 0.0)
399  continue;
400 
401  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
402  + k * sizeof(AttrNumber));
403 
404  /* copy the dependency (and keep the indexes into stxkeys) */
405  d->degree = degree;
406  d->nattributes = k;
407  for (i = 0; i < k; i++)
408  d->attributes[i] = data->attnums[dependency[i]];
409 
410  /* initialize the list of dependencies */
411  if (dependencies == NULL)
412  {
413  dependencies
414  = (MVDependencies *) palloc0(sizeof(MVDependencies));
415 
416  dependencies->magic = STATS_DEPS_MAGIC;
417  dependencies->type = STATS_DEPS_TYPE_BASIC;
418  dependencies->ndeps = 0;
419  }
420 
421  dependencies->ndeps++;
422  dependencies = (MVDependencies *) repalloc(dependencies,
423  offsetof(MVDependencies, deps)
424  + dependencies->ndeps * sizeof(MVDependency *));
425 
426  dependencies->deps[dependencies->ndeps - 1] = d;
427  }
428 
429  /*
430  * we're done with variations of k elements, so free the
431  * DependencyGenerator
432  */
434  }
435 
436  MemoryContextDelete(cxt);
437 
438  return dependencies;
439 }
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:206
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:197
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:174
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
Definition: dependencies.c:223
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:143
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
#define AllocSetContextCreate
Definition: memutils.h:173
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define STATS_DEPS_MAGIC
Definition: statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:44
uint32 ndeps
Definition: statistics.h:61
uint32 magic
Definition: statistics.h:59
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:62
AttrNumber nattributes
Definition: statistics.h:53
double degree
Definition: statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:54

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), MVDependency::attributes, CurrentMemoryContext, data, MVDependency::degree, dependency_degree(), DependencyGenerator_free(), DependencyGenerator_init(), DependencyGenerator_next(), MVDependencies::deps, i, if(), MVDependencies::magic, MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), MVDependency::nattributes, MVDependencies::ndeps, offsetof, palloc0(), repalloc(), STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, and MVDependencies::type.

Referenced by BuildRelationExtStatistics().

◆ statext_dependencies_deserialize()

MVDependencies* statext_dependencies_deserialize ( bytea data)

Definition at line 501 of file dependencies.c.

502 {
503  int i;
504  Size min_expected_size;
505  MVDependencies *dependencies;
506  char *tmp;
507 
508  if (data == NULL)
509  return NULL;
510 
512  elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
514 
515  /* read the MVDependencies header */
516  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
517 
518  /* initialize pointer to the data part (skip the varlena header) */
519  tmp = VARDATA_ANY(data);
520 
521  /* read the header fields and perform basic sanity checks */
522  memcpy(&dependencies->magic, tmp, sizeof(uint32));
523  tmp += sizeof(uint32);
524  memcpy(&dependencies->type, tmp, sizeof(uint32));
525  tmp += sizeof(uint32);
526  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
527  tmp += sizeof(uint32);
528 
529  if (dependencies->magic != STATS_DEPS_MAGIC)
530  elog(ERROR, "invalid dependency magic %d (expected %d)",
531  dependencies->magic, STATS_DEPS_MAGIC);
532 
533  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
534  elog(ERROR, "invalid dependency type %d (expected %d)",
535  dependencies->type, STATS_DEPS_TYPE_BASIC);
536 
537  if (dependencies->ndeps == 0)
538  elog(ERROR, "invalid zero-length item array in MVDependencies");
539 
540  /* what minimum bytea size do we expect for those parameters */
541  min_expected_size = SizeOfItem(dependencies->ndeps);
542 
543  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
544  elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
545  VARSIZE_ANY_EXHDR(data), min_expected_size);
546 
547  /* allocate space for the MCV items */
548  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
549  + (dependencies->ndeps * sizeof(MVDependency *)));
550 
551  for (i = 0; i < dependencies->ndeps; i++)
552  {
553  double degree;
554  AttrNumber k;
555  MVDependency *d;
556 
557  /* degree of validity */
558  memcpy(&degree, tmp, sizeof(double));
559  tmp += sizeof(double);
560 
561  /* number of attributes */
562  memcpy(&k, tmp, sizeof(AttrNumber));
563  tmp += sizeof(AttrNumber);
564 
565  /* is the number of attributes valid? */
566  Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
567 
568  /* now that we know the number of attributes, allocate the dependency */
569  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
570  + (k * sizeof(AttrNumber)));
571 
572  d->degree = degree;
573  d->nattributes = k;
574 
575  /* copy attribute numbers */
576  memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
577  tmp += sizeof(AttrNumber) * d->nattributes;
578 
579  dependencies->deps[i] = d;
580 
581  /* still within the bytea */
582  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
583  }
584 
585  /* we should have consumed the whole bytea exactly */
586  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
587 
588  return dependencies;
589 }
unsigned int uint32
Definition: c.h:441
size_t Size
Definition: c.h:540
#define SizeOfHeader
Definition: dependencies.c:39
#define SizeOfItem(natts)
Definition: dependencies.c:42
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define VARSIZE_ANY(PTR)
Definition: postgres.h:348
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19

References Assert(), MVDependency::attributes, data, MVDependency::degree, MVDependencies::deps, elog, ERROR, i, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, offsetof, palloc0(), repalloc(), SizeOfHeader, SizeOfItem, STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, STATS_MAX_DIMENSIONS, MVDependencies::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_dependencies_out(), and statext_dependencies_load().

◆ statext_dependencies_serialize()

bytea* statext_dependencies_serialize ( MVDependencies dependencies)

Definition at line 446 of file dependencies.c.

447 {
448  int i;
449  bytea *output;
450  char *tmp;
451  Size len;
452 
453  /* we need to store ndeps, with a number of attributes for each one */
455 
456  /* and also include space for the actual attribute numbers and degrees */
457  for (i = 0; i < dependencies->ndeps; i++)
458  len += SizeOfItem(dependencies->deps[i]->nattributes);
459 
460  output = (bytea *) palloc0(len);
462 
463  tmp = VARDATA(output);
464 
465  /* Store the base struct values (magic, type, ndeps) */
466  memcpy(tmp, &dependencies->magic, sizeof(uint32));
467  tmp += sizeof(uint32);
468  memcpy(tmp, &dependencies->type, sizeof(uint32));
469  tmp += sizeof(uint32);
470  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
471  tmp += sizeof(uint32);
472 
473  /* store number of attributes and attribute numbers for each dependency */
474  for (i = 0; i < dependencies->ndeps; i++)
475  {
476  MVDependency *d = dependencies->deps[i];
477 
478  memcpy(tmp, &d->degree, sizeof(double));
479  tmp += sizeof(double);
480 
481  memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
482  tmp += sizeof(AttrNumber);
483 
484  memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
485  tmp += sizeof(AttrNumber) * d->nattributes;
486 
487  /* protect against overflow */
488  Assert(tmp <= ((char *) output + len));
489  }
490 
491  /* make sure we've produced exactly the right amount of data */
492  Assert(tmp == ((char *) output + len));
493 
494  return output;
495 }
#define VARHDRSZ
Definition: c.h:627
static void output(uint64 loop_count)
#define VARDATA(PTR)
Definition: postgres.h:315
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:342
Definition: c.h:622

References Assert(), MVDependency::attributes, MVDependency::degree, MVDependencies::deps, i, len, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, output(), palloc0(), SET_VARSIZE, SizeOfHeader, SizeOfItem, MVDependencies::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().

◆ statext_mcv_build()

MCVList* statext_mcv_build ( StatsBuildData data,
double  totalrows,
int  stattarget 
)

Definition at line 184 of file mcv.c.

185 {
186  int i,
187  numattrs,
188  numrows,
189  ngroups,
190  nitems;
191  double mincount;
192  SortItem *items;
193  SortItem *groups;
194  MCVList *mcvlist = NULL;
195  MultiSortSupport mss;
196 
197  /* comparator for all the columns */
198  mss = build_mss(data);
199 
200  /* sort the rows */
201  items = build_sorted_items(data, &nitems, mss,
202  data->nattnums, data->attnums);
203 
204  if (!items)
205  return NULL;
206 
207  /* for convenience */
208  numattrs = data->nattnums;
209  numrows = data->numrows;
210 
211  /* transform the sorted rows into groups (sorted by frequency) */
212  groups = build_distinct_groups(nitems, items, mss, &ngroups);
213 
214  /*
215  * The maximum number of MCV items to store, based on the statistics
216  * target we computed for the statistics object (from the target set for
217  * the object itself, attributes and the system default). In any case, we
218  * can't keep more groups than we have available.
219  */
220  nitems = stattarget;
221  if (nitems > ngroups)
222  nitems = ngroups;
223 
224  /*
225  * Decide how many items to keep in the MCV list. We can't use the same
226  * algorithm as per-column MCV lists, because that only considers the
227  * actual group frequency - but we're primarily interested in how the
228  * actual frequency differs from the base frequency (product of simple
229  * per-column frequencies, as if the columns were independent).
230  *
231  * Using the same algorithm might exclude items that are close to the
232  * "average" frequency of the sample. But that does not say whether the
233  * observed frequency is close to the base frequency or not. We also need
234  * to consider unexpectedly uncommon items (again, compared to the base
235  * frequency), and the single-column algorithm does not have to.
236  *
237  * We simply decide how many items to keep by computing the minimum count
238  * using get_mincount_for_mcv_list() and then keep all items that seem to
239  * be more common than that.
240  */
241  mincount = get_mincount_for_mcv_list(numrows, totalrows);
242 
243  /*
244  * Walk the groups until we find the first group with a count below the
245  * mincount threshold (the index of that group is the number of groups we
246  * want to keep).
247  */
248  for (i = 0; i < nitems; i++)
249  {
250  if (groups[i].count < mincount)
251  {
252  nitems = i;
253  break;
254  }
255  }
256 
257  /*
258  * At this point, we know the number of items for the MCV list. There
259  * might be none (for uniform distribution with many groups), and in that
260  * case, there will be no MCV list. Otherwise, construct the MCV list.
261  */
262  if (nitems > 0)
263  {
264  int j;
265  SortItem key;
266  MultiSortSupport tmp;
267 
268  /* frequencies for values in each attribute */
269  SortItem **freqs;
270  int *nfreqs;
271 
272  /* used to search values */
274  + sizeof(SortSupportData));
275 
276  /* compute frequencies for values in each column */
277  nfreqs = (int *) palloc0(sizeof(int) * numattrs);
278  freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
279 
280  /*
281  * Allocate the MCV list structure, set the global parameters.
282  */
283  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
284  sizeof(MCVItem) * nitems);
285 
286  mcvlist->magic = STATS_MCV_MAGIC;
287  mcvlist->type = STATS_MCV_TYPE_BASIC;
288  mcvlist->ndimensions = numattrs;
289  mcvlist->nitems = nitems;
290 
291  /* store info about data type OIDs */
292  for (i = 0; i < numattrs; i++)
293  mcvlist->types[i] = data->stats[i]->attrtypid;
294 
295  /* Copy the first chunk of groups into the result. */
296  for (i = 0; i < nitems; i++)
297  {
298  /* just pointer to the proper place in the list */
299  MCVItem *item = &mcvlist->items[i];
300 
301  item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
302  item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
303 
304  /* copy values for the group */
305  memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
306  memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
307 
308  /* groups should be sorted by frequency in descending order */
309  Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
310 
311  /* group frequency */
312  item->frequency = (double) groups[i].count / numrows;
313 
314  /* base frequency, if the attributes were independent */
315  item->base_frequency = 1.0;
316  for (j = 0; j < numattrs; j++)
317  {
318  SortItem *freq;
319 
320  /* single dimension */
321  tmp->ndims = 1;
322  tmp->ssup[0] = mss->ssup[j];
323 
324  /* fill search key */
325  key.values = &groups[i].values[j];
326  key.isnull = &groups[i].isnull[j];
327 
328  freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
329  sizeof(SortItem),
330  multi_sort_compare, tmp);
331 
332  item->base_frequency *= ((double) freq->count) / numrows;
333  }
334  }
335 
336  pfree(nfreqs);
337  pfree(freqs);
338  }
339 
340  pfree(items);
341  pfree(groups);
342 
343  return mcvlist;
344 }
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
static MultiSortSupport build_mss(StatsBuildData *data)
Definition: mcv.c:351
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:152
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:494
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:428
void * bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition: bsearch_arg.c:55
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:67
#define STATS_MCV_MAGIC
Definition: statistics.h:66
struct MCVItem MCVItem
bool * isnull
Definition: statistics.h:82
Datum * values
Definition: statistics.h:83
uint32 type
Definition: statistics.h:90
uint32 magic
Definition: statistics.h:89
AttrNumber ndimensions
Definition: statistics.h:92
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93

References Assert(), MCVItem::base_frequency, bsearch_arg(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), SortItem::count, data, MCVItem::frequency, get_mincount_for_mcv_list(), i, SortItem::isnull, MCVItem::isnull, MCVList::items, j, sort-test::key, MCVList::magic, multi_sort_compare(), MCVList::ndimensions, MultiSortSupportData::ndims, MCVList::nitems, offsetof, palloc(), palloc0(), pfree(), MultiSortSupportData::ssup, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, MCVList::type, MCVList::types, SortItem::values, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

◆ statext_mcv_deserialize()

MCVList* statext_mcv_deserialize ( bytea data)

Definition at line 999 of file mcv.c.

1000 {
1001  int dim,
1002  i;
1003  Size expected_size;
1004  MCVList *mcvlist;
1005  char *raw;
1006  char *ptr;
1007  char *endptr PG_USED_FOR_ASSERTS_ONLY;
1008 
1009  int ndims,
1010  nitems;
1011  DimensionInfo *info = NULL;
1012 
1013  /* local allocation buffer (used only for deserialization) */
1014  Datum **map = NULL;
1015 
1016  /* MCV list */
1017  Size mcvlen;
1018 
1019  /* buffer used for the result */
1020  Size datalen;
1021  char *dataptr;
1022  char *valuesptr;
1023  char *isnullptr;
1024 
1025  if (data == NULL)
1026  return NULL;
1027 
1028  /*
1029  * We can't possibly deserialize a MCV list if there's not even a complete
1030  * header. We need an explicit formula here, because we serialize the
1031  * header fields one by one, so we need to ignore struct alignment.
1032  */
1034  elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
1036 
1037  /* read the MCV list header */
1038  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1039 
1040  /* pointer to the data part (skip the varlena header) */
1041  raw = (char *) data;
1042  ptr = VARDATA_ANY(raw);
1043  endptr = (char *) raw + VARSIZE_ANY(data);
1044 
1045  /* get the header and perform further sanity checks */
1046  memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1047  ptr += sizeof(uint32);
1048 
1049  memcpy(&mcvlist->type, ptr, sizeof(uint32));
1050  ptr += sizeof(uint32);
1051 
1052  memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1053  ptr += sizeof(uint32);
1054 
1055  memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1056  ptr += sizeof(AttrNumber);
1057 
1058  if (mcvlist->magic != STATS_MCV_MAGIC)
1059  elog(ERROR, "invalid MCV magic %u (expected %u)",
1060  mcvlist->magic, STATS_MCV_MAGIC);
1061 
1062  if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1063  elog(ERROR, "invalid MCV type %u (expected %u)",
1064  mcvlist->type, STATS_MCV_TYPE_BASIC);
1065 
1066  if (mcvlist->ndimensions == 0)
1067  elog(ERROR, "invalid zero-length dimension array in MCVList");
1068  else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1069  (mcvlist->ndimensions < 0))
1070  elog(ERROR, "invalid length (%d) dimension array in MCVList",
1071  mcvlist->ndimensions);
1072 
1073  if (mcvlist->nitems == 0)
1074  elog(ERROR, "invalid zero-length item array in MCVList");
1075  else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1076  elog(ERROR, "invalid length (%u) item array in MCVList",
1077  mcvlist->nitems);
1078 
1079  nitems = mcvlist->nitems;
1080  ndims = mcvlist->ndimensions;
1081 
1082  /*
1083  * Check amount of data including DimensionInfo for all dimensions and
1084  * also the serialized items (including uint16 indexes). Also, walk
1085  * through the dimension information and add it to the sum.
1086  */
1087  expected_size = SizeOfMCVList(ndims, nitems);
1088 
1089  /*
1090  * Check that we have at least the dimension and info records, along with
1091  * the items. We don't know the size of the serialized values yet. We need
1092  * to do this check first, before accessing the dimension info.
1093  */
1094  if (VARSIZE_ANY(data) < expected_size)
1095  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1096  VARSIZE_ANY(data), expected_size);
1097 
1098  /* Now copy the array of type Oids. */
1099  memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1100  ptr += (sizeof(Oid) * ndims);
1101 
1102  /* Now it's safe to access the dimension info. */
1103  info = palloc(ndims * sizeof(DimensionInfo));
1104 
1105  memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1106  ptr += (ndims * sizeof(DimensionInfo));
1107 
1108  /* account for the value arrays */
1109  for (dim = 0; dim < ndims; dim++)
1110  {
1111  /*
1112  * XXX I wonder if we can/should rely on asserts here. Maybe those
1113  * checks should be done every time?
1114  */
1115  Assert(info[dim].nvalues >= 0);
1116  Assert(info[dim].nbytes >= 0);
1117 
1118  expected_size += info[dim].nbytes;
1119  }
1120 
1121  /*
1122  * Now we know the total expected MCV size, including all the pieces
1123  * (header, dimension info. items and deduplicated data). So do the final
1124  * check on size.
1125  */
1126  if (VARSIZE_ANY(data) != expected_size)
1127  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1128  VARSIZE_ANY(data), expected_size);
1129 
1130  /*
1131  * We need an array of Datum values for each dimension, so that we can
1132  * easily translate the uint16 indexes later. We also need a top-level
1133  * array of pointers to those per-dimension arrays.
1134  *
1135  * While allocating the arrays for dimensions, compute how much space we
1136  * need for a copy of the by-ref data, as we can't simply point to the
1137  * original values (it might go away).
1138  */
1139  datalen = 0; /* space for by-ref data */
1140  map = (Datum **) palloc(ndims * sizeof(Datum *));
1141 
1142  for (dim = 0; dim < ndims; dim++)
1143  {
1144  map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1145 
1146  /* space needed for a copy of data for by-ref types */
1147  datalen += info[dim].nbytes_aligned;
1148  }
1149 
1150  /*
1151  * Now resize the MCV list so that the allocation includes all the data.
1152  *
1153  * Allocate space for a copy of the data, as we can't simply reference the
1154  * serialized data - it's not aligned properly, and it may disappear while
1155  * we're still using the MCV list, e.g. due to catcache release.
1156  *
1157  * We do care about alignment here, because we will allocate all the
1158  * pieces at once, but then use pointers to different parts.
1159  */
1160  mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1161 
1162  /* arrays of values and isnull flags for all MCV items */
1163  mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1164  mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1165 
1166  /* we don't quite need to align this, but it makes some asserts easier */
1167  mcvlen += MAXALIGN(datalen);
1168 
1169  /* now resize the deserialized MCV list, and compute pointers to parts */
1170  mcvlist = repalloc(mcvlist, mcvlen);
1171 
1172  /* pointer to the beginning of values/isnull arrays */
1173  valuesptr = (char *) mcvlist
1174  + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1175 
1176  isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1177 
1178  dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1179 
1180  /*
1181  * Build mapping (index => value) for translating the serialized data into
1182  * the in-memory representation.
1183  */
1184  for (dim = 0; dim < ndims; dim++)
1185  {
1186  /* remember start position in the input array */
1187  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1188 
1189  if (info[dim].typbyval)
1190  {
1191  /* for by-val types we simply copy data into the mapping */
1192  for (i = 0; i < info[dim].nvalues; i++)
1193  {
1194  Datum v = 0;
1195 
1196  memcpy(&v, ptr, info[dim].typlen);
1197  ptr += info[dim].typlen;
1198 
1199  map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1200 
1201  /* no under/overflow of input array */
1202  Assert(ptr <= (start + info[dim].nbytes));
1203  }
1204  }
1205  else
1206  {
1207  /* for by-ref types we need to also make a copy of the data */
1208 
1209  /* passed by reference, but fixed length (name, tid, ...) */
1210  if (info[dim].typlen > 0)
1211  {
1212  for (i = 0; i < info[dim].nvalues; i++)
1213  {
1214  memcpy(dataptr, ptr, info[dim].typlen);
1215  ptr += info[dim].typlen;
1216 
1217  /* just point into the array */
1218  map[dim][i] = PointerGetDatum(dataptr);
1219  dataptr += MAXALIGN(info[dim].typlen);
1220  }
1221  }
1222  else if (info[dim].typlen == -1)
1223  {
1224  /* varlena */
1225  for (i = 0; i < info[dim].nvalues; i++)
1226  {
1227  uint32 len;
1228 
1229  /* read the uint32 length */
1230  memcpy(&len, ptr, sizeof(uint32));
1231  ptr += sizeof(uint32);
1232 
1233  /* the length is data-only */
1234  SET_VARSIZE(dataptr, len + VARHDRSZ);
1235  memcpy(VARDATA(dataptr), ptr, len);
1236  ptr += len;
1237 
1238  /* just point into the array */
1239  map[dim][i] = PointerGetDatum(dataptr);
1240 
1241  /* skip to place of the next deserialized value */
1242  dataptr += MAXALIGN(len + VARHDRSZ);
1243  }
1244  }
1245  else if (info[dim].typlen == -2)
1246  {
1247  /* cstring */
1248  for (i = 0; i < info[dim].nvalues; i++)
1249  {
1250  uint32 len;
1251 
1252  memcpy(&len, ptr, sizeof(uint32));
1253  ptr += sizeof(uint32);
1254 
1255  memcpy(dataptr, ptr, len);
1256  ptr += len;
1257 
1258  /* just point into the array */
1259  map[dim][i] = PointerGetDatum(dataptr);
1260  dataptr += MAXALIGN(len);
1261  }
1262  }
1263 
1264  /* no under/overflow of input array */
1265  Assert(ptr <= (start + info[dim].nbytes));
1266 
1267  /* no overflow of the output mcv value */
1268  Assert(dataptr <= ((char *) mcvlist + mcvlen));
1269  }
1270 
1271  /* check we consumed input data for this dimension exactly */
1272  Assert(ptr == (start + info[dim].nbytes));
1273  }
1274 
1275  /* we should have also filled the MCV list exactly */
1276  Assert(dataptr == ((char *) mcvlist + mcvlen));
1277 
1278  /* deserialize the MCV items and translate the indexes to Datums */
1279  for (i = 0; i < nitems; i++)
1280  {
1281  MCVItem *item = &mcvlist->items[i];
1282 
1283  item->values = (Datum *) valuesptr;
1284  valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1285 
1286  item->isnull = (bool *) isnullptr;
1287  isnullptr += MAXALIGN(sizeof(bool) * ndims);
1288 
1289  memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1290  ptr += sizeof(bool) * ndims;
1291 
1292  memcpy(&item->frequency, ptr, sizeof(double));
1293  ptr += sizeof(double);
1294 
1295  memcpy(&item->base_frequency, ptr, sizeof(double));
1296  ptr += sizeof(double);
1297 
1298  /* finally translate the indexes (for non-NULL only) */
1299  for (dim = 0; dim < ndims; dim++)
1300  {
1301  uint16 index;
1302 
1303  memcpy(&index, ptr, sizeof(uint16));
1304  ptr += sizeof(uint16);
1305 
1306  if (item->isnull[dim])
1307  continue;
1308 
1309  item->values[dim] = map[dim][index];
1310  }
1311 
1312  /* check we're not overflowing the input */
1313  Assert(ptr <= endptr);
1314  }
1315 
1316  /* check that we processed all the data */
1317  Assert(ptr == endptr);
1318 
1319  /* release the buffers used for mapping */
1320  for (dim = 0; dim < ndims; dim++)
1321  pfree(map[dim]);
1322 
1323  pfree(map);
1324 
1325  return mcvlist;
1326 }
unsigned short uint16
Definition: c.h:440
#define MAXALIGN(LEN)
Definition: c.h:757
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
struct DimensionInfo DimensionInfo
#define MinSizeOfMCVList
Definition: mcv.c:63
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:72
unsigned int Oid
Definition: postgres_ext.h:31
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:70
Definition: type.h:90
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:75

References Assert(), MCVItem::base_frequency, data, elog, ERROR, fetch_att, MCVItem::frequency, i, MCVItem::isnull, MCVList::items, len, MCVList::magic, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, offsetof, palloc(), palloc0(), pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum, repalloc(), SET_VARSIZE, SizeOfMCVList, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, MCVList::type, MCVList::types, DimensionInfo::typlen, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY.

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

◆ statext_mcv_serialize()

bytea* statext_mcv_serialize ( MCVList mcv,
VacAttrStats **  stats 
)

Definition at line 624 of file mcv.c.

625 {
626  int i;
627  int dim;
628  int ndims = mcvlist->ndimensions;
629 
630  SortSupport ssup;
631  DimensionInfo *info;
632 
633  Size total_length;
634 
635  /* serialized items (indexes into arrays, etc.) */
636  bytea *raw;
637  char *ptr;
638  char *endptr PG_USED_FOR_ASSERTS_ONLY;
639 
640  /* values per dimension (and number of non-NULL values) */
641  Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
642  int *counts = (int *) palloc0(sizeof(int) * ndims);
643 
644  /*
645  * We'll include some rudimentary information about the attribute types
646  * (length, by-val flag), so that we don't have to look them up while
647  * deserializing the MCV list (we already have the type OID in the
648  * header). This is safe because when changing the type of the attribute
649  * the statistics gets dropped automatically. We need to store the info
650  * about the arrays of deduplicated values anyway.
651  */
652  info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
653 
654  /* sort support data for all attributes included in the MCV list */
655  ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
656 
657  /* collect and deduplicate values for each dimension (attribute) */
658  for (dim = 0; dim < ndims; dim++)
659  {
660  int ndistinct;
661  TypeCacheEntry *typentry;
662 
663  /*
664  * Lookup the LT operator (can't get it from stats extra_data, as we
665  * don't know how to interpret that - scalar vs. array etc.).
666  */
667  typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
668 
669  /* copy important info about the data type (length, by-value) */
670  info[dim].typlen = stats[dim]->attrtype->typlen;
671  info[dim].typbyval = stats[dim]->attrtype->typbyval;
672 
673  /* allocate space for values in the attribute and collect them */
674  values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
675 
676  for (i = 0; i < mcvlist->nitems; i++)
677  {
678  /* skip NULL values - we don't need to deduplicate those */
679  if (mcvlist->items[i].isnull[dim])
680  continue;
681 
682  /* append the value at the end */
683  values[dim][counts[dim]] = mcvlist->items[i].values[dim];
684  counts[dim] += 1;
685  }
686 
687  /* if there are just NULL values in this dimension, we're done */
688  if (counts[dim] == 0)
689  continue;
690 
691  /* sort and deduplicate the data */
692  ssup[dim].ssup_cxt = CurrentMemoryContext;
693  ssup[dim].ssup_collation = stats[dim]->attrcollid;
694  ssup[dim].ssup_nulls_first = false;
695 
696  PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
697 
698  qsort_arg(values[dim], counts[dim], sizeof(Datum),
699  compare_scalars_simple, &ssup[dim]);
700 
701  /*
702  * Walk through the array and eliminate duplicate values, but keep the
703  * ordering (so that we can do a binary search later). We know there's
704  * at least one item as (counts[dim] != 0), so we can skip the first
705  * element.
706  */
707  ndistinct = 1; /* number of distinct values */
708  for (i = 1; i < counts[dim]; i++)
709  {
710  /* expect sorted array */
711  Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
712 
713  /* if the value is the same as the previous one, we can skip it */
714  if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
715  continue;
716 
717  values[dim][ndistinct] = values[dim][i];
718  ndistinct += 1;
719  }
720 
721  /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
722  Assert(ndistinct <= PG_UINT16_MAX);
723 
724  /*
725  * Store additional info about the attribute - number of deduplicated
726  * values, and also size of the serialized data. For fixed-length data
727  * types this is trivial to compute, for varwidth types we need to
728  * actually walk the array and sum the sizes.
729  */
730  info[dim].nvalues = ndistinct;
731 
732  if (info[dim].typbyval) /* by-value data types */
733  {
734  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
735 
736  /*
737  * We copy the data into the MCV item during deserialization, so
738  * we don't need to allocate any extra space.
739  */
740  info[dim].nbytes_aligned = 0;
741  }
742  else if (info[dim].typlen > 0) /* fixed-length by-ref */
743  {
744  /*
745  * We don't care about alignment in the serialized data, so we
746  * pack the data as much as possible. But we also track how much
747  * data will be needed after deserialization, and in that case we
748  * need to account for alignment of each item.
749  *
750  * Note: As the items are fixed-length, we could easily compute
751  * this during deserialization, but we do it here anyway.
752  */
753  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
754  info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
755  }
756  else if (info[dim].typlen == -1) /* varlena */
757  {
758  info[dim].nbytes = 0;
759  info[dim].nbytes_aligned = 0;
760  for (i = 0; i < info[dim].nvalues; i++)
761  {
762  Size len;
763 
764  /*
765  * For varlena values, we detoast the values and store the
766  * length and data separately. We don't bother with alignment
767  * here, which means that during deserialization we need to
768  * copy the fields and only access the copies.
769  */
771 
772  /* serialized length (uint32 length + data) */
773  len = VARSIZE_ANY_EXHDR(values[dim][i]);
774  info[dim].nbytes += sizeof(uint32); /* length */
775  info[dim].nbytes += len; /* value (no header) */
776 
777  /*
778  * During deserialization we'll build regular varlena values
779  * with full headers, and we need to align them properly.
780  */
781  info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
782  }
783  }
784  else if (info[dim].typlen == -2) /* cstring */
785  {
786  info[dim].nbytes = 0;
787  info[dim].nbytes_aligned = 0;
788  for (i = 0; i < info[dim].nvalues; i++)
789  {
790  Size len;
791 
792  /*
793  * cstring is handled similar to varlena - first we store the
794  * length as uint32 and then the data. We don't care about
795  * alignment, which means that during deserialization we need
796  * to copy the fields and only access the copies.
797  */
798 
799  /* c-strings include terminator, so +1 byte */
800  len = strlen(DatumGetCString(values[dim][i])) + 1;
801  info[dim].nbytes += sizeof(uint32); /* length */
802  info[dim].nbytes += len; /* value */
803 
804  /* space needed for properly aligned deserialized copies */
805  info[dim].nbytes_aligned += MAXALIGN(len);
806  }
807  }
808 
809  /* we know (count>0) so there must be some data */
810  Assert(info[dim].nbytes > 0);
811  }
812 
813  /*
814  * Now we can finally compute how much space we'll actually need for the
815  * whole serialized MCV list (varlena header, MCV header, dimension info
816  * for each attribute, deduplicated values and items).
817  */
818  total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
819  + sizeof(AttrNumber) /* ndimensions */
820  + (ndims * sizeof(Oid)); /* attribute types */
821 
822  /* dimension info */
823  total_length += ndims * sizeof(DimensionInfo);
824 
825  /* add space for the arrays of deduplicated values */
826  for (i = 0; i < ndims; i++)
827  total_length += info[i].nbytes;
828 
829  /*
830  * And finally account for the items (those are fixed-length, thanks to
831  * replacing values with uint16 indexes into the deduplicated arrays).
832  */
833  total_length += mcvlist->nitems * ITEM_SIZE(dim);
834 
835  /*
836  * Allocate space for the whole serialized MCV list (we'll skip bytes, so
837  * we set them to zero to make the result more compressible).
838  */
839  raw = (bytea *) palloc0(VARHDRSZ + total_length);
840  SET_VARSIZE(raw, VARHDRSZ + total_length);
841 
842  ptr = VARDATA(raw);
843  endptr = ptr + total_length;
844 
845  /* copy the MCV list header fields, one by one */
846  memcpy(ptr, &mcvlist->magic, sizeof(uint32));
847  ptr += sizeof(uint32);
848 
849  memcpy(ptr, &mcvlist->type, sizeof(uint32));
850  ptr += sizeof(uint32);
851 
852  memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
853  ptr += sizeof(uint32);
854 
855  memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
856  ptr += sizeof(AttrNumber);
857 
858  memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
859  ptr += (sizeof(Oid) * ndims);
860 
861  /* store information about the attributes (data amounts, ...) */
862  memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
863  ptr += sizeof(DimensionInfo) * ndims;
864 
865  /* Copy the deduplicated values for all attributes to the output. */
866  for (dim = 0; dim < ndims; dim++)
867  {
868  /* remember the starting point for Asserts later */
869  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
870 
871  for (i = 0; i < info[dim].nvalues; i++)
872  {
873  Datum value = values[dim][i];
874 
875  if (info[dim].typbyval) /* passed by value */
876  {
877  Datum tmp;
878 
879  /*
880  * For byval types, we need to copy just the significant bytes
881  * - we can't use memcpy directly, as that assumes
882  * little-endian behavior. store_att_byval does almost what
883  * we need, but it requires a properly aligned buffer - the
884  * output buffer does not guarantee that. So we simply use a
885  * local Datum variable (which guarantees proper alignment),
886  * and then copy the value from it.
887  */
888  store_att_byval(&tmp, value, info[dim].typlen);
889 
890  memcpy(ptr, &tmp, info[dim].typlen);
891  ptr += info[dim].typlen;
892  }
893  else if (info[dim].typlen > 0) /* passed by reference */
894  {
895  /* no special alignment needed, treated as char array */
896  memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
897  ptr += info[dim].typlen;
898  }
899  else if (info[dim].typlen == -1) /* varlena */
900  {
902 
903  /* copy the length */
904  memcpy(ptr, &len, sizeof(uint32));
905  ptr += sizeof(uint32);
906 
907  /* data from the varlena value (without the header) */
908  memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
909  ptr += len;
910  }
911  else if (info[dim].typlen == -2) /* cstring */
912  {
913  uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
914 
915  /* copy the length */
916  memcpy(ptr, &len, sizeof(uint32));
917  ptr += sizeof(uint32);
918 
919  /* value */
920  memcpy(ptr, DatumGetCString(value), len);
921  ptr += len;
922  }
923 
924  /* no underflows or overflows */
925  Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
926  }
927 
928  /* we should get exactly nbytes of data for this dimension */
929  Assert((ptr - start) == info[dim].nbytes);
930  }
931 
932  /* Serialize the items, with uint16 indexes instead of the values. */
933  for (i = 0; i < mcvlist->nitems; i++)
934  {
935  MCVItem *mcvitem = &mcvlist->items[i];
936 
937  /* don't write beyond the allocated space */
938  Assert(ptr <= (endptr - ITEM_SIZE(dim)));
939 
940  /* copy NULL and frequency flags into the serialized MCV */
941  memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
942  ptr += sizeof(bool) * ndims;
943 
944  memcpy(ptr, &mcvitem->frequency, sizeof(double));
945  ptr += sizeof(double);
946 
947  memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
948  ptr += sizeof(double);
949 
950  /* store the indexes last */
951  for (dim = 0; dim < ndims; dim++)
952  {
953  uint16 index = 0;
954  Datum *value;
955 
956  /* do the lookup only for non-NULL values */
957  if (!mcvitem->isnull[dim])
958  {
959  value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
960  info[dim].nvalues, sizeof(Datum),
961  compare_scalars_simple, &ssup[dim]);
962 
963  Assert(value != NULL); /* serialization or deduplication
964  * error */
965 
966  /* compute index within the deduplicated array */
967  index = (uint16) (value - values[dim]);
968 
969  /* check the index is within expected bounds */
970  Assert(index < info[dim].nvalues);
971  }
972 
973  /* copy the index into the serialized MCV */
974  memcpy(ptr, &index, sizeof(uint16));
975  ptr += sizeof(uint16);
976  }
977 
978  /* make sure we don't overflow the allocated value */
979  Assert(ptr <= endptr);
980  }
981 
982  /* at this point we expect to match the total_length exactly */
983  Assert(ptr == endptr);
984 
985  pfree(values);
986  pfree(counts);
987 
988  return raw;
989 }
#define PG_UINT16_MAX
Definition: c.h:522
int compare_scalars_simple(const void *a, const void *b, void *arg)
#define ITEM_SIZE(ndims)
Definition: mcv.c:57
#define DatumGetPointer(X)
Definition: postgres.h:593
#define DatumGetCString(X)
Definition: postgres.h:610
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
Form_pg_type attrtype
Definition: vacuum.h:126
Oid attrcollid
Definition: vacuum.h:127
#define store_att_byval(T, newdatum, attlen)
Definition: tupmacs.h:226
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:339
#define TYPECACHE_LT_OPR
Definition: typcache.h:137

References Assert(), VacAttrStats::attrcollid, VacAttrStats::attrtype, MCVItem::base_frequency, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString, DatumGetPointer, MCVItem::frequency, i, MCVItem::isnull, ITEM_SIZE, MCVList::items, len, lookup_type_cache(), TypeCacheEntry::lt_opr, MCVList::magic, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, palloc0(), pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum, PrepareSortSupportFromOrderingOp(), qsort_arg(), SET_VARSIZE, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, store_att_byval, DimensionInfo::typbyval, MCVList::type, TYPECACHE_LT_OPR, MCVList::types, DimensionInfo::typlen, value, values, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by statext_store().

◆ statext_ndistinct_build()

MVNDistinct* statext_ndistinct_build ( double  totalrows,
StatsBuildData data 
)

Definition at line 89 of file mvdistinct.c.

90 {
91  MVNDistinct *result;
92  int k;
93  int itemcnt;
94  int numattrs = data->nattnums;
95  int numcombs = num_combinations(numattrs);
96 
97  result = palloc(offsetof(MVNDistinct, items) +
98  numcombs * sizeof(MVNDistinctItem));
99  result->magic = STATS_NDISTINCT_MAGIC;
101  result->nitems = numcombs;
102 
103  itemcnt = 0;
104  for (k = 2; k <= numattrs; k++)
105  {
106  int *combination;
108 
109  /* generate combinations of K out of N elements */
110  generator = generator_init(numattrs, k);
111 
112  while ((combination = generator_next(generator)))
113  {
114  MVNDistinctItem *item = &result->items[itemcnt];
115  int j;
116 
117  item->attributes = palloc(sizeof(AttrNumber) * k);
118  item->nattributes = k;
119 
120  /* translate the indexes to attnums */
121  for (j = 0; j < k; j++)
122  {
123  item->attributes[j] = data->attnums[combination[j]];
124 
126  }
127 
128  item->ndistinct =
129  ndistinct_for_combination(totalrows, data, k, combination);
130 
131  itemcnt++;
132  Assert(itemcnt <= result->nitems);
133  }
134 
136  }
137 
138  /* must consume exactly the whole output array */
139  Assert(itemcnt == result->nitems);
140 
141  return result;
142 }
static double ndistinct_for_combination(double totalrows, StatsBuildData *data, int k, int *combination)
Definition: mvdistinct.c:425
static int num_combinations(int n)
Definition: mvdistinct.c:575
static void generator_free(CombinationGenerator *state)
Definition: mvdistinct.c:642
static CombinationGenerator * generator_init(int n, int k)
Definition: mvdistinct.c:589
static int * generator_next(CombinationGenerator *state)
Definition: mvdistinct.c:627
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
double ndistinct
Definition: statistics.h:28
AttrNumber * attributes
Definition: statistics.h:30
uint32 nitems
Definition: statistics.h:38
uint32 type
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:36
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:39

References Assert(), AttributeNumberIsValid, MVNDistinctItem::attributes, data, generator_free(), generator_init(), generator_next(), MVNDistinct::items, j, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, ndistinct_for_combination(), MVNDistinct::nitems, num_combinations(), offsetof, palloc(), STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, and MVNDistinct::type.

Referenced by BuildRelationExtStatistics().

◆ statext_ndistinct_deserialize()

MVNDistinct* statext_ndistinct_deserialize ( bytea data)

Definition at line 250 of file mvdistinct.c.

251 {
252  int i;
253  Size minimum_size;
254  MVNDistinct ndist;
255  MVNDistinct *ndistinct;
256  char *tmp;
257 
258  if (data == NULL)
259  return NULL;
260 
261  /* we expect at least the basic fields of MVNDistinct struct */
263  elog(ERROR, "invalid MVNDistinct size %zd (expected at least %zd)",
265 
266  /* initialize pointer to the data part (skip the varlena header) */
267  tmp = VARDATA_ANY(data);
268 
269  /* read the header fields and perform basic sanity checks */
270  memcpy(&ndist.magic, tmp, sizeof(uint32));
271  tmp += sizeof(uint32);
272  memcpy(&ndist.type, tmp, sizeof(uint32));
273  tmp += sizeof(uint32);
274  memcpy(&ndist.nitems, tmp, sizeof(uint32));
275  tmp += sizeof(uint32);
276 
277  if (ndist.magic != STATS_NDISTINCT_MAGIC)
278  elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
280  if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
281  elog(ERROR, "invalid ndistinct type %d (expected %d)",
283  if (ndist.nitems == 0)
284  elog(ERROR, "invalid zero-length item array in MVNDistinct");
285 
286  /* what minimum bytea size do we expect for those parameters */
287  minimum_size = MinSizeOfItems(ndist.nitems);
288  if (VARSIZE_ANY_EXHDR(data) < minimum_size)
289  elog(ERROR, "invalid MVNDistinct size %zd (expected at least %zd)",
290  VARSIZE_ANY_EXHDR(data), minimum_size);
291 
292  /*
293  * Allocate space for the ndistinct items (no space for each item's
294  * attnos: those live in bitmapsets allocated separately)
295  */
296  ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
297  (ndist.nitems * sizeof(MVNDistinctItem)));
298  ndistinct->magic = ndist.magic;
299  ndistinct->type = ndist.type;
300  ndistinct->nitems = ndist.nitems;
301 
302  for (i = 0; i < ndistinct->nitems; i++)
303  {
304  MVNDistinctItem *item = &ndistinct->items[i];
305 
306  /* ndistinct value */
307  memcpy(&item->ndistinct, tmp, sizeof(double));
308  tmp += sizeof(double);
309 
310  /* number of attributes */
311  memcpy(&item->nattributes, tmp, sizeof(int));
312  tmp += sizeof(int);
313  Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
314 
315  item->attributes
316  = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
317 
318  memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
319  tmp += sizeof(AttrNumber) * item->nattributes;
320 
321  /* still within the bytea */
322  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
323  }
324 
325  /* we should have consumed the whole bytea exactly */
326  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
327 
328  return ndistinct;
329 }
#define SizeOfHeader
Definition: mvdistinct.c:46
#define MinSizeOfItems(nitems)
Definition: mvdistinct.c:56

References Assert(), MVNDistinctItem::attributes, data, elog, ERROR, i, MVNDistinct::items, MVNDistinct::magic, MAXALIGN, MinSizeOfItems, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, offsetof, palloc(), palloc0(), SizeOfHeader, STATS_MAX_DIMENSIONS, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_ndistinct_out(), and statext_ndistinct_load().

◆ statext_ndistinct_serialize()

bytea* statext_ndistinct_serialize ( MVNDistinct ndistinct)

Definition at line 179 of file mvdistinct.c.

180 {
181  int i;
182  bytea *output;
183  char *tmp;
184  Size len;
185 
186  Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
187  Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
188 
189  /*
190  * Base size is size of scalar fields in the struct, plus one base struct
191  * for each item, including number of items for each.
192  */
194 
195  /* and also include space for the actual attribute numbers */
196  for (i = 0; i < ndistinct->nitems; i++)
197  {
198  int nmembers;
199 
200  nmembers = ndistinct->items[i].nattributes;
201  Assert(nmembers >= 2);
202 
203  len += SizeOfItem(nmembers);
204  }
205 
206  output = (bytea *) palloc(len);
208 
209  tmp = VARDATA(output);
210 
211  /* Store the base struct values (magic, type, nitems) */
212  memcpy(tmp, &ndistinct->magic, sizeof(uint32));
213  tmp += sizeof(uint32);
214  memcpy(tmp, &ndistinct->type, sizeof(uint32));
215  tmp += sizeof(uint32);
216  memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
217  tmp += sizeof(uint32);
218 
219  /*
220  * store number of attributes and attribute numbers for each entry
221  */
222  for (i = 0; i < ndistinct->nitems; i++)
223  {
224  MVNDistinctItem item = ndistinct->items[i];
225  int nmembers = item.nattributes;
226 
227  memcpy(tmp, &item.ndistinct, sizeof(double));
228  tmp += sizeof(double);
229  memcpy(tmp, &nmembers, sizeof(int));
230  tmp += sizeof(int);
231 
232  memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
233  tmp += nmembers * sizeof(AttrNumber);
234 
235  /* protect against overflows */
236  Assert(tmp <= ((char *) output + len));
237  }
238 
239  /* check we used exactly the expected space */
240  Assert(tmp == ((char *) output + len));
241 
242  return output;
243 }
#define SizeOfItem(natts)
Definition: mvdistinct.c:49

References Assert(), MVNDistinctItem::attributes, i, MVNDistinct::items, len, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, output(), palloc(), SET_VARSIZE, SizeOfHeader, SizeOfItem, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().