PostgreSQL Source Code  git master
extended_stats_internal.h File Reference
Include dependency graph for extended_stats_internal.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  StdAnalyzeData
 
struct  ScalarItem
 
struct  DimensionInfo
 
struct  MultiSortSupportData
 
struct  SortItem
 

Typedefs

typedef struct DimensionInfo DimensionInfo
 
typedef struct MultiSortSupportData MultiSortSupportData
 
typedef MultiSortSupportDataMultiSortSupport
 
typedef struct SortItem SortItem
 

Functions

MVNDistinctstatext_ndistinct_build (double totalrows, int numrows, HeapTuple *rows, Bitmapset *attrs, VacAttrStats **stats)
 
byteastatext_ndistinct_serialize (MVNDistinct *ndistinct)
 
MVNDistinctstatext_ndistinct_deserialize (bytea *data)
 
MVDependenciesstatext_dependencies_build (int numrows, HeapTuple *rows, Bitmapset *attrs, VacAttrStats **stats)
 
byteastatext_dependencies_serialize (MVDependencies *dependencies)
 
MVDependenciesstatext_dependencies_deserialize (bytea *data)
 
MCVListstatext_mcv_build (int numrows, HeapTuple *rows, Bitmapset *attrs, VacAttrStats **stats, double totalrows, int stattarget)
 
byteastatext_mcv_serialize (MCVList *mcv, VacAttrStats **stats)
 
MCVListstatext_mcv_deserialize (bytea *data)
 
MultiSortSupport multi_sort_init (int ndims)
 
void multi_sort_add_dimension (MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
 
int multi_sort_compare (const void *a, const void *b, void *arg)
 
int multi_sort_compare_dim (int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int multi_sort_compare_dims (int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int compare_scalars_simple (const void *a, const void *b, void *arg)
 
int compare_datums_simple (Datum a, Datum b, SortSupport ssup)
 
void * bsearch_arg (const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
 
AttrNumberbuild_attnums_array (Bitmapset *attrs, int *numattrs)
 
SortItembuild_sorted_items (int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
 
bool examine_opclause_expression (OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp)
 
Selectivity mcv_clauselist_selectivity (PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
 

Typedef Documentation

◆ DimensionInfo

typedef struct DimensionInfo DimensionInfo

◆ MultiSortSupport

Definition at line 52 of file extended_stats_internal.h.

◆ MultiSortSupportData

◆ SortItem

typedef struct SortItem SortItem

Function Documentation

◆ bsearch_arg()

void* bsearch_arg ( const void *  key,
const void *  base,
size_t  nmemb,
size_t  size,
int(*)(const void *, const void *, void *)  compar,
void *  arg 
)

Definition at line 640 of file extended_stats.c.

References arg, idx(), and sort-test::key.

Referenced by statext_mcv_build(), and statext_mcv_serialize().

643 {
644  size_t l,
645  u,
646  idx;
647  const void *p;
648  int comparison;
649 
650  l = 0;
651  u = nmemb;
652  while (l < u)
653  {
654  idx = (l + u) / 2;
655  p = (void *) (((const char *) base) + (idx * size));
656  comparison = (*compar) (key, p, arg);
657 
658  if (comparison < 0)
659  u = idx;
660  else if (comparison > 0)
661  l = idx + 1;
662  else
663  return (void *) p;
664  }
665 
666  return NULL;
667 }
symbol * p
Definition: api.h:15
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
void * arg
int l
Definition: api.h:16

◆ build_attnums_array()

AttrNumber* build_attnums_array ( Bitmapset attrs,
int *  numattrs 
)

Definition at line 678 of file extended_stats.c.

References Assert, AttrNumberIsForUserDefinedAttr, bms_next_member(), bms_num_members(), i, MaxAttrNumber, and palloc().

Referenced by dependency_degree(), statext_dependencies_build(), and statext_mcv_build().

679 {
680  int i,
681  j;
682  AttrNumber *attnums;
683  int num = bms_num_members(attrs);
684 
685  if (numattrs)
686  *numattrs = num;
687 
688  /* build attnums from the bitmapset */
689  attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
690  i = 0;
691  j = -1;
692  while ((j = bms_next_member(attrs, j)) >= 0)
693  {
694  /*
695  * Make sure the bitmap contains only user-defined attributes. As
696  * bitmaps can't contain negative values, this can be violated in two
697  * ways. Firstly, the bitmap might contain 0 as a member, and secondly
698  * the integer value might be larger than MaxAttrNumber.
699  */
701  Assert(j <= MaxAttrNumber);
702 
703  attnums[i++] = (AttrNumber) j;
704 
705  /* protect against overflows */
706  Assert(i <= num);
707  }
708 
709  return attnums;
710 }
#define MaxAttrNumber
Definition: attnum.h:24
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1043
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
#define Assert(condition)
Definition: c.h:732
void * palloc(Size size)
Definition: mcxt.c:924
int i
int16 AttrNumber
Definition: attnum.h:21

◆ build_sorted_items()

SortItem* build_sorted_items ( int  numrows,
int *  nitems,
HeapTuple rows,
TupleDesc  tdesc,
MultiSortSupport  mss,
int  numattrs,
AttrNumber attnums 
)

Definition at line 720 of file extended_stats.c.

References Assert, attlen, heap_getattr, i, idx(), multi_sort_compare(), palloc0(), pfree(), PG_DETOAST_DATUM, PointerGetDatum, qsort_arg(), toast_raw_datum_size(), TupleDescAttr, value, values, and WIDTH_THRESHOLD.

Referenced by dependency_degree(), and statext_mcv_build().

722 {
723  int i,
724  j,
725  len,
726  idx;
727  int nvalues = numrows * numattrs;
728 
729  SortItem *items;
730  Datum *values;
731  bool *isnull;
732  char *ptr;
733 
734  /* Compute the total amount of memory we need (both items and values). */
735  len = numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
736 
737  /* Allocate the memory and split it into the pieces. */
738  ptr = palloc0(len);
739 
740  /* items to sort */
741  items = (SortItem *) ptr;
742  ptr += numrows * sizeof(SortItem);
743 
744  /* values and null flags */
745  values = (Datum *) ptr;
746  ptr += nvalues * sizeof(Datum);
747 
748  isnull = (bool *) ptr;
749  ptr += nvalues * sizeof(bool);
750 
751  /* make sure we consumed the whole buffer exactly */
752  Assert((ptr - (char *) items) == len);
753 
754  /* fix the pointers to Datum and bool arrays */
755  idx = 0;
756  for (i = 0; i < numrows; i++)
757  {
758  bool toowide = false;
759 
760  items[idx].values = &values[idx * numattrs];
761  items[idx].isnull = &isnull[idx * numattrs];
762 
763  /* load the values/null flags from sample rows */
764  for (j = 0; j < numattrs; j++)
765  {
766  Datum value;
767  bool isnull;
768 
769  value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
770 
771  /*
772  * If this is a varlena value, check if it's too wide and if yes
773  * then skip the whole item. Otherwise detoast the value.
774  *
775  * XXX It may happen that we've already detoasted some preceding
776  * values for the current item. We don't bother to cleanup those
777  * on the assumption that those are small (below WIDTH_THRESHOLD)
778  * and will be discarded at the end of analyze.
779  */
780  if ((!isnull) &&
781  (TupleDescAttr(tdesc, attnums[j] - 1)->attlen == -1))
782  {
784  {
785  toowide = true;
786  break;
787  }
788 
789  value = PointerGetDatum(PG_DETOAST_DATUM(value));
790  }
791 
792  items[idx].values[j] = value;
793  items[idx].isnull[j] = isnull;
794  }
795 
796  if (toowide)
797  continue;
798 
799  idx++;
800  }
801 
802  /* store the actual number of items (ignoring the too-wide ones) */
803  *nitems = idx;
804 
805  /* all items were too wide */
806  if (idx == 0)
807  {
808  /* everything is allocated as a single chunk */
809  pfree(items);
810  return NULL;
811  }
812 
813  /* do the sort, using the multi-sort */
814  qsort_arg((void *) items, idx, sizeof(SortItem),
815  multi_sort_compare, mss);
816 
817  return items;
818 }
#define PointerGetDatum(X)
Definition: postgres.h:556
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
static struct @144 value
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
void pfree(void *pointer)
Definition: mcxt.c:1031
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:768
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
int16 attlen
Definition: pg_attribute.h:64
#define heap_getattr(tup, attnum, tupleDesc, isnull)
Definition: htup_details.h:762
void * palloc0(Size size)
Definition: mcxt.c:955
uintptr_t Datum
Definition: postgres.h:367
struct SortItem SortItem
#define WIDTH_THRESHOLD
#define Assert(condition)
Definition: c.h:732
int multi_sort_compare(const void *a, const void *b, void *arg)
static Datum values[MAXATTR]
Definition: bootstrap.c:167
int i
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:235
unsigned char bool
Definition: c.h:308

◆ compare_datums_simple()

int compare_datums_simple ( Datum  a,
Datum  b,
SortSupport  ssup 
)

Definition at line 633 of file extended_stats.c.

References ApplySortComparator().

Referenced by compare_scalars_simple(), and statext_mcv_serialize().

634 {
635  return ApplySortComparator(a, false, b, false, ssup);
636 }
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

◆ compare_scalars_simple()

int compare_scalars_simple ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 625 of file extended_stats.c.

References compare_datums_simple().

Referenced by statext_mcv_serialize().

626 {
627  return compare_datums_simple(*(Datum *) a,
628  *(Datum *) b,
629  (SortSupport) arg);
630 }
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
uintptr_t Datum
Definition: postgres.h:367
void * arg

◆ examine_opclause_expression()

bool examine_opclause_expression ( OpExpr expr,
Var **  varp,
Const **  cstp,
bool varonleftp 
)

Definition at line 1350 of file extended_stats.c.

References arg, OpExpr::args, Assert, IsA, linitial, list_length(), and lsecond.

Referenced by mcv_get_match_bitmap(), and statext_is_compatible_clause_internal().

1351 {
1352  Var *var;
1353  Const *cst;
1354  bool varonleft;
1355  Node *leftop,
1356  *rightop;
1357 
1358  /* enforced by statext_is_compatible_clause_internal */
1359  Assert(list_length(expr->args) == 2);
1360 
1361  leftop = linitial(expr->args);
1362  rightop = lsecond(expr->args);
1363 
1364  /* strip RelabelType from either side of the expression */
1365  if (IsA(leftop, RelabelType))
1366  leftop = (Node *) ((RelabelType *) leftop)->arg;
1367 
1368  if (IsA(rightop, RelabelType))
1369  rightop = (Node *) ((RelabelType *) rightop)->arg;
1370 
1371  if (IsA(leftop, Var) && IsA(rightop, Const))
1372  {
1373  var = (Var *) leftop;
1374  cst = (Const *) rightop;
1375  varonleft = true;
1376  }
1377  else if (IsA(leftop, Const) && IsA(rightop, Var))
1378  {
1379  var = (Var *) rightop;
1380  cst = (Const *) leftop;
1381  varonleft = false;
1382  }
1383  else
1384  return false;
1385 
1386  /* return pointers to the extracted parts if requested */
1387  if (varp)
1388  *varp = var;
1389 
1390  if (cstp)
1391  *cstp = cst;
1392 
1393  if (varonleftp)
1394  *varonleftp = varonleft;
1395 
1396  return true;
1397 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
Definition: nodes.h:525
Definition: primnodes.h:167
#define lsecond(l)
Definition: pg_list.h:200
#define linitial(l)
Definition: pg_list.h:195
#define Assert(condition)
Definition: c.h:732
static int list_length(const List *l)
Definition: pg_list.h:169
void * arg
List * args
Definition: primnodes.h:508

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity ( PlannerInfo root,
StatisticExtInfo stat,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Selectivity basesel,
Selectivity totalsel 
)

Definition at line 1794 of file mcv.c.

References MCVItem::base_frequency, MCVItem::frequency, i, MCVList::items, StatisticExtInfo::keys, mcv_get_match_bitmap(), MCVList::nitems, statext_mcv_load(), and StatisticExtInfo::statOid.

Referenced by statext_mcv_clauselist_selectivity().

1799 {
1800  int i;
1801  MCVList *mcv;
1802  Selectivity s = 0.0;
1803 
1804  /* match/mismatch bitmap for each MCV item */
1805  bool *matches = NULL;
1806 
1807  /* load the MCV list stored in the statistics object */
1808  mcv = statext_mcv_load(stat->statOid);
1809 
1810  /* build a match bitmap for the clauses */
1811  matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, false);
1812 
1813  /* sum frequencies for all the matching MCV items */
1814  *basesel = 0.0;
1815  *totalsel = 0.0;
1816  for (i = 0; i < mcv->nitems; i++)
1817  {
1818  *totalsel += mcv->items[i].frequency;
1819 
1820  if (matches[i] != false)
1821  {
1822  /* XXX Shouldn't the basesel be outside the if condition? */
1823  *basesel += mcv->items[i].base_frequency;
1824  s += mcv->items[i].frequency;
1825  }
1826  }
1827 
1828  return s;
1829 }
uint32 nitems
Definition: statistics.h:90
double Selectivity
Definition: nodes.h:658
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1543
MCVList * statext_mcv_load(Oid mvoid)
Definition: mcv.c:557
double base_frequency
Definition: statistics.h:80
Bitmapset * keys
Definition: pathnodes.h:885
int i
double frequency
Definition: statistics.h:79

◆ multi_sort_add_dimension()

void multi_sort_add_dimension ( MultiSortSupport  mss,
int  sortdim,
Oid  oper,
Oid  collation 
)

Definition at line 557 of file extended_stats.c.

References CurrentMemoryContext, PrepareSortSupportFromOrderingOp(), MultiSortSupportData::ssup, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, and SortSupportData::ssup_nulls_first.

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

559 {
560  SortSupport ssup = &mss->ssup[sortdim];
561 
563  ssup->ssup_collation = collation;
564  ssup->ssup_nulls_first = false;
565 
567 }
bool ssup_nulls_first
Definition: sortsupport.h:75
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:134
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
MemoryContext ssup_cxt
Definition: sortsupport.h:66
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition: parse_oper.c:377

◆ multi_sort_compare()

int multi_sort_compare ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 571 of file extended_stats.c.

References ApplySortComparator(), compare(), i, SortItem::isnull, MultiSortSupportData::ndims, MultiSortSupportData::ssup, and SortItem::values.

Referenced by build_distinct_groups(), build_sorted_items(), count_distinct_groups(), ndistinct_for_combination(), and statext_mcv_build().

572 {
574  SortItem *ia = (SortItem *) a;
575  SortItem *ib = (SortItem *) b;
576  int i;
577 
578  for (i = 0; i < mss->ndims; i++)
579  {
580  int compare;
581 
582  compare = ApplySortComparator(ia->values[i], ia->isnull[i],
583  ib->values[i], ib->isnull[i],
584  &mss->ssup[i]);
585 
586  if (compare != 0)
587  return compare;
588  }
589 
590  /* equal by default */
591  return 0;
592 }
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
int i
void * arg
MultiSortSupportData * MultiSortSupport
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

◆ multi_sort_compare_dim()

int multi_sort_compare_dim ( int  dim,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 596 of file extended_stats.c.

References ApplySortComparator(), SortItem::isnull, MultiSortSupportData::ssup, and SortItem::values.

Referenced by dependency_degree().

598 {
599  return ApplySortComparator(a->values[dim], a->isnull[dim],
600  b->values[dim], b->isnull[dim],
601  &mss->ssup[dim]);
602 }
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

◆ multi_sort_compare_dims()

int multi_sort_compare_dims ( int  start,
int  end,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 605 of file extended_stats.c.

References ApplySortComparator(), SortItem::isnull, MultiSortSupportData::ssup, and SortItem::values.

Referenced by dependency_degree().

608 {
609  int dim;
610 
611  for (dim = start; dim <= end; dim++)
612  {
613  int r = ApplySortComparator(a->values[dim], a->isnull[dim],
614  b->values[dim], b->isnull[dim],
615  &mss->ssup[dim]);
616 
617  if (r != 0)
618  return r;
619  }
620 
621  return 0;
622 }
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

◆ multi_sort_init()

MultiSortSupport multi_sort_init ( int  ndims)

Definition at line 538 of file extended_stats.c.

References Assert, MultiSortSupportData::ndims, offsetof, and palloc0().

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

539 {
540  MultiSortSupport mss;
541 
542  Assert(ndims >= 2);
543 
545  + sizeof(SortSupportData) * ndims);
546 
547  mss->ndims = ndims;
548 
549  return mss;
550 }
struct SortSupportData SortSupportData
void * palloc0(Size size)
Definition: mcxt.c:955
#define Assert(condition)
Definition: c.h:732
MultiSortSupportData * MultiSortSupport
#define offsetof(type, field)
Definition: c.h:655

◆ statext_dependencies_build()

MVDependencies* statext_dependencies_build ( int  numrows,
HeapTuple rows,
Bitmapset attrs,
VacAttrStats **  stats 
)

Definition at line 357 of file dependencies.c.

References Assert, MVDependency::attributes, build_attnums_array(), MVDependency::degree, DependencyGeneratorData::dependencies, dependency_degree(), DependencyGenerator_free(), DependencyGenerator_init(), DependencyGenerator_next(), MVDependencies::deps, i, DependencyGeneratorData::k, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, offsetof, palloc0(), repalloc(), STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, and MVDependencies::type.

Referenced by BuildRelationExtStatistics().

359 {
360  int i,
361  k;
362  int numattrs;
363  AttrNumber *attnums;
364 
365  /* result */
366  MVDependencies *dependencies = NULL;
367 
368  /*
369  * Transform the bms into an array, to make accessing i-th member easier.
370  */
371  attnums = build_attnums_array(attrs, &numattrs);
372 
373  Assert(numattrs >= 2);
374 
375  /*
376  * We'll try build functional dependencies starting from the smallest ones
377  * covering just 2 columns, to the largest ones, covering all columns
378  * included in the statistics object. We start from the smallest ones
379  * because we want to be able to skip already implied ones.
380  */
381  for (k = 2; k <= numattrs; k++)
382  {
383  AttrNumber *dependency; /* array with k elements */
384 
385  /* prepare a DependencyGenerator of variation */
387 
388  /* generate all possible variations of k values (out of n) */
389  while ((dependency = DependencyGenerator_next(DependencyGenerator)))
390  {
391  double degree;
392  MVDependency *d;
393 
394  /* compute how valid the dependency seems */
395  degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
396 
397  /*
398  * if the dependency seems entirely invalid, don't store it
399  */
400  if (degree == 0.0)
401  continue;
402 
403  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
404  + k * sizeof(AttrNumber));
405 
406  /* copy the dependency (and keep the indexes into stxkeys) */
407  d->degree = degree;
408  d->nattributes = k;
409  for (i = 0; i < k; i++)
410  d->attributes[i] = attnums[dependency[i]];
411 
412  /* initialize the list of dependencies */
413  if (dependencies == NULL)
414  {
415  dependencies
416  = (MVDependencies *) palloc0(sizeof(MVDependencies));
417 
418  dependencies->magic = STATS_DEPS_MAGIC;
419  dependencies->type = STATS_DEPS_TYPE_BASIC;
420  dependencies->ndeps = 0;
421  }
422 
423  dependencies->ndeps++;
424  dependencies = (MVDependencies *) repalloc(dependencies,
425  offsetof(MVDependencies, deps)
426  + dependencies->ndeps * sizeof(MVDependency *));
427 
428  dependencies->deps[dependencies->ndeps - 1] = d;
429  }
430 
431  /*
432  * we're done with variations of k elements, so free the
433  * DependencyGenerator
434  */
435  DependencyGenerator_free(DependencyGenerator);
436  }
437 
438  return dependencies;
439 }
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:53
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:61
#define STATS_DEPS_MAGIC
Definition: statistics.h:42
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:43
AttrNumber nattributes
Definition: statistics.h:52
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:167
AttrNumber * build_attnums_array(Bitmapset *attrs, int *numattrs)
static double dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs)
Definition: dependencies.c:216
void * palloc0(Size size)
Definition: mcxt.c:955
uint32 magic
Definition: statistics.h:58
uint32 ndeps
Definition: statistics.h:60
#define Assert(condition)
Definition: c.h:732
double degree
Definition: statistics.h:51
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1044
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:199
int i
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:190
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:655

◆ statext_dependencies_deserialize()

MVDependencies* statext_dependencies_deserialize ( bytea data)

Definition at line 501 of file dependencies.c.

References Assert, MVDependency::attributes, MVDependency::degree, DependencyGeneratorData::dependencies, MVDependencies::deps, elog, ERROR, i, DependencyGeneratorData::k, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, offsetof, palloc0(), repalloc(), SizeOfHeader, SizeOfItem, STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, STATS_MAX_DIMENSIONS, MVDependencies::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_dependencies_out(), and statext_dependencies_load().

502 {
503  int i;
504  Size min_expected_size;
505  MVDependencies *dependencies;
506  char *tmp;
507 
508  if (data == NULL)
509  return NULL;
510 
511  if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
512  elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
514 
515  /* read the MVDependencies header */
516  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
517 
518  /* initialize pointer to the data part (skip the varlena header) */
519  tmp = VARDATA_ANY(data);
520 
521  /* read the header fields and perform basic sanity checks */
522  memcpy(&dependencies->magic, tmp, sizeof(uint32));
523  tmp += sizeof(uint32);
524  memcpy(&dependencies->type, tmp, sizeof(uint32));
525  tmp += sizeof(uint32);
526  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
527  tmp += sizeof(uint32);
528 
529  if (dependencies->magic != STATS_DEPS_MAGIC)
530  elog(ERROR, "invalid dependency magic %d (expected %d)",
531  dependencies->magic, STATS_DEPS_MAGIC);
532 
533  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
534  elog(ERROR, "invalid dependency type %d (expected %d)",
535  dependencies->type, STATS_DEPS_TYPE_BASIC);
536 
537  if (dependencies->ndeps == 0)
538  elog(ERROR, "invalid zero-length item array in MVDependencies");
539 
540  /* what minimum bytea size do we expect for those parameters */
541  min_expected_size = SizeOfItem(dependencies->ndeps);
542 
543  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
544  elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
545  VARSIZE_ANY_EXHDR(data), min_expected_size);
546 
547  /* allocate space for the MCV items */
548  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
549  + (dependencies->ndeps * sizeof(MVDependency *)));
550 
551  for (i = 0; i < dependencies->ndeps; i++)
552  {
553  double degree;
554  AttrNumber k;
555  MVDependency *d;
556 
557  /* degree of validity */
558  memcpy(&degree, tmp, sizeof(double));
559  tmp += sizeof(double);
560 
561  /* number of attributes */
562  memcpy(&k, tmp, sizeof(AttrNumber));
563  tmp += sizeof(AttrNumber);
564 
565  /* is the number of attributes valid? */
566  Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
567 
568  /* now that we know the number of attributes, allocate the dependency */
569  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
570  + (k * sizeof(AttrNumber)));
571 
572  d->degree = degree;
573  d->nattributes = k;
574 
575  /* copy attribute numbers */
576  memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
577  tmp += sizeof(AttrNumber) * d->nattributes;
578 
579  dependencies->deps[i] = d;
580 
581  /* still within the bytea */
582  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
583  }
584 
585  /* we should have consumed the whole bytea exactly */
586  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
587 
588  return dependencies;
589 }
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:53
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:61
#define STATS_DEPS_MAGIC
Definition: statistics.h:42
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:43
#define SizeOfItem(natts)
Definition: dependencies.c:40
AttrNumber nattributes
Definition: statistics.h:52
#define ERROR
Definition: elog.h:43
unsigned int uint32
Definition: c.h:358
void * palloc0(Size size)
Definition: mcxt.c:955
uint32 magic
Definition: statistics.h:58
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
uint32 ndeps
Definition: statistics.h:60
#define Assert(condition)
Definition: c.h:732
double degree
Definition: statistics.h:51
size_t Size
Definition: c.h:466
#define SizeOfHeader
Definition: dependencies.c:37
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1044
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
#define elog(elevel,...)
Definition: elog.h:226
int i
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:655

◆ statext_dependencies_serialize()

bytea* statext_dependencies_serialize ( MVDependencies dependencies)

Definition at line 446 of file dependencies.c.

References Assert, MVDependency::attributes, MVDependency::degree, MVDependencies::deps, i, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, output(), palloc0(), SET_VARSIZE, SizeOfHeader, SizeOfItem, MVDependencies::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().

447 {
448  int i;
449  bytea *output;
450  char *tmp;
451  Size len;
452 
453  /* we need to store ndeps, with a number of attributes for each one */
454  len = VARHDRSZ + SizeOfHeader;
455 
456  /* and also include space for the actual attribute numbers and degrees */
457  for (i = 0; i < dependencies->ndeps; i++)
458  len += SizeOfItem(dependencies->deps[i]->nattributes);
459 
460  output = (bytea *) palloc0(len);
461  SET_VARSIZE(output, len);
462 
463  tmp = VARDATA(output);
464 
465  /* Store the base struct values (magic, type, ndeps) */
466  memcpy(tmp, &dependencies->magic, sizeof(uint32));
467  tmp += sizeof(uint32);
468  memcpy(tmp, &dependencies->type, sizeof(uint32));
469  tmp += sizeof(uint32);
470  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
471  tmp += sizeof(uint32);
472 
473  /* store number of attributes and attribute numbers for each dependency */
474  for (i = 0; i < dependencies->ndeps; i++)
475  {
476  MVDependency *d = dependencies->deps[i];
477 
478  memcpy(tmp, &d->degree, sizeof(double));
479  tmp += sizeof(double);
480 
481  memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
482  tmp += sizeof(AttrNumber);
483 
484  memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
485  tmp += sizeof(AttrNumber) * d->nattributes;
486 
487  /* protect against overflow */
488  Assert(tmp <= ((char *) output + len));
489  }
490 
491  /* make sure we've produced exactly the right amount of data */
492  Assert(tmp == ((char *) output + len));
493 
494  return output;
495 }
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:53
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:61
#define VARDATA(PTR)
Definition: postgres.h:302
static void output(uint64 loop_count)
#define VARHDRSZ
Definition: c.h:555
#define SizeOfItem(natts)
Definition: dependencies.c:40
AttrNumber nattributes
Definition: statistics.h:52
unsigned int uint32
Definition: c.h:358
void * palloc0(Size size)
Definition: mcxt.c:955
uint32 magic
Definition: statistics.h:58
uint32 ndeps
Definition: statistics.h:60
#define Assert(condition)
Definition: c.h:732
double degree
Definition: statistics.h:51
size_t Size
Definition: c.h:466
#define SizeOfHeader
Definition: dependencies.c:37
int i
Definition: c.h:549
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
int16 AttrNumber
Definition: attnum.h:21

◆ statext_mcv_build()

MCVList* statext_mcv_build ( int  numrows,
HeapTuple rows,
Bitmapset attrs,
VacAttrStats **  stats,
double  totalrows,
int  stattarget 
)

Definition at line 183 of file mcv.c.

References Assert, VacAttrStats::attrtypid, MCVItem::base_frequency, bsearch_arg(), build_attnums_array(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), SortItem::count, MCVItem::frequency, get_mincount_for_mcv_list(), i, SortItem::isnull, MCVItem::isnull, MCVList::items, sort-test::key, MCVList::magic, multi_sort_compare(), MCVList::ndimensions, MultiSortSupportData::ndims, MCVList::nitems, offsetof, palloc(), palloc0(), pfree(), MultiSortSupportData::ssup, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, MCVList::type, MCVList::types, SortItem::values, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

185 {
186  int i,
187  numattrs,
188  ngroups,
189  nitems;
190  AttrNumber *attnums;
191  double mincount;
192  SortItem *items;
193  SortItem *groups;
194  MCVList *mcvlist = NULL;
195  MultiSortSupport mss;
196 
197  attnums = build_attnums_array(attrs, &numattrs);
198 
199  /* comparator for all the columns */
200  mss = build_mss(stats, numattrs);
201 
202  /* sort the rows */
203  items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
204  mss, numattrs, attnums);
205 
206  if (!items)
207  return NULL;
208 
209  /* transform the sorted rows into groups (sorted by frequency) */
210  groups = build_distinct_groups(nitems, items, mss, &ngroups);
211 
212  /*
213  * Maximum number of MCV items to store, based on the statistics target
214  * we computed for the statistics object (from target set for the object
215  * itself, attributes and the system default). In any case, we can't keep
216  * more groups than we have available.
217  */
218  nitems = stattarget;
219  if (nitems > ngroups)
220  nitems = ngroups;
221 
222  /*
223  * Decide how many items to keep in the MCV list. We can't use the same
224  * algorithm as per-column MCV lists, because that only considers the
225  * actual group frequency - but we're primarily interested in how the
226  * actual frequency differs from the base frequency (product of simple
227  * per-column frequencies, as if the columns were independent).
228  *
229  * Using the same algorithm might exclude items that are close to the
230  * "average" frequency of the sample. But that does not say whether the
231  * observed frequency is close to the base frequency or not. We also need
232  * to consider unexpectedly uncommon items (again, compared to the base
233  * frequency), and the single-column algorithm does not have to.
234  *
235  * We simply decide how many items to keep by computing minimum count
236  * using get_mincount_for_mcv_list() and then keep all items that seem to
237  * be more common than that.
238  */
239  mincount = get_mincount_for_mcv_list(numrows, totalrows);
240 
241  /*
242  * Walk the groups until we find the first group with a count below the
243  * mincount threshold (the index of that group is the number of groups we
244  * want to keep).
245  */
246  for (i = 0; i < nitems; i++)
247  {
248  if (groups[i].count < mincount)
249  {
250  nitems = i;
251  break;
252  }
253  }
254 
255  /*
256  * At this point we know the number of items for the MCV list. There might
257  * be none (for uniform distribution with many groups), and in that case
258  * there will be no MCV list. Otherwise construct the MCV list.
259  */
260  if (nitems > 0)
261  {
262  int j;
263  SortItem key;
264  MultiSortSupport tmp;
265 
266  /* frequencies for values in each attribute */
267  SortItem **freqs;
268  int *nfreqs;
269 
270  /* used to search values */
272  + sizeof(SortSupportData));
273 
274  /* compute frequencies for values in each column */
275  nfreqs = (int *) palloc0(sizeof(int) * numattrs);
276  freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
277 
278  /*
279  * Allocate the MCV list structure, set the global parameters.
280  */
281  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
282  sizeof(MCVItem) * nitems);
283 
284  mcvlist->magic = STATS_MCV_MAGIC;
285  mcvlist->type = STATS_MCV_TYPE_BASIC;
286  mcvlist->ndimensions = numattrs;
287  mcvlist->nitems = nitems;
288 
289  /* store info about data type OIDs */
290  for (i = 0; i < numattrs; i++)
291  mcvlist->types[i] = stats[i]->attrtypid;
292 
293  /* Copy the first chunk of groups into the result. */
294  for (i = 0; i < nitems; i++)
295  {
296  /* just pointer to the proper place in the list */
297  MCVItem *item = &mcvlist->items[i];
298 
299  item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
300  item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
301 
302  /* copy values for the group */
303  memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
304  memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
305 
306  /* groups should be sorted by frequency in descending order */
307  Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
308 
309  /* group frequency */
310  item->frequency = (double) groups[i].count / numrows;
311 
312  /* base frequency, if the attributes were independent */
313  item->base_frequency = 1.0;
314  for (j = 0; j < numattrs; j++)
315  {
316  SortItem *freq;
317 
318  /* single dimension */
319  tmp->ndims = 1;
320  tmp->ssup[0] = mss->ssup[j];
321 
322  /* fill search key */
323  key.values = &groups[i].values[j];
324  key.isnull = &groups[i].isnull[j];
325 
326  freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
327  sizeof(SortItem),
328  multi_sort_compare, tmp);
329 
330  item->base_frequency *= ((double) freq->count) / numrows;
331  }
332  }
333 
334  pfree(nfreqs);
335  pfree(freqs);
336  }
337 
338  pfree(items);
339  pfree(groups);
340 
341  return mcvlist;
342 }
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:423
uint32 nitems
Definition: statistics.h:90
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:489
void * bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:92
static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs)
Definition: mcv.c:349
SortItem * build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
uint32 magic
Definition: statistics.h:88
AttrNumber ndimensions
Definition: statistics.h:91
struct MCVItem MCVItem
Datum * values
Definition: statistics.h:82
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:151
void pfree(void *pointer)
Definition: mcxt.c:1031
Oid attrtypid
Definition: vacuum.h:86
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
AttrNumber * build_attnums_array(Bitmapset *attrs, int *numattrs)
uint32 type
Definition: statistics.h:89
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:66
struct SortSupportData SortSupportData
void * palloc0(Size size)
Definition: mcxt.c:955
uintptr_t Datum
Definition: postgres.h:367
#define Assert(condition)
Definition: c.h:732
double base_frequency
Definition: statistics.h:80
#define STATS_MCV_MAGIC
Definition: statistics.h:65
int multi_sort_compare(const void *a, const void *b, void *arg)
bool * isnull
Definition: statistics.h:81
void * palloc(Size size)
Definition: mcxt.c:924
int i
MultiSortSupportData * MultiSortSupport
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:655
double frequency
Definition: statistics.h:79

◆ statext_mcv_deserialize()

MCVList* statext_mcv_deserialize ( bytea data)

Definition at line 992 of file mcv.c.

References Assert, MCVItem::base_frequency, elog, ERROR, fetch_att, MCVItem::frequency, i, MCVItem::isnull, MCVList::items, MCVList::magic, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, offsetof, palloc(), palloc0(), pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum, repalloc(), SET_VARSIZE, SizeOfMCVList, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, MCVList::type, MCVList::types, DimensionInfo::typlen, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY.

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

993 {
994  int dim,
995  i;
996  Size expected_size;
997  MCVList *mcvlist;
998  char *raw;
999  char *ptr;
1000  char *endptr PG_USED_FOR_ASSERTS_ONLY;
1001 
1002  int ndims,
1003  nitems;
1004  DimensionInfo *info = NULL;
1005 
1006  /* local allocation buffer (used only for deserialization) */
1007  Datum **map = NULL;
1008 
1009  /* MCV list */
1010  Size mcvlen;
1011 
1012  /* buffer used for the result */
1013  Size datalen;
1014  char *dataptr;
1015  char *valuesptr;
1016  char *isnullptr;
1017 
1018  if (data == NULL)
1019  return NULL;
1020 
1021  /*
1022  * We can't possibly deserialize a MCV list if there's not even a complete
1023  * header. We need an explicit formula here, because we serialize the
1024  * header fields one by one, so we need to ignore struct alignment.
1025  */
1026  if (VARSIZE_ANY(data) < MinSizeOfMCVList)
1027  elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
1028  VARSIZE_ANY(data), MinSizeOfMCVList);
1029 
1030  /* read the MCV list header */
1031  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1032 
1033  /* pointer to the data part (skip the varlena header) */
1034  raw = (char *) data;
1035  ptr = VARDATA_ANY(raw);
1036  endptr = (char *) raw + VARSIZE_ANY(data);
1037 
1038  /* get the header and perform further sanity checks */
1039  memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1040  ptr += sizeof(uint32);
1041 
1042  memcpy(&mcvlist->type, ptr, sizeof(uint32));
1043  ptr += sizeof(uint32);
1044 
1045  memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1046  ptr += sizeof(uint32);
1047 
1048  memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1049  ptr += sizeof(AttrNumber);
1050 
1051  if (mcvlist->magic != STATS_MCV_MAGIC)
1052  elog(ERROR, "invalid MCV magic %u (expected %u)",
1053  mcvlist->magic, STATS_MCV_MAGIC);
1054 
1055  if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1056  elog(ERROR, "invalid MCV type %u (expected %u)",
1057  mcvlist->type, STATS_MCV_TYPE_BASIC);
1058 
1059  if (mcvlist->ndimensions == 0)
1060  elog(ERROR, "invalid zero-length dimension array in MCVList");
1061  else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1062  (mcvlist->ndimensions < 0))
1063  elog(ERROR, "invalid length (%d) dimension array in MCVList",
1064  mcvlist->ndimensions);
1065 
1066  if (mcvlist->nitems == 0)
1067  elog(ERROR, "invalid zero-length item array in MCVList");
1068  else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1069  elog(ERROR, "invalid length (%u) item array in MCVList",
1070  mcvlist->nitems);
1071 
1072  nitems = mcvlist->nitems;
1073  ndims = mcvlist->ndimensions;
1074 
1075  /*
1076  * Check amount of data including DimensionInfo for all dimensions and
1077  * also the serialized items (including uint16 indexes). Also, walk
1078  * through the dimension information and add it to the sum.
1079  */
1080  expected_size = SizeOfMCVList(ndims, nitems);
1081 
1082  /*
1083  * Check that we have at least the dimension and info records, along with
1084  * the items. We don't know the size of the serialized values yet. We need
1085  * to do this check first, before accessing the dimension info.
1086  */
1087  if (VARSIZE_ANY(data) < expected_size)
1088  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1089  VARSIZE_ANY(data), expected_size);
1090 
1091  /* Now copy the array of type Oids. */
1092  memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1093  ptr += (sizeof(Oid) * ndims);
1094 
1095  /* Now it's safe to access the dimension info. */
1096  info = palloc(ndims * sizeof(DimensionInfo));
1097 
1098  memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1099  ptr += (ndims * sizeof(DimensionInfo));
1100 
1101  /* account for the value arrays */
1102  for (dim = 0; dim < ndims; dim++)
1103  {
1104  /*
1105  * XXX I wonder if we can/should rely on asserts here. Maybe those
1106  * checks should be done every time?
1107  */
1108  Assert(info[dim].nvalues >= 0);
1109  Assert(info[dim].nbytes >= 0);
1110 
1111  expected_size += info[dim].nbytes;
1112  }
1113 
1114  /*
1115  * Now we know the total expected MCV size, including all the pieces
1116  * (header, dimension info. items and deduplicated data). So do the final
1117  * check on size.
1118  */
1119  if (VARSIZE_ANY(data) != expected_size)
1120  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1121  VARSIZE_ANY(data), expected_size);
1122 
1123  /*
1124  * We need an array of Datum values for each dimension, so that we can
1125  * easily translate the uint16 indexes later. We also need a top-level
1126  * array of pointers to those per-dimension arrays.
1127  *
1128  * While allocating the arrays for dimensions, compute how much space we
1129  * need for a copy of the by-ref data, as we can't simply point to the
1130  * original values (it might go away).
1131  */
1132  datalen = 0; /* space for by-ref data */
1133  map = (Datum **) palloc(ndims * sizeof(Datum *));
1134 
1135  for (dim = 0; dim < ndims; dim++)
1136  {
1137  map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1138 
1139  /* space needed for a copy of data for by-ref types */
1140  datalen += info[dim].nbytes_aligned;
1141  }
1142 
1143  /*
1144  * Now resize the MCV list so that the allocation includes all the data.
1145  *
1146  * Allocate space for a copy of the data, as we can't simply reference the
1147  * serialized data - it's not aligned properly, and it may disappear while
1148  * we're still using the MCV list, e.g. due to catcache release.
1149  *
1150  * We do care about alignment here, because we will allocate all the pieces
1151  * at once, but then use pointers to different parts.
1152  */
1153  mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1154 
1155  /* arrays of values and isnull flags for all MCV items */
1156  mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1157  mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1158 
1159  /* we don't quite need to align this, but it makes some asserts easier */
1160  mcvlen += MAXALIGN(datalen);
1161 
1162  /* now resize the deserialized MCV list, and compute pointers to parts */
1163  mcvlist = repalloc(mcvlist, mcvlen);
1164 
1165  /* pointer to the beginning of values/isnull arrays */
1166  valuesptr = (char *) mcvlist
1167  + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1168 
1169  isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1170 
1171  dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1172 
1173  /*
1174  * Build mapping (index => value) for translating the serialized data into
1175  * the in-memory representation.
1176  */
1177  for (dim = 0; dim < ndims; dim++)
1178  {
1179  /* remember start position in the input array */
1180  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1181 
1182  if (info[dim].typbyval)
1183  {
1184  /* for by-val types we simply copy data into the mapping */
1185  for (i = 0; i < info[dim].nvalues; i++)
1186  {
1187  Datum v = 0;
1188 
1189  memcpy(&v, ptr, info[dim].typlen);
1190  ptr += info[dim].typlen;
1191 
1192  map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1193 
1194  /* no under/overflow of input array */
1195  Assert(ptr <= (start + info[dim].nbytes));
1196  }
1197  }
1198  else
1199  {
1200  /* for by-ref types we need to also make a copy of the data */
1201 
1202  /* passed by reference, but fixed length (name, tid, ...) */
1203  if (info[dim].typlen > 0)
1204  {
1205  for (i = 0; i < info[dim].nvalues; i++)
1206  {
1207  memcpy(dataptr, ptr, info[dim].typlen);
1208  ptr += info[dim].typlen;
1209 
1210  /* just point into the array */
1211  map[dim][i] = PointerGetDatum(dataptr);
1212  dataptr += MAXALIGN(info[dim].typlen);
1213  }
1214  }
1215  else if (info[dim].typlen == -1)
1216  {
1217  /* varlena */
1218  for (i = 0; i < info[dim].nvalues; i++)
1219  {
1220  uint32 len;
1221 
1222  /* read the uint32 length */
1223  memcpy(&len, ptr, sizeof(uint32));
1224  ptr += sizeof(uint32);
1225 
1226  /* the length is data-only */
1227  SET_VARSIZE(dataptr, len + VARHDRSZ);
1228  memcpy(VARDATA(dataptr), ptr, len);
1229  ptr += len;
1230 
1231  /* just point into the array */
1232  map[dim][i] = PointerGetDatum(dataptr);
1233 
1234  /* skip to place of the next deserialized value */
1235  dataptr += MAXALIGN(len + VARHDRSZ);
1236  }
1237  }
1238  else if (info[dim].typlen == -2)
1239  {
1240  /* cstring */
1241  for (i = 0; i < info[dim].nvalues; i++)
1242  {
1243  uint32 len;
1244 
1245  memcpy(&len, ptr, sizeof(uint32));
1246  ptr += sizeof(uint32);
1247 
1248  memcpy(dataptr, ptr, len);
1249  ptr += len;
1250 
1251  /* just point into the array */
1252  map[dim][i] = PointerGetDatum(dataptr);
1253  dataptr += MAXALIGN(len);
1254  }
1255  }
1256 
1257  /* no under/overflow of input array */
1258  Assert(ptr <= (start + info[dim].nbytes));
1259 
1260  /* no overflow of the output mcv value */
1261  Assert(dataptr <= ((char *) mcvlist + mcvlen));
1262  }
1263 
1264  /* check we consumed input data for this dimension exactly */
1265  Assert(ptr == (start + info[dim].nbytes));
1266  }
1267 
1268  /* we should have also filled the MCV list exactly */
1269  Assert(dataptr == ((char *) mcvlist + mcvlen));
1270 
1271  /* deserialize the MCV items and translate the indexes to Datums */
1272  for (i = 0; i < nitems; i++)
1273  {
1274  MCVItem *item = &mcvlist->items[i];
1275 
1276  item->values = (Datum *) valuesptr;
1277  valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1278 
1279  item->isnull = (bool *) isnullptr;
1280  isnullptr += MAXALIGN(sizeof(bool) * ndims);
1281 
1282  memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1283  ptr += sizeof(bool) * ndims;
1284 
1285  memcpy(&item->frequency, ptr, sizeof(double));
1286  ptr += sizeof(double);
1287 
1288  memcpy(&item->base_frequency, ptr, sizeof(double));
1289  ptr += sizeof(double);
1290 
1291  /* finally translate the indexes (for non-NULL only) */
1292  for (dim = 0; dim < ndims; dim++)
1293  {
1294  uint16 index;
1295 
1296  memcpy(&index, ptr, sizeof(uint16));
1297  ptr += sizeof(uint16);
1298 
1299  if (item->isnull[dim])
1300  continue;
1301 
1302  item->values[dim] = map[dim][index];
1303  }
1304 
1305  /* check we're not overflowing the input */
1306  Assert(ptr <= endptr);
1307  }
1308 
1309  /* check that we processed all the data */
1310  Assert(ptr == endptr);
1311 
1312  /* release the buffers used for mapping */
1313  for (dim = 0; dim < ndims; dim++)
1314  pfree(map[dim]);
1315 
1316  pfree(map);
1317 
1318  return mcvlist;
1319 }
uint32 nitems
Definition: statistics.h:90
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:92
#define PointerGetDatum(X)
Definition: postgres.h:556
#define VARHDRSZ
Definition: c.h:555
#define MinSizeOfMCVList
Definition: mcv.c:62
struct DimensionInfo DimensionInfo
uint32 magic
Definition: statistics.h:88
AttrNumber ndimensions
Definition: statistics.h:91
struct MCVItem MCVItem
Datum * values
Definition: statistics.h:82
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
unsigned int Oid
Definition: postgres_ext.h:31
Definition: type.h:89
unsigned short uint16
Definition: c.h:357
void pfree(void *pointer)
Definition: mcxt.c:1031
#define ERROR
Definition: elog.h:43
unsigned int uint32
Definition: c.h:358
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:71
uint32 type
Definition: statistics.h:89
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:66
void * palloc0(Size size)
Definition: mcxt.c:955
uintptr_t Datum
Definition: postgres.h:367
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:69
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
#define Assert(condition)
Definition: c.h:732
double base_frequency
Definition: statistics.h:80
#define STATS_MCV_MAGIC
Definition: statistics.h:65
size_t Size
Definition: c.h:466
#define MAXALIGN(LEN)
Definition: c.h:685
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1044
bool * isnull
Definition: statistics.h:81
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
void * palloc(Size size)
Definition: mcxt.c:924
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:73
#define elog(elevel,...)
Definition: elog.h:226
int i
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:123
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:655
double frequency
Definition: statistics.h:79
unsigned char bool
Definition: c.h:308

◆ statext_mcv_serialize()

bytea* statext_mcv_serialize ( MCVList mcv,
VacAttrStats **  stats 
)

Definition at line 618 of file mcv.c.

References Assert, VacAttrStats::attrcollid, VacAttrStats::attrtype, MCVItem::base_frequency, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString, DatumGetPointer, MCVItem::frequency, i, MCVItem::isnull, ITEM_SIZE, MCVList::items, lookup_type_cache(), TypeCacheEntry::lt_opr, MCVList::magic, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, palloc0(), pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum, PrepareSortSupportFromOrderingOp(), qsort_arg(), SET_VARSIZE, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, store_att_byval, DimensionInfo::typbyval, MCVList::type, TYPECACHE_LT_OPR, MCVList::types, DimensionInfo::typlen, value, MCVItem::values, values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by statext_store().

619 {
620  int i;
621  int dim;
622  int ndims = mcvlist->ndimensions;
623 
624  SortSupport ssup;
625  DimensionInfo *info;
626 
627  Size total_length;
628 
629  /* serialized items (indexes into arrays, etc.) */
630  bytea *raw;
631  char *ptr;
632  char *endptr PG_USED_FOR_ASSERTS_ONLY;
633 
634  /* values per dimension (and number of non-NULL values) */
635  Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
636  int *counts = (int *) palloc0(sizeof(int) * ndims);
637 
638  /*
639  * We'll include some rudimentary information about the attribute types
640  * (length, by-val flag), so that we don't have to look them up while
641  * deserializating the MCV list (we already have the type OID in the
642  * header). This is safe, because when changing type of the attribute the
643  * statistics gets dropped automatically. We need to store the info about
644  * the arrays of deduplicated values anyway.
645  */
646  info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
647 
648  /* sort support data for all attributes included in the MCV list */
649  ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
650 
651  /* collect and deduplicate values for each dimension (attribute) */
652  for (dim = 0; dim < ndims; dim++)
653  {
654  int ndistinct;
655  TypeCacheEntry *typentry;
656 
657  /*
658  * Lookup the LT operator (can't get it from stats extra_data, as we
659  * don't know how to interpret that - scalar vs. array etc.).
660  */
661  typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
662 
663  /* copy important info about the data type (length, by-value) */
664  info[dim].typlen = stats[dim]->attrtype->typlen;
665  info[dim].typbyval = stats[dim]->attrtype->typbyval;
666 
667  /* allocate space for values in the attribute and collect them */
668  values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
669 
670  for (i = 0; i < mcvlist->nitems; i++)
671  {
672  /* skip NULL values - we don't need to deduplicate those */
673  if (mcvlist->items[i].isnull[dim])
674  continue;
675 
676  /* append the value at the end */
677  values[dim][counts[dim]] = mcvlist->items[i].values[dim];
678  counts[dim] += 1;
679  }
680 
681  /* if there are just NULL values in this dimension, we're done */
682  if (counts[dim] == 0)
683  continue;
684 
685  /* sort and deduplicate the data */
686  ssup[dim].ssup_cxt = CurrentMemoryContext;
687  ssup[dim].ssup_collation = stats[dim]->attrcollid;
688  ssup[dim].ssup_nulls_first = false;
689 
690  PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
691 
692  qsort_arg(values[dim], counts[dim], sizeof(Datum),
693  compare_scalars_simple, &ssup[dim]);
694 
695  /*
696  * Walk through the array and eliminate duplicate values, but keep the
697  * ordering (so that we can do bsearch later). We know there's at
698  * least one item as (counts[dim] != 0), so we can skip the first
699  * element.
700  */
701  ndistinct = 1; /* number of distinct values */
702  for (i = 1; i < counts[dim]; i++)
703  {
704  /* expect sorted array */
705  Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
706 
707  /* if the value is the same as the previous one, we can skip it */
708  if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
709  continue;
710 
711  values[dim][ndistinct] = values[dim][i];
712  ndistinct += 1;
713  }
714 
715  /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
716  Assert(ndistinct <= PG_UINT16_MAX);
717 
718  /*
719  * Store additional info about the attribute - number of deduplicated
720  * values, and also size of the serialized data. For fixed-length data
721  * types this is trivial to compute, for varwidth types we need to
722  * actually walk the array and sum the sizes.
723  */
724  info[dim].nvalues = ndistinct;
725 
726  if (info[dim].typbyval) /* by-value data types */
727  {
728  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
729 
730  /*
731  * We copy the data into the MCV item during deserialization, so
732  * we don't need to allocate any extra space.
733  */
734  info[dim].nbytes_aligned = 0;
735  }
736  else if (info[dim].typlen > 0) /* fixed-length by-ref */
737  {
738  /*
739  * We don't care about alignment in the serialized data, so we
740  * pack the data as much as possible. But we also track how much
741  * data will be needed after deserialization, and in that case
742  * we need to account for alignment of each item.
743  *
744  * Note: As the items are fixed-length, we could easily compute
745  * this during deserialization, but we do it here anyway.
746  */
747  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
748  info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
749  }
750  else if (info[dim].typlen == -1) /* varlena */
751  {
752  info[dim].nbytes = 0;
753  info[dim].nbytes_aligned = 0;
754  for (i = 0; i < info[dim].nvalues; i++)
755  {
756  Size len;
757 
758  /*
759  * For varlena values, we detoast the values and store the
760  * length and data separately. We don't bother with alignment
761  * here, which means that during deserialization we need to
762  * copy the fields and only access the copies.
763  */
764  values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
765 
766  /* serialized length (uint32 length + data) */
767  len = VARSIZE_ANY_EXHDR(values[dim][i]);
768  info[dim].nbytes += sizeof(uint32); /* length */
769  info[dim].nbytes += len; /* value (no header) */
770 
771  /*
772  * During deserialization we'll build regular varlena values
773  * with full headers, and we need to align them properly.
774  */
775  info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
776  }
777  }
778  else if (info[dim].typlen == -2) /* cstring */
779  {
780  info[dim].nbytes = 0;
781  info[dim].nbytes_aligned = 0;
782  for (i = 0; i < info[dim].nvalues; i++)
783  {
784  Size len;
785 
786  /*
787  * For cstring, we do similar thing as for varlena - first we
788  * store the length as uint32 and then the data. We don't care
789  * about alignment, which means that during deserialization we
790  * need to copy the fields and only access the copies.
791  */
792 
793  /* c-strings include terminator, so +1 byte */
794  len = strlen(DatumGetCString(values[dim][i])) + 1;
795  info[dim].nbytes += sizeof(uint32); /* length */
796  info[dim].nbytes += len; /* value */
797 
798  /* space needed for properly aligned deserialized copies */
799  info[dim].nbytes_aligned += MAXALIGN(len);
800  }
801  }
802 
803  /* we know (count>0) so there must be some data */
804  Assert(info[dim].nbytes > 0);
805  }
806 
807  /*
808  * Now we can finally compute how much space we'll actually need for the
809  * whole serialized MCV list (varlena header, MCV header, dimension info
810  * for each attribute, deduplicated values and items).
811  */
812  total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
813  + sizeof(AttrNumber) /* ndimensions */
814  + (ndims * sizeof(Oid)); /* attribute types */
815 
816  /* dimension info */
817  total_length += ndims * sizeof(DimensionInfo);
818 
819  /* add space for the arrays of deduplicated values */
820  for (i = 0; i < ndims; i++)
821  total_length += info[i].nbytes;
822 
823  /*
824  * And finally account for the items (those are fixed-length, thanks to
825  * replacing values with uint16 indexes into the deduplicated arrays).
826  */
827  total_length += mcvlist->nitems * ITEM_SIZE(dim);
828 
829  /*
830  * Allocate space for the whole serialized MCV list (we'll skip bytes, so
831  * we set them to zero to make the result more compressible).
832  */
833  raw = (bytea *) palloc0(VARHDRSZ + total_length);
834  SET_VARSIZE(raw, VARHDRSZ + total_length);
835 
836  ptr = VARDATA(raw);
837  endptr = ptr + total_length;
838 
839  /* copy the MCV list header fields, one by one */
840  memcpy(ptr, &mcvlist->magic, sizeof(uint32));
841  ptr += sizeof(uint32);
842 
843  memcpy(ptr, &mcvlist->type, sizeof(uint32));
844  ptr += sizeof(uint32);
845 
846  memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
847  ptr += sizeof(uint32);
848 
849  memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
850  ptr += sizeof(AttrNumber);
851 
852  memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
853  ptr += (sizeof(Oid) * ndims);
854 
855  /* store information about the attributes (data amounts, ...) */
856  memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
857  ptr += sizeof(DimensionInfo) * ndims;
858 
859  /* Copy the deduplicated values for all attributes to the output. */
860  for (dim = 0; dim < ndims; dim++)
861  {
862  /* remember the starting point for Asserts later */
863  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
864 
865  for (i = 0; i < info[dim].nvalues; i++)
866  {
867  Datum value = values[dim][i];
868 
869  if (info[dim].typbyval) /* passed by value */
870  {
871  Datum tmp;
872 
873  /*
874  * For values passed by value, we need to copy just the
875  * significant bytes - we can't use memcpy directly, as that
876  * assumes little endian behavior. store_att_byval does
877  * almost what we need, but it requires properly aligned
878  * buffer - the output buffer does not guarantee that. So we
879  * simply use a local Datum variable (which guarantees proper
880  * alignment), and then copy the value from it.
881  */
882  store_att_byval(&tmp, value, info[dim].typlen);
883 
884  memcpy(ptr, &tmp, info[dim].typlen);
885  ptr += info[dim].typlen;
886  }
887  else if (info[dim].typlen > 0) /* passed by reference */
888  {
889  /* no special alignment needed, treated as char array */
890  memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
891  ptr += info[dim].typlen;
892  }
893  else if (info[dim].typlen == -1) /* varlena */
894  {
896 
897  /* copy the length */
898  memcpy(ptr, &len, sizeof(uint32));
899  ptr += sizeof(uint32);
900 
901  /* data from the varlena value (without the header) */
902  memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
903  ptr += len;
904  }
905  else if (info[dim].typlen == -2) /* cstring */
906  {
907  uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
908 
909  /* copy the length */
910  memcpy(ptr, &len, sizeof(uint32));
911  ptr += sizeof(uint32);
912 
913  /* value */
914  memcpy(ptr, DatumGetCString(value), len);
915  ptr += len;
916  }
917 
918  /* no underflows or overflows */
919  Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
920  }
921 
922  /* we should get exactly nbytes of data for this dimension */
923  Assert((ptr - start) == info[dim].nbytes);
924  }
925 
926  /* Serialize the items, with uint16 indexes instead of the values. */
927  for (i = 0; i < mcvlist->nitems; i++)
928  {
929  MCVItem *mcvitem = &mcvlist->items[i];
930 
931  /* don't write beyond the allocated space */
932  Assert(ptr <= (endptr - ITEM_SIZE(dim)));
933 
934  /* copy NULL and frequency flags into the serialized MCV */
935  memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
936  ptr += sizeof(bool) * ndims;
937 
938  memcpy(ptr, &mcvitem->frequency, sizeof(double));
939  ptr += sizeof(double);
940 
941  memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
942  ptr += sizeof(double);
943 
944  /* store the indexes last */
945  for (dim = 0; dim < ndims; dim++)
946  {
947  uint16 index = 0;
948  Datum *value;
949 
950  /* do the lookup only for non-NULL values */
951  if (!mcvitem->isnull[dim])
952  {
953  value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
954  info[dim].nvalues, sizeof(Datum),
955  compare_scalars_simple, &ssup[dim]);
956 
957  Assert(value != NULL); /* serialization or deduplication error */
958 
959  /* compute index within the deduplicated array */
960  index = (uint16) (value - values[dim]);
961 
962  /* check the index is within expected bounds */
963  Assert(index < info[dim].nvalues);
964  }
965 
966  /* copy the index into the serialized MCV */
967  memcpy(ptr, &index, sizeof(uint16));
968  ptr += sizeof(uint16);
969  }
970 
971  /* make sure we don't overflow the allocated value */
972  Assert(ptr <= endptr);
973  }
974 
975  /* at this point we expect to match the total_length exactly */
976  Assert(ptr == endptr);
977 
978  pfree(values);
979  pfree(counts);
980 
981  return raw;
982 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
bool ssup_nulls_first
Definition: sortsupport.h:75
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
void * bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
#define PointerGetDatum(X)
Definition: postgres.h:556
#define VARHDRSZ
Definition: c.h:555
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:134
static struct @144 value
struct DimensionInfo DimensionInfo
Datum * values
Definition: statistics.h:82
unsigned int Oid
Definition: postgres_ext.h:31
Definition: type.h:89
unsigned short uint16
Definition: c.h:357
void pfree(void *pointer)
Definition: mcxt.c:1031
#define PG_UINT16_MAX
Definition: c.h:439
#define DatumGetCString(X)
Definition: postgres.h:566
MemoryContext ssup_cxt
Definition: sortsupport.h:66
int compare_scalars_simple(const void *a, const void *b, void *arg)
unsigned int uint32
Definition: c.h:358
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
#define store_att_byval(T, newdatum, attlen)
Definition: tupmacs.h:224
void * palloc0(Size size)
Definition: mcxt.c:955
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:322
#define ITEM_SIZE(ndims)
Definition: mcv.c:56
#define Assert(condition)
Definition: c.h:732
double base_frequency
Definition: statistics.h:80
size_t Size
Definition: c.h:466
#define MAXALIGN(LEN)
Definition: c.h:685
Oid attrcollid
Definition: vacuum.h:89
#define DatumGetPointer(X)
Definition: postgres.h:549
bool * isnull
Definition: statistics.h:81
static Datum values[MAXATTR]
Definition: bootstrap.c:167
Form_pg_type attrtype
Definition: vacuum.h:88
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
int i
#define TYPECACHE_LT_OPR
Definition: typcache.h:129
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:235
Definition: c.h:549
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:123
int16 AttrNumber
Definition: attnum.h:21
double frequency
Definition: statistics.h:79
unsigned char bool
Definition: c.h:308

◆ statext_ndistinct_build()

MVNDistinct* statext_ndistinct_build ( double  totalrows,
int  numrows,
HeapTuple rows,
Bitmapset attrs,
VacAttrStats **  stats 
)

Definition at line 87 of file mvdistinct.c.

References Assert, VacAttrStats::attr, MVNDistinctItem::attrs, bms_add_member(), bms_num_members(), generator_free(), generator_init(), generator_next(), MVNDistinct::items, CombinationGenerator::k, MVNDistinct::magic, MVNDistinctItem::ndistinct, ndistinct_for_combination(), MVNDistinct::nitems, num_combinations(), offsetof, palloc(), STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, and MVNDistinct::type.

Referenced by BuildRelationExtStatistics().

89 {
90  MVNDistinct *result;
91  int k;
92  int itemcnt;
93  int numattrs = bms_num_members(attrs);
94  int numcombs = num_combinations(numattrs);
95 
96  result = palloc(offsetof(MVNDistinct, items) +
97  numcombs * sizeof(MVNDistinctItem));
98  result->magic = STATS_NDISTINCT_MAGIC;
100  result->nitems = numcombs;
101 
102  itemcnt = 0;
103  for (k = 2; k <= numattrs; k++)
104  {
105  int *combination;
107 
108  /* generate combinations of K out of N elements */
109  generator = generator_init(numattrs, k);
110 
111  while ((combination = generator_next(generator)))
112  {
113  MVNDistinctItem *item = &result->items[itemcnt];
114  int j;
115 
116  item->attrs = NULL;
117  for (j = 0; j < k; j++)
118  item->attrs = bms_add_member(item->attrs,
119  stats[combination[j]]->attr->attnum);
120  item->ndistinct =
121  ndistinct_for_combination(totalrows, numrows, rows,
122  stats, k, combination);
123 
124  itemcnt++;
125  Assert(itemcnt <= result->nitems);
126  }
127 
128  generator_free(generator);
129  }
130 
131  /* must consume exactly the whole output array */
132  Assert(itemcnt == result->nitems);
133 
134  return result;
135 }
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:38
static double ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, VacAttrStats **stats, int k, int *combination)
Definition: mvdistinct.c:431
double ndistinct
Definition: statistics.h:28
static int * generator_next(CombinationGenerator *state)
Definition: mvdistinct.c:638
Form_pg_attribute attr
Definition: vacuum.h:85
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
uint32 nitems
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:35
static void generator_free(CombinationGenerator *state)
Definition: mvdistinct.c:653
uint32 type
Definition: statistics.h:36
#define Assert(condition)
Definition: c.h:732
Bitmapset * attrs
Definition: statistics.h:29
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:736
void * palloc(Size size)
Definition: mcxt.c:924
static int num_combinations(int n)
Definition: mvdistinct.c:578
static CombinationGenerator * generator_init(int n, int k)
Definition: mvdistinct.c:600
#define offsetof(type, field)
Definition: c.h:655

◆ statext_ndistinct_deserialize()

MVNDistinct* statext_ndistinct_deserialize ( bytea data)

Definition at line 250 of file mvdistinct.c.

References Assert, MVNDistinctItem::attrs, bms_add_member(), elog, ERROR, i, MVNDistinct::items, MVNDistinct::magic, MAXALIGN, MinSizeOfItems, MVNDistinctItem::ndistinct, MVNDistinct::nitems, offsetof, palloc0(), SizeOfHeader, STATS_MAX_DIMENSIONS, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_ndistinct_out(), and statext_ndistinct_load().

251 {
252  int i;
253  Size minimum_size;
254  MVNDistinct ndist;
255  MVNDistinct *ndistinct;
256  char *tmp;
257 
258  if (data == NULL)
259  return NULL;
260 
261  /* we expect at least the basic fields of MVNDistinct struct */
262  if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
263  elog(ERROR, "invalid MVNDistinct size %zd (expected at least %zd)",
265 
266  /* initialize pointer to the data part (skip the varlena header) */
267  tmp = VARDATA_ANY(data);
268 
269  /* read the header fields and perform basic sanity checks */
270  memcpy(&ndist.magic, tmp, sizeof(uint32));
271  tmp += sizeof(uint32);
272  memcpy(&ndist.type, tmp, sizeof(uint32));
273  tmp += sizeof(uint32);
274  memcpy(&ndist.nitems, tmp, sizeof(uint32));
275  tmp += sizeof(uint32);
276 
277  if (ndist.magic != STATS_NDISTINCT_MAGIC)
278  elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
280  if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
281  elog(ERROR, "invalid ndistinct type %d (expected %d)",
283  if (ndist.nitems == 0)
284  elog(ERROR, "invalid zero-length item array in MVNDistinct");
285 
286  /* what minimum bytea size do we expect for those parameters */
287  minimum_size = MinSizeOfItems(ndist.nitems);
288  if (VARSIZE_ANY_EXHDR(data) < minimum_size)
289  elog(ERROR, "invalid MVNDistinct size %zd (expected at least %zd)",
290  VARSIZE_ANY_EXHDR(data), minimum_size);
291 
292  /*
293  * Allocate space for the ndistinct items (no space for each item's
294  * attnos: those live in bitmapsets allocated separately)
295  */
296  ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
297  (ndist.nitems * sizeof(MVNDistinctItem)));
298  ndistinct->magic = ndist.magic;
299  ndistinct->type = ndist.type;
300  ndistinct->nitems = ndist.nitems;
301 
302  for (i = 0; i < ndistinct->nitems; i++)
303  {
304  MVNDistinctItem *item = &ndistinct->items[i];
305  int nelems;
306 
307  item->attrs = NULL;
308 
309  /* ndistinct value */
310  memcpy(&item->ndistinct, tmp, sizeof(double));
311  tmp += sizeof(double);
312 
313  /* number of attributes */
314  memcpy(&nelems, tmp, sizeof(int));
315  tmp += sizeof(int);
316  Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS));
317 
318  while (nelems-- > 0)
319  {
320  AttrNumber attno;
321 
322  memcpy(&attno, tmp, sizeof(AttrNumber));
323  tmp += sizeof(AttrNumber);
324  item->attrs = bms_add_member(item->attrs, attno);
325  }
326 
327  /* still within the bytea */
328  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
329  }
330 
331  /* we should have consumed the whole bytea exactly */
332  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
333 
334  return ndistinct;
335 }
#define MinSizeOfItems(nitems)
Definition: mvdistinct.c:58
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:38
double ndistinct
Definition: statistics.h:28
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
#define ERROR
Definition: elog.h:43
unsigned int uint32
Definition: c.h:358
uint32 nitems
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:35
void * palloc0(Size size)
Definition: mcxt.c:955
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
uint32 type
Definition: statistics.h:36
#define SizeOfHeader
Definition: mvdistinct.c:48
#define Assert(condition)
Definition: c.h:732
size_t Size
Definition: c.h:466
Bitmapset * attrs
Definition: statistics.h:29
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
#define MAXALIGN(LEN)
Definition: c.h:685
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:736
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
#define elog(elevel,...)
Definition: elog.h:226
int i
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:655

◆ statext_ndistinct_serialize()

bytea* statext_ndistinct_serialize ( MVNDistinct ndistinct)

Definition at line 172 of file mvdistinct.c.

References Assert, MVNDistinctItem::attrs, bms_next_member(), bms_num_members(), i, MVNDistinct::items, MVNDistinct::magic, MVNDistinctItem::ndistinct, MVNDistinct::nitems, output(), palloc(), SET_VARSIZE, SizeOfHeader, SizeOfItem, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, value, VARDATA, and VARHDRSZ.

Referenced by statext_store().

173 {
174  int i;
175  bytea *output;
176  char *tmp;
177  Size len;
178 
179  Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
180  Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
181 
182  /*
183  * Base size is size of scalar fields in the struct, plus one base struct
184  * for each item, including number of items for each.
185  */
186  len = VARHDRSZ + SizeOfHeader;
187 
188  /* and also include space for the actual attribute numbers */
189  for (i = 0; i < ndistinct->nitems; i++)
190  {
191  int nmembers;
192 
193  nmembers = bms_num_members(ndistinct->items[i].attrs);
194  Assert(nmembers >= 2);
195 
196  len += SizeOfItem(nmembers);
197  }
198 
199  output = (bytea *) palloc(len);
200  SET_VARSIZE(output, len);
201 
202  tmp = VARDATA(output);
203 
204  /* Store the base struct values (magic, type, nitems) */
205  memcpy(tmp, &ndistinct->magic, sizeof(uint32));
206  tmp += sizeof(uint32);
207  memcpy(tmp, &ndistinct->type, sizeof(uint32));
208  tmp += sizeof(uint32);
209  memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
210  tmp += sizeof(uint32);
211 
212  /*
213  * store number of attributes and attribute numbers for each entry
214  */
215  for (i = 0; i < ndistinct->nitems; i++)
216  {
217  MVNDistinctItem item = ndistinct->items[i];
218  int nmembers = bms_num_members(item.attrs);
219  int x;
220 
221  memcpy(tmp, &item.ndistinct, sizeof(double));
222  tmp += sizeof(double);
223  memcpy(tmp, &nmembers, sizeof(int));
224  tmp += sizeof(int);
225 
226  x = -1;
227  while ((x = bms_next_member(item.attrs, x)) >= 0)
228  {
230 
231  memcpy(tmp, &value, sizeof(AttrNumber));
232  tmp += sizeof(AttrNumber);
233  }
234 
235  /* protect against overflows */
236  Assert(tmp <= ((char *) output + len));
237  }
238 
239  /* check we used exactly the expected space */
240  Assert(tmp == ((char *) output + len));
241 
242  return output;
243 }
#define VARDATA(PTR)
Definition: postgres.h:302
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:38
static void output(uint64 loop_count)
#define VARHDRSZ
Definition: c.h:555
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1043
double ndistinct
Definition: statistics.h:28
static struct @144 value
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
#define SizeOfItem(natts)
Definition: mvdistinct.c:51
unsigned int uint32
Definition: c.h:358
uint32 nitems
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:35
uint32 type
Definition: statistics.h:36
#define SizeOfHeader
Definition: mvdistinct.c:48
#define Assert(condition)
Definition: c.h:732
size_t Size
Definition: c.h:466
Bitmapset * attrs
Definition: statistics.h:29
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
void * palloc(Size size)
Definition: mcxt.c:924
int i
Definition: c.h:549
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
int16 AttrNumber
Definition: attnum.h:21