PostgreSQL Source Code  git master
extended_stats_internal.h File Reference
Include dependency graph for extended_stats_internal.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  StdAnalyzeData
 
struct  ScalarItem
 
struct  DimensionInfo
 
struct  MultiSortSupportData
 
struct  SortItem
 

Typedefs

typedef struct DimensionInfo DimensionInfo
 
typedef struct MultiSortSupportData MultiSortSupportData
 
typedef MultiSortSupportDataMultiSortSupport
 
typedef struct SortItem SortItem
 

Functions

MVNDistinctstatext_ndistinct_build (double totalrows, int numrows, HeapTuple *rows, Bitmapset *attrs, VacAttrStats **stats)
 
byteastatext_ndistinct_serialize (MVNDistinct *ndistinct)
 
MVNDistinctstatext_ndistinct_deserialize (bytea *data)
 
MVDependenciesstatext_dependencies_build (int numrows, HeapTuple *rows, Bitmapset *attrs, VacAttrStats **stats)
 
byteastatext_dependencies_serialize (MVDependencies *dependencies)
 
MVDependenciesstatext_dependencies_deserialize (bytea *data)
 
MCVListstatext_mcv_build (int numrows, HeapTuple *rows, Bitmapset *attrs, VacAttrStats **stats, double totalrows, int stattarget)
 
byteastatext_mcv_serialize (MCVList *mcv, VacAttrStats **stats)
 
MCVListstatext_mcv_deserialize (bytea *data)
 
MultiSortSupport multi_sort_init (int ndims)
 
void multi_sort_add_dimension (MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
 
int multi_sort_compare (const void *a, const void *b, void *arg)
 
int multi_sort_compare_dim (int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int multi_sort_compare_dims (int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int compare_scalars_simple (const void *a, const void *b, void *arg)
 
int compare_datums_simple (Datum a, Datum b, SortSupport ssup)
 
void * bsearch_arg (const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
 
AttrNumberbuild_attnums_array (Bitmapset *attrs, int *numattrs)
 
SortItembuild_sorted_items (int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
 
bool examine_clause_args (List *args, Var **varp, Const **cstp, bool *varonleftp)
 
Selectivity mcv_clauselist_selectivity (PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
 

Typedef Documentation

◆ DimensionInfo

typedef struct DimensionInfo DimensionInfo

◆ MultiSortSupport

Definition at line 51 of file extended_stats_internal.h.

◆ MultiSortSupportData

◆ SortItem

typedef struct SortItem SortItem

Function Documentation

◆ bsearch_arg()

void* bsearch_arg ( const void *  key,
const void *  base,
size_t  nmemb,
size_t  size,
int(*)(const void *, const void *, void *)  compar,
void *  arg 
)

Definition at line 664 of file extended_stats.c.

References arg, idx(), and sort-test::key.

Referenced by statext_mcv_build(), and statext_mcv_serialize().

667 {
668  size_t l,
669  u,
670  idx;
671  const void *p;
672  int comparison;
673 
674  l = 0;
675  u = nmemb;
676  while (l < u)
677  {
678  idx = (l + u) / 2;
679  p = (void *) (((const char *) base) + (idx * size));
680  comparison = (*compar) (key, p, arg);
681 
682  if (comparison < 0)
683  u = idx;
684  else if (comparison > 0)
685  l = idx + 1;
686  else
687  return (void *) p;
688  }
689 
690  return NULL;
691 }
symbol * p
Definition: api.h:15
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
void * arg
int l
Definition: api.h:16

◆ build_attnums_array()

AttrNumber* build_attnums_array ( Bitmapset attrs,
int *  numattrs 
)

Definition at line 702 of file extended_stats.c.

References Assert, AttrNumberIsForUserDefinedAttr, bms_next_member(), bms_num_members(), i, MaxAttrNumber, and palloc().

Referenced by dependency_degree(), statext_dependencies_build(), and statext_mcv_build().

703 {
704  int i,
705  j;
706  AttrNumber *attnums;
707  int num = bms_num_members(attrs);
708 
709  if (numattrs)
710  *numattrs = num;
711 
712  /* build attnums from the bitmapset */
713  attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
714  i = 0;
715  j = -1;
716  while ((j = bms_next_member(attrs, j)) >= 0)
717  {
718  /*
719  * Make sure the bitmap contains only user-defined attributes. As
720  * bitmaps can't contain negative values, this can be violated in two
721  * ways. Firstly, the bitmap might contain 0 as a member, and secondly
722  * the integer value might be larger than MaxAttrNumber.
723  */
725  Assert(j <= MaxAttrNumber);
726 
727  attnums[i++] = (AttrNumber) j;
728 
729  /* protect against overflows */
730  Assert(i <= num);
731  }
732 
733  return attnums;
734 }
#define MaxAttrNumber
Definition: attnum.h:24
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1043
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
#define Assert(condition)
Definition: c.h:738
void * palloc(Size size)
Definition: mcxt.c:949
int i
int16 AttrNumber
Definition: attnum.h:21

◆ build_sorted_items()

SortItem* build_sorted_items ( int  numrows,
int *  nitems,
HeapTuple rows,
TupleDesc  tdesc,
MultiSortSupport  mss,
int  numattrs,
AttrNumber attnums 
)

Definition at line 744 of file extended_stats.c.

References Assert, attlen, heap_getattr, i, idx(), multi_sort_compare(), palloc0(), pfree(), PG_DETOAST_DATUM, PointerGetDatum, qsort_arg(), toast_raw_datum_size(), TupleDescAttr, value, values, and WIDTH_THRESHOLD.

Referenced by dependency_degree(), and statext_mcv_build().

746 {
747  int i,
748  j,
749  len,
750  idx;
751  int nvalues = numrows * numattrs;
752 
753  SortItem *items;
754  Datum *values;
755  bool *isnull;
756  char *ptr;
757 
758  /* Compute the total amount of memory we need (both items and values). */
759  len = numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
760 
761  /* Allocate the memory and split it into the pieces. */
762  ptr = palloc0(len);
763 
764  /* items to sort */
765  items = (SortItem *) ptr;
766  ptr += numrows * sizeof(SortItem);
767 
768  /* values and null flags */
769  values = (Datum *) ptr;
770  ptr += nvalues * sizeof(Datum);
771 
772  isnull = (bool *) ptr;
773  ptr += nvalues * sizeof(bool);
774 
775  /* make sure we consumed the whole buffer exactly */
776  Assert((ptr - (char *) items) == len);
777 
778  /* fix the pointers to Datum and bool arrays */
779  idx = 0;
780  for (i = 0; i < numrows; i++)
781  {
782  bool toowide = false;
783 
784  items[idx].values = &values[idx * numattrs];
785  items[idx].isnull = &isnull[idx * numattrs];
786 
787  /* load the values/null flags from sample rows */
788  for (j = 0; j < numattrs; j++)
789  {
790  Datum value;
791  bool isnull;
792 
793  value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
794 
795  /*
796  * If this is a varlena value, check if it's too wide and if yes
797  * then skip the whole item. Otherwise detoast the value.
798  *
799  * XXX It may happen that we've already detoasted some preceding
800  * values for the current item. We don't bother to cleanup those
801  * on the assumption that those are small (below WIDTH_THRESHOLD)
802  * and will be discarded at the end of analyze.
803  */
804  if ((!isnull) &&
805  (TupleDescAttr(tdesc, attnums[j] - 1)->attlen == -1))
806  {
808  {
809  toowide = true;
810  break;
811  }
812 
813  value = PointerGetDatum(PG_DETOAST_DATUM(value));
814  }
815 
816  items[idx].values[j] = value;
817  items[idx].isnull[j] = isnull;
818  }
819 
820  if (toowide)
821  continue;
822 
823  idx++;
824  }
825 
826  /* store the actual number of items (ignoring the too-wide ones) */
827  *nitems = idx;
828 
829  /* all items were too wide */
830  if (idx == 0)
831  {
832  /* everything is allocated as a single chunk */
833  pfree(items);
834  return NULL;
835  }
836 
837  /* do the sort, using the multi-sort */
838  qsort_arg((void *) items, idx, sizeof(SortItem),
839  multi_sort_compare, mss);
840 
841  return items;
842 }
#define PointerGetDatum(X)
Definition: postgres.h:556
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
void pfree(void *pointer)
Definition: mcxt.c:1056
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:502
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
int16 attlen
Definition: pg_attribute.h:64
#define heap_getattr(tup, attnum, tupleDesc, isnull)
Definition: htup_details.h:762
void * palloc0(Size size)
Definition: mcxt.c:980
uintptr_t Datum
Definition: postgres.h:367
struct SortItem SortItem
#define WIDTH_THRESHOLD
static struct @143 value
#define Assert(condition)
Definition: c.h:738
int multi_sort_compare(const void *a, const void *b, void *arg)
static Datum values[MAXATTR]
Definition: bootstrap.c:167
int i
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
unsigned char bool
Definition: c.h:317

◆ compare_datums_simple()

int compare_datums_simple ( Datum  a,
Datum  b,
SortSupport  ssup 
)

Definition at line 657 of file extended_stats.c.

References ApplySortComparator().

Referenced by compare_scalars_simple(), and statext_mcv_serialize().

658 {
659  return ApplySortComparator(a, false, b, false, ssup);
660 }
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

◆ compare_scalars_simple()

int compare_scalars_simple ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 649 of file extended_stats.c.

References compare_datums_simple().

Referenced by statext_mcv_serialize().

650 {
651  return compare_datums_simple(*(Datum *) a,
652  *(Datum *) b,
653  (SortSupport) arg);
654 }
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
uintptr_t Datum
Definition: postgres.h:367
void * arg

◆ examine_clause_args()

bool examine_clause_args ( List args,
Var **  varp,
Const **  cstp,
bool varonleftp 
)

Definition at line 1461 of file extended_stats.c.

References arg, Assert, IsA, linitial, list_length(), and lsecond.

Referenced by mcv_get_match_bitmap(), and statext_is_compatible_clause_internal().

1462 {
1463  Var *var;
1464  Const *cst;
1465  bool varonleft;
1466  Node *leftop,
1467  *rightop;
1468 
1469  /* enforced by statext_is_compatible_clause_internal */
1470  Assert(list_length(args) == 2);
1471 
1472  leftop = linitial(args);
1473  rightop = lsecond(args);
1474 
1475  /* strip RelabelType from either side of the expression */
1476  if (IsA(leftop, RelabelType))
1477  leftop = (Node *) ((RelabelType *) leftop)->arg;
1478 
1479  if (IsA(rightop, RelabelType))
1480  rightop = (Node *) ((RelabelType *) rightop)->arg;
1481 
1482  if (IsA(leftop, Var) && IsA(rightop, Const))
1483  {
1484  var = (Var *) leftop;
1485  cst = (Const *) rightop;
1486  varonleft = true;
1487  }
1488  else if (IsA(leftop, Const) && IsA(rightop, Var))
1489  {
1490  var = (Var *) rightop;
1491  cst = (Const *) leftop;
1492  varonleft = false;
1493  }
1494  else
1495  return false;
1496 
1497  /* return pointers to the extracted parts if requested */
1498  if (varp)
1499  *varp = var;
1500 
1501  if (cstp)
1502  *cstp = cst;
1503 
1504  if (varonleftp)
1505  *varonleftp = varonleft;
1506 
1507  return true;
1508 }
#define IsA(nodeptr, _type_)
Definition: nodes.h:580
Definition: nodes.h:529
Definition: primnodes.h:181
#define lsecond(l)
Definition: pg_list.h:200
#define linitial(l)
Definition: pg_list.h:195
#define Assert(condition)
Definition: c.h:738
static int list_length(const List *l)
Definition: pg_list.h:169
void * arg

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity ( PlannerInfo root,
StatisticExtInfo stat,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Selectivity basesel,
Selectivity totalsel 
)

Definition at line 1903 of file mcv.c.

References MCVItem::base_frequency, MCVItem::frequency, i, MCVList::items, StatisticExtInfo::keys, mcv_get_match_bitmap(), MCVList::nitems, statext_mcv_load(), and StatisticExtInfo::statOid.

Referenced by statext_mcv_clauselist_selectivity().

1908 {
1909  int i;
1910  MCVList *mcv;
1911  Selectivity s = 0.0;
1912 
1913  /* match/mismatch bitmap for each MCV item */
1914  bool *matches = NULL;
1915 
1916  /* load the MCV list stored in the statistics object */
1917  mcv = statext_mcv_load(stat->statOid);
1918 
1919  /* build a match bitmap for the clauses */
1920  matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, false);
1921 
1922  /* sum frequencies for all the matching MCV items */
1923  *basesel = 0.0;
1924  *totalsel = 0.0;
1925  for (i = 0; i < mcv->nitems; i++)
1926  {
1927  *totalsel += mcv->items[i].frequency;
1928 
1929  if (matches[i] != false)
1930  {
1931  /* XXX Shouldn't the basesel be outside the if condition? */
1932  *basesel += mcv->items[i].base_frequency;
1933  s += mcv->items[i].frequency;
1934  }
1935  }
1936 
1937  return s;
1938 }
uint32 nitems
Definition: statistics.h:90
double Selectivity
Definition: nodes.h:662
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1545
MCVList * statext_mcv_load(Oid mvoid)
Definition: mcv.c:557
double base_frequency
Definition: statistics.h:80
Bitmapset * keys
Definition: pathnodes.h:915
int i
double frequency
Definition: statistics.h:79

◆ multi_sort_add_dimension()

void multi_sort_add_dimension ( MultiSortSupport  mss,
int  sortdim,
Oid  oper,
Oid  collation 
)

Definition at line 581 of file extended_stats.c.

References CurrentMemoryContext, PrepareSortSupportFromOrderingOp(), MultiSortSupportData::ssup, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, and SortSupportData::ssup_nulls_first.

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

583 {
584  SortSupport ssup = &mss->ssup[sortdim];
585 
587  ssup->ssup_collation = collation;
588  ssup->ssup_nulls_first = false;
589 
591 }
bool ssup_nulls_first
Definition: sortsupport.h:75
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:134
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
MemoryContext ssup_cxt
Definition: sortsupport.h:66
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition: parse_oper.c:377

◆ multi_sort_compare()

int multi_sort_compare ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 595 of file extended_stats.c.

References ApplySortComparator(), compare(), i, SortItem::isnull, MultiSortSupportData::ndims, MultiSortSupportData::ssup, and SortItem::values.

Referenced by build_distinct_groups(), build_sorted_items(), count_distinct_groups(), ndistinct_for_combination(), and statext_mcv_build().

596 {
598  SortItem *ia = (SortItem *) a;
599  SortItem *ib = (SortItem *) b;
600  int i;
601 
602  for (i = 0; i < mss->ndims; i++)
603  {
604  int compare;
605 
606  compare = ApplySortComparator(ia->values[i], ia->isnull[i],
607  ib->values[i], ib->isnull[i],
608  &mss->ssup[i]);
609 
610  if (compare != 0)
611  return compare;
612  }
613 
614  /* equal by default */
615  return 0;
616 }
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
int i
void * arg
MultiSortSupportData * MultiSortSupport
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

◆ multi_sort_compare_dim()

int multi_sort_compare_dim ( int  dim,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 620 of file extended_stats.c.

References ApplySortComparator(), SortItem::isnull, MultiSortSupportData::ssup, and SortItem::values.

Referenced by dependency_degree().

622 {
623  return ApplySortComparator(a->values[dim], a->isnull[dim],
624  b->values[dim], b->isnull[dim],
625  &mss->ssup[dim]);
626 }
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

◆ multi_sort_compare_dims()

int multi_sort_compare_dims ( int  start,
int  end,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 629 of file extended_stats.c.

References ApplySortComparator(), SortItem::isnull, MultiSortSupportData::ssup, and SortItem::values.

Referenced by dependency_degree().

632 {
633  int dim;
634 
635  for (dim = start; dim <= end; dim++)
636  {
637  int r = ApplySortComparator(a->values[dim], a->isnull[dim],
638  b->values[dim], b->isnull[dim],
639  &mss->ssup[dim]);
640 
641  if (r != 0)
642  return r;
643  }
644 
645  return 0;
646 }
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

◆ multi_sort_init()

MultiSortSupport multi_sort_init ( int  ndims)

Definition at line 562 of file extended_stats.c.

References Assert, MultiSortSupportData::ndims, offsetof, and palloc0().

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

563 {
564  MultiSortSupport mss;
565 
566  Assert(ndims >= 2);
567 
569  + sizeof(SortSupportData) * ndims);
570 
571  mss->ndims = ndims;
572 
573  return mss;
574 }
struct SortSupportData SortSupportData
void * palloc0(Size size)
Definition: mcxt.c:980
#define Assert(condition)
Definition: c.h:738
MultiSortSupportData * MultiSortSupport
#define offsetof(type, field)
Definition: c.h:661

◆ statext_dependencies_build()

MVDependencies* statext_dependencies_build ( int  numrows,
HeapTuple rows,
Bitmapset attrs,
VacAttrStats **  stats 
)

Definition at line 363 of file dependencies.c.

References Assert, MVDependency::attributes, build_attnums_array(), MVDependency::degree, DependencyGeneratorData::dependencies, dependency_degree(), DependencyGenerator_free(), DependencyGenerator_init(), DependencyGenerator_next(), MVDependencies::deps, i, DependencyGeneratorData::k, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, offsetof, palloc0(), repalloc(), STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, and MVDependencies::type.

Referenced by BuildRelationExtStatistics().

365 {
366  int i,
367  k;
368  int numattrs;
369  AttrNumber *attnums;
370 
371  /* result */
372  MVDependencies *dependencies = NULL;
373 
374  /*
375  * Transform the bms into an array, to make accessing i-th member easier.
376  */
377  attnums = build_attnums_array(attrs, &numattrs);
378 
379  Assert(numattrs >= 2);
380 
381  /*
382  * We'll try build functional dependencies starting from the smallest ones
383  * covering just 2 columns, to the largest ones, covering all columns
384  * included in the statistics object. We start from the smallest ones
385  * because we want to be able to skip already implied ones.
386  */
387  for (k = 2; k <= numattrs; k++)
388  {
389  AttrNumber *dependency; /* array with k elements */
390 
391  /* prepare a DependencyGenerator of variation */
393 
394  /* generate all possible variations of k values (out of n) */
395  while ((dependency = DependencyGenerator_next(DependencyGenerator)))
396  {
397  double degree;
398  MVDependency *d;
399 
400  /* compute how valid the dependency seems */
401  degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
402 
403  /*
404  * if the dependency seems entirely invalid, don't store it
405  */
406  if (degree == 0.0)
407  continue;
408 
409  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
410  + k * sizeof(AttrNumber));
411 
412  /* copy the dependency (and keep the indexes into stxkeys) */
413  d->degree = degree;
414  d->nattributes = k;
415  for (i = 0; i < k; i++)
416  d->attributes[i] = attnums[dependency[i]];
417 
418  /* initialize the list of dependencies */
419  if (dependencies == NULL)
420  {
421  dependencies
422  = (MVDependencies *) palloc0(sizeof(MVDependencies));
423 
424  dependencies->magic = STATS_DEPS_MAGIC;
425  dependencies->type = STATS_DEPS_TYPE_BASIC;
426  dependencies->ndeps = 0;
427  }
428 
429  dependencies->ndeps++;
430  dependencies = (MVDependencies *) repalloc(dependencies,
431  offsetof(MVDependencies, deps)
432  + dependencies->ndeps * sizeof(MVDependency *));
433 
434  dependencies->deps[dependencies->ndeps - 1] = d;
435  }
436 
437  /*
438  * we're done with variations of k elements, so free the
439  * DependencyGenerator
440  */
441  DependencyGenerator_free(DependencyGenerator);
442  }
443 
444  return dependencies;
445 }
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:53
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:61
#define STATS_DEPS_MAGIC
Definition: statistics.h:42
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:43
AttrNumber nattributes
Definition: statistics.h:52
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:173
AttrNumber * build_attnums_array(Bitmapset *attrs, int *numattrs)
static double dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs)
Definition: dependencies.c:222
void * palloc0(Size size)
Definition: mcxt.c:980
uint32 magic
Definition: statistics.h:58
uint32 ndeps
Definition: statistics.h:60
#define Assert(condition)
Definition: c.h:738
double degree
Definition: statistics.h:51
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:205
int i
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:196
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:661

◆ statext_dependencies_deserialize()

MVDependencies* statext_dependencies_deserialize ( bytea data)

Definition at line 507 of file dependencies.c.

References Assert, MVDependency::attributes, MVDependency::degree, DependencyGeneratorData::dependencies, MVDependencies::deps, elog, ERROR, i, DependencyGeneratorData::k, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, offsetof, palloc0(), repalloc(), SizeOfHeader, SizeOfItem, STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, STATS_MAX_DIMENSIONS, MVDependencies::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_dependencies_out(), and statext_dependencies_load().

508 {
509  int i;
510  Size min_expected_size;
511  MVDependencies *dependencies;
512  char *tmp;
513 
514  if (data == NULL)
515  return NULL;
516 
517  if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
518  elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
520 
521  /* read the MVDependencies header */
522  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
523 
524  /* initialize pointer to the data part (skip the varlena header) */
525  tmp = VARDATA_ANY(data);
526 
527  /* read the header fields and perform basic sanity checks */
528  memcpy(&dependencies->magic, tmp, sizeof(uint32));
529  tmp += sizeof(uint32);
530  memcpy(&dependencies->type, tmp, sizeof(uint32));
531  tmp += sizeof(uint32);
532  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
533  tmp += sizeof(uint32);
534 
535  if (dependencies->magic != STATS_DEPS_MAGIC)
536  elog(ERROR, "invalid dependency magic %d (expected %d)",
537  dependencies->magic, STATS_DEPS_MAGIC);
538 
539  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
540  elog(ERROR, "invalid dependency type %d (expected %d)",
541  dependencies->type, STATS_DEPS_TYPE_BASIC);
542 
543  if (dependencies->ndeps == 0)
544  elog(ERROR, "invalid zero-length item array in MVDependencies");
545 
546  /* what minimum bytea size do we expect for those parameters */
547  min_expected_size = SizeOfItem(dependencies->ndeps);
548 
549  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
550  elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
551  VARSIZE_ANY_EXHDR(data), min_expected_size);
552 
553  /* allocate space for the MCV items */
554  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
555  + (dependencies->ndeps * sizeof(MVDependency *)));
556 
557  for (i = 0; i < dependencies->ndeps; i++)
558  {
559  double degree;
560  AttrNumber k;
561  MVDependency *d;
562 
563  /* degree of validity */
564  memcpy(&degree, tmp, sizeof(double));
565  tmp += sizeof(double);
566 
567  /* number of attributes */
568  memcpy(&k, tmp, sizeof(AttrNumber));
569  tmp += sizeof(AttrNumber);
570 
571  /* is the number of attributes valid? */
572  Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
573 
574  /* now that we know the number of attributes, allocate the dependency */
575  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
576  + (k * sizeof(AttrNumber)));
577 
578  d->degree = degree;
579  d->nattributes = k;
580 
581  /* copy attribute numbers */
582  memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
583  tmp += sizeof(AttrNumber) * d->nattributes;
584 
585  dependencies->deps[i] = d;
586 
587  /* still within the bytea */
588  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
589  }
590 
591  /* we should have consumed the whole bytea exactly */
592  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
593 
594  return dependencies;
595 }
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:53
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:61
#define STATS_DEPS_MAGIC
Definition: statistics.h:42
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:43
#define SizeOfItem(natts)
Definition: dependencies.c:41
AttrNumber nattributes
Definition: statistics.h:52
#define ERROR
Definition: elog.h:43
unsigned int uint32
Definition: c.h:367
void * palloc0(Size size)
Definition: mcxt.c:980
uint32 magic
Definition: statistics.h:58
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
uint32 ndeps
Definition: statistics.h:60
#define Assert(condition)
Definition: c.h:738
double degree
Definition: statistics.h:51
size_t Size
Definition: c.h:466
#define SizeOfHeader
Definition: dependencies.c:38
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
#define elog(elevel,...)
Definition: elog.h:214
int i
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:661

◆ statext_dependencies_serialize()

bytea* statext_dependencies_serialize ( MVDependencies dependencies)

Definition at line 452 of file dependencies.c.

References Assert, MVDependency::attributes, MVDependency::degree, MVDependencies::deps, i, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, output(), palloc0(), SET_VARSIZE, SizeOfHeader, SizeOfItem, MVDependencies::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().

453 {
454  int i;
455  bytea *output;
456  char *tmp;
457  Size len;
458 
459  /* we need to store ndeps, with a number of attributes for each one */
460  len = VARHDRSZ + SizeOfHeader;
461 
462  /* and also include space for the actual attribute numbers and degrees */
463  for (i = 0; i < dependencies->ndeps; i++)
464  len += SizeOfItem(dependencies->deps[i]->nattributes);
465 
466  output = (bytea *) palloc0(len);
467  SET_VARSIZE(output, len);
468 
469  tmp = VARDATA(output);
470 
471  /* Store the base struct values (magic, type, ndeps) */
472  memcpy(tmp, &dependencies->magic, sizeof(uint32));
473  tmp += sizeof(uint32);
474  memcpy(tmp, &dependencies->type, sizeof(uint32));
475  tmp += sizeof(uint32);
476  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
477  tmp += sizeof(uint32);
478 
479  /* store number of attributes and attribute numbers for each dependency */
480  for (i = 0; i < dependencies->ndeps; i++)
481  {
482  MVDependency *d = dependencies->deps[i];
483 
484  memcpy(tmp, &d->degree, sizeof(double));
485  tmp += sizeof(double);
486 
487  memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
488  tmp += sizeof(AttrNumber);
489 
490  memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
491  tmp += sizeof(AttrNumber) * d->nattributes;
492 
493  /* protect against overflow */
494  Assert(tmp <= ((char *) output + len));
495  }
496 
497  /* make sure we've produced exactly the right amount of data */
498  Assert(tmp == ((char *) output + len));
499 
500  return output;
501 }
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:53
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:61
#define VARDATA(PTR)
Definition: postgres.h:302
static void output(uint64 loop_count)
#define VARHDRSZ
Definition: c.h:561
#define SizeOfItem(natts)
Definition: dependencies.c:41
AttrNumber nattributes
Definition: statistics.h:52
unsigned int uint32
Definition: c.h:367
void * palloc0(Size size)
Definition: mcxt.c:980
uint32 magic
Definition: statistics.h:58
uint32 ndeps
Definition: statistics.h:60
#define Assert(condition)
Definition: c.h:738
double degree
Definition: statistics.h:51
size_t Size
Definition: c.h:466
#define SizeOfHeader
Definition: dependencies.c:38
int i
Definition: c.h:555
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
int16 AttrNumber
Definition: attnum.h:21

◆ statext_mcv_build()

MCVList* statext_mcv_build ( int  numrows,
HeapTuple rows,
Bitmapset attrs,
VacAttrStats **  stats,
double  totalrows,
int  stattarget 
)

Definition at line 183 of file mcv.c.

References Assert, VacAttrStats::attrtypid, MCVItem::base_frequency, bsearch_arg(), build_attnums_array(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), SortItem::count, MCVItem::frequency, get_mincount_for_mcv_list(), i, SortItem::isnull, MCVItem::isnull, MCVList::items, sort-test::key, MCVList::magic, multi_sort_compare(), MCVList::ndimensions, MultiSortSupportData::ndims, MCVList::nitems, offsetof, palloc(), palloc0(), pfree(), MultiSortSupportData::ssup, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, MCVList::type, MCVList::types, SortItem::values, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

185 {
186  int i,
187  numattrs,
188  ngroups,
189  nitems;
190  AttrNumber *attnums;
191  double mincount;
192  SortItem *items;
193  SortItem *groups;
194  MCVList *mcvlist = NULL;
195  MultiSortSupport mss;
196 
197  attnums = build_attnums_array(attrs, &numattrs);
198 
199  /* comparator for all the columns */
200  mss = build_mss(stats, numattrs);
201 
202  /* sort the rows */
203  items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
204  mss, numattrs, attnums);
205 
206  if (!items)
207  return NULL;
208 
209  /* transform the sorted rows into groups (sorted by frequency) */
210  groups = build_distinct_groups(nitems, items, mss, &ngroups);
211 
212  /*
213  * Maximum number of MCV items to store, based on the statistics target we
214  * computed for the statistics object (from target set for the object
215  * itself, attributes and the system default). In any case, we can't keep
216  * more groups than we have available.
217  */
218  nitems = stattarget;
219  if (nitems > ngroups)
220  nitems = ngroups;
221 
222  /*
223  * Decide how many items to keep in the MCV list. We can't use the same
224  * algorithm as per-column MCV lists, because that only considers the
225  * actual group frequency - but we're primarily interested in how the
226  * actual frequency differs from the base frequency (product of simple
227  * per-column frequencies, as if the columns were independent).
228  *
229  * Using the same algorithm might exclude items that are close to the
230  * "average" frequency of the sample. But that does not say whether the
231  * observed frequency is close to the base frequency or not. We also need
232  * to consider unexpectedly uncommon items (again, compared to the base
233  * frequency), and the single-column algorithm does not have to.
234  *
235  * We simply decide how many items to keep by computing minimum count
236  * using get_mincount_for_mcv_list() and then keep all items that seem to
237  * be more common than that.
238  */
239  mincount = get_mincount_for_mcv_list(numrows, totalrows);
240 
241  /*
242  * Walk the groups until we find the first group with a count below the
243  * mincount threshold (the index of that group is the number of groups we
244  * want to keep).
245  */
246  for (i = 0; i < nitems; i++)
247  {
248  if (groups[i].count < mincount)
249  {
250  nitems = i;
251  break;
252  }
253  }
254 
255  /*
256  * At this point we know the number of items for the MCV list. There might
257  * be none (for uniform distribution with many groups), and in that case
258  * there will be no MCV list. Otherwise construct the MCV list.
259  */
260  if (nitems > 0)
261  {
262  int j;
263  SortItem key;
264  MultiSortSupport tmp;
265 
266  /* frequencies for values in each attribute */
267  SortItem **freqs;
268  int *nfreqs;
269 
270  /* used to search values */
272  + sizeof(SortSupportData));
273 
274  /* compute frequencies for values in each column */
275  nfreqs = (int *) palloc0(sizeof(int) * numattrs);
276  freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
277 
278  /*
279  * Allocate the MCV list structure, set the global parameters.
280  */
281  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
282  sizeof(MCVItem) * nitems);
283 
284  mcvlist->magic = STATS_MCV_MAGIC;
285  mcvlist->type = STATS_MCV_TYPE_BASIC;
286  mcvlist->ndimensions = numattrs;
287  mcvlist->nitems = nitems;
288 
289  /* store info about data type OIDs */
290  for (i = 0; i < numattrs; i++)
291  mcvlist->types[i] = stats[i]->attrtypid;
292 
293  /* Copy the first chunk of groups into the result. */
294  for (i = 0; i < nitems; i++)
295  {
296  /* just pointer to the proper place in the list */
297  MCVItem *item = &mcvlist->items[i];
298 
299  item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
300  item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
301 
302  /* copy values for the group */
303  memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
304  memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
305 
306  /* groups should be sorted by frequency in descending order */
307  Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
308 
309  /* group frequency */
310  item->frequency = (double) groups[i].count / numrows;
311 
312  /* base frequency, if the attributes were independent */
313  item->base_frequency = 1.0;
314  for (j = 0; j < numattrs; j++)
315  {
316  SortItem *freq;
317 
318  /* single dimension */
319  tmp->ndims = 1;
320  tmp->ssup[0] = mss->ssup[j];
321 
322  /* fill search key */
323  key.values = &groups[i].values[j];
324  key.isnull = &groups[i].isnull[j];
325 
326  freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
327  sizeof(SortItem),
328  multi_sort_compare, tmp);
329 
330  item->base_frequency *= ((double) freq->count) / numrows;
331  }
332  }
333 
334  pfree(nfreqs);
335  pfree(freqs);
336  }
337 
338  pfree(items);
339  pfree(groups);
340 
341  return mcvlist;
342 }
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:423
uint32 nitems
Definition: statistics.h:90
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:489
void * bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:92
static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs)
Definition: mcv.c:349
SortItem * build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
uint32 magic
Definition: statistics.h:88
AttrNumber ndimensions
Definition: statistics.h:91
struct MCVItem MCVItem
Datum * values
Definition: statistics.h:82
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:151
void pfree(void *pointer)
Definition: mcxt.c:1056
Oid attrtypid
Definition: vacuum.h:124
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
AttrNumber * build_attnums_array(Bitmapset *attrs, int *numattrs)
uint32 type
Definition: statistics.h:89
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:66
struct SortSupportData SortSupportData
void * palloc0(Size size)
Definition: mcxt.c:980
uintptr_t Datum
Definition: postgres.h:367
#define Assert(condition)
Definition: c.h:738
double base_frequency
Definition: statistics.h:80
#define STATS_MCV_MAGIC
Definition: statistics.h:65
int multi_sort_compare(const void *a, const void *b, void *arg)
bool * isnull
Definition: statistics.h:81
void * palloc(Size size)
Definition: mcxt.c:949
int i
MultiSortSupportData * MultiSortSupport
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:661
double frequency
Definition: statistics.h:79

◆ statext_mcv_deserialize()

MCVList* statext_mcv_deserialize ( bytea data)

Definition at line 993 of file mcv.c.

References Assert, MCVItem::base_frequency, elog, ERROR, fetch_att, MCVItem::frequency, i, MCVItem::isnull, MCVList::items, MCVList::magic, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, offsetof, palloc(), palloc0(), pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum, repalloc(), SET_VARSIZE, SizeOfMCVList, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, MCVList::type, MCVList::types, DimensionInfo::typlen, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY.

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

994 {
995  int dim,
996  i;
997  Size expected_size;
998  MCVList *mcvlist;
999  char *raw;
1000  char *ptr;
1001  char *endptr PG_USED_FOR_ASSERTS_ONLY;
1002 
1003  int ndims,
1004  nitems;
1005  DimensionInfo *info = NULL;
1006 
1007  /* local allocation buffer (used only for deserialization) */
1008  Datum **map = NULL;
1009 
1010  /* MCV list */
1011  Size mcvlen;
1012 
1013  /* buffer used for the result */
1014  Size datalen;
1015  char *dataptr;
1016  char *valuesptr;
1017  char *isnullptr;
1018 
1019  if (data == NULL)
1020  return NULL;
1021 
1022  /*
1023  * We can't possibly deserialize a MCV list if there's not even a complete
1024  * header. We need an explicit formula here, because we serialize the
1025  * header fields one by one, so we need to ignore struct alignment.
1026  */
1027  if (VARSIZE_ANY(data) < MinSizeOfMCVList)
1028  elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
1029  VARSIZE_ANY(data), MinSizeOfMCVList);
1030 
1031  /* read the MCV list header */
1032  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1033 
1034  /* pointer to the data part (skip the varlena header) */
1035  raw = (char *) data;
1036  ptr = VARDATA_ANY(raw);
1037  endptr = (char *) raw + VARSIZE_ANY(data);
1038 
1039  /* get the header and perform further sanity checks */
1040  memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1041  ptr += sizeof(uint32);
1042 
1043  memcpy(&mcvlist->type, ptr, sizeof(uint32));
1044  ptr += sizeof(uint32);
1045 
1046  memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1047  ptr += sizeof(uint32);
1048 
1049  memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1050  ptr += sizeof(AttrNumber);
1051 
1052  if (mcvlist->magic != STATS_MCV_MAGIC)
1053  elog(ERROR, "invalid MCV magic %u (expected %u)",
1054  mcvlist->magic, STATS_MCV_MAGIC);
1055 
1056  if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1057  elog(ERROR, "invalid MCV type %u (expected %u)",
1058  mcvlist->type, STATS_MCV_TYPE_BASIC);
1059 
1060  if (mcvlist->ndimensions == 0)
1061  elog(ERROR, "invalid zero-length dimension array in MCVList");
1062  else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1063  (mcvlist->ndimensions < 0))
1064  elog(ERROR, "invalid length (%d) dimension array in MCVList",
1065  mcvlist->ndimensions);
1066 
1067  if (mcvlist->nitems == 0)
1068  elog(ERROR, "invalid zero-length item array in MCVList");
1069  else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1070  elog(ERROR, "invalid length (%u) item array in MCVList",
1071  mcvlist->nitems);
1072 
1073  nitems = mcvlist->nitems;
1074  ndims = mcvlist->ndimensions;
1075 
1076  /*
1077  * Check amount of data including DimensionInfo for all dimensions and
1078  * also the serialized items (including uint16 indexes). Also, walk
1079  * through the dimension information and add it to the sum.
1080  */
1081  expected_size = SizeOfMCVList(ndims, nitems);
1082 
1083  /*
1084  * Check that we have at least the dimension and info records, along with
1085  * the items. We don't know the size of the serialized values yet. We need
1086  * to do this check first, before accessing the dimension info.
1087  */
1088  if (VARSIZE_ANY(data) < expected_size)
1089  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1090  VARSIZE_ANY(data), expected_size);
1091 
1092  /* Now copy the array of type Oids. */
1093  memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1094  ptr += (sizeof(Oid) * ndims);
1095 
1096  /* Now it's safe to access the dimension info. */
1097  info = palloc(ndims * sizeof(DimensionInfo));
1098 
1099  memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1100  ptr += (ndims * sizeof(DimensionInfo));
1101 
1102  /* account for the value arrays */
1103  for (dim = 0; dim < ndims; dim++)
1104  {
1105  /*
1106  * XXX I wonder if we can/should rely on asserts here. Maybe those
1107  * checks should be done every time?
1108  */
1109  Assert(info[dim].nvalues >= 0);
1110  Assert(info[dim].nbytes >= 0);
1111 
1112  expected_size += info[dim].nbytes;
1113  }
1114 
1115  /*
1116  * Now we know the total expected MCV size, including all the pieces
1117  * (header, dimension info. items and deduplicated data). So do the final
1118  * check on size.
1119  */
1120  if (VARSIZE_ANY(data) != expected_size)
1121  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1122  VARSIZE_ANY(data), expected_size);
1123 
1124  /*
1125  * We need an array of Datum values for each dimension, so that we can
1126  * easily translate the uint16 indexes later. We also need a top-level
1127  * array of pointers to those per-dimension arrays.
1128  *
1129  * While allocating the arrays for dimensions, compute how much space we
1130  * need for a copy of the by-ref data, as we can't simply point to the
1131  * original values (it might go away).
1132  */
1133  datalen = 0; /* space for by-ref data */
1134  map = (Datum **) palloc(ndims * sizeof(Datum *));
1135 
1136  for (dim = 0; dim < ndims; dim++)
1137  {
1138  map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1139 
1140  /* space needed for a copy of data for by-ref types */
1141  datalen += info[dim].nbytes_aligned;
1142  }
1143 
1144  /*
1145  * Now resize the MCV list so that the allocation includes all the data.
1146  *
1147  * Allocate space for a copy of the data, as we can't simply reference the
1148  * serialized data - it's not aligned properly, and it may disappear while
1149  * we're still using the MCV list, e.g. due to catcache release.
1150  *
1151  * We do care about alignment here, because we will allocate all the
1152  * pieces at once, but then use pointers to different parts.
1153  */
1154  mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1155 
1156  /* arrays of values and isnull flags for all MCV items */
1157  mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1158  mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1159 
1160  /* we don't quite need to align this, but it makes some asserts easier */
1161  mcvlen += MAXALIGN(datalen);
1162 
1163  /* now resize the deserialized MCV list, and compute pointers to parts */
1164  mcvlist = repalloc(mcvlist, mcvlen);
1165 
1166  /* pointer to the beginning of values/isnull arrays */
1167  valuesptr = (char *) mcvlist
1168  + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1169 
1170  isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1171 
1172  dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1173 
1174  /*
1175  * Build mapping (index => value) for translating the serialized data into
1176  * the in-memory representation.
1177  */
1178  for (dim = 0; dim < ndims; dim++)
1179  {
1180  /* remember start position in the input array */
1181  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1182 
1183  if (info[dim].typbyval)
1184  {
1185  /* for by-val types we simply copy data into the mapping */
1186  for (i = 0; i < info[dim].nvalues; i++)
1187  {
1188  Datum v = 0;
1189 
1190  memcpy(&v, ptr, info[dim].typlen);
1191  ptr += info[dim].typlen;
1192 
1193  map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1194 
1195  /* no under/overflow of input array */
1196  Assert(ptr <= (start + info[dim].nbytes));
1197  }
1198  }
1199  else
1200  {
1201  /* for by-ref types we need to also make a copy of the data */
1202 
1203  /* passed by reference, but fixed length (name, tid, ...) */
1204  if (info[dim].typlen > 0)
1205  {
1206  for (i = 0; i < info[dim].nvalues; i++)
1207  {
1208  memcpy(dataptr, ptr, info[dim].typlen);
1209  ptr += info[dim].typlen;
1210 
1211  /* just point into the array */
1212  map[dim][i] = PointerGetDatum(dataptr);
1213  dataptr += MAXALIGN(info[dim].typlen);
1214  }
1215  }
1216  else if (info[dim].typlen == -1)
1217  {
1218  /* varlena */
1219  for (i = 0; i < info[dim].nvalues; i++)
1220  {
1221  uint32 len;
1222 
1223  /* read the uint32 length */
1224  memcpy(&len, ptr, sizeof(uint32));
1225  ptr += sizeof(uint32);
1226 
1227  /* the length is data-only */
1228  SET_VARSIZE(dataptr, len + VARHDRSZ);
1229  memcpy(VARDATA(dataptr), ptr, len);
1230  ptr += len;
1231 
1232  /* just point into the array */
1233  map[dim][i] = PointerGetDatum(dataptr);
1234 
1235  /* skip to place of the next deserialized value */
1236  dataptr += MAXALIGN(len + VARHDRSZ);
1237  }
1238  }
1239  else if (info[dim].typlen == -2)
1240  {
1241  /* cstring */
1242  for (i = 0; i < info[dim].nvalues; i++)
1243  {
1244  uint32 len;
1245 
1246  memcpy(&len, ptr, sizeof(uint32));
1247  ptr += sizeof(uint32);
1248 
1249  memcpy(dataptr, ptr, len);
1250  ptr += len;
1251 
1252  /* just point into the array */
1253  map[dim][i] = PointerGetDatum(dataptr);
1254  dataptr += MAXALIGN(len);
1255  }
1256  }
1257 
1258  /* no under/overflow of input array */
1259  Assert(ptr <= (start + info[dim].nbytes));
1260 
1261  /* no overflow of the output mcv value */
1262  Assert(dataptr <= ((char *) mcvlist + mcvlen));
1263  }
1264 
1265  /* check we consumed input data for this dimension exactly */
1266  Assert(ptr == (start + info[dim].nbytes));
1267  }
1268 
1269  /* we should have also filled the MCV list exactly */
1270  Assert(dataptr == ((char *) mcvlist + mcvlen));
1271 
1272  /* deserialize the MCV items and translate the indexes to Datums */
1273  for (i = 0; i < nitems; i++)
1274  {
1275  MCVItem *item = &mcvlist->items[i];
1276 
1277  item->values = (Datum *) valuesptr;
1278  valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1279 
1280  item->isnull = (bool *) isnullptr;
1281  isnullptr += MAXALIGN(sizeof(bool) * ndims);
1282 
1283  memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1284  ptr += sizeof(bool) * ndims;
1285 
1286  memcpy(&item->frequency, ptr, sizeof(double));
1287  ptr += sizeof(double);
1288 
1289  memcpy(&item->base_frequency, ptr, sizeof(double));
1290  ptr += sizeof(double);
1291 
1292  /* finally translate the indexes (for non-NULL only) */
1293  for (dim = 0; dim < ndims; dim++)
1294  {
1295  uint16 index;
1296 
1297  memcpy(&index, ptr, sizeof(uint16));
1298  ptr += sizeof(uint16);
1299 
1300  if (item->isnull[dim])
1301  continue;
1302 
1303  item->values[dim] = map[dim][index];
1304  }
1305 
1306  /* check we're not overflowing the input */
1307  Assert(ptr <= endptr);
1308  }
1309 
1310  /* check that we processed all the data */
1311  Assert(ptr == endptr);
1312 
1313  /* release the buffers used for mapping */
1314  for (dim = 0; dim < ndims; dim++)
1315  pfree(map[dim]);
1316 
1317  pfree(map);
1318 
1319  return mcvlist;
1320 }
uint32 nitems
Definition: statistics.h:90
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:92
#define PointerGetDatum(X)
Definition: postgres.h:556
#define VARHDRSZ
Definition: c.h:561
#define MinSizeOfMCVList
Definition: mcv.c:62
struct DimensionInfo DimensionInfo
uint32 magic
Definition: statistics.h:88
AttrNumber ndimensions
Definition: statistics.h:91
struct MCVItem MCVItem
Datum * values
Definition: statistics.h:82
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:93
unsigned int Oid
Definition: postgres_ext.h:31
Definition: type.h:89
unsigned short uint16
Definition: c.h:366
void pfree(void *pointer)
Definition: mcxt.c:1056
#define ERROR
Definition: elog.h:43
unsigned int uint32
Definition: c.h:367
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:71
uint32 type
Definition: statistics.h:89
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:66
void * palloc0(Size size)
Definition: mcxt.c:980
uintptr_t Datum
Definition: postgres.h:367
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:69
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
#define Assert(condition)
Definition: c.h:738
double base_frequency
Definition: statistics.h:80
#define STATS_MCV_MAGIC
Definition: statistics.h:65
size_t Size
Definition: c.h:466
#define MAXALIGN(LEN)
Definition: c.h:691
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
bool * isnull
Definition: statistics.h:81
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
void * palloc(Size size)
Definition: mcxt.c:949
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:75
#define elog(elevel,...)
Definition: elog.h:214
int i
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:661
double frequency
Definition: statistics.h:79
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:121
unsigned char bool
Definition: c.h:317

◆ statext_mcv_serialize()

bytea* statext_mcv_serialize ( MCVList mcv,
VacAttrStats **  stats 
)

Definition at line 618 of file mcv.c.

References Assert, VacAttrStats::attrcollid, VacAttrStats::attrtype, MCVItem::base_frequency, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString, DatumGetPointer, MCVItem::frequency, i, MCVItem::isnull, ITEM_SIZE, MCVList::items, lookup_type_cache(), TypeCacheEntry::lt_opr, MCVList::magic, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, palloc0(), pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum, PrepareSortSupportFromOrderingOp(), qsort_arg(), SET_VARSIZE, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, store_att_byval, DimensionInfo::typbyval, MCVList::type, TYPECACHE_LT_OPR, MCVList::types, DimensionInfo::typlen, value, MCVItem::values, values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by statext_store().

619 {
620  int i;
621  int dim;
622  int ndims = mcvlist->ndimensions;
623 
624  SortSupport ssup;
625  DimensionInfo *info;
626 
627  Size total_length;
628 
629  /* serialized items (indexes into arrays, etc.) */
630  bytea *raw;
631  char *ptr;
632  char *endptr PG_USED_FOR_ASSERTS_ONLY;
633 
634  /* values per dimension (and number of non-NULL values) */
635  Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
636  int *counts = (int *) palloc0(sizeof(int) * ndims);
637 
638  /*
639  * We'll include some rudimentary information about the attribute types
640  * (length, by-val flag), so that we don't have to look them up while
641  * deserializating the MCV list (we already have the type OID in the
642  * header). This is safe, because when changing type of the attribute the
643  * statistics gets dropped automatically. We need to store the info about
644  * the arrays of deduplicated values anyway.
645  */
646  info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
647 
648  /* sort support data for all attributes included in the MCV list */
649  ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
650 
651  /* collect and deduplicate values for each dimension (attribute) */
652  for (dim = 0; dim < ndims; dim++)
653  {
654  int ndistinct;
655  TypeCacheEntry *typentry;
656 
657  /*
658  * Lookup the LT operator (can't get it from stats extra_data, as we
659  * don't know how to interpret that - scalar vs. array etc.).
660  */
661  typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
662 
663  /* copy important info about the data type (length, by-value) */
664  info[dim].typlen = stats[dim]->attrtype->typlen;
665  info[dim].typbyval = stats[dim]->attrtype->typbyval;
666 
667  /* allocate space for values in the attribute and collect them */
668  values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
669 
670  for (i = 0; i < mcvlist->nitems; i++)
671  {
672  /* skip NULL values - we don't need to deduplicate those */
673  if (mcvlist->items[i].isnull[dim])
674  continue;
675 
676  /* append the value at the end */
677  values[dim][counts[dim]] = mcvlist->items[i].values[dim];
678  counts[dim] += 1;
679  }
680 
681  /* if there are just NULL values in this dimension, we're done */
682  if (counts[dim] == 0)
683  continue;
684 
685  /* sort and deduplicate the data */
686  ssup[dim].ssup_cxt = CurrentMemoryContext;
687  ssup[dim].ssup_collation = stats[dim]->attrcollid;
688  ssup[dim].ssup_nulls_first = false;
689 
690  PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
691 
692  qsort_arg(values[dim], counts[dim], sizeof(Datum),
693  compare_scalars_simple, &ssup[dim]);
694 
695  /*
696  * Walk through the array and eliminate duplicate values, but keep the
697  * ordering (so that we can do bsearch later). We know there's at
698  * least one item as (counts[dim] != 0), so we can skip the first
699  * element.
700  */
701  ndistinct = 1; /* number of distinct values */
702  for (i = 1; i < counts[dim]; i++)
703  {
704  /* expect sorted array */
705  Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
706 
707  /* if the value is the same as the previous one, we can skip it */
708  if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
709  continue;
710 
711  values[dim][ndistinct] = values[dim][i];
712  ndistinct += 1;
713  }
714 
715  /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
716  Assert(ndistinct <= PG_UINT16_MAX);
717 
718  /*
719  * Store additional info about the attribute - number of deduplicated
720  * values, and also size of the serialized data. For fixed-length data
721  * types this is trivial to compute, for varwidth types we need to
722  * actually walk the array and sum the sizes.
723  */
724  info[dim].nvalues = ndistinct;
725 
726  if (info[dim].typbyval) /* by-value data types */
727  {
728  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
729 
730  /*
731  * We copy the data into the MCV item during deserialization, so
732  * we don't need to allocate any extra space.
733  */
734  info[dim].nbytes_aligned = 0;
735  }
736  else if (info[dim].typlen > 0) /* fixed-length by-ref */
737  {
738  /*
739  * We don't care about alignment in the serialized data, so we
740  * pack the data as much as possible. But we also track how much
741  * data will be needed after deserialization, and in that case we
742  * need to account for alignment of each item.
743  *
744  * Note: As the items are fixed-length, we could easily compute
745  * this during deserialization, but we do it here anyway.
746  */
747  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
748  info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
749  }
750  else if (info[dim].typlen == -1) /* varlena */
751  {
752  info[dim].nbytes = 0;
753  info[dim].nbytes_aligned = 0;
754  for (i = 0; i < info[dim].nvalues; i++)
755  {
756  Size len;
757 
758  /*
759  * For varlena values, we detoast the values and store the
760  * length and data separately. We don't bother with alignment
761  * here, which means that during deserialization we need to
762  * copy the fields and only access the copies.
763  */
764  values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
765 
766  /* serialized length (uint32 length + data) */
767  len = VARSIZE_ANY_EXHDR(values[dim][i]);
768  info[dim].nbytes += sizeof(uint32); /* length */
769  info[dim].nbytes += len; /* value (no header) */
770 
771  /*
772  * During deserialization we'll build regular varlena values
773  * with full headers, and we need to align them properly.
774  */
775  info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
776  }
777  }
778  else if (info[dim].typlen == -2) /* cstring */
779  {
780  info[dim].nbytes = 0;
781  info[dim].nbytes_aligned = 0;
782  for (i = 0; i < info[dim].nvalues; i++)
783  {
784  Size len;
785 
786  /*
787  * For cstring, we do similar thing as for varlena - first we
788  * store the length as uint32 and then the data. We don't care
789  * about alignment, which means that during deserialization we
790  * need to copy the fields and only access the copies.
791  */
792 
793  /* c-strings include terminator, so +1 byte */
794  len = strlen(DatumGetCString(values[dim][i])) + 1;
795  info[dim].nbytes += sizeof(uint32); /* length */
796  info[dim].nbytes += len; /* value */
797 
798  /* space needed for properly aligned deserialized copies */
799  info[dim].nbytes_aligned += MAXALIGN(len);
800  }
801  }
802 
803  /* we know (count>0) so there must be some data */
804  Assert(info[dim].nbytes > 0);
805  }
806 
807  /*
808  * Now we can finally compute how much space we'll actually need for the
809  * whole serialized MCV list (varlena header, MCV header, dimension info
810  * for each attribute, deduplicated values and items).
811  */
812  total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
813  + sizeof(AttrNumber) /* ndimensions */
814  + (ndims * sizeof(Oid)); /* attribute types */
815 
816  /* dimension info */
817  total_length += ndims * sizeof(DimensionInfo);
818 
819  /* add space for the arrays of deduplicated values */
820  for (i = 0; i < ndims; i++)
821  total_length += info[i].nbytes;
822 
823  /*
824  * And finally account for the items (those are fixed-length, thanks to
825  * replacing values with uint16 indexes into the deduplicated arrays).
826  */
827  total_length += mcvlist->nitems * ITEM_SIZE(dim);
828 
829  /*
830  * Allocate space for the whole serialized MCV list (we'll skip bytes, so
831  * we set them to zero to make the result more compressible).
832  */
833  raw = (bytea *) palloc0(VARHDRSZ + total_length);
834  SET_VARSIZE(raw, VARHDRSZ + total_length);
835 
836  ptr = VARDATA(raw);
837  endptr = ptr + total_length;
838 
839  /* copy the MCV list header fields, one by one */
840  memcpy(ptr, &mcvlist->magic, sizeof(uint32));
841  ptr += sizeof(uint32);
842 
843  memcpy(ptr, &mcvlist->type, sizeof(uint32));
844  ptr += sizeof(uint32);
845 
846  memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
847  ptr += sizeof(uint32);
848 
849  memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
850  ptr += sizeof(AttrNumber);
851 
852  memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
853  ptr += (sizeof(Oid) * ndims);
854 
855  /* store information about the attributes (data amounts, ...) */
856  memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
857  ptr += sizeof(DimensionInfo) * ndims;
858 
859  /* Copy the deduplicated values for all attributes to the output. */
860  for (dim = 0; dim < ndims; dim++)
861  {
862  /* remember the starting point for Asserts later */
863  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
864 
865  for (i = 0; i < info[dim].nvalues; i++)
866  {
867  Datum value = values[dim][i];
868 
869  if (info[dim].typbyval) /* passed by value */
870  {
871  Datum tmp;
872 
873  /*
874  * For values passed by value, we need to copy just the
875  * significant bytes - we can't use memcpy directly, as that
876  * assumes little endian behavior. store_att_byval does
877  * almost what we need, but it requires properly aligned
878  * buffer - the output buffer does not guarantee that. So we
879  * simply use a local Datum variable (which guarantees proper
880  * alignment), and then copy the value from it.
881  */
882  store_att_byval(&tmp, value, info[dim].typlen);
883 
884  memcpy(ptr, &tmp, info[dim].typlen);
885  ptr += info[dim].typlen;
886  }
887  else if (info[dim].typlen > 0) /* passed by reference */
888  {
889  /* no special alignment needed, treated as char array */
890  memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
891  ptr += info[dim].typlen;
892  }
893  else if (info[dim].typlen == -1) /* varlena */
894  {
896 
897  /* copy the length */
898  memcpy(ptr, &len, sizeof(uint32));
899  ptr += sizeof(uint32);
900 
901  /* data from the varlena value (without the header) */
902  memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
903  ptr += len;
904  }
905  else if (info[dim].typlen == -2) /* cstring */
906  {
907  uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
908 
909  /* copy the length */
910  memcpy(ptr, &len, sizeof(uint32));
911  ptr += sizeof(uint32);
912 
913  /* value */
914  memcpy(ptr, DatumGetCString(value), len);
915  ptr += len;
916  }
917 
918  /* no underflows or overflows */
919  Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
920  }
921 
922  /* we should get exactly nbytes of data for this dimension */
923  Assert((ptr - start) == info[dim].nbytes);
924  }
925 
926  /* Serialize the items, with uint16 indexes instead of the values. */
927  for (i = 0; i < mcvlist->nitems; i++)
928  {
929  MCVItem *mcvitem = &mcvlist->items[i];
930 
931  /* don't write beyond the allocated space */
932  Assert(ptr <= (endptr - ITEM_SIZE(dim)));
933 
934  /* copy NULL and frequency flags into the serialized MCV */
935  memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
936  ptr += sizeof(bool) * ndims;
937 
938  memcpy(ptr, &mcvitem->frequency, sizeof(double));
939  ptr += sizeof(double);
940 
941  memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
942  ptr += sizeof(double);
943 
944  /* store the indexes last */
945  for (dim = 0; dim < ndims; dim++)
946  {
947  uint16 index = 0;
948  Datum *value;
949 
950  /* do the lookup only for non-NULL values */
951  if (!mcvitem->isnull[dim])
952  {
953  value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
954  info[dim].nvalues, sizeof(Datum),
955  compare_scalars_simple, &ssup[dim]);
956 
957  Assert(value != NULL); /* serialization or deduplication
958  * error */
959 
960  /* compute index within the deduplicated array */
961  index = (uint16) (value - values[dim]);
962 
963  /* check the index is within expected bounds */
964  Assert(index < info[dim].nvalues);
965  }
966 
967  /* copy the index into the serialized MCV */
968  memcpy(ptr, &index, sizeof(uint16));
969  ptr += sizeof(uint16);
970  }
971 
972  /* make sure we don't overflow the allocated value */
973  Assert(ptr <= endptr);
974  }
975 
976  /* at this point we expect to match the total_length exactly */
977  Assert(ptr == endptr);
978 
979  pfree(values);
980  pfree(counts);
981 
982  return raw;
983 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
bool ssup_nulls_first
Definition: sortsupport.h:75
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
void * bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
#define PointerGetDatum(X)
Definition: postgres.h:556
#define VARHDRSZ
Definition: c.h:561
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:134
struct DimensionInfo DimensionInfo
Datum * values
Definition: statistics.h:82
unsigned int Oid
Definition: postgres_ext.h:31
Definition: type.h:89
unsigned short uint16
Definition: c.h:366
void pfree(void *pointer)
Definition: mcxt.c:1056
#define PG_UINT16_MAX
Definition: c.h:448
#define DatumGetCString(X)
Definition: postgres.h:566
MemoryContext ssup_cxt
Definition: sortsupport.h:66
int compare_scalars_simple(const void *a, const void *b, void *arg)
unsigned int uint32
Definition: c.h:367
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
#define store_att_byval(T, newdatum, attlen)
Definition: tupmacs.h:226
void * palloc0(Size size)
Definition: mcxt.c:980
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:331
#define ITEM_SIZE(ndims)
Definition: mcv.c:56
static struct @143 value
#define Assert(condition)
Definition: c.h:738
double base_frequency
Definition: statistics.h:80
size_t Size
Definition: c.h:466
#define MAXALIGN(LEN)
Definition: c.h:691
Oid attrcollid
Definition: vacuum.h:127
#define DatumGetPointer(X)
Definition: postgres.h:549
bool * isnull
Definition: statistics.h:81
static Datum values[MAXATTR]
Definition: bootstrap.c:167
Form_pg_type attrtype
Definition: vacuum.h:126
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
int i
#define TYPECACHE_LT_OPR
Definition: typcache.h:131
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
Definition: c.h:555
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
int16 AttrNumber
Definition: attnum.h:21
double frequency
Definition: statistics.h:79
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:121
unsigned char bool
Definition: c.h:317

◆ statext_ndistinct_build()

MVNDistinct* statext_ndistinct_build ( double  totalrows,
int  numrows,
HeapTuple rows,
Bitmapset attrs,
VacAttrStats **  stats 
)

Definition at line 86 of file mvdistinct.c.

References Assert, VacAttrStats::attr, MVNDistinctItem::attrs, bms_add_member(), bms_num_members(), generator_free(), generator_init(), generator_next(), MVNDistinct::items, CombinationGenerator::k, MVNDistinct::magic, MVNDistinctItem::ndistinct, ndistinct_for_combination(), MVNDistinct::nitems, num_combinations(), offsetof, palloc(), STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, and MVNDistinct::type.

Referenced by BuildRelationExtStatistics().

88 {
89  MVNDistinct *result;
90  int k;
91  int itemcnt;
92  int numattrs = bms_num_members(attrs);
93  int numcombs = num_combinations(numattrs);
94 
95  result = palloc(offsetof(MVNDistinct, items) +
96  numcombs * sizeof(MVNDistinctItem));
97  result->magic = STATS_NDISTINCT_MAGIC;
99  result->nitems = numcombs;
100 
101  itemcnt = 0;
102  for (k = 2; k <= numattrs; k++)
103  {
104  int *combination;
106 
107  /* generate combinations of K out of N elements */
108  generator = generator_init(numattrs, k);
109 
110  while ((combination = generator_next(generator)))
111  {
112  MVNDistinctItem *item = &result->items[itemcnt];
113  int j;
114 
115  item->attrs = NULL;
116  for (j = 0; j < k; j++)
117  item->attrs = bms_add_member(item->attrs,
118  stats[combination[j]]->attr->attnum);
119  item->ndistinct =
120  ndistinct_for_combination(totalrows, numrows, rows,
121  stats, k, combination);
122 
123  itemcnt++;
124  Assert(itemcnt <= result->nitems);
125  }
126 
127  generator_free(generator);
128  }
129 
130  /* must consume exactly the whole output array */
131  Assert(itemcnt == result->nitems);
132 
133  return result;
134 }
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:38
static double ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, VacAttrStats **stats, int k, int *combination)
Definition: mvdistinct.c:430
double ndistinct
Definition: statistics.h:28
static int * generator_next(CombinationGenerator *state)
Definition: mvdistinct.c:629
Form_pg_attribute attr
Definition: vacuum.h:123
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
uint32 nitems
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:35
static void generator_free(CombinationGenerator *state)
Definition: mvdistinct.c:644
uint32 type
Definition: statistics.h:36
#define Assert(condition)
Definition: c.h:738
Bitmapset * attrs
Definition: statistics.h:29
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:736
void * palloc(Size size)
Definition: mcxt.c:949
static int num_combinations(int n)
Definition: mvdistinct.c:577
static CombinationGenerator * generator_init(int n, int k)
Definition: mvdistinct.c:591
#define offsetof(type, field)
Definition: c.h:661

◆ statext_ndistinct_deserialize()

MVNDistinct* statext_ndistinct_deserialize ( bytea data)

Definition at line 249 of file mvdistinct.c.

References Assert, MVNDistinctItem::attrs, bms_add_member(), elog, ERROR, i, MVNDistinct::items, MVNDistinct::magic, MAXALIGN, MinSizeOfItems, MVNDistinctItem::ndistinct, MVNDistinct::nitems, offsetof, palloc0(), SizeOfHeader, STATS_MAX_DIMENSIONS, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_ndistinct_out(), and statext_ndistinct_load().

250 {
251  int i;
252  Size minimum_size;
253  MVNDistinct ndist;
254  MVNDistinct *ndistinct;
255  char *tmp;
256 
257  if (data == NULL)
258  return NULL;
259 
260  /* we expect at least the basic fields of MVNDistinct struct */
261  if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
262  elog(ERROR, "invalid MVNDistinct size %zd (expected at least %zd)",
264 
265  /* initialize pointer to the data part (skip the varlena header) */
266  tmp = VARDATA_ANY(data);
267 
268  /* read the header fields and perform basic sanity checks */
269  memcpy(&ndist.magic, tmp, sizeof(uint32));
270  tmp += sizeof(uint32);
271  memcpy(&ndist.type, tmp, sizeof(uint32));
272  tmp += sizeof(uint32);
273  memcpy(&ndist.nitems, tmp, sizeof(uint32));
274  tmp += sizeof(uint32);
275 
276  if (ndist.magic != STATS_NDISTINCT_MAGIC)
277  elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
279  if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
280  elog(ERROR, "invalid ndistinct type %d (expected %d)",
282  if (ndist.nitems == 0)
283  elog(ERROR, "invalid zero-length item array in MVNDistinct");
284 
285  /* what minimum bytea size do we expect for those parameters */
286  minimum_size = MinSizeOfItems(ndist.nitems);
287  if (VARSIZE_ANY_EXHDR(data) < minimum_size)
288  elog(ERROR, "invalid MVNDistinct size %zd (expected at least %zd)",
289  VARSIZE_ANY_EXHDR(data), minimum_size);
290 
291  /*
292  * Allocate space for the ndistinct items (no space for each item's
293  * attnos: those live in bitmapsets allocated separately)
294  */
295  ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
296  (ndist.nitems * sizeof(MVNDistinctItem)));
297  ndistinct->magic = ndist.magic;
298  ndistinct->type = ndist.type;
299  ndistinct->nitems = ndist.nitems;
300 
301  for (i = 0; i < ndistinct->nitems; i++)
302  {
303  MVNDistinctItem *item = &ndistinct->items[i];
304  int nelems;
305 
306  item->attrs = NULL;
307 
308  /* ndistinct value */
309  memcpy(&item->ndistinct, tmp, sizeof(double));
310  tmp += sizeof(double);
311 
312  /* number of attributes */
313  memcpy(&nelems, tmp, sizeof(int));
314  tmp += sizeof(int);
315  Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS));
316 
317  while (nelems-- > 0)
318  {
319  AttrNumber attno;
320 
321  memcpy(&attno, tmp, sizeof(AttrNumber));
322  tmp += sizeof(AttrNumber);
323  item->attrs = bms_add_member(item->attrs, attno);
324  }
325 
326  /* still within the bytea */
327  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
328  }
329 
330  /* we should have consumed the whole bytea exactly */
331  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
332 
333  return ndistinct;
334 }
#define MinSizeOfItems(nitems)
Definition: mvdistinct.c:57
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:38
double ndistinct
Definition: statistics.h:28
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
#define ERROR
Definition: elog.h:43
unsigned int uint32
Definition: c.h:367
uint32 nitems
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:35
void * palloc0(Size size)
Definition: mcxt.c:980
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
uint32 type
Definition: statistics.h:36
#define SizeOfHeader
Definition: mvdistinct.c:47
#define Assert(condition)
Definition: c.h:738
size_t Size
Definition: c.h:466
Bitmapset * attrs
Definition: statistics.h:29
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
#define MAXALIGN(LEN)
Definition: c.h:691
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:736
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
#define elog(elevel,...)
Definition: elog.h:214
int i
int16 AttrNumber
Definition: attnum.h:21
#define offsetof(type, field)
Definition: c.h:661

◆ statext_ndistinct_serialize()

bytea* statext_ndistinct_serialize ( MVNDistinct ndistinct)

Definition at line 171 of file mvdistinct.c.

References Assert, MVNDistinctItem::attrs, bms_next_member(), bms_num_members(), i, MVNDistinct::items, MVNDistinct::magic, MVNDistinctItem::ndistinct, MVNDistinct::nitems, output(), palloc(), SET_VARSIZE, SizeOfHeader, SizeOfItem, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, value, VARDATA, and VARHDRSZ.

Referenced by statext_store().

172 {
173  int i;
174  bytea *output;
175  char *tmp;
176  Size len;
177 
178  Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
179  Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
180 
181  /*
182  * Base size is size of scalar fields in the struct, plus one base struct
183  * for each item, including number of items for each.
184  */
185  len = VARHDRSZ + SizeOfHeader;
186 
187  /* and also include space for the actual attribute numbers */
188  for (i = 0; i < ndistinct->nitems; i++)
189  {
190  int nmembers;
191 
192  nmembers = bms_num_members(ndistinct->items[i].attrs);
193  Assert(nmembers >= 2);
194 
195  len += SizeOfItem(nmembers);
196  }
197 
198  output = (bytea *) palloc(len);
199  SET_VARSIZE(output, len);
200 
201  tmp = VARDATA(output);
202 
203  /* Store the base struct values (magic, type, nitems) */
204  memcpy(tmp, &ndistinct->magic, sizeof(uint32));
205  tmp += sizeof(uint32);
206  memcpy(tmp, &ndistinct->type, sizeof(uint32));
207  tmp += sizeof(uint32);
208  memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
209  tmp += sizeof(uint32);
210 
211  /*
212  * store number of attributes and attribute numbers for each entry
213  */
214  for (i = 0; i < ndistinct->nitems; i++)
215  {
216  MVNDistinctItem item = ndistinct->items[i];
217  int nmembers = bms_num_members(item.attrs);
218  int x;
219 
220  memcpy(tmp, &item.ndistinct, sizeof(double));
221  tmp += sizeof(double);
222  memcpy(tmp, &nmembers, sizeof(int));
223  tmp += sizeof(int);
224 
225  x = -1;
226  while ((x = bms_next_member(item.attrs, x)) >= 0)
227  {
229 
230  memcpy(tmp, &value, sizeof(AttrNumber));
231  tmp += sizeof(AttrNumber);
232  }
233 
234  /* protect against overflows */
235  Assert(tmp <= ((char *) output + len));
236  }
237 
238  /* check we used exactly the expected space */
239  Assert(tmp == ((char *) output + len));
240 
241  return output;
242 }
#define VARDATA(PTR)
Definition: postgres.h:302
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:38
static void output(uint64 loop_count)
#define VARHDRSZ
Definition: c.h:561
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1043
double ndistinct
Definition: statistics.h:28
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
#define SizeOfItem(natts)
Definition: mvdistinct.c:50
unsigned int uint32
Definition: c.h:367
uint32 nitems
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:35
static struct @143 value
uint32 type
Definition: statistics.h:36
#define SizeOfHeader
Definition: mvdistinct.c:47
#define Assert(condition)
Definition: c.h:738
size_t Size
Definition: c.h:466
Bitmapset * attrs
Definition: statistics.h:29
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
void * palloc(Size size)
Definition: mcxt.c:949
int i
Definition: c.h:555
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
int16 AttrNumber
Definition: attnum.h:21