PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
extended_stats_internal.h File Reference
Include dependency graph for extended_stats_internal.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  StdAnalyzeData
 
struct  ScalarItem
 
struct  DimensionInfo
 
struct  MultiSortSupportData
 
struct  SortItem
 
struct  StatsBuildData
 

Typedefs

typedef struct DimensionInfo DimensionInfo
 
typedef struct MultiSortSupportData MultiSortSupportData
 
typedef MultiSortSupportDataMultiSortSupport
 
typedef struct SortItem SortItem
 
typedef struct StatsBuildData StatsBuildData
 

Functions

MVNDistinctstatext_ndistinct_build (double totalrows, StatsBuildData *data)
 
byteastatext_ndistinct_serialize (MVNDistinct *ndistinct)
 
MVNDistinctstatext_ndistinct_deserialize (bytea *data)
 
MVDependenciesstatext_dependencies_build (StatsBuildData *data)
 
byteastatext_dependencies_serialize (MVDependencies *dependencies)
 
MVDependenciesstatext_dependencies_deserialize (bytea *data)
 
MCVListstatext_mcv_build (StatsBuildData *data, double totalrows, int stattarget)
 
byteastatext_mcv_serialize (MCVList *mcvlist, VacAttrStats **stats)
 
MCVListstatext_mcv_deserialize (bytea *data)
 
MultiSortSupport multi_sort_init (int ndims)
 
void multi_sort_add_dimension (MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
 
int multi_sort_compare (const void *a, const void *b, void *arg)
 
int multi_sort_compare_dim (int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int multi_sort_compare_dims (int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int compare_scalars_simple (const void *a, const void *b, void *arg)
 
int compare_datums_simple (Datum a, Datum b, SortSupport ssup)
 
AttrNumberbuild_attnums_array (Bitmapset *attrs, int nexprs, int *numattrs)
 
SortItembuild_sorted_items (StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
 
bool examine_opclause_args (List *args, Node **exprp, Const **cstp, bool *expronleftp)
 
Selectivity mcv_combine_selectivities (Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)
 
Selectivity mcv_clauselist_selectivity (PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
 
Selectivity mcv_clause_selectivity_or (PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)
 

Typedef Documentation

◆ DimensionInfo

typedef struct DimensionInfo DimensionInfo

◆ MultiSortSupport

Definition at line 51 of file extended_stats_internal.h.

◆ MultiSortSupportData

◆ SortItem

typedef struct SortItem SortItem

◆ StatsBuildData

Function Documentation

◆ build_attnums_array()

AttrNumber * build_attnums_array ( Bitmapset attrs,
int  nexprs,
int *  numattrs 
)

Definition at line 938 of file extended_stats.c.

939{
940 int i,
941 j;
942 AttrNumber *attnums;
943 int num = bms_num_members(attrs);
944
945 if (numattrs)
946 *numattrs = num;
947
948 /* build attnums from the bitmapset */
949 attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
950 i = 0;
951 j = -1;
952 while ((j = bms_next_member(attrs, j)) >= 0)
953 {
954 int attnum = (j - nexprs);
955
956 /*
957 * Make sure the bitmap contains only user-defined attributes. As
958 * bitmaps can't contain negative values, this can be violated in two
959 * ways. Firstly, the bitmap might contain 0 as a member, and secondly
960 * the integer value might be larger than MaxAttrNumber.
961 */
964 Assert(attnum >= (-nexprs));
965
966 attnums[i++] = (AttrNumber) attnum;
967
968 /* protect against overflows */
969 Assert(i <= num);
970 }
971
972 return attnums;
973}
int16 AttrNumber
Definition: attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
#define MaxAttrNumber
Definition: attnum.h:24
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1306
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:751
#define Assert(condition)
Definition: c.h:812
int j
Definition: isn.c:73
int i
Definition: isn.c:72
void * palloc(Size size)
Definition: mcxt.c:1317
int16 attnum
Definition: pg_attribute.h:74

References Assert, attnum, AttributeNumberIsValid, bms_next_member(), bms_num_members(), i, j, MaxAttrNumber, and palloc().

◆ build_sorted_items()

SortItem * build_sorted_items ( StatsBuildData data,
int *  nitems,
MultiSortSupport  mss,
int  numattrs,
AttrNumber attnums 
)

Definition at line 983 of file extended_stats.c.

986{
987 int i,
988 j,
989 len,
990 nrows;
991 int nvalues = data->numrows * numattrs;
992
994 Datum *values;
995 bool *isnull;
996 char *ptr;
997 int *typlen;
998
999 /* Compute the total amount of memory we need (both items and values). */
1000 len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
1001
1002 /* Allocate the memory and split it into the pieces. */
1003 ptr = palloc0(len);
1004
1005 /* items to sort */
1006 items = (SortItem *) ptr;
1007 ptr += data->numrows * sizeof(SortItem);
1008
1009 /* values and null flags */
1010 values = (Datum *) ptr;
1011 ptr += nvalues * sizeof(Datum);
1012
1013 isnull = (bool *) ptr;
1014 ptr += nvalues * sizeof(bool);
1015
1016 /* make sure we consumed the whole buffer exactly */
1017 Assert((ptr - (char *) items) == len);
1018
1019 /* fix the pointers to Datum and bool arrays */
1020 nrows = 0;
1021 for (i = 0; i < data->numrows; i++)
1022 {
1023 items[nrows].values = &values[nrows * numattrs];
1024 items[nrows].isnull = &isnull[nrows * numattrs];
1025
1026 nrows++;
1027 }
1028
1029 /* build a local cache of typlen for all attributes */
1030 typlen = (int *) palloc(sizeof(int) * data->nattnums);
1031 for (i = 0; i < data->nattnums; i++)
1032 typlen[i] = get_typlen(data->stats[i]->attrtypid);
1033
1034 nrows = 0;
1035 for (i = 0; i < data->numrows; i++)
1036 {
1037 bool toowide = false;
1038
1039 /* load the values/null flags from sample rows */
1040 for (j = 0; j < numattrs; j++)
1041 {
1042 Datum value;
1043 bool isnull;
1044 int attlen;
1045 AttrNumber attnum = attnums[j];
1046
1047 int idx;
1048
1049 /* match attnum to the pre-calculated data */
1050 for (idx = 0; idx < data->nattnums; idx++)
1051 {
1052 if (attnum == data->attnums[idx])
1053 break;
1054 }
1055
1056 Assert(idx < data->nattnums);
1057
1058 value = data->values[idx][i];
1059 isnull = data->nulls[idx][i];
1060 attlen = typlen[idx];
1061
1062 /*
1063 * If this is a varlena value, check if it's too wide and if yes
1064 * then skip the whole item. Otherwise detoast the value.
1065 *
1066 * XXX It may happen that we've already detoasted some preceding
1067 * values for the current item. We don't bother to cleanup those
1068 * on the assumption that those are small (below WIDTH_THRESHOLD)
1069 * and will be discarded at the end of analyze.
1070 */
1071 if ((!isnull) && (attlen == -1))
1072 {
1074 {
1075 toowide = true;
1076 break;
1077 }
1078
1080 }
1081
1082 items[nrows].values[j] = value;
1083 items[nrows].isnull[j] = isnull;
1084 }
1085
1086 if (toowide)
1087 continue;
1088
1089 nrows++;
1090 }
1091
1092 /* store the actual number of items (ignoring the too-wide ones) */
1093 *nitems = nrows;
1094
1095 /* all items were too wide */
1096 if (nrows == 0)
1097 {
1098 /* everything is allocated as a single chunk */
1099 pfree(items);
1100 return NULL;
1101 }
1102
1103 /* do the sort, using the multi-sort */
1104 qsort_interruptible(items, nrows, sizeof(SortItem),
1105 multi_sort_compare, mss);
1106
1107 return items;
1108}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
static Datum values[MAXATTR]
Definition: bootstrap.c:151
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:545
#define WIDTH_THRESHOLD
int multi_sort_compare(const void *a, const void *b, void *arg)
struct SortItem SortItem
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define nitems(x)
Definition: indent.h:31
static struct @161 value
int16 get_typlen(Oid typid)
Definition: lsyscache.c:2197
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
int16 attlen
Definition: pg_attribute.h:59
const void size_t len
const void * data
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static ItemArray items
Definition: test_tidstore.c:48

References Assert, attlen, attnum, data, get_typlen(), i, idx(), items, j, len, multi_sort_compare(), nitems, palloc(), palloc0(), pfree(), PG_DETOAST_DATUM, PointerGetDatum(), qsort_interruptible(), toast_raw_datum_size(), value, values, and WIDTH_THRESHOLD.

Referenced by dependency_degree(), and statext_mcv_build().

◆ compare_datums_simple()

int compare_datums_simple ( Datum  a,
Datum  b,
SortSupport  ssup 
)

Definition at line 924 of file extended_stats.c.

925{
926 return ApplySortComparator(a, false, b, false, ssup);
927}
int b
Definition: isn.c:69
int a
Definition: isn.c:68
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200

References a, ApplySortComparator(), and b.

Referenced by compare_scalars_simple(), and statext_mcv_serialize().

◆ compare_scalars_simple()

int compare_scalars_simple ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 916 of file extended_stats.c.

917{
918 return compare_datums_simple(*(Datum *) a,
919 *(Datum *) b,
920 (SortSupport) arg);
921}
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
void * arg

References a, arg, b, and compare_datums_simple().

Referenced by statext_mcv_serialize().

◆ examine_opclause_args()

bool examine_opclause_args ( List args,
Node **  exprp,
Const **  cstp,
bool *  expronleftp 
)

Definition at line 2052 of file extended_stats.c.

2054{
2055 Node *expr;
2056 Const *cst;
2057 bool expronleft;
2058 Node *leftop,
2059 *rightop;
2060
2061 /* enforced by statext_is_compatible_clause_internal */
2062 Assert(list_length(args) == 2);
2063
2064 leftop = linitial(args);
2065 rightop = lsecond(args);
2066
2067 /* strip RelabelType from either side of the expression */
2068 if (IsA(leftop, RelabelType))
2069 leftop = (Node *) ((RelabelType *) leftop)->arg;
2070
2071 if (IsA(rightop, RelabelType))
2072 rightop = (Node *) ((RelabelType *) rightop)->arg;
2073
2074 if (IsA(rightop, Const))
2075 {
2076 expr = (Node *) leftop;
2077 cst = (Const *) rightop;
2078 expronleft = true;
2079 }
2080 else if (IsA(leftop, Const))
2081 {
2082 expr = (Node *) rightop;
2083 cst = (Const *) leftop;
2084 expronleft = false;
2085 }
2086 else
2087 return false;
2088
2089 /* return pointers to the extracted parts if requested */
2090 if (exprp)
2091 *exprp = expr;
2092
2093 if (cstp)
2094 *cstp = cst;
2095
2096 if (expronleftp)
2097 *expronleftp = expronleft;
2098
2099 return true;
2100}
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
static int list_length(const List *l)
Definition: pg_list.h:152
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
Definition: nodes.h:129

References arg, generate_unaccent_rules::args, Assert, IsA, linitial, list_length(), and lsecond.

Referenced by mcv_get_match_bitmap(), and statext_is_compatible_clause_internal().

◆ mcv_clause_selectivity_or()

Selectivity mcv_clause_selectivity_or ( PlannerInfo root,
StatisticExtInfo stat,
MCVList mcv,
Node clause,
bool **  or_matches,
Selectivity basesel,
Selectivity overlap_mcvsel,
Selectivity overlap_basesel,
Selectivity totalsel 
)

Definition at line 2126 of file mcv.c.

2130{
2131 Selectivity s = 0.0;
2132 bool *new_matches;
2133 int i;
2134
2135 /* build the OR-matches bitmap, if not built already */
2136 if (*or_matches == NULL)
2137 *or_matches = palloc0(sizeof(bool) * mcv->nitems);
2138
2139 /* build the match bitmap for the new clause */
2140 new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2141 stat->exprs, mcv, false);
2142
2143 /*
2144 * Sum the frequencies for all the MCV items matching this clause and also
2145 * those matching the overlap between this clause and any of the preceding
2146 * clauses as described above.
2147 */
2148 *basesel = 0.0;
2149 *overlap_mcvsel = 0.0;
2150 *overlap_basesel = 0.0;
2151 *totalsel = 0.0;
2152 for (i = 0; i < mcv->nitems; i++)
2153 {
2154 *totalsel += mcv->items[i].frequency;
2155
2156 if (new_matches[i])
2157 {
2158 s += mcv->items[i].frequency;
2159 *basesel += mcv->items[i].base_frequency;
2160
2161 if ((*or_matches)[i])
2162 {
2163 *overlap_mcvsel += mcv->items[i].frequency;
2164 *overlap_basesel += mcv->items[i].base_frequency;
2165 }
2166 }
2167
2168 /* update the OR-matches bitmap for the next clause */
2169 (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2170 }
2171
2172 pfree(new_matches);
2173
2174 return s;
2175}
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1599
double Selectivity
Definition: nodes.h:250
#define list_make1(x1)
Definition: pg_list.h:212
tree ctl root
Definition: radixtree.h:1857
double frequency
Definition: statistics.h:80
double base_frequency
Definition: statistics.h:81
uint32 nitems
Definition: statistics.h:91
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94

References MCVItem::base_frequency, MCVItem::frequency, i, MCVList::items, list_make1, mcv_get_match_bitmap(), MCVList::nitems, palloc0(), pfree(), and root.

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity ( PlannerInfo root,
StatisticExtInfo stat,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Selectivity basesel,
Selectivity totalsel 
)

Definition at line 2048 of file mcv.c.

2053{
2054 int i;
2055 MCVList *mcv;
2056 Selectivity s = 0.0;
2057 RangeTblEntry *rte = root->simple_rte_array[rel->relid];
2058
2059 /* match/mismatch bitmap for each MCV item */
2060 bool *matches = NULL;
2061
2062 /* load the MCV list stored in the statistics object */
2063 mcv = statext_mcv_load(stat->statOid, rte->inh);
2064
2065 /* build a match bitmap for the clauses */
2066 matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2067 mcv, false);
2068
2069 /* sum frequencies for all the matching MCV items */
2070 *basesel = 0.0;
2071 *totalsel = 0.0;
2072 for (i = 0; i < mcv->nitems; i++)
2073 {
2074 *totalsel += mcv->items[i].frequency;
2075
2076 if (matches[i] != false)
2077 {
2078 *basesel += mcv->items[i].base_frequency;
2079 s += mcv->items[i].frequency;
2080 }
2081 }
2082
2083 return s;
2084}
MCVList * statext_mcv_load(Oid mvoid, bool inh)
Definition: mcv.c:558
Index relid
Definition: pathnodes.h:918

References MCVItem::base_frequency, MCVItem::frequency, i, RangeTblEntry::inh, MCVList::items, mcv_get_match_bitmap(), MCVList::nitems, RelOptInfo::relid, root, and statext_mcv_load().

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_combine_selectivities()

Selectivity mcv_combine_selectivities ( Selectivity  simple_sel,
Selectivity  mcv_sel,
Selectivity  mcv_basesel,
Selectivity  mcv_totalsel 
)

Definition at line 2006 of file mcv.c.

2010{
2011 Selectivity other_sel;
2012 Selectivity sel;
2013
2014 /* estimated selectivity of values not covered by MCV matches */
2015 other_sel = simple_sel - mcv_basesel;
2016 CLAMP_PROBABILITY(other_sel);
2017
2018 /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2019 if (other_sel > 1.0 - mcv_totalsel)
2020 other_sel = 1.0 - mcv_totalsel;
2021
2022 /* overall selectivity is the sum of the MCV and non-MCV parts */
2023 sel = mcv_sel + other_sel;
2024 CLAMP_PROBABILITY(sel);
2025
2026 return sel;
2027}
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63

References CLAMP_PROBABILITY.

Referenced by statext_mcv_clauselist_selectivity().

◆ multi_sort_add_dimension()

void multi_sort_add_dimension ( MultiSortSupport  mss,
int  sortdim,
Oid  oper,
Oid  collation 
)

Definition at line 848 of file extended_stats.c.

850{
851 SortSupport ssup = &mss->ssup[sortdim];
852
854 ssup->ssup_collation = collation;
855 ssup->ssup_nulls_first = false;
856
858}
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition: parse_oper.c:370
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:134
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
bool ssup_nulls_first
Definition: sortsupport.h:75
MemoryContext ssup_cxt
Definition: sortsupport.h:66

References CurrentMemoryContext, oper(), PrepareSortSupportFromOrderingOp(), MultiSortSupportData::ssup, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, and SortSupportData::ssup_nulls_first.

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ multi_sort_compare()

int multi_sort_compare ( const void *  a,
const void *  b,
void *  arg 
)

Definition at line 862 of file extended_stats.c.

863{
865 SortItem *ia = (SortItem *) a;
866 SortItem *ib = (SortItem *) b;
867 int i;
868
869 for (i = 0; i < mss->ndims; i++)
870 {
871 int compare;
872
874 ib->values[i], ib->isnull[i],
875 &mss->ssup[i]);
876
877 if (compare != 0)
878 return compare;
879 }
880
881 /* equal by default */
882 return 0;
883}
MultiSortSupportData * MultiSortSupport
static int compare(const void *arg1, const void *arg2)
Definition: geqo_pool.c:145

References a, ApplySortComparator(), arg, b, compare(), i, SortItem::isnull, MultiSortSupportData::ndims, MultiSortSupportData::ssup, and SortItem::values.

Referenced by build_distinct_groups(), build_sorted_items(), count_distinct_groups(), ndistinct_for_combination(), and statext_mcv_build().

◆ multi_sort_compare_dim()

int multi_sort_compare_dim ( int  dim,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 887 of file extended_stats.c.

889{
890 return ApplySortComparator(a->values[dim], a->isnull[dim],
891 b->values[dim], b->isnull[dim],
892 &mss->ssup[dim]);
893}

References a, ApplySortComparator(), b, and MultiSortSupportData::ssup.

Referenced by dependency_degree().

◆ multi_sort_compare_dims()

int multi_sort_compare_dims ( int  start,
int  end,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)

Definition at line 896 of file extended_stats.c.

899{
900 int dim;
901
902 for (dim = start; dim <= end; dim++)
903 {
904 int r = ApplySortComparator(a->values[dim], a->isnull[dim],
905 b->values[dim], b->isnull[dim],
906 &mss->ssup[dim]);
907
908 if (r != 0)
909 return r;
910 }
911
912 return 0;
913}
return str start

References a, ApplySortComparator(), b, MultiSortSupportData::ssup, and start.

Referenced by dependency_degree().

◆ multi_sort_init()

MultiSortSupport multi_sort_init ( int  ndims)

Definition at line 829 of file extended_stats.c.

830{
832
833 Assert(ndims >= 2);
834
835 mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
836 + sizeof(SortSupportData) * ndims);
837
838 mss->ndims = ndims;
839
840 return mss;
841}
struct SortSupportData SortSupportData

References Assert, MultiSortSupportData::ndims, and palloc0().

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ statext_dependencies_build()

MVDependencies * statext_dependencies_build ( StatsBuildData data)

Definition at line 348 of file dependencies.c.

349{
350 int i,
351 k;
352
353 /* result */
354 MVDependencies *dependencies = NULL;
355 MemoryContext cxt;
356
357 Assert(data->nattnums >= 2);
358
359 /* tracks memory allocated by dependency_degree calls */
361 "dependency_degree cxt",
363
364 /*
365 * We'll try build functional dependencies starting from the smallest ones
366 * covering just 2 columns, to the largest ones, covering all columns
367 * included in the statistics object. We start from the smallest ones
368 * because we want to be able to skip already implied ones.
369 */
370 for (k = 2; k <= data->nattnums; k++)
371 {
372 AttrNumber *dependency; /* array with k elements */
373
374 /* prepare a DependencyGenerator of variation */
376
377 /* generate all possible variations of k values (out of n) */
378 while ((dependency = DependencyGenerator_next(DependencyGenerator)))
379 {
380 double degree;
381 MVDependency *d;
382 MemoryContext oldcxt;
383
384 /* release memory used by dependency degree calculation */
385 oldcxt = MemoryContextSwitchTo(cxt);
386
387 /* compute how valid the dependency seems */
388 degree = dependency_degree(data, k, dependency);
389
390 MemoryContextSwitchTo(oldcxt);
392
393 /*
394 * if the dependency seems entirely invalid, don't store it
395 */
396 if (degree == 0.0)
397 continue;
398
399 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
400 + k * sizeof(AttrNumber));
401
402 /* copy the dependency (and keep the indexes into stxkeys) */
403 d->degree = degree;
404 d->nattributes = k;
405 for (i = 0; i < k; i++)
406 d->attributes[i] = data->attnums[dependency[i]];
407
408 /* initialize the list of dependencies */
409 if (dependencies == NULL)
410 {
411 dependencies
413
414 dependencies->magic = STATS_DEPS_MAGIC;
415 dependencies->type = STATS_DEPS_TYPE_BASIC;
416 dependencies->ndeps = 0;
417 }
418
419 dependencies->ndeps++;
420 dependencies = (MVDependencies *) repalloc(dependencies,
421 offsetof(MVDependencies, deps)
422 + dependencies->ndeps * sizeof(MVDependency *));
423
424 dependencies->deps[dependencies->ndeps - 1] = d;
425 }
426
427 /*
428 * we're done with variations of k elements, so free the
429 * DependencyGenerator
430 */
432 }
433
435
436 return dependencies;
437}
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:204
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:196
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:173
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
Definition: dependencies.c:221
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define STATS_DEPS_MAGIC
Definition: statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:44
uint32 ndeps
Definition: statistics.h:61
uint32 magic
Definition: statistics.h:59
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:62
AttrNumber nattributes
Definition: statistics.h:53
double degree
Definition: statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:54

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, MVDependency::attributes, CurrentMemoryContext, data, MVDependency::degree, dependency_degree(), DependencyGenerator_free(), DependencyGenerator_init(), DependencyGenerator_next(), MVDependencies::deps, i, if(), MVDependencies::magic, MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), MVDependency::nattributes, MVDependencies::ndeps, palloc0(), repalloc(), STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, and MVDependencies::type.

Referenced by BuildRelationExtStatistics().

◆ statext_dependencies_deserialize()

MVDependencies * statext_dependencies_deserialize ( bytea data)

Definition at line 499 of file dependencies.c.

500{
501 int i;
502 Size min_expected_size;
503 MVDependencies *dependencies;
504 char *tmp;
505
506 if (data == NULL)
507 return NULL;
508
510 elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
512
513 /* read the MVDependencies header */
514 dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
515
516 /* initialize pointer to the data part (skip the varlena header) */
517 tmp = VARDATA_ANY(data);
518
519 /* read the header fields and perform basic sanity checks */
520 memcpy(&dependencies->magic, tmp, sizeof(uint32));
521 tmp += sizeof(uint32);
522 memcpy(&dependencies->type, tmp, sizeof(uint32));
523 tmp += sizeof(uint32);
524 memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
525 tmp += sizeof(uint32);
526
527 if (dependencies->magic != STATS_DEPS_MAGIC)
528 elog(ERROR, "invalid dependency magic %d (expected %d)",
529 dependencies->magic, STATS_DEPS_MAGIC);
530
531 if (dependencies->type != STATS_DEPS_TYPE_BASIC)
532 elog(ERROR, "invalid dependency type %d (expected %d)",
533 dependencies->type, STATS_DEPS_TYPE_BASIC);
534
535 if (dependencies->ndeps == 0)
536 elog(ERROR, "invalid zero-length item array in MVDependencies");
537
538 /* what minimum bytea size do we expect for those parameters */
539 min_expected_size = SizeOfItem(dependencies->ndeps);
540
541 if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
542 elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
543 VARSIZE_ANY_EXHDR(data), min_expected_size);
544
545 /* allocate space for the MCV items */
546 dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
547 + (dependencies->ndeps * sizeof(MVDependency *)));
548
549 for (i = 0; i < dependencies->ndeps; i++)
550 {
551 double degree;
552 AttrNumber k;
553 MVDependency *d;
554
555 /* degree of validity */
556 memcpy(&degree, tmp, sizeof(double));
557 tmp += sizeof(double);
558
559 /* number of attributes */
560 memcpy(&k, tmp, sizeof(AttrNumber));
561 tmp += sizeof(AttrNumber);
562
563 /* is the number of attributes valid? */
564 Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
565
566 /* now that we know the number of attributes, allocate the dependency */
567 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
568 + (k * sizeof(AttrNumber)));
569
570 d->degree = degree;
571 d->nattributes = k;
572
573 /* copy attribute numbers */
574 memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
575 tmp += sizeof(AttrNumber) * d->nattributes;
576
577 dependencies->deps[i] = d;
578
579 /* still within the bytea */
580 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
581 }
582
583 /* we should have consumed the whole bytea exactly */
584 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
585
586 return dependencies;
587}
uint32_t uint32
Definition: c.h:485
size_t Size
Definition: c.h:559
#define SizeOfHeader
Definition: dependencies.c:38
#define SizeOfItem(natts)
Definition: dependencies.c:41
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317

References Assert, MVDependency::attributes, data, MVDependency::degree, MVDependencies::deps, elog, ERROR, i, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, palloc0(), repalloc(), SizeOfHeader, SizeOfItem, STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, STATS_MAX_DIMENSIONS, MVDependencies::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_dependencies_out(), and statext_dependencies_load().

◆ statext_dependencies_serialize()

bytea * statext_dependencies_serialize ( MVDependencies dependencies)

Definition at line 444 of file dependencies.c.

445{
446 int i;
447 bytea *output;
448 char *tmp;
449 Size len;
450
451 /* we need to store ndeps, with a number of attributes for each one */
453
454 /* and also include space for the actual attribute numbers and degrees */
455 for (i = 0; i < dependencies->ndeps; i++)
456 len += SizeOfItem(dependencies->deps[i]->nattributes);
457
458 output = (bytea *) palloc0(len);
460
461 tmp = VARDATA(output);
462
463 /* Store the base struct values (magic, type, ndeps) */
464 memcpy(tmp, &dependencies->magic, sizeof(uint32));
465 tmp += sizeof(uint32);
466 memcpy(tmp, &dependencies->type, sizeof(uint32));
467 tmp += sizeof(uint32);
468 memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
469 tmp += sizeof(uint32);
470
471 /* store number of attributes and attribute numbers for each dependency */
472 for (i = 0; i < dependencies->ndeps; i++)
473 {
474 MVDependency *d = dependencies->deps[i];
475
476 memcpy(tmp, &d->degree, sizeof(double));
477 tmp += sizeof(double);
478
479 memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
480 tmp += sizeof(AttrNumber);
481
482 memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
483 tmp += sizeof(AttrNumber) * d->nattributes;
484
485 /* protect against overflow */
486 Assert(tmp <= ((char *) output + len));
487 }
488
489 /* make sure we've produced exactly the right amount of data */
490 Assert(tmp == ((char *) output + len));
491
492 return output;
493}
#define VARHDRSZ
Definition: c.h:646
FILE * output
Definition: c.h:641
#define VARDATA(PTR)
Definition: varatt.h:278
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305

References Assert, MVDependency::attributes, MVDependency::degree, MVDependencies::deps, i, len, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, output, palloc0(), SET_VARSIZE, SizeOfHeader, SizeOfItem, MVDependencies::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().

◆ statext_mcv_build()

MCVList * statext_mcv_build ( StatsBuildData data,
double  totalrows,
int  stattarget 
)

Definition at line 180 of file mcv.c.

181{
182 int i,
183 numattrs,
184 numrows,
185 ngroups,
186 nitems;
187 double mincount;
189 SortItem *groups;
190 MCVList *mcvlist = NULL;
192
193 /* comparator for all the columns */
194 mss = build_mss(data);
195
196 /* sort the rows */
198 data->nattnums, data->attnums);
199
200 if (!items)
201 return NULL;
202
203 /* for convenience */
204 numattrs = data->nattnums;
205 numrows = data->numrows;
206
207 /* transform the sorted rows into groups (sorted by frequency) */
208 groups = build_distinct_groups(nitems, items, mss, &ngroups);
209
210 /*
211 * The maximum number of MCV items to store, based on the statistics
212 * target we computed for the statistics object (from the target set for
213 * the object itself, attributes and the system default). In any case, we
214 * can't keep more groups than we have available.
215 */
216 nitems = stattarget;
217 if (nitems > ngroups)
218 nitems = ngroups;
219
220 /*
221 * Decide how many items to keep in the MCV list. We can't use the same
222 * algorithm as per-column MCV lists, because that only considers the
223 * actual group frequency - but we're primarily interested in how the
224 * actual frequency differs from the base frequency (product of simple
225 * per-column frequencies, as if the columns were independent).
226 *
227 * Using the same algorithm might exclude items that are close to the
228 * "average" frequency of the sample. But that does not say whether the
229 * observed frequency is close to the base frequency or not. We also need
230 * to consider unexpectedly uncommon items (again, compared to the base
231 * frequency), and the single-column algorithm does not have to.
232 *
233 * We simply decide how many items to keep by computing the minimum count
234 * using get_mincount_for_mcv_list() and then keep all items that seem to
235 * be more common than that.
236 */
237 mincount = get_mincount_for_mcv_list(numrows, totalrows);
238
239 /*
240 * Walk the groups until we find the first group with a count below the
241 * mincount threshold (the index of that group is the number of groups we
242 * want to keep).
243 */
244 for (i = 0; i < nitems; i++)
245 {
246 if (groups[i].count < mincount)
247 {
248 nitems = i;
249 break;
250 }
251 }
252
253 /*
254 * At this point, we know the number of items for the MCV list. There
255 * might be none (for uniform distribution with many groups), and in that
256 * case, there will be no MCV list. Otherwise, construct the MCV list.
257 */
258 if (nitems > 0)
259 {
260 int j;
263
264 /* frequencies for values in each attribute */
265 SortItem **freqs;
266 int *nfreqs;
267
268 /* used to search values */
269 tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
270 + sizeof(SortSupportData));
271
272 /* compute frequencies for values in each column */
273 nfreqs = (int *) palloc0(sizeof(int) * numattrs);
274 freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
275
276 /*
277 * Allocate the MCV list structure, set the global parameters.
278 */
279 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
280 sizeof(MCVItem) * nitems);
281
282 mcvlist->magic = STATS_MCV_MAGIC;
283 mcvlist->type = STATS_MCV_TYPE_BASIC;
284 mcvlist->ndimensions = numattrs;
285 mcvlist->nitems = nitems;
286
287 /* store info about data type OIDs */
288 for (i = 0; i < numattrs; i++)
289 mcvlist->types[i] = data->stats[i]->attrtypid;
290
291 /* Copy the first chunk of groups into the result. */
292 for (i = 0; i < nitems; i++)
293 {
294 /* just point to the proper place in the list */
295 MCVItem *item = &mcvlist->items[i];
296
297 item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
298 item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
299
300 /* copy values for the group */
301 memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
302 memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
303
304 /* groups should be sorted by frequency in descending order */
305 Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
306
307 /* group frequency */
308 item->frequency = (double) groups[i].count / numrows;
309
310 /* base frequency, if the attributes were independent */
311 item->base_frequency = 1.0;
312 for (j = 0; j < numattrs; j++)
313 {
314 SortItem *freq;
315
316 /* single dimension */
317 tmp->ndims = 1;
318 tmp->ssup[0] = mss->ssup[j];
319
320 /* fill search key */
321 key.values = &groups[i].values[j];
322 key.isnull = &groups[i].isnull[j];
323
324 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
325 sizeof(SortItem),
326 multi_sort_compare, tmp);
327
328 item->base_frequency *= ((double) freq->count) / numrows;
329 }
330 }
331
332 pfree(nfreqs);
333 pfree(freqs);
334 }
335
336 pfree(items);
337 pfree(groups);
338
339 return mcvlist;
340}
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
for(;;)
static MultiSortSupport build_mss(StatsBuildData *data)
Definition: mcv.c:347
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:148
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:490
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:424
void * bsearch_arg(const void *key, const void *base0, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition: bsearch_arg.c:55
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:67
#define STATS_MCV_MAGIC
Definition: statistics.h:66
struct MCVItem MCVItem
bool * isnull
Definition: statistics.h:82
Datum * values
Definition: statistics.h:83
uint32 type
Definition: statistics.h:90
uint32 magic
Definition: statistics.h:89
AttrNumber ndimensions
Definition: statistics.h:92
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93

References Assert, MCVItem::base_frequency, bsearch_arg(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), SortItem::count, data, for(), MCVItem::frequency, get_mincount_for_mcv_list(), i, SortItem::isnull, MCVItem::isnull, MCVList::items, items, j, sort-test::key, MCVList::magic, multi_sort_compare(), MCVList::ndimensions, MultiSortSupportData::ndims, MCVList::nitems, nitems, palloc(), palloc0(), pfree(), MultiSortSupportData::ssup, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, MCVList::type, MCVList::types, SortItem::values, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

◆ statext_mcv_deserialize()

MCVList * statext_mcv_deserialize ( bytea data)

Definition at line 996 of file mcv.c.

997{
998 int dim,
999 i;
1000 Size expected_size;
1001 MCVList *mcvlist;
1002 char *raw;
1003 char *ptr;
1004 char *endptr PG_USED_FOR_ASSERTS_ONLY;
1005
1006 int ndims,
1007 nitems;
1008 DimensionInfo *info = NULL;
1009
1010 /* local allocation buffer (used only for deserialization) */
1011 Datum **map = NULL;
1012
1013 /* MCV list */
1014 Size mcvlen;
1015
1016 /* buffer used for the result */
1017 Size datalen;
1018 char *dataptr;
1019 char *valuesptr;
1020 char *isnullptr;
1021
1022 if (data == NULL)
1023 return NULL;
1024
1025 /*
1026 * We can't possibly deserialize a MCV list if there's not even a complete
1027 * header. We need an explicit formula here, because we serialize the
1028 * header fields one by one, so we need to ignore struct alignment.
1029 */
1031 elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
1033
1034 /* read the MCV list header */
1035 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1036
1037 /* pointer to the data part (skip the varlena header) */
1038 raw = (char *) data;
1039 ptr = VARDATA_ANY(raw);
1040 endptr = (char *) raw + VARSIZE_ANY(data);
1041
1042 /* get the header and perform further sanity checks */
1043 memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1044 ptr += sizeof(uint32);
1045
1046 memcpy(&mcvlist->type, ptr, sizeof(uint32));
1047 ptr += sizeof(uint32);
1048
1049 memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1050 ptr += sizeof(uint32);
1051
1052 memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1053 ptr += sizeof(AttrNumber);
1054
1055 if (mcvlist->magic != STATS_MCV_MAGIC)
1056 elog(ERROR, "invalid MCV magic %u (expected %u)",
1057 mcvlist->magic, STATS_MCV_MAGIC);
1058
1059 if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1060 elog(ERROR, "invalid MCV type %u (expected %u)",
1061 mcvlist->type, STATS_MCV_TYPE_BASIC);
1062
1063 if (mcvlist->ndimensions == 0)
1064 elog(ERROR, "invalid zero-length dimension array in MCVList");
1065 else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1066 (mcvlist->ndimensions < 0))
1067 elog(ERROR, "invalid length (%d) dimension array in MCVList",
1068 mcvlist->ndimensions);
1069
1070 if (mcvlist->nitems == 0)
1071 elog(ERROR, "invalid zero-length item array in MCVList");
1072 else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1073 elog(ERROR, "invalid length (%u) item array in MCVList",
1074 mcvlist->nitems);
1075
1076 nitems = mcvlist->nitems;
1077 ndims = mcvlist->ndimensions;
1078
1079 /*
1080 * Check amount of data including DimensionInfo for all dimensions and
1081 * also the serialized items (including uint16 indexes). Also, walk
1082 * through the dimension information and add it to the sum.
1083 */
1084 expected_size = SizeOfMCVList(ndims, nitems);
1085
1086 /*
1087 * Check that we have at least the dimension and info records, along with
1088 * the items. We don't know the size of the serialized values yet. We need
1089 * to do this check first, before accessing the dimension info.
1090 */
1091 if (VARSIZE_ANY(data) < expected_size)
1092 elog(ERROR, "invalid MCV size %zu (expected %zu)",
1093 VARSIZE_ANY(data), expected_size);
1094
1095 /* Now copy the array of type Oids. */
1096 memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1097 ptr += (sizeof(Oid) * ndims);
1098
1099 /* Now it's safe to access the dimension info. */
1100 info = palloc(ndims * sizeof(DimensionInfo));
1101
1102 memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1103 ptr += (ndims * sizeof(DimensionInfo));
1104
1105 /* account for the value arrays */
1106 for (dim = 0; dim < ndims; dim++)
1107 {
1108 /*
1109 * XXX I wonder if we can/should rely on asserts here. Maybe those
1110 * checks should be done every time?
1111 */
1112 Assert(info[dim].nvalues >= 0);
1113 Assert(info[dim].nbytes >= 0);
1114
1115 expected_size += info[dim].nbytes;
1116 }
1117
1118 /*
1119 * Now we know the total expected MCV size, including all the pieces
1120 * (header, dimension info. items and deduplicated data). So do the final
1121 * check on size.
1122 */
1123 if (VARSIZE_ANY(data) != expected_size)
1124 elog(ERROR, "invalid MCV size %zu (expected %zu)",
1125 VARSIZE_ANY(data), expected_size);
1126
1127 /*
1128 * We need an array of Datum values for each dimension, so that we can
1129 * easily translate the uint16 indexes later. We also need a top-level
1130 * array of pointers to those per-dimension arrays.
1131 *
1132 * While allocating the arrays for dimensions, compute how much space we
1133 * need for a copy of the by-ref data, as we can't simply point to the
1134 * original values (it might go away).
1135 */
1136 datalen = 0; /* space for by-ref data */
1137 map = (Datum **) palloc(ndims * sizeof(Datum *));
1138
1139 for (dim = 0; dim < ndims; dim++)
1140 {
1141 map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1142
1143 /* space needed for a copy of data for by-ref types */
1144 datalen += info[dim].nbytes_aligned;
1145 }
1146
1147 /*
1148 * Now resize the MCV list so that the allocation includes all the data.
1149 *
1150 * Allocate space for a copy of the data, as we can't simply reference the
1151 * serialized data - it's not aligned properly, and it may disappear while
1152 * we're still using the MCV list, e.g. due to catcache release.
1153 *
1154 * We do care about alignment here, because we will allocate all the
1155 * pieces at once, but then use pointers to different parts.
1156 */
1157 mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1158
1159 /* arrays of values and isnull flags for all MCV items */
1160 mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1161 mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1162
1163 /* we don't quite need to align this, but it makes some asserts easier */
1164 mcvlen += MAXALIGN(datalen);
1165
1166 /* now resize the deserialized MCV list, and compute pointers to parts */
1167 mcvlist = repalloc(mcvlist, mcvlen);
1168
1169 /* pointer to the beginning of values/isnull arrays */
1170 valuesptr = (char *) mcvlist
1171 + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1172
1173 isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1174
1175 dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1176
1177 /*
1178 * Build mapping (index => value) for translating the serialized data into
1179 * the in-memory representation.
1180 */
1181 for (dim = 0; dim < ndims; dim++)
1182 {
1183 /* remember start position in the input array */
1184 char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1185
1186 if (info[dim].typbyval)
1187 {
1188 /* for by-val types we simply copy data into the mapping */
1189 for (i = 0; i < info[dim].nvalues; i++)
1190 {
1191 Datum v = 0;
1192
1193 memcpy(&v, ptr, info[dim].typlen);
1194 ptr += info[dim].typlen;
1195
1196 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1197
1198 /* no under/overflow of input array */
1199 Assert(ptr <= (start + info[dim].nbytes));
1200 }
1201 }
1202 else
1203 {
1204 /* for by-ref types we need to also make a copy of the data */
1205
1206 /* passed by reference, but fixed length (name, tid, ...) */
1207 if (info[dim].typlen > 0)
1208 {
1209 for (i = 0; i < info[dim].nvalues; i++)
1210 {
1211 memcpy(dataptr, ptr, info[dim].typlen);
1212 ptr += info[dim].typlen;
1213
1214 /* just point into the array */
1215 map[dim][i] = PointerGetDatum(dataptr);
1216 dataptr += MAXALIGN(info[dim].typlen);
1217 }
1218 }
1219 else if (info[dim].typlen == -1)
1220 {
1221 /* varlena */
1222 for (i = 0; i < info[dim].nvalues; i++)
1223 {
1224 uint32 len;
1225
1226 /* read the uint32 length */
1227 memcpy(&len, ptr, sizeof(uint32));
1228 ptr += sizeof(uint32);
1229
1230 /* the length is data-only */
1231 SET_VARSIZE(dataptr, len + VARHDRSZ);
1232 memcpy(VARDATA(dataptr), ptr, len);
1233 ptr += len;
1234
1235 /* just point into the array */
1236 map[dim][i] = PointerGetDatum(dataptr);
1237
1238 /* skip to place of the next deserialized value */
1239 dataptr += MAXALIGN(len + VARHDRSZ);
1240 }
1241 }
1242 else if (info[dim].typlen == -2)
1243 {
1244 /* cstring */
1245 for (i = 0; i < info[dim].nvalues; i++)
1246 {
1247 uint32 len;
1248
1249 memcpy(&len, ptr, sizeof(uint32));
1250 ptr += sizeof(uint32);
1251
1252 memcpy(dataptr, ptr, len);
1253 ptr += len;
1254
1255 /* just point into the array */
1256 map[dim][i] = PointerGetDatum(dataptr);
1257 dataptr += MAXALIGN(len);
1258 }
1259 }
1260
1261 /* no under/overflow of input array */
1262 Assert(ptr <= (start + info[dim].nbytes));
1263
1264 /* no overflow of the output mcv value */
1265 Assert(dataptr <= ((char *) mcvlist + mcvlen));
1266 }
1267
1268 /* check we consumed input data for this dimension exactly */
1269 Assert(ptr == (start + info[dim].nbytes));
1270 }
1271
1272 /* we should have also filled the MCV list exactly */
1273 Assert(dataptr == ((char *) mcvlist + mcvlen));
1274
1275 /* deserialize the MCV items and translate the indexes to Datums */
1276 for (i = 0; i < nitems; i++)
1277 {
1278 MCVItem *item = &mcvlist->items[i];
1279
1280 item->values = (Datum *) valuesptr;
1281 valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1282
1283 item->isnull = (bool *) isnullptr;
1284 isnullptr += MAXALIGN(sizeof(bool) * ndims);
1285
1286 memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1287 ptr += sizeof(bool) * ndims;
1288
1289 memcpy(&item->frequency, ptr, sizeof(double));
1290 ptr += sizeof(double);
1291
1292 memcpy(&item->base_frequency, ptr, sizeof(double));
1293 ptr += sizeof(double);
1294
1295 /* finally translate the indexes (for non-NULL only) */
1296 for (dim = 0; dim < ndims; dim++)
1297 {
1298 uint16 index;
1299
1300 memcpy(&index, ptr, sizeof(uint16));
1301 ptr += sizeof(uint16);
1302
1303 if (item->isnull[dim])
1304 continue;
1305
1306 item->values[dim] = map[dim][index];
1307 }
1308
1309 /* check we're not overflowing the input */
1310 Assert(ptr <= endptr);
1311 }
1312
1313 /* check that we processed all the data */
1314 Assert(ptr == endptr);
1315
1316 /* release the buffers used for mapping */
1317 for (dim = 0; dim < ndims; dim++)
1318 pfree(map[dim]);
1319
1320 pfree(map);
1321
1322 return mcvlist;
1323}
#define MAXALIGN(LEN)
Definition: c.h:765
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:201
uint16_t uint16
Definition: c.h:484
struct DimensionInfo DimensionInfo
#define MinSizeOfMCVList
Definition: mcv.c:59
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:68
unsigned int Oid
Definition: postgres_ext.h:31
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:70
Definition: type.h:96
static Datum fetch_att(const void *T, bool attbyval, int attlen)
Definition: tupmacs.h:53

References Assert, MCVItem::base_frequency, data, elog, ERROR, fetch_att(), MCVItem::frequency, i, MCVItem::isnull, MCVList::items, items, len, MCVList::magic, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, nitems, DimensionInfo::nvalues, palloc(), palloc0(), pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), repalloc(), SET_VARSIZE, SizeOfMCVList, start, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, MCVList::type, MCVList::types, DimensionInfo::typlen, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY.

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

◆ statext_mcv_serialize()

bytea * statext_mcv_serialize ( MCVList mcvlist,
VacAttrStats **  stats 
)

Definition at line 621 of file mcv.c.

622{
623 int i;
624 int dim;
625 int ndims = mcvlist->ndimensions;
626
627 SortSupport ssup;
628 DimensionInfo *info;
629
630 Size total_length;
631
632 /* serialized items (indexes into arrays, etc.) */
633 bytea *raw;
634 char *ptr;
635 char *endptr PG_USED_FOR_ASSERTS_ONLY;
636
637 /* values per dimension (and number of non-NULL values) */
638 Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
639 int *counts = (int *) palloc0(sizeof(int) * ndims);
640
641 /*
642 * We'll include some rudimentary information about the attribute types
643 * (length, by-val flag), so that we don't have to look them up while
644 * deserializing the MCV list (we already have the type OID in the
645 * header). This is safe because when changing the type of the attribute
646 * the statistics gets dropped automatically. We need to store the info
647 * about the arrays of deduplicated values anyway.
648 */
649 info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
650
651 /* sort support data for all attributes included in the MCV list */
652 ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
653
654 /* collect and deduplicate values for each dimension (attribute) */
655 for (dim = 0; dim < ndims; dim++)
656 {
657 int ndistinct;
658 TypeCacheEntry *typentry;
659
660 /*
661 * Lookup the LT operator (can't get it from stats extra_data, as we
662 * don't know how to interpret that - scalar vs. array etc.).
663 */
664 typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
665
666 /* copy important info about the data type (length, by-value) */
667 info[dim].typlen = stats[dim]->attrtype->typlen;
668 info[dim].typbyval = stats[dim]->attrtype->typbyval;
669
670 /* allocate space for values in the attribute and collect them */
671 values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
672
673 for (i = 0; i < mcvlist->nitems; i++)
674 {
675 /* skip NULL values - we don't need to deduplicate those */
676 if (mcvlist->items[i].isnull[dim])
677 continue;
678
679 /* append the value at the end */
680 values[dim][counts[dim]] = mcvlist->items[i].values[dim];
681 counts[dim] += 1;
682 }
683
684 /* if there are just NULL values in this dimension, we're done */
685 if (counts[dim] == 0)
686 continue;
687
688 /* sort and deduplicate the data */
689 ssup[dim].ssup_cxt = CurrentMemoryContext;
690 ssup[dim].ssup_collation = stats[dim]->attrcollid;
691 ssup[dim].ssup_nulls_first = false;
692
693 PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
694
695 qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
696 compare_scalars_simple, &ssup[dim]);
697
698 /*
699 * Walk through the array and eliminate duplicate values, but keep the
700 * ordering (so that we can do a binary search later). We know there's
701 * at least one item as (counts[dim] != 0), so we can skip the first
702 * element.
703 */
704 ndistinct = 1; /* number of distinct values */
705 for (i = 1; i < counts[dim]; i++)
706 {
707 /* expect sorted array */
708 Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
709
710 /* if the value is the same as the previous one, we can skip it */
711 if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
712 continue;
713
714 values[dim][ndistinct] = values[dim][i];
715 ndistinct += 1;
716 }
717
718 /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
719 Assert(ndistinct <= PG_UINT16_MAX);
720
721 /*
722 * Store additional info about the attribute - number of deduplicated
723 * values, and also size of the serialized data. For fixed-length data
724 * types this is trivial to compute, for varwidth types we need to
725 * actually walk the array and sum the sizes.
726 */
727 info[dim].nvalues = ndistinct;
728
729 if (info[dim].typbyval) /* by-value data types */
730 {
731 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
732
733 /*
734 * We copy the data into the MCV item during deserialization, so
735 * we don't need to allocate any extra space.
736 */
737 info[dim].nbytes_aligned = 0;
738 }
739 else if (info[dim].typlen > 0) /* fixed-length by-ref */
740 {
741 /*
742 * We don't care about alignment in the serialized data, so we
743 * pack the data as much as possible. But we also track how much
744 * data will be needed after deserialization, and in that case we
745 * need to account for alignment of each item.
746 *
747 * Note: As the items are fixed-length, we could easily compute
748 * this during deserialization, but we do it here anyway.
749 */
750 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
751 info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
752 }
753 else if (info[dim].typlen == -1) /* varlena */
754 {
755 info[dim].nbytes = 0;
756 info[dim].nbytes_aligned = 0;
757 for (i = 0; i < info[dim].nvalues; i++)
758 {
759 Size len;
760
761 /*
762 * For varlena values, we detoast the values and store the
763 * length and data separately. We don't bother with alignment
764 * here, which means that during deserialization we need to
765 * copy the fields and only access the copies.
766 */
768
769 /* serialized length (uint32 length + data) */
770 len = VARSIZE_ANY_EXHDR(values[dim][i]);
771 info[dim].nbytes += sizeof(uint32); /* length */
772 info[dim].nbytes += len; /* value (no header) */
773
774 /*
775 * During deserialization we'll build regular varlena values
776 * with full headers, and we need to align them properly.
777 */
778 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
779 }
780 }
781 else if (info[dim].typlen == -2) /* cstring */
782 {
783 info[dim].nbytes = 0;
784 info[dim].nbytes_aligned = 0;
785 for (i = 0; i < info[dim].nvalues; i++)
786 {
787 Size len;
788
789 /*
790 * cstring is handled similar to varlena - first we store the
791 * length as uint32 and then the data. We don't care about
792 * alignment, which means that during deserialization we need
793 * to copy the fields and only access the copies.
794 */
795
796 /* c-strings include terminator, so +1 byte */
797 len = strlen(DatumGetCString(values[dim][i])) + 1;
798 info[dim].nbytes += sizeof(uint32); /* length */
799 info[dim].nbytes += len; /* value */
800
801 /* space needed for properly aligned deserialized copies */
802 info[dim].nbytes_aligned += MAXALIGN(len);
803 }
804 }
805
806 /* we know (count>0) so there must be some data */
807 Assert(info[dim].nbytes > 0);
808 }
809
810 /*
811 * Now we can finally compute how much space we'll actually need for the
812 * whole serialized MCV list (varlena header, MCV header, dimension info
813 * for each attribute, deduplicated values and items).
814 */
815 total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
816 + sizeof(AttrNumber) /* ndimensions */
817 + (ndims * sizeof(Oid)); /* attribute types */
818
819 /* dimension info */
820 total_length += ndims * sizeof(DimensionInfo);
821
822 /* add space for the arrays of deduplicated values */
823 for (i = 0; i < ndims; i++)
824 total_length += info[i].nbytes;
825
826 /*
827 * And finally account for the items (those are fixed-length, thanks to
828 * replacing values with uint16 indexes into the deduplicated arrays).
829 */
830 total_length += mcvlist->nitems * ITEM_SIZE(dim);
831
832 /*
833 * Allocate space for the whole serialized MCV list (we'll skip bytes, so
834 * we set them to zero to make the result more compressible).
835 */
836 raw = (bytea *) palloc0(VARHDRSZ + total_length);
837 SET_VARSIZE(raw, VARHDRSZ + total_length);
838
839 ptr = VARDATA(raw);
840 endptr = ptr + total_length;
841
842 /* copy the MCV list header fields, one by one */
843 memcpy(ptr, &mcvlist->magic, sizeof(uint32));
844 ptr += sizeof(uint32);
845
846 memcpy(ptr, &mcvlist->type, sizeof(uint32));
847 ptr += sizeof(uint32);
848
849 memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
850 ptr += sizeof(uint32);
851
852 memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
853 ptr += sizeof(AttrNumber);
854
855 memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
856 ptr += (sizeof(Oid) * ndims);
857
858 /* store information about the attributes (data amounts, ...) */
859 memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
860 ptr += sizeof(DimensionInfo) * ndims;
861
862 /* Copy the deduplicated values for all attributes to the output. */
863 for (dim = 0; dim < ndims; dim++)
864 {
865 /* remember the starting point for Asserts later */
867
868 for (i = 0; i < info[dim].nvalues; i++)
869 {
870 Datum value = values[dim][i];
871
872 if (info[dim].typbyval) /* passed by value */
873 {
874 Datum tmp;
875
876 /*
877 * For byval types, we need to copy just the significant bytes
878 * - we can't use memcpy directly, as that assumes
879 * little-endian behavior. store_att_byval does almost what
880 * we need, but it requires a properly aligned buffer - the
881 * output buffer does not guarantee that. So we simply use a
882 * local Datum variable (which guarantees proper alignment),
883 * and then copy the value from it.
884 */
885 store_att_byval(&tmp, value, info[dim].typlen);
886
887 memcpy(ptr, &tmp, info[dim].typlen);
888 ptr += info[dim].typlen;
889 }
890 else if (info[dim].typlen > 0) /* passed by reference */
891 {
892 /* no special alignment needed, treated as char array */
893 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
894 ptr += info[dim].typlen;
895 }
896 else if (info[dim].typlen == -1) /* varlena */
897 {
899
900 /* copy the length */
901 memcpy(ptr, &len, sizeof(uint32));
902 ptr += sizeof(uint32);
903
904 /* data from the varlena value (without the header) */
905 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
906 ptr += len;
907 }
908 else if (info[dim].typlen == -2) /* cstring */
909 {
910 uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
911
912 /* copy the length */
913 memcpy(ptr, &len, sizeof(uint32));
914 ptr += sizeof(uint32);
915
916 /* value */
917 memcpy(ptr, DatumGetCString(value), len);
918 ptr += len;
919 }
920
921 /* no underflows or overflows */
922 Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
923 }
924
925 /* we should get exactly nbytes of data for this dimension */
926 Assert((ptr - start) == info[dim].nbytes);
927 }
928
929 /* Serialize the items, with uint16 indexes instead of the values. */
930 for (i = 0; i < mcvlist->nitems; i++)
931 {
932 MCVItem *mcvitem = &mcvlist->items[i];
933
934 /* don't write beyond the allocated space */
935 Assert(ptr <= (endptr - ITEM_SIZE(dim)));
936
937 /* copy NULL and frequency flags into the serialized MCV */
938 memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
939 ptr += sizeof(bool) * ndims;
940
941 memcpy(ptr, &mcvitem->frequency, sizeof(double));
942 ptr += sizeof(double);
943
944 memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
945 ptr += sizeof(double);
946
947 /* store the indexes last */
948 for (dim = 0; dim < ndims; dim++)
949 {
950 uint16 index = 0;
951 Datum *value;
952
953 /* do the lookup only for non-NULL values */
954 if (!mcvitem->isnull[dim])
955 {
956 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
957 info[dim].nvalues, sizeof(Datum),
958 compare_scalars_simple, &ssup[dim]);
959
960 Assert(value != NULL); /* serialization or deduplication
961 * error */
962
963 /* compute index within the deduplicated array */
964 index = (uint16) (value - values[dim]);
965
966 /* check the index is within expected bounds */
967 Assert(index < info[dim].nvalues);
968 }
969
970 /* copy the index into the serialized MCV */
971 memcpy(ptr, &index, sizeof(uint16));
972 ptr += sizeof(uint16);
973 }
974
975 /* make sure we don't overflow the allocated value */
976 Assert(ptr <= endptr);
977 }
978
979 /* at this point we expect to match the total_length exactly */
980 Assert(ptr == endptr);
981
982 pfree(values);
983 pfree(counts);
984
985 return raw;
986}
#define PG_UINT16_MAX
Definition: c.h:541
int compare_scalars_simple(const void *a, const void *b, void *arg)
#define ITEM_SIZE(ndims)
Definition: mcv.c:53
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
Form_pg_type attrtype
Definition: vacuum.h:128
Oid attrcollid
Definition: vacuum.h:129
static void store_att_byval(void *T, Datum newdatum, int attlen)
Definition: tupmacs.h:211
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:386
#define TYPECACHE_LT_OPR
Definition: typcache.h:138

References Assert, VacAttrStats::attrcollid, VacAttrStats::attrtype, MCVItem::base_frequency, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString(), DatumGetPointer(), MCVItem::frequency, i, MCVItem::isnull, ITEM_SIZE, MCVList::items, len, lookup_type_cache(), TypeCacheEntry::lt_opr, MCVList::magic, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, MCVList::ndimensions, MCVList::nitems, DimensionInfo::nvalues, palloc0(), pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), PrepareSortSupportFromOrderingOp(), qsort_interruptible(), SET_VARSIZE, SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, start, store_att_byval(), DimensionInfo::typbyval, MCVList::type, TYPECACHE_LT_OPR, MCVList::types, DimensionInfo::typlen, value, values, MCVItem::values, VARDATA, VARDATA_ANY, VARHDRSZ, and VARSIZE_ANY_EXHDR.

Referenced by statext_store().

◆ statext_ndistinct_build()

MVNDistinct * statext_ndistinct_build ( double  totalrows,
StatsBuildData data 
)

Definition at line 88 of file mvdistinct.c.

89{
90 MVNDistinct *result;
91 int k;
92 int itemcnt;
93 int numattrs = data->nattnums;
94 int numcombs = num_combinations(numattrs);
95
96 result = palloc(offsetof(MVNDistinct, items) +
97 numcombs * sizeof(MVNDistinctItem));
100 result->nitems = numcombs;
101
102 itemcnt = 0;
103 for (k = 2; k <= numattrs; k++)
104 {
105 int *combination;
107
108 /* generate combinations of K out of N elements */
109 generator = generator_init(numattrs, k);
110
111 while ((combination = generator_next(generator)))
112 {
113 MVNDistinctItem *item = &result->items[itemcnt];
114 int j;
115
116 item->attributes = palloc(sizeof(AttrNumber) * k);
117 item->nattributes = k;
118
119 /* translate the indexes to attnums */
120 for (j = 0; j < k; j++)
121 {
122 item->attributes[j] = data->attnums[combination[j]];
123
125 }
126
127 item->ndistinct =
128 ndistinct_for_combination(totalrows, data, k, combination);
129
130 itemcnt++;
131 Assert(itemcnt <= result->nitems);
132 }
133
135 }
136
137 /* must consume exactly the whole output array */
138 Assert(itemcnt == result->nitems);
139
140 return result;
141}
static double ndistinct_for_combination(double totalrows, StatsBuildData *data, int k, int *combination)
Definition: mvdistinct.c:425
static int num_combinations(int n)
Definition: mvdistinct.c:575
static void generator_free(CombinationGenerator *state)
Definition: mvdistinct.c:642
static CombinationGenerator * generator_init(int n, int k)
Definition: mvdistinct.c:589
static int * generator_next(CombinationGenerator *state)
Definition: mvdistinct.c:627
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
double ndistinct
Definition: statistics.h:28
AttrNumber * attributes
Definition: statistics.h:30
uint32 nitems
Definition: statistics.h:38
uint32 type
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:36
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:39

References Assert, AttributeNumberIsValid, MVNDistinctItem::attributes, data, generator_free(), generator_init(), generator_next(), MVNDistinct::items, items, j, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, ndistinct_for_combination(), MVNDistinct::nitems, nitems, num_combinations(), palloc(), STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, and MVNDistinct::type.

Referenced by BuildRelationExtStatistics().

◆ statext_ndistinct_deserialize()

MVNDistinct * statext_ndistinct_deserialize ( bytea data)

Definition at line 250 of file mvdistinct.c.

251{
252 int i;
253 Size minimum_size;
254 MVNDistinct ndist;
255 MVNDistinct *ndistinct;
256 char *tmp;
257
258 if (data == NULL)
259 return NULL;
260
261 /* we expect at least the basic fields of MVNDistinct struct */
263 elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
265
266 /* initialize pointer to the data part (skip the varlena header) */
267 tmp = VARDATA_ANY(data);
268
269 /* read the header fields and perform basic sanity checks */
270 memcpy(&ndist.magic, tmp, sizeof(uint32));
271 tmp += sizeof(uint32);
272 memcpy(&ndist.type, tmp, sizeof(uint32));
273 tmp += sizeof(uint32);
274 memcpy(&ndist.nitems, tmp, sizeof(uint32));
275 tmp += sizeof(uint32);
276
277 if (ndist.magic != STATS_NDISTINCT_MAGIC)
278 elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
280 if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
281 elog(ERROR, "invalid ndistinct type %d (expected %d)",
283 if (ndist.nitems == 0)
284 elog(ERROR, "invalid zero-length item array in MVNDistinct");
285
286 /* what minimum bytea size do we expect for those parameters */
287 minimum_size = MinSizeOfItems(ndist.nitems);
288 if (VARSIZE_ANY_EXHDR(data) < minimum_size)
289 elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
290 VARSIZE_ANY_EXHDR(data), minimum_size);
291
292 /*
293 * Allocate space for the ndistinct items (no space for each item's
294 * attnos: those live in bitmapsets allocated separately)
295 */
296 ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
297 (ndist.nitems * sizeof(MVNDistinctItem)));
298 ndistinct->magic = ndist.magic;
299 ndistinct->type = ndist.type;
300 ndistinct->nitems = ndist.nitems;
301
302 for (i = 0; i < ndistinct->nitems; i++)
303 {
304 MVNDistinctItem *item = &ndistinct->items[i];
305
306 /* ndistinct value */
307 memcpy(&item->ndistinct, tmp, sizeof(double));
308 tmp += sizeof(double);
309
310 /* number of attributes */
311 memcpy(&item->nattributes, tmp, sizeof(int));
312 tmp += sizeof(int);
313 Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
314
315 item->attributes
316 = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
317
318 memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
319 tmp += sizeof(AttrNumber) * item->nattributes;
320
321 /* still within the bytea */
322 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
323 }
324
325 /* we should have consumed the whole bytea exactly */
326 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
327
328 return ndistinct;
329}
#define SizeOfHeader
Definition: mvdistinct.c:45
#define MinSizeOfItems(nitems)
Definition: mvdistinct.c:55

References Assert, MVNDistinctItem::attributes, data, elog, ERROR, i, MVNDistinct::items, items, MVNDistinct::magic, MAXALIGN, MinSizeOfItems, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, palloc(), palloc0(), SizeOfHeader, STATS_MAX_DIMENSIONS, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA_ANY, VARSIZE_ANY, and VARSIZE_ANY_EXHDR.

Referenced by pg_ndistinct_out(), and statext_ndistinct_load().

◆ statext_ndistinct_serialize()

bytea * statext_ndistinct_serialize ( MVNDistinct ndistinct)

Definition at line 179 of file mvdistinct.c.

180{
181 int i;
182 bytea *output;
183 char *tmp;
184 Size len;
185
186 Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
188
189 /*
190 * Base size is size of scalar fields in the struct, plus one base struct
191 * for each item, including number of items for each.
192 */
194
195 /* and also include space for the actual attribute numbers */
196 for (i = 0; i < ndistinct->nitems; i++)
197 {
198 int nmembers;
199
200 nmembers = ndistinct->items[i].nattributes;
201 Assert(nmembers >= 2);
202
203 len += SizeOfItem(nmembers);
204 }
205
206 output = (bytea *) palloc(len);
208
209 tmp = VARDATA(output);
210
211 /* Store the base struct values (magic, type, nitems) */
212 memcpy(tmp, &ndistinct->magic, sizeof(uint32));
213 tmp += sizeof(uint32);
214 memcpy(tmp, &ndistinct->type, sizeof(uint32));
215 tmp += sizeof(uint32);
216 memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
217 tmp += sizeof(uint32);
218
219 /*
220 * store number of attributes and attribute numbers for each entry
221 */
222 for (i = 0; i < ndistinct->nitems; i++)
223 {
224 MVNDistinctItem item = ndistinct->items[i];
225 int nmembers = item.nattributes;
226
227 memcpy(tmp, &item.ndistinct, sizeof(double));
228 tmp += sizeof(double);
229 memcpy(tmp, &nmembers, sizeof(int));
230 tmp += sizeof(int);
231
232 memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
233 tmp += nmembers * sizeof(AttrNumber);
234
235 /* protect against overflows */
236 Assert(tmp <= ((char *) output + len));
237 }
238
239 /* check we used exactly the expected space */
240 Assert(tmp == ((char *) output + len));
241
242 return output;
243}
#define SizeOfItem(natts)
Definition: mvdistinct.c:48

References Assert, MVNDistinctItem::attributes, i, MVNDistinct::items, len, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, output, palloc(), SET_VARSIZE, SizeOfHeader, SizeOfItem, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA, and VARHDRSZ.

Referenced by statext_store().