PostgreSQL Source Code git master
Loading...
Searching...
No Matches
extended_stats_internal.h File Reference
Include dependency graph for extended_stats_internal.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  StdAnalyzeData
 
struct  ScalarItem
 
struct  DimensionInfo
 
struct  MultiSortSupportData
 
struct  SortItem
 
struct  StatsBuildData
 

Typedefs

typedef struct DimensionInfo DimensionInfo
 
typedef struct MultiSortSupportData MultiSortSupportData
 
typedef MultiSortSupportDataMultiSortSupport
 
typedef struct SortItem SortItem
 
typedef struct StatsBuildData StatsBuildData
 

Functions

MVNDistinctstatext_ndistinct_build (double totalrows, StatsBuildData *data)
 
byteastatext_ndistinct_serialize (MVNDistinct *ndistinct)
 
MVNDistinctstatext_ndistinct_deserialize (bytea *data)
 
bool statext_ndistinct_validate (const MVNDistinct *ndistinct, const int2vector *stxkeys, int numexprs, int elevel)
 
void statext_ndistinct_free (MVNDistinct *ndistinct)
 
MVDependenciesstatext_dependencies_build (StatsBuildData *data)
 
byteastatext_dependencies_serialize (MVDependencies *dependencies)
 
MVDependenciesstatext_dependencies_deserialize (bytea *data)
 
bool statext_dependencies_validate (const MVDependencies *dependencies, const int2vector *stxkeys, int numexprs, int elevel)
 
void statext_dependencies_free (MVDependencies *dependencies)
 
MCVListstatext_mcv_build (StatsBuildData *data, double totalrows, int stattarget)
 
byteastatext_mcv_serialize (MCVList *mcvlist, VacAttrStats **stats)
 
MCVListstatext_mcv_deserialize (bytea *data)
 
void statext_mcv_free (MCVList *mcvlist)
 
Datum statext_mcv_import (int elevel, int numattrs, Oid *atttypids, int32 *atttypmods, Oid *atttypcolls, int nitems, Datum *mcv_elems, bool *mcv_nulls, float8 *freqs, float8 *base_freqs)
 
MultiSortSupport multi_sort_init (int ndims)
 
void multi_sort_add_dimension (MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
 
int multi_sort_compare (const void *a, const void *b, void *arg)
 
int multi_sort_compare_dim (int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int multi_sort_compare_dims (int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
 
int compare_scalars_simple (const void *a, const void *b, void *arg)
 
int compare_datums_simple (Datum a, Datum b, SortSupport ssup)
 
AttrNumberbuild_attnums_array (Bitmapset *attrs, int nexprs, int *numattrs)
 
SortItembuild_sorted_items (StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
 
bool examine_opclause_args (List *args, Node **exprp, Const **cstp, bool *expronleftp)
 
Selectivity mcv_combine_selectivities (Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)
 
Selectivity mcv_clauselist_selectivity (PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
 
Selectivity mcv_clause_selectivity_or (PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)
 

Typedef Documentation

◆ DimensionInfo

◆ MultiSortSupport

◆ MultiSortSupportData

◆ SortItem

◆ StatsBuildData

Function Documentation

◆ build_attnums_array()

AttrNumber * build_attnums_array ( Bitmapset attrs,
int  nexprs,
int numattrs 
)
extern

Definition at line 938 of file extended_stats.c.

939{
940 int i,
941 j;
942 AttrNumber *attnums;
943 int num = bms_num_members(attrs);
944
945 if (numattrs)
946 *numattrs = num;
947
948 /* build attnums from the bitmapset */
949 attnums = palloc_array(AttrNumber, num);
950 i = 0;
951 j = -1;
952 while ((j = bms_next_member(attrs, j)) >= 0)
953 {
954 int attnum = (j - nexprs);
955
956 /*
957 * Make sure the bitmap contains only user-defined attributes. As
958 * bitmaps can't contain negative values, this can be violated in two
959 * ways. Firstly, the bitmap might contain 0 as a member, and secondly
960 * the integer value might be larger than MaxAttrNumber.
961 */
964 Assert(attnum >= (-nexprs));
965
966 attnums[i++] = (AttrNumber) attnum;
967
968 /* protect against overflows */
969 Assert(i <= num);
970 }
971
972 return attnums;
973}
int16 AttrNumber
Definition attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition attnum.h:34
#define MaxAttrNumber
Definition attnum.h:24
int bms_next_member(const Bitmapset *a, int prevbit)
Definition bitmapset.c:1305
int bms_num_members(const Bitmapset *a)
Definition bitmapset.c:750
#define Assert(condition)
Definition c.h:873
#define palloc_array(type, count)
Definition fe_memutils.h:76
int j
Definition isn.c:78
int i
Definition isn.c:77
int16 attnum
static int fb(int x)

References Assert, attnum, AttributeNumberIsValid, bms_next_member(), bms_num_members(), fb(), i, j, MaxAttrNumber, and palloc_array.

◆ build_sorted_items()

SortItem * build_sorted_items ( StatsBuildData data,
int nitems,
MultiSortSupport  mss,
int  numattrs,
AttrNumber attnums 
)
extern

Definition at line 983 of file extended_stats.c.

986{
987 int i,
988 j,
989 nrows;
990 int nvalues = data->numrows * numattrs;
991 Size len;
993 Datum *values;
994 bool *isnull;
995 char *ptr;
996 int *typlen;
997
998 /* Compute the total amount of memory we need (both items and values). */
999 len = MAXALIGN(data->numrows * sizeof(SortItem)) +
1000 nvalues * (sizeof(Datum) + sizeof(bool));
1001
1002 /* Allocate the memory and split it into the pieces. */
1003 ptr = palloc0(len);
1004
1005 /* items to sort */
1006 items = (SortItem *) ptr;
1007 /* MAXALIGN ensures that the following Datums are suitably aligned */
1008 ptr += MAXALIGN(data->numrows * sizeof(SortItem));
1009
1010 /* values and null flags */
1011 values = (Datum *) ptr;
1012 ptr += nvalues * sizeof(Datum);
1013
1014 isnull = (bool *) ptr;
1015 ptr += nvalues * sizeof(bool);
1016
1017 /* make sure we consumed the whole buffer exactly */
1018 Assert((ptr - (char *) items) == len);
1019
1020 /* fix the pointers to Datum and bool arrays */
1021 nrows = 0;
1022 for (i = 0; i < data->numrows; i++)
1023 {
1024 items[nrows].values = &values[nrows * numattrs];
1025 items[nrows].isnull = &isnull[nrows * numattrs];
1026
1027 nrows++;
1028 }
1029
1030 /* build a local cache of typlen for all attributes */
1031 typlen = palloc_array(int, data->nattnums);
1032 for (i = 0; i < data->nattnums; i++)
1033 typlen[i] = get_typlen(data->stats[i]->attrtypid);
1034
1035 nrows = 0;
1036 for (i = 0; i < data->numrows; i++)
1037 {
1038 bool toowide = false;
1039
1040 /* load the values/null flags from sample rows */
1041 for (j = 0; j < numattrs; j++)
1042 {
1043 Datum value;
1044 bool isnull;
1045 int attlen;
1046 AttrNumber attnum = attnums[j];
1047
1048 int idx;
1049
1050 /* match attnum to the pre-calculated data */
1051 for (idx = 0; idx < data->nattnums; idx++)
1052 {
1053 if (attnum == data->attnums[idx])
1054 break;
1055 }
1056
1057 Assert(idx < data->nattnums);
1058
1059 value = data->values[idx][i];
1060 isnull = data->nulls[idx][i];
1061 attlen = typlen[idx];
1062
1063 /*
1064 * If this is a varlena value, check if it's too wide and if yes
1065 * then skip the whole item. Otherwise detoast the value.
1066 *
1067 * XXX It may happen that we've already detoasted some preceding
1068 * values for the current item. We don't bother to cleanup those
1069 * on the assumption that those are small (below WIDTH_THRESHOLD)
1070 * and will be discarded at the end of analyze.
1071 */
1072 if ((!isnull) && (attlen == -1))
1073 {
1075 {
1076 toowide = true;
1077 break;
1078 }
1079
1081 }
1082
1083 items[nrows].values[j] = value;
1084 items[nrows].isnull[j] = isnull;
1085 }
1086
1087 if (toowide)
1088 continue;
1089
1090 nrows++;
1091 }
1092
1093 /* store the actual number of items (ignoring the too-wide ones) */
1094 *nitems = nrows;
1095
1096 /* all items were too wide */
1097 if (nrows == 0)
1098 {
1099 /* everything is allocated as a single chunk */
1100 pfree(items);
1101 return NULL;
1102 }
1103
1104 /* do the sort, using the multi-sort */
1105 qsort_interruptible(items, nrows, sizeof(SortItem),
1107
1108 return items;
1109}
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262
static Datum values[MAXATTR]
Definition bootstrap.c:155
#define MAXALIGN(LEN)
Definition c.h:826
size_t Size
Definition c.h:619
Size toast_raw_datum_size(Datum value)
Definition detoast.c:545
#define WIDTH_THRESHOLD
int multi_sort_compare(const void *a, const void *b, void *arg)
#define PG_DETOAST_DATUM(datum)
Definition fmgr.h:240
#define nitems(x)
Definition indent.h:31
static struct @172 value
int16 get_typlen(Oid typid)
Definition lsyscache.c:2347
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
int16 attlen
const void size_t len
const void * data
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
uint64_t Datum
Definition postgres.h:70
static ItemArray items

References Assert, attlen, attnum, data, fb(), get_typlen(), i, idx(), items, j, len, MAXALIGN, multi_sort_compare(), nitems, palloc0(), palloc_array, pfree(), PG_DETOAST_DATUM, PointerGetDatum(), qsort_interruptible(), toast_raw_datum_size(), value, values, and WIDTH_THRESHOLD.

Referenced by dependency_degree(), and statext_mcv_build().

◆ compare_datums_simple()

int compare_datums_simple ( Datum  a,
Datum  b,
SortSupport  ssup 
)
extern

Definition at line 924 of file extended_stats.c.

925{
926 return ApplySortComparator(a, false, b, false, ssup);
927}
int b
Definition isn.c:74
int a
Definition isn.c:73
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)

References a, ApplySortComparator(), and b.

Referenced by compare_scalars_simple(), and statext_mcv_serialize().

◆ compare_scalars_simple()

int compare_scalars_simple ( const void a,
const void b,
void arg 
)
extern

Definition at line 916 of file extended_stats.c.

917{
918 return compare_datums_simple(*(const Datum *) a,
919 *(const Datum *) b,
920 (SortSupport) arg);
921}
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
void * arg

References a, arg, b, and compare_datums_simple().

Referenced by statext_mcv_serialize().

◆ examine_opclause_args()

bool examine_opclause_args ( List args,
Node **  exprp,
Const **  cstp,
bool expronleftp 
)
extern

Definition at line 2032 of file extended_stats.c.

2034{
2035 Node *expr;
2036 Const *cst;
2037 bool expronleft;
2038 Node *leftop,
2039 *rightop;
2040
2041 /* enforced by statext_is_compatible_clause_internal */
2042 Assert(list_length(args) == 2);
2043
2044 leftop = linitial(args);
2045 rightop = lsecond(args);
2046
2047 /* strip RelabelType from either side of the expression */
2048 if (IsA(leftop, RelabelType))
2049 leftop = (Node *) ((RelabelType *) leftop)->arg;
2050
2051 if (IsA(rightop, RelabelType))
2052 rightop = (Node *) ((RelabelType *) rightop)->arg;
2053
2054 if (IsA(rightop, Const))
2055 {
2056 expr = leftop;
2057 cst = (Const *) rightop;
2058 expronleft = true;
2059 }
2060 else if (IsA(leftop, Const))
2061 {
2062 expr = rightop;
2063 cst = (Const *) leftop;
2064 expronleft = false;
2065 }
2066 else
2067 return false;
2068
2069 /* return pointers to the extracted parts if requested */
2070 if (exprp)
2071 *exprp = expr;
2072
2073 if (cstp)
2074 *cstp = cst;
2075
2076 if (expronleftp)
2078
2079 return true;
2080}
#define IsA(nodeptr, _type_)
Definition nodes.h:164
static int list_length(const List *l)
Definition pg_list.h:152
#define linitial(l)
Definition pg_list.h:178
#define lsecond(l)
Definition pg_list.h:183
Definition nodes.h:135

References arg, Assert, fb(), IsA, linitial, list_length(), and lsecond.

Referenced by mcv_get_match_bitmap(), and statext_is_compatible_clause_internal().

◆ mcv_clause_selectivity_or()

Selectivity mcv_clause_selectivity_or ( PlannerInfo root,
StatisticExtInfo stat,
MCVList mcv,
Node clause,
bool **  or_matches,
Selectivity basesel,
Selectivity overlap_mcvsel,
Selectivity overlap_basesel,
Selectivity totalsel 
)
extern

Definition at line 2124 of file mcv.c.

2128{
2129 Selectivity s = 0.0;
2130 bool *new_matches;
2131 int i;
2132
2133 /* build the OR-matches bitmap, if not built already */
2134 if (*or_matches == NULL)
2135 *or_matches = palloc0_array(bool, mcv->nitems);
2136
2137 /* build the match bitmap for the new clause */
2139 stat->exprs, mcv, false);
2140
2141 /*
2142 * Sum the frequencies for all the MCV items matching this clause and also
2143 * those matching the overlap between this clause and any of the preceding
2144 * clauses as described above.
2145 */
2146 *basesel = 0.0;
2147 *overlap_mcvsel = 0.0;
2148 *overlap_basesel = 0.0;
2149 *totalsel = 0.0;
2150 for (i = 0; i < mcv->nitems; i++)
2151 {
2152 *totalsel += mcv->items[i].frequency;
2153
2154 if (new_matches[i])
2155 {
2156 s += mcv->items[i].frequency;
2157 *basesel += mcv->items[i].base_frequency;
2158
2159 if ((*or_matches)[i])
2160 {
2161 *overlap_mcvsel += mcv->items[i].frequency;
2163 }
2164 }
2165
2166 /* update the OR-matches bitmap for the next clause */
2167 (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2168 }
2169
2171
2172 return s;
2173}
#define palloc0_array(type, count)
Definition fe_memutils.h:77
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition mcv.c:1597
double Selectivity
Definition nodes.h:260
#define list_make1(x1)
Definition pg_list.h:212
tree ctl root
Definition radixtree.h:1857
double frequency
Definition statistics.h:80
double base_frequency
Definition statistics.h:81
uint32 nitems
Definition statistics.h:91
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition statistics.h:94

References MCVItem::base_frequency, fb(), MCVItem::frequency, i, MCVList::items, list_make1, mcv_get_match_bitmap(), MCVList::nitems, palloc0_array, pfree(), and root.

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_clauselist_selectivity()

Selectivity mcv_clauselist_selectivity ( PlannerInfo root,
StatisticExtInfo stat,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Selectivity basesel,
Selectivity totalsel 
)
extern

Definition at line 2046 of file mcv.c.

2051{
2052 int i;
2053 MCVList *mcv;
2054 Selectivity s = 0.0;
2055 RangeTblEntry *rte = root->simple_rte_array[rel->relid];
2056
2057 /* match/mismatch bitmap for each MCV item */
2058 bool *matches = NULL;
2059
2060 /* load the MCV list stored in the statistics object */
2061 mcv = statext_mcv_load(stat->statOid, rte->inh);
2062
2063 /* build a match bitmap for the clauses */
2064 matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2065 mcv, false);
2066
2067 /* sum frequencies for all the matching MCV items */
2068 *basesel = 0.0;
2069 *totalsel = 0.0;
2070 for (i = 0; i < mcv->nitems; i++)
2071 {
2072 *totalsel += mcv->items[i].frequency;
2073
2074 if (matches[i] != false)
2075 {
2076 *basesel += mcv->items[i].base_frequency;
2077 s += mcv->items[i].frequency;
2078 }
2079 }
2080
2081 return s;
2082}
MCVList * statext_mcv_load(Oid mvoid, bool inh)
Definition mcv.c:556
Index relid
Definition pathnodes.h:1051

References MCVItem::base_frequency, fb(), MCVItem::frequency, i, MCVList::items, mcv_get_match_bitmap(), MCVList::nitems, RelOptInfo::relid, root, and statext_mcv_load().

Referenced by statext_mcv_clauselist_selectivity().

◆ mcv_combine_selectivities()

Selectivity mcv_combine_selectivities ( Selectivity  simple_sel,
Selectivity  mcv_sel,
Selectivity  mcv_basesel,
Selectivity  mcv_totalsel 
)
extern

Definition at line 2004 of file mcv.c.

2008{
2011
2012 /* estimated selectivity of values not covered by MCV matches */
2015
2016 /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2017 if (other_sel > 1.0 - mcv_totalsel)
2018 other_sel = 1.0 - mcv_totalsel;
2019
2020 /* overall selectivity is the sum of the MCV and non-MCV parts */
2021 sel = mcv_sel + other_sel;
2023
2024 return sel;
2025}
#define CLAMP_PROBABILITY(p)
Definition selfuncs.h:63

References CLAMP_PROBABILITY, and fb().

Referenced by statext_mcv_clauselist_selectivity().

◆ multi_sort_add_dimension()

void multi_sort_add_dimension ( MultiSortSupport  mss,
int  sortdim,
Oid  oper,
Oid  collation 
)
extern

Definition at line 848 of file extended_stats.c.

850{
851 SortSupport ssup = &mss->ssup[sortdim];
852
854 ssup->ssup_collation = collation;
855 ssup->ssup_nulls_first = false;
856
858}
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition parse_oper.c:371
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
MemoryContext ssup_cxt
Definition sortsupport.h:66

References CurrentMemoryContext, fb(), oper(), PrepareSortSupportFromOrderingOp(), SortSupportData::ssup_collation, SortSupportData::ssup_cxt, and SortSupportData::ssup_nulls_first.

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ multi_sort_compare()

int multi_sort_compare ( const void a,
const void b,
void arg 
)
extern

Definition at line 862 of file extended_stats.c.

863{
865 const SortItem *ia = a;
866 const SortItem *ib = b;
867 int i;
868
869 for (i = 0; i < mss->ndims; i++)
870 {
871 int compare;
872
873 compare = ApplySortComparator(ia->values[i], ia->isnull[i],
874 ib->values[i], ib->isnull[i],
875 &mss->ssup[i]);
876
877 if (compare != 0)
878 return compare;
879 }
880
881 /* equal by default */
882 return 0;
883}
MultiSortSupportData * MultiSortSupport
static int compare(const void *arg1, const void *arg2)
Definition geqo_pool.c:144

References a, ApplySortComparator(), arg, b, compare(), fb(), and i.

Referenced by build_distinct_groups(), build_sorted_items(), count_distinct_groups(), ndistinct_for_combination(), and statext_mcv_build().

◆ multi_sort_compare_dim()

int multi_sort_compare_dim ( int  dim,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)
extern

Definition at line 887 of file extended_stats.c.

889{
890 return ApplySortComparator(a->values[dim], a->isnull[dim],
891 b->values[dim], b->isnull[dim],
892 &mss->ssup[dim]);
893}

References a, ApplySortComparator(), b, and fb().

Referenced by dependency_degree().

◆ multi_sort_compare_dims()

int multi_sort_compare_dims ( int  start,
int  end,
const SortItem a,
const SortItem b,
MultiSortSupport  mss 
)
extern

Definition at line 896 of file extended_stats.c.

899{
900 int dim;
901
902 for (dim = start; dim <= end; dim++)
903 {
904 int r = ApplySortComparator(a->values[dim], a->isnull[dim],
905 b->values[dim], b->isnull[dim],
906 &mss->ssup[dim]);
907
908 if (r != 0)
909 return r;
910 }
911
912 return 0;
913}
return str start

References a, ApplySortComparator(), b, fb(), and start.

Referenced by dependency_degree().

◆ multi_sort_init()

MultiSortSupport multi_sort_init ( int  ndims)
extern

Definition at line 829 of file extended_stats.c.

830{
832
833 Assert(ndims >= 2);
834
836 + sizeof(SortSupportData) * ndims);
837
838 mss->ndims = ndims;
839
840 return mss;
841}

References Assert, fb(), and palloc0().

Referenced by build_mss(), dependency_degree(), and ndistinct_for_combination().

◆ statext_dependencies_build()

MVDependencies * statext_dependencies_build ( StatsBuildData data)
extern

Definition at line 342 of file dependencies.c.

343{
344 int i,
345 k;
346
347 /* result */
348 MVDependencies *dependencies = NULL;
349 MemoryContext cxt;
350
351 Assert(data->nattnums >= 2);
352
353 /* tracks memory allocated by dependency_degree calls */
355 "dependency_degree cxt",
357
358 /*
359 * We'll try build functional dependencies starting from the smallest ones
360 * covering just 2 columns, to the largest ones, covering all columns
361 * included in the statistics object. We start from the smallest ones
362 * because we want to be able to skip already implied ones.
363 */
364 for (k = 2; k <= data->nattnums; k++)
365 {
366 AttrNumber *dependency; /* array with k elements */
367
368 /* prepare a DependencyGenerator of variation */
370
371 /* generate all possible variations of k values (out of n) */
372 while ((dependency = DependencyGenerator_next(DependencyGenerator)))
373 {
374 double degree;
375 MVDependency *d;
377
378 /* release memory used by dependency degree calculation */
380
381 /* compute how valid the dependency seems */
382 degree = dependency_degree(data, k, dependency);
383
386
387 /*
388 * if the dependency seems entirely invalid, don't store it
389 */
390 if (degree == 0.0)
391 continue;
392
393 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
394 + k * sizeof(AttrNumber));
395
396 /* copy the dependency (and keep the indexes into stxkeys) */
397 d->degree = degree;
398 d->nattributes = k;
399 for (i = 0; i < k; i++)
400 d->attributes[i] = data->attnums[dependency[i]];
401
402 /* initialize the list of dependencies */
403 if (dependencies == NULL)
404 {
405 dependencies = palloc0_object(MVDependencies);
406
407 dependencies->magic = STATS_DEPS_MAGIC;
408 dependencies->type = STATS_DEPS_TYPE_BASIC;
409 dependencies->ndeps = 0;
410 }
411
412 dependencies->ndeps++;
413 dependencies = (MVDependencies *) repalloc(dependencies,
415 + dependencies->ndeps * sizeof(MVDependency *));
416
417 dependencies->deps[dependencies->ndeps - 1] = d;
418 }
419
420 /*
421 * we're done with variations of k elements, so free the
422 * DependencyGenerator
423 */
425 }
426
428
429 return dependencies;
430}
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
static void DependencyGenerator_free(DependencyGenerator state)
static DependencyGenerator DependencyGenerator_init(int n, int k)
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
#define palloc0_object(type)
Definition fe_memutils.h:75
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
#define STATS_DEPS_MAGIC
Definition statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition statistics.h:44
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition statistics.h:62
AttrNumber nattributes
Definition statistics.h:53
double degree
Definition statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition statistics.h:54

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, MVDependency::attributes, CurrentMemoryContext, data, MVDependency::degree, dependency_degree(), DependencyGenerator_free(), DependencyGenerator_init(), DependencyGenerator_next(), MVDependencies::deps, fb(), i, MVDependencies::magic, MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), MVDependency::nattributes, MVDependencies::ndeps, palloc0(), palloc0_object, repalloc(), STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, and MVDependencies::type.

Referenced by BuildRelationExtStatistics().

◆ statext_dependencies_deserialize()

MVDependencies * statext_dependencies_deserialize ( bytea data)
extern

Definition at line 492 of file dependencies.c.

493{
494 int i;
496 MVDependencies *dependencies;
497 char *tmp;
498
499 if (data == NULL)
500 return NULL;
501
503 elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
505
506 /* read the MVDependencies header */
507 dependencies = palloc0_object(MVDependencies);
508
509 /* initialize pointer to the data part (skip the varlena header) */
510 tmp = VARDATA_ANY(data);
511
512 /* read the header fields and perform basic sanity checks */
513 memcpy(&dependencies->magic, tmp, sizeof(uint32));
514 tmp += sizeof(uint32);
515 memcpy(&dependencies->type, tmp, sizeof(uint32));
516 tmp += sizeof(uint32);
517 memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
518 tmp += sizeof(uint32);
519
520 if (dependencies->magic != STATS_DEPS_MAGIC)
521 elog(ERROR, "invalid dependency magic %d (expected %d)",
522 dependencies->magic, STATS_DEPS_MAGIC);
523
524 if (dependencies->type != STATS_DEPS_TYPE_BASIC)
525 elog(ERROR, "invalid dependency type %d (expected %d)",
526 dependencies->type, STATS_DEPS_TYPE_BASIC);
527
528 if (dependencies->ndeps == 0)
529 elog(ERROR, "invalid zero-length item array in MVDependencies");
530
531 /* what minimum bytea size do we expect for those parameters */
532 min_expected_size = SizeOfItem(dependencies->ndeps);
533
535 elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
537
538 /* allocate space for the MCV items */
539 dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
540 + (dependencies->ndeps * sizeof(MVDependency *)));
541
542 for (i = 0; i < dependencies->ndeps; i++)
543 {
544 double degree;
545 AttrNumber k;
546 MVDependency *d;
547
548 /* degree of validity */
549 memcpy(&degree, tmp, sizeof(double));
550 tmp += sizeof(double);
551
552 /* number of attributes */
553 memcpy(&k, tmp, sizeof(AttrNumber));
554 tmp += sizeof(AttrNumber);
555
556 /* is the number of attributes valid? */
557 Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
558
559 /* now that we know the number of attributes, allocate the dependency */
560 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
561 + (k * sizeof(AttrNumber)));
562
563 d->degree = degree;
564 d->nattributes = k;
565
566 /* copy attribute numbers */
567 memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
568 tmp += sizeof(AttrNumber) * d->nattributes;
569
570 dependencies->deps[i] = d;
571
572 /* still within the bytea */
573 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
574 }
575
576 /* we should have consumed the whole bytea exactly */
577 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
578
579 return dependencies;
580}
uint32_t uint32
Definition c.h:546
#define SizeOfHeader
#define SizeOfItem(natts)
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define STATS_MAX_DIMENSIONS
Definition statistics.h:19
static Size VARSIZE_ANY(const void *PTR)
Definition varatt.h:460
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486

References Assert, MVDependency::attributes, data, MVDependency::degree, MVDependencies::deps, elog, ERROR, fb(), i, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, palloc0(), palloc0_object, repalloc(), SizeOfHeader, SizeOfItem, STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, STATS_MAX_DIMENSIONS, MVDependencies::type, VARDATA_ANY(), VARSIZE_ANY(), and VARSIZE_ANY_EXHDR().

Referenced by extended_statistics_update(), pg_dependencies_out(), and statext_dependencies_load().

◆ statext_dependencies_free()

void statext_dependencies_free ( MVDependencies dependencies)
extern

Definition at line 586 of file dependencies.c.

587{
588 for (int i = 0; i < dependencies->ndeps; i++)
589 pfree(dependencies->deps[i]);
590 pfree(dependencies);
591}

References MVDependencies::deps, i, MVDependencies::ndeps, and pfree().

Referenced by extended_statistics_update().

◆ statext_dependencies_serialize()

bytea * statext_dependencies_serialize ( MVDependencies dependencies)
extern

Definition at line 437 of file dependencies.c.

438{
439 int i;
440 bytea *output;
441 char *tmp;
442 Size len;
443
444 /* we need to store ndeps, with a number of attributes for each one */
446
447 /* and also include space for the actual attribute numbers and degrees */
448 for (i = 0; i < dependencies->ndeps; i++)
449 len += SizeOfItem(dependencies->deps[i]->nattributes);
450
451 output = (bytea *) palloc0(len);
453
454 tmp = VARDATA(output);
455
456 /* Store the base struct values (magic, type, ndeps) */
457 memcpy(tmp, &dependencies->magic, sizeof(uint32));
458 tmp += sizeof(uint32);
459 memcpy(tmp, &dependencies->type, sizeof(uint32));
460 tmp += sizeof(uint32);
461 memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
462 tmp += sizeof(uint32);
463
464 /* store number of attributes and attribute numbers for each dependency */
465 for (i = 0; i < dependencies->ndeps; i++)
466 {
467 MVDependency *d = dependencies->deps[i];
468
469 memcpy(tmp, &d->degree, sizeof(double));
470 tmp += sizeof(double);
471
472 memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
473 tmp += sizeof(AttrNumber);
474
475 memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
476 tmp += sizeof(AttrNumber) * d->nattributes;
477
478 /* protect against overflow */
479 Assert(tmp <= ((char *) output + len));
480 }
481
482 /* make sure we've produced exactly the right amount of data */
483 Assert(tmp == ((char *) output + len));
484
485 return output;
486}
#define VARHDRSZ
Definition c.h:711
FILE * output
Definition c.h:706
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432

References Assert, MVDependency::attributes, MVDependency::degree, MVDependencies::deps, fb(), i, len, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, output, palloc0(), SET_VARSIZE(), SizeOfHeader, SizeOfItem, MVDependencies::type, VARDATA(), and VARHDRSZ.

Referenced by build_mvdependencies(), and statext_store().

◆ statext_dependencies_validate()

bool statext_dependencies_validate ( const MVDependencies dependencies,
const int2vector stxkeys,
int  numexprs,
int  elevel 
)
extern

Definition at line 606 of file dependencies.c.

609{
611
612 /* Scan through each dependency entry */
613 for (int i = 0; i < dependencies->ndeps; i++)
614 {
615 const MVDependency *dep = dependencies->deps[i];
616
617 /*
618 * Cross-check each attribute in a dependency entry with the extended
619 * stats object definition.
620 */
621 for (int j = 0; j < dep->nattributes; j++)
622 {
624 bool ok = false;
625
626 if (attnum > 0)
627 {
628 /* attribute number in stxkeys */
629 for (int k = 0; k < stxkeys->dim1; k++)
630 {
631 if (attnum == stxkeys->values[k])
632 {
633 ok = true;
634 break;
635 }
636 }
637 }
638 else if ((attnum < 0) && (attnum >= attnum_expr_lowbound))
639 {
640 /* attribute number for an expression */
641 ok = true;
642 }
643
644 if (!ok)
645 {
646 ereport(elevel,
648 errmsg("could not validate \"%s\" object: invalid attribute number %d found",
649 "pg_dependencies", attnum)));
650 return false;
651 }
652 }
653 }
654
655 return true;
656}
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ereport(elevel,...)
Definition elog.h:150

References attnum, MVDependency::attributes, MVDependencies::deps, ereport, errcode(), errmsg(), fb(), i, j, and MVDependencies::ndeps.

Referenced by extended_statistics_update().

◆ statext_mcv_build()

MCVList * statext_mcv_build ( StatsBuildData data,
double  totalrows,
int  stattarget 
)
extern

Definition at line 178 of file mcv.c.

179{
180 int i,
181 numattrs,
182 numrows,
183 ngroups,
184 nitems;
185 double mincount;
190
191 /* comparator for all the columns */
192 mss = build_mss(data);
193
194 /* sort the rows */
196 data->nattnums, data->attnums);
197
198 if (!items)
199 return NULL;
200
201 /* for convenience */
202 numattrs = data->nattnums;
203 numrows = data->numrows;
204
205 /* transform the sorted rows into groups (sorted by frequency) */
207
208 /*
209 * The maximum number of MCV items to store, based on the statistics
210 * target we computed for the statistics object (from the target set for
211 * the object itself, attributes and the system default). In any case, we
212 * can't keep more groups than we have available.
213 */
214 nitems = stattarget;
215 if (nitems > ngroups)
216 nitems = ngroups;
217
218 /*
219 * Decide how many items to keep in the MCV list. We can't use the same
220 * algorithm as per-column MCV lists, because that only considers the
221 * actual group frequency - but we're primarily interested in how the
222 * actual frequency differs from the base frequency (product of simple
223 * per-column frequencies, as if the columns were independent).
224 *
225 * Using the same algorithm might exclude items that are close to the
226 * "average" frequency of the sample. But that does not say whether the
227 * observed frequency is close to the base frequency or not. We also need
228 * to consider unexpectedly uncommon items (again, compared to the base
229 * frequency), and the single-column algorithm does not have to.
230 *
231 * We simply decide how many items to keep by computing the minimum count
232 * using get_mincount_for_mcv_list() and then keep all items that seem to
233 * be more common than that.
234 */
236
237 /*
238 * Walk the groups until we find the first group with a count below the
239 * mincount threshold (the index of that group is the number of groups we
240 * want to keep).
241 */
242 for (i = 0; i < nitems; i++)
243 {
244 if (groups[i].count < mincount)
245 {
246 nitems = i;
247 break;
248 }
249 }
250
251 /*
252 * At this point, we know the number of items for the MCV list. There
253 * might be none (for uniform distribution with many groups), and in that
254 * case, there will be no MCV list. Otherwise, construct the MCV list.
255 */
256 if (nitems > 0)
257 {
258 int j;
261
262 /* frequencies for values in each attribute */
263 SortItem **freqs;
264 int *nfreqs;
265
266 /* used to search values */
268 + sizeof(SortSupportData));
269
270 /* compute frequencies for values in each column */
273
274 /*
275 * Allocate the MCV list structure, set the global parameters.
276 */
278 sizeof(MCVItem) * nitems);
279
280 mcvlist->magic = STATS_MCV_MAGIC;
282 mcvlist->ndimensions = numattrs;
283 mcvlist->nitems = nitems;
284
285 /* store info about data type OIDs */
286 for (i = 0; i < numattrs; i++)
287 mcvlist->types[i] = data->stats[i]->attrtypid;
288
289 /* Copy the first chunk of groups into the result. */
290 for (i = 0; i < nitems; i++)
291 {
292 /* just point to the proper place in the list */
293 MCVItem *item = &mcvlist->items[i];
294
296 item->isnull = palloc_array(bool, numattrs);
297
298 /* copy values for the group */
299 memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
300 memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
301
302 /* groups should be sorted by frequency in descending order */
303 Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
304
305 /* group frequency */
306 item->frequency = (double) groups[i].count / numrows;
307
308 /* base frequency, if the attributes were independent */
309 item->base_frequency = 1.0;
310 for (j = 0; j < numattrs; j++)
311 {
312 SortItem *freq;
313
314 /* single dimension */
315 tmp->ndims = 1;
316 tmp->ssup[0] = mss->ssup[j];
317
318 /* fill search key */
319 key.values = &groups[i].values[j];
320 key.isnull = &groups[i].isnull[j];
321
322 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
323 sizeof(SortItem),
324 multi_sort_compare, tmp);
325
326 item->base_frequency *= ((double) freq->count) / numrows;
327 }
328 }
329
330 pfree(nfreqs);
331 pfree(freqs);
332 }
333
334 pfree(items);
335 pfree(groups);
336
337 return mcvlist;
338}
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
static MultiSortSupport build_mss(StatsBuildData *data)
Definition mcv.c:345
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition mcv.c:146
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition mcv.c:488
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition mcv.c:422
void * palloc(Size size)
Definition mcxt.c:1387
void * bsearch_arg(const void *key, const void *base0, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition bsearch_arg.c:55
#define STATS_MCV_TYPE_BASIC
Definition statistics.h:67
#define STATS_MCV_MAGIC
Definition statistics.h:66
bool * isnull
Definition statistics.h:82
Datum * values
Definition statistics.h:83
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]

References Assert, MCVItem::base_frequency, bsearch_arg(), build_column_frequencies(), build_distinct_groups(), build_mss(), build_sorted_items(), data, fb(), MCVItem::frequency, get_mincount_for_mcv_list(), i, MCVItem::isnull, items, j, multi_sort_compare(), MultiSortSupportData::ndims, nitems, palloc(), palloc0(), palloc0_array, palloc_array, pfree(), MultiSortSupportData::ssup, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, and MCVItem::values.

Referenced by BuildRelationExtStatistics().

◆ statext_mcv_deserialize()

MCVList * statext_mcv_deserialize ( bytea data)
extern

Definition at line 994 of file mcv.c.

995{
996 int dim,
997 i;
1000 char *raw;
1001 char *ptr;
1002 char *endptr PG_USED_FOR_ASSERTS_ONLY;
1003
1004 int ndims,
1005 nitems;
1006 DimensionInfo *info = NULL;
1007
1008 /* local allocation buffer (used only for deserialization) */
1009 Datum **map = NULL;
1010
1011 /* MCV list */
1012 Size mcvlen;
1013
1014 /* buffer used for the result */
1015 Size datalen;
1016 char *dataptr;
1017 char *valuesptr;
1018 char *isnullptr;
1019
1020 if (data == NULL)
1021 return NULL;
1022
1023 /*
1024 * We can't possibly deserialize a MCV list if there's not even a complete
1025 * header. We need an explicit formula here, because we serialize the
1026 * header fields one by one, so we need to ignore struct alignment.
1027 */
1029 elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
1031
1032 /* read the MCV list header */
1034
1035 /* pointer to the data part (skip the varlena header) */
1036 raw = (char *) data;
1037 ptr = VARDATA_ANY(raw);
1038 endptr = raw + VARSIZE_ANY(data);
1039
1040 /* get the header and perform further sanity checks */
1041 memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1042 ptr += sizeof(uint32);
1043
1044 memcpy(&mcvlist->type, ptr, sizeof(uint32));
1045 ptr += sizeof(uint32);
1046
1047 memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1048 ptr += sizeof(uint32);
1049
1050 memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1051 ptr += sizeof(AttrNumber);
1052
1053 if (mcvlist->magic != STATS_MCV_MAGIC)
1054 elog(ERROR, "invalid MCV magic %u (expected %u)",
1055 mcvlist->magic, STATS_MCV_MAGIC);
1056
1057 if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1058 elog(ERROR, "invalid MCV type %u (expected %u)",
1060
1061 if (mcvlist->ndimensions == 0)
1062 elog(ERROR, "invalid zero-length dimension array in MCVList");
1063 else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1064 (mcvlist->ndimensions < 0))
1065 elog(ERROR, "invalid length (%d) dimension array in MCVList",
1066 mcvlist->ndimensions);
1067
1068 if (mcvlist->nitems == 0)
1069 elog(ERROR, "invalid zero-length item array in MCVList");
1070 else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1071 elog(ERROR, "invalid length (%u) item array in MCVList",
1072 mcvlist->nitems);
1073
1074 nitems = mcvlist->nitems;
1075 ndims = mcvlist->ndimensions;
1076
1077 /*
1078 * Check amount of data including DimensionInfo for all dimensions and
1079 * also the serialized items (including uint16 indexes). Also, walk
1080 * through the dimension information and add it to the sum.
1081 */
1083
1084 /*
1085 * Check that we have at least the dimension and info records, along with
1086 * the items. We don't know the size of the serialized values yet. We need
1087 * to do this check first, before accessing the dimension info.
1088 */
1090 elog(ERROR, "invalid MCV size %zu (expected %zu)",
1092
1093 /* Now copy the array of type Oids. */
1094 memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1095 ptr += (sizeof(Oid) * ndims);
1096
1097 /* Now it's safe to access the dimension info. */
1098 info = palloc(ndims * sizeof(DimensionInfo));
1099
1100 memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1101 ptr += (ndims * sizeof(DimensionInfo));
1102
1103 /* account for the value arrays */
1104 for (dim = 0; dim < ndims; dim++)
1105 {
1106 /*
1107 * XXX I wonder if we can/should rely on asserts here. Maybe those
1108 * checks should be done every time?
1109 */
1110 Assert(info[dim].nvalues >= 0);
1111 Assert(info[dim].nbytes >= 0);
1112
1113 expected_size += info[dim].nbytes;
1114 }
1115
1116 /*
1117 * Now we know the total expected MCV size, including all the pieces
1118 * (header, dimension info. items and deduplicated data). So do the final
1119 * check on size.
1120 */
1122 elog(ERROR, "invalid MCV size %zu (expected %zu)",
1124
1125 /*
1126 * We need an array of Datum values for each dimension, so that we can
1127 * easily translate the uint16 indexes later. We also need a top-level
1128 * array of pointers to those per-dimension arrays.
1129 *
1130 * While allocating the arrays for dimensions, compute how much space we
1131 * need for a copy of the by-ref data, as we can't simply point to the
1132 * original values (it might go away).
1133 */
1134 datalen = 0; /* space for by-ref data */
1135 map = palloc_array(Datum *, ndims);
1136
1137 for (dim = 0; dim < ndims; dim++)
1138 {
1139 map[dim] = palloc_array(Datum, info[dim].nvalues);
1140
1141 /* space needed for a copy of data for by-ref types */
1142 datalen += info[dim].nbytes_aligned;
1143 }
1144
1145 /*
1146 * Now resize the MCV list so that the allocation includes all the data.
1147 *
1148 * Allocate space for a copy of the data, as we can't simply reference the
1149 * serialized data - it's not aligned properly, and it may disappear while
1150 * we're still using the MCV list, e.g. due to catcache release.
1151 *
1152 * We do care about alignment here, because we will allocate all the
1153 * pieces at once, but then use pointers to different parts.
1154 */
1155 mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1156
1157 /* arrays of values and isnull flags for all MCV items */
1158 mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1159 mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1160
1161 /* we don't quite need to align this, but it makes some asserts easier */
1162 mcvlen += MAXALIGN(datalen);
1163
1164 /* now resize the deserialized MCV list, and compute pointers to parts */
1166
1167 /* pointer to the beginning of values/isnull arrays */
1168 valuesptr = (char *) mcvlist
1169 + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1170
1171 isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1172
1173 dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1174
1175 /*
1176 * Build mapping (index => value) for translating the serialized data into
1177 * the in-memory representation.
1178 */
1179 for (dim = 0; dim < ndims; dim++)
1180 {
1181 /* remember start position in the input array */
1182 char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1183
1184 if (info[dim].typbyval)
1185 {
1186 /* for by-val types we simply copy data into the mapping */
1187 for (i = 0; i < info[dim].nvalues; i++)
1188 {
1189 Datum v = 0;
1190
1191 memcpy(&v, ptr, info[dim].typlen);
1192 ptr += info[dim].typlen;
1193
1194 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1195
1196 /* no under/overflow of input array */
1197 Assert(ptr <= (start + info[dim].nbytes));
1198 }
1199 }
1200 else
1201 {
1202 /* for by-ref types we need to also make a copy of the data */
1203
1204 /* passed by reference, but fixed length (name, tid, ...) */
1205 if (info[dim].typlen > 0)
1206 {
1207 for (i = 0; i < info[dim].nvalues; i++)
1208 {
1209 memcpy(dataptr, ptr, info[dim].typlen);
1210 ptr += info[dim].typlen;
1211
1212 /* just point into the array */
1213 map[dim][i] = PointerGetDatum(dataptr);
1214 dataptr += MAXALIGN(info[dim].typlen);
1215 }
1216 }
1217 else if (info[dim].typlen == -1)
1218 {
1219 /* varlena */
1220 for (i = 0; i < info[dim].nvalues; i++)
1221 {
1222 uint32 len;
1223
1224 /* read the uint32 length */
1225 memcpy(&len, ptr, sizeof(uint32));
1226 ptr += sizeof(uint32);
1227
1228 /* the length is data-only */
1229 SET_VARSIZE(dataptr, len + VARHDRSZ);
1230 memcpy(VARDATA(dataptr), ptr, len);
1231 ptr += len;
1232
1233 /* just point into the array */
1234 map[dim][i] = PointerGetDatum(dataptr);
1235
1236 /* skip to place of the next deserialized value */
1237 dataptr += MAXALIGN(len + VARHDRSZ);
1238 }
1239 }
1240 else if (info[dim].typlen == -2)
1241 {
1242 /* cstring */
1243 for (i = 0; i < info[dim].nvalues; i++)
1244 {
1245 uint32 len;
1246
1247 memcpy(&len, ptr, sizeof(uint32));
1248 ptr += sizeof(uint32);
1249
1250 memcpy(dataptr, ptr, len);
1251 ptr += len;
1252
1253 /* just point into the array */
1254 map[dim][i] = PointerGetDatum(dataptr);
1255 dataptr += MAXALIGN(len);
1256 }
1257 }
1258
1259 /* no under/overflow of input array */
1260 Assert(ptr <= (start + info[dim].nbytes));
1261
1262 /* no overflow of the output mcv value */
1263 Assert(dataptr <= ((char *) mcvlist + mcvlen));
1264 }
1265
1266 /* check we consumed input data for this dimension exactly */
1267 Assert(ptr == (start + info[dim].nbytes));
1268 }
1269
1270 /* we should have also filled the MCV list exactly */
1271 Assert(dataptr == ((char *) mcvlist + mcvlen));
1272
1273 /* deserialize the MCV items and translate the indexes to Datums */
1274 for (i = 0; i < nitems; i++)
1275 {
1276 MCVItem *item = &mcvlist->items[i];
1277
1278 item->values = (Datum *) valuesptr;
1279 valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1280
1281 item->isnull = (bool *) isnullptr;
1282 isnullptr += MAXALIGN(sizeof(bool) * ndims);
1283
1284 memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1285 ptr += sizeof(bool) * ndims;
1286
1287 memcpy(&item->frequency, ptr, sizeof(double));
1288 ptr += sizeof(double);
1289
1290 memcpy(&item->base_frequency, ptr, sizeof(double));
1291 ptr += sizeof(double);
1292
1293 /* finally translate the indexes (for non-NULL only) */
1294 for (dim = 0; dim < ndims; dim++)
1295 {
1296 uint16 index;
1297
1298 memcpy(&index, ptr, sizeof(uint16));
1299 ptr += sizeof(uint16);
1300
1301 if (item->isnull[dim])
1302 continue;
1303
1304 item->values[dim] = map[dim][index];
1305 }
1306
1307 /* check we're not overflowing the input */
1308 Assert(ptr <= endptr);
1309 }
1310
1311 /* check that we processed all the data */
1312 Assert(ptr == endptr);
1313
1314 /* release the buffers used for mapping */
1315 for (dim = 0; dim < ndims; dim++)
1316 pfree(map[dim]);
1317
1318 pfree(map);
1319
1320 return mcvlist;
1321}
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:223
uint16_t uint16
Definition c.h:545
#define MinSizeOfMCVList
Definition mcv.c:57
#define SizeOfMCVList(ndims, nitems)
Definition mcv.c:66
unsigned int Oid
#define STATS_MCVLIST_MAX_ITEMS
Definition statistics.h:70
Definition type.h:96
static Datum fetch_att(const void *T, bool attbyval, int attlen)
Definition tupmacs.h:50

References Assert, MCVItem::base_frequency, data, elog, ERROR, fb(), fetch_att(), MCVItem::frequency, i, MCVItem::isnull, items, len, MAXALIGN, MinSizeOfMCVList, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, nitems, DimensionInfo::nvalues, palloc(), palloc0(), palloc_array, pfree(), PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), repalloc(), SET_VARSIZE(), SizeOfMCVList, start, STATS_MAX_DIMENSIONS, STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, STATS_MCVLIST_MAX_ITEMS, DimensionInfo::typlen, MCVItem::values, VARDATA(), VARDATA_ANY(), VARHDRSZ, and VARSIZE_ANY().

Referenced by pg_stats_ext_mcvlist_items(), and statext_mcv_load().

◆ statext_mcv_free()

void statext_mcv_free ( MCVList mcvlist)
extern

Definition at line 2179 of file mcv.c.

2180{
2181 for (int i = 0; i < mcvlist->nitems; i++)
2182 {
2183 MCVItem *item = &mcvlist->items[i];
2184
2185 pfree(item->values);
2186 pfree(item->isnull);
2187 }
2188 pfree(mcvlist);
2189}

References fb(), i, MCVItem::isnull, pfree(), and MCVItem::values.

Referenced by statext_mcv_import().

◆ statext_mcv_import()

Datum statext_mcv_import ( int  elevel,
int  numattrs,
Oid atttypids,
int32 atttypmods,
Oid atttypcolls,
int  nitems,
Datum mcv_elems,
bool mcv_nulls,
float8 freqs,
float8 base_freqs 
)
extern

Definition at line 2204 of file mcv.c.

2208{
2210 bytea *bytes;
2212
2213 /*
2214 * Allocate the MCV list structure, set the global parameters.
2215 */
2217 (sizeof(MCVItem) * nitems));
2218
2219 mcvlist->magic = STATS_MCV_MAGIC;
2221 mcvlist->ndimensions = numattrs;
2222 mcvlist->nitems = nitems;
2223
2224 /* Set the values for the 1-D arrays and allocate space for the 2-D arrays */
2225 for (int i = 0; i < nitems; i++)
2226 {
2227 MCVItem *item = &mcvlist->items[i];
2228
2229 item->frequency = freqs[i];
2230 item->base_frequency = base_freqs[i];
2231 item->values = (Datum *) palloc0_array(Datum, numattrs);
2232 item->isnull = (bool *) palloc0_array(bool, numattrs);
2233 }
2234
2235 /*
2236 * Walk through each dimension, determine the input function for that
2237 * type, and then attempt to convert all values in that column via that
2238 * function. We approach this column-wise because it is simpler to deal
2239 * with one input function at time, and possibly more cache-friendly.
2240 */
2241 for (int j = 0; j < numattrs; j++)
2242 {
2243 FmgrInfo finfo;
2244 Oid ioparam;
2245 Oid infunc;
2246 int index = j;
2247
2249 fmgr_info(infunc, &finfo);
2250
2251 /* store info about data type OIDs */
2252 mcvlist->types[j] = atttypids[j];
2253
2254 for (int i = 0; i < nitems; i++)
2255 {
2256 MCVItem *item = &mcvlist->items[i];
2257
2258 if (mcv_nulls[index])
2259 {
2260 /* NULL value detected, hence no input to process */
2261 item->values[j] = (Datum) 0;
2262 item->isnull[j] = true;
2263 }
2264 else
2265 {
2268
2269 if (!InputFunctionCallSafe(&finfo, s, ioparam, atttypmods[j],
2270 (Node *) &escontext, &item->values[j]))
2271 {
2272 ereport(elevel,
2274 errmsg("could not parse MCV element \"%s\": incorrect value", s)));
2275 pfree(s);
2276 goto error;
2277 }
2278
2279 pfree(s);
2280 }
2281
2282 index += numattrs;
2283 }
2284 }
2285
2286 /*
2287 * The function statext_mcv_serialize() requires an array of pointers to
2288 * VacAttrStats records, but only a few fields within those records have
2289 * to be filled out.
2290 */
2292
2293 for (int i = 0; i < numattrs; i++)
2294 {
2295 Oid typid = atttypids[i];
2297
2299
2301 elog(ERROR, "cache lookup failed for type %u", typid);
2302
2304
2305 vastats[i]->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
2306 vastats[i]->attrtypid = typid;
2307 vastats[i]->attrcollid = atttypcolls[i];
2308 }
2309
2311
2312 for (int i = 0; i < numattrs; i++)
2313 {
2314 pfree(vastats[i]);
2315 }
2316 pfree((void *) vastats);
2317
2320
2321 if (bytes == NULL)
2322 {
2323 ereport(elevel,
2325 errmsg("could not import MCV list")));
2326 goto error;
2327 }
2328
2329 return PointerGetDatum(bytes);
2330
2331error:
2333 return (Datum) 0;
2334}
#define TextDatumGetCString(d)
Definition builtins.h:98
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition fmgr.c:128
bool InputFunctionCallSafe(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod, Node *escontext, Datum *result)
Definition fmgr.c:1585
#define HeapTupleIsValid(tuple)
Definition htup.h:78
static void * GETSTRUCT(const HeapTupleData *tuple)
void getTypeInputInfo(Oid type, Oid *typInput, Oid *typIOParam)
Definition lsyscache.c:3024
void statext_mcv_free(MCVList *mcvlist)
Definition mcv.c:2179
bytea * statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
Definition mcv.c:619
FormData_pg_type * Form_pg_type
Definition pg_type.h:261
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:262
static void error(void)
#define SearchSysCacheCopy1(cacheId, key1)
Definition syscache.h:91

References MCVItem::base_frequency, elog, ereport, errcode(), errmsg(), ERROR, error(), fb(), fmgr_info(), MCVItem::frequency, GETSTRUCT(), getTypeInputInfo(), HeapTupleIsValid, i, InputFunctionCallSafe(), MCVItem::isnull, items, j, nitems, ObjectIdGetDatum(), palloc0(), palloc0_array, palloc0_object, pfree(), PointerGetDatum(), SearchSysCacheCopy1, statext_mcv_free(), statext_mcv_serialize(), STATS_MCV_MAGIC, STATS_MCV_TYPE_BASIC, TextDatumGetCString, and MCVItem::values.

Referenced by import_mcv().

◆ statext_mcv_serialize()

bytea * statext_mcv_serialize ( MCVList mcvlist,
VacAttrStats **  stats 
)
extern

Definition at line 619 of file mcv.c.

620{
621 int i;
622 int dim;
623 int ndims = mcvlist->ndimensions;
624
625 SortSupport ssup;
626 DimensionInfo *info;
627
629
630 /* serialized items (indexes into arrays, etc.) */
631 bytea *raw;
632 char *ptr;
633 char *endptr PG_USED_FOR_ASSERTS_ONLY;
634
635 /* values per dimension (and number of non-NULL values) */
636 Datum **values = palloc0_array(Datum *, ndims);
637 int *counts = palloc0_array(int, ndims);
638
639 /*
640 * We'll include some rudimentary information about the attribute types
641 * (length, by-val flag), so that we don't have to look them up while
642 * deserializing the MCV list (we already have the type OID in the
643 * header). This is safe because when changing the type of the attribute
644 * the statistics gets dropped automatically. We need to store the info
645 * about the arrays of deduplicated values anyway.
646 */
647 info = palloc0_array(DimensionInfo, ndims);
648
649 /* sort support data for all attributes included in the MCV list */
650 ssup = palloc0_array(SortSupportData, ndims);
651
652 /* collect and deduplicate values for each dimension (attribute) */
653 for (dim = 0; dim < ndims; dim++)
654 {
655 int ndistinct;
656 TypeCacheEntry *typentry;
657
658 /*
659 * Lookup the LT operator (can't get it from stats extra_data, as we
660 * don't know how to interpret that - scalar vs. array etc.).
661 */
662 typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
663
664 /* copy important info about the data type (length, by-value) */
665 info[dim].typlen = stats[dim]->attrtype->typlen;
666 info[dim].typbyval = stats[dim]->attrtype->typbyval;
667
668 /* allocate space for values in the attribute and collect them */
669 values[dim] = palloc0_array(Datum, mcvlist->nitems);
670
671 for (i = 0; i < mcvlist->nitems; i++)
672 {
673 /* skip NULL values - we don't need to deduplicate those */
674 if (mcvlist->items[i].isnull[dim])
675 continue;
676
677 /* append the value at the end */
678 values[dim][counts[dim]] = mcvlist->items[i].values[dim];
679 counts[dim] += 1;
680 }
681
682 /* if there are just NULL values in this dimension, we're done */
683 if (counts[dim] == 0)
684 continue;
685
686 /* sort and deduplicate the data */
687 ssup[dim].ssup_cxt = CurrentMemoryContext;
688 ssup[dim].ssup_collation = stats[dim]->attrcollid;
689 ssup[dim].ssup_nulls_first = false;
690
691 PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
692
693 qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
694 compare_scalars_simple, &ssup[dim]);
695
696 /*
697 * Walk through the array and eliminate duplicate values, but keep the
698 * ordering (so that we can do a binary search later). We know there's
699 * at least one item as (counts[dim] != 0), so we can skip the first
700 * element.
701 */
702 ndistinct = 1; /* number of distinct values */
703 for (i = 1; i < counts[dim]; i++)
704 {
705 /* expect sorted array */
706 Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
707
708 /* if the value is the same as the previous one, we can skip it */
709 if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
710 continue;
711
712 values[dim][ndistinct] = values[dim][i];
713 ndistinct += 1;
714 }
715
716 /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
717 Assert(ndistinct <= PG_UINT16_MAX);
718
719 /*
720 * Store additional info about the attribute - number of deduplicated
721 * values, and also size of the serialized data. For fixed-length data
722 * types this is trivial to compute, for varwidth types we need to
723 * actually walk the array and sum the sizes.
724 */
725 info[dim].nvalues = ndistinct;
726
727 if (info[dim].typbyval) /* by-value data types */
728 {
729 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
730
731 /*
732 * We copy the data into the MCV item during deserialization, so
733 * we don't need to allocate any extra space.
734 */
735 info[dim].nbytes_aligned = 0;
736 }
737 else if (info[dim].typlen > 0) /* fixed-length by-ref */
738 {
739 /*
740 * We don't care about alignment in the serialized data, so we
741 * pack the data as much as possible. But we also track how much
742 * data will be needed after deserialization, and in that case we
743 * need to account for alignment of each item.
744 *
745 * Note: As the items are fixed-length, we could easily compute
746 * this during deserialization, but we do it here anyway.
747 */
748 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
749 info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
750 }
751 else if (info[dim].typlen == -1) /* varlena */
752 {
753 info[dim].nbytes = 0;
754 info[dim].nbytes_aligned = 0;
755 for (i = 0; i < info[dim].nvalues; i++)
756 {
757 Size len;
758
759 /*
760 * For varlena values, we detoast the values and store the
761 * length and data separately. We don't bother with alignment
762 * here, which means that during deserialization we need to
763 * copy the fields and only access the copies.
764 */
766
767 /* serialized length (uint32 length + data) */
769 info[dim].nbytes += sizeof(uint32); /* length */
770 info[dim].nbytes += len; /* value (no header) */
771
772 /*
773 * During deserialization we'll build regular varlena values
774 * with full headers, and we need to align them properly.
775 */
776 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
777 }
778 }
779 else if (info[dim].typlen == -2) /* cstring */
780 {
781 info[dim].nbytes = 0;
782 info[dim].nbytes_aligned = 0;
783 for (i = 0; i < info[dim].nvalues; i++)
784 {
785 Size len;
786
787 /*
788 * cstring is handled similar to varlena - first we store the
789 * length as uint32 and then the data. We don't care about
790 * alignment, which means that during deserialization we need
791 * to copy the fields and only access the copies.
792 */
793
794 /* c-strings include terminator, so +1 byte */
795 len = strlen(DatumGetCString(values[dim][i])) + 1;
796 info[dim].nbytes += sizeof(uint32); /* length */
797 info[dim].nbytes += len; /* value */
798
799 /* space needed for properly aligned deserialized copies */
800 info[dim].nbytes_aligned += MAXALIGN(len);
801 }
802 }
803
804 /* we know (count>0) so there must be some data */
805 Assert(info[dim].nbytes > 0);
806 }
807
808 /*
809 * Now we can finally compute how much space we'll actually need for the
810 * whole serialized MCV list (varlena header, MCV header, dimension info
811 * for each attribute, deduplicated values and items).
812 */
813 total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
814 + sizeof(AttrNumber) /* ndimensions */
815 + (ndims * sizeof(Oid)); /* attribute types */
816
817 /* dimension info */
818 total_length += ndims * sizeof(DimensionInfo);
819
820 /* add space for the arrays of deduplicated values */
821 for (i = 0; i < ndims; i++)
822 total_length += info[i].nbytes;
823
824 /*
825 * And finally account for the items (those are fixed-length, thanks to
826 * replacing values with uint16 indexes into the deduplicated arrays).
827 */
828 total_length += mcvlist->nitems * ITEM_SIZE(dim);
829
830 /*
831 * Allocate space for the whole serialized MCV list (we'll skip bytes, so
832 * we set them to zero to make the result more compressible).
833 */
836
837 ptr = VARDATA(raw);
838 endptr = ptr + total_length;
839
840 /* copy the MCV list header fields, one by one */
841 memcpy(ptr, &mcvlist->magic, sizeof(uint32));
842 ptr += sizeof(uint32);
843
844 memcpy(ptr, &mcvlist->type, sizeof(uint32));
845 ptr += sizeof(uint32);
846
847 memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
848 ptr += sizeof(uint32);
849
850 memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
851 ptr += sizeof(AttrNumber);
852
853 memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
854 ptr += (sizeof(Oid) * ndims);
855
856 /* store information about the attributes (data amounts, ...) */
857 memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
858 ptr += sizeof(DimensionInfo) * ndims;
859
860 /* Copy the deduplicated values for all attributes to the output. */
861 for (dim = 0; dim < ndims; dim++)
862 {
863 /* remember the starting point for Asserts later */
865
866 for (i = 0; i < info[dim].nvalues; i++)
867 {
868 Datum value = values[dim][i];
869
870 if (info[dim].typbyval) /* passed by value */
871 {
872 Datum tmp;
873
874 /*
875 * For byval types, we need to copy just the significant bytes
876 * - we can't use memcpy directly, as that assumes
877 * little-endian behavior. store_att_byval does almost what
878 * we need, but it requires a properly aligned buffer - the
879 * output buffer does not guarantee that. So we simply use a
880 * local Datum variable (which guarantees proper alignment),
881 * and then copy the value from it.
882 */
883 store_att_byval(&tmp, value, info[dim].typlen);
884
885 memcpy(ptr, &tmp, info[dim].typlen);
886 ptr += info[dim].typlen;
887 }
888 else if (info[dim].typlen > 0) /* passed by reference */
889 {
890 /* no special alignment needed, treated as char array */
891 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
892 ptr += info[dim].typlen;
893 }
894 else if (info[dim].typlen == -1) /* varlena */
895 {
897
898 /* copy the length */
899 memcpy(ptr, &len, sizeof(uint32));
900 ptr += sizeof(uint32);
901
902 /* data from the varlena value (without the header) */
904 ptr += len;
905 }
906 else if (info[dim].typlen == -2) /* cstring */
907 {
909
910 /* copy the length */
911 memcpy(ptr, &len, sizeof(uint32));
912 ptr += sizeof(uint32);
913
914 /* value */
916 ptr += len;
917 }
918
919 /* no underflows or overflows */
920 Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
921 }
922
923 /* we should get exactly nbytes of data for this dimension */
924 Assert((ptr - start) == info[dim].nbytes);
925 }
926
927 /* Serialize the items, with uint16 indexes instead of the values. */
928 for (i = 0; i < mcvlist->nitems; i++)
929 {
930 MCVItem *mcvitem = &mcvlist->items[i];
931
932 /* don't write beyond the allocated space */
933 Assert(ptr <= (endptr - ITEM_SIZE(dim)));
934
935 /* copy NULL and frequency flags into the serialized MCV */
936 memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
937 ptr += sizeof(bool) * ndims;
938
939 memcpy(ptr, &mcvitem->frequency, sizeof(double));
940 ptr += sizeof(double);
941
942 memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
943 ptr += sizeof(double);
944
945 /* store the indexes last */
946 for (dim = 0; dim < ndims; dim++)
947 {
948 uint16 index = 0;
949 Datum *value;
950
951 /* do the lookup only for non-NULL values */
952 if (!mcvitem->isnull[dim])
953 {
954 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
955 info[dim].nvalues, sizeof(Datum),
956 compare_scalars_simple, &ssup[dim]);
957
958 Assert(value != NULL); /* serialization or deduplication
959 * error */
960
961 /* compute index within the deduplicated array */
962 index = (uint16) (value - values[dim]);
963
964 /* check the index is within expected bounds */
965 Assert(index < info[dim].nvalues);
966 }
967
968 /* copy the index into the serialized MCV */
969 memcpy(ptr, &index, sizeof(uint16));
970 ptr += sizeof(uint16);
971 }
972
973 /* make sure we don't overflow the allocated value */
974 Assert(ptr <= endptr);
975 }
976
977 /* at this point we expect to match the total_length exactly */
978 Assert(ptr == endptr);
979
980 pfree(values);
981 pfree(counts);
982
983 return raw;
984}
#define PG_UINT16_MAX
Definition c.h:601
int compare_scalars_simple(const void *a, const void *b, void *arg)
#define ITEM_SIZE(ndims)
Definition mcv.c:51
static char * DatumGetCString(Datum X)
Definition postgres.h:365
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
Form_pg_type attrtype
Definition vacuum.h:128
Oid attrcollid
Definition vacuum.h:129
static void store_att_byval(void *T, Datum newdatum, int attlen)
Definition tupmacs.h:206
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition typcache.c:386
#define TYPECACHE_LT_OPR
Definition typcache.h:139

References Assert, VacAttrStats::attrcollid, VacAttrStats::attrtype, bsearch_arg(), compare_datums_simple(), compare_scalars_simple(), CurrentMemoryContext, DatumGetCString(), DatumGetPointer(), fb(), i, ITEM_SIZE, len, lookup_type_cache(), TypeCacheEntry::lt_opr, MAXALIGN, DimensionInfo::nbytes, DimensionInfo::nbytes_aligned, DimensionInfo::nvalues, palloc0(), palloc0_array, pfree(), PG_DETOAST_DATUM, PG_UINT16_MAX, PG_USED_FOR_ASSERTS_ONLY, PointerGetDatum(), PrepareSortSupportFromOrderingOp(), qsort_interruptible(), SET_VARSIZE(), SortSupportData::ssup_collation, SortSupportData::ssup_cxt, SortSupportData::ssup_nulls_first, start, store_att_byval(), DimensionInfo::typbyval, TYPECACHE_LT_OPR, DimensionInfo::typlen, value, values, VARDATA(), VARDATA_ANY(), VARHDRSZ, and VARSIZE_ANY_EXHDR().

Referenced by statext_mcv_import(), and statext_store().

◆ statext_ndistinct_build()

MVNDistinct * statext_ndistinct_build ( double  totalrows,
StatsBuildData data 
)
extern

Definition at line 85 of file mvdistinct.c.

86{
87 MVNDistinct *result;
88 int k;
89 int itemcnt;
90 int numattrs = data->nattnums;
92
93 result = palloc(offsetof(MVNDistinct, items) +
94 numcombs * sizeof(MVNDistinctItem));
97 result->nitems = numcombs;
98
99 itemcnt = 0;
100 for (k = 2; k <= numattrs; k++)
101 {
102 int *combination;
104
105 /* generate combinations of K out of N elements */
107
109 {
110 MVNDistinctItem *item = &result->items[itemcnt];
111 int j;
112
114 item->nattributes = k;
115
116 /* translate the indexes to attnums */
117 for (j = 0; j < k; j++)
118 {
119 item->attributes[j] = data->attnums[combination[j]];
120
122 }
123
124 item->ndistinct =
126
127 itemcnt++;
129 }
130
132 }
133
134 /* must consume exactly the whole output array */
135 Assert(itemcnt == result->nitems);
136
137 return result;
138}
static double ndistinct_for_combination(double totalrows, StatsBuildData *data, int k, int *combination)
Definition mvdistinct.c:416
static int num_combinations(int n)
Definition mvdistinct.c:566
static void generator_free(CombinationGenerator *state)
Definition mvdistinct.c:633
static CombinationGenerator * generator_init(int n, int k)
Definition mvdistinct.c:580
static int * generator_next(CombinationGenerator *state)
Definition mvdistinct.c:618
#define STATS_NDISTINCT_MAGIC
Definition statistics.h:22
#define STATS_NDISTINCT_TYPE_BASIC
Definition statistics.h:23
AttrNumber * attributes
Definition statistics.h:30
uint32 nitems
Definition statistics.h:38
uint32 type
Definition statistics.h:37
uint32 magic
Definition statistics.h:36
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition statistics.h:39

References Assert, AttributeNumberIsValid, MVNDistinctItem::attributes, data, fb(), generator_free(), generator_init(), generator_next(), MVNDistinct::items, items, j, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, ndistinct_for_combination(), MVNDistinct::nitems, nitems, num_combinations(), palloc(), palloc_array, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, and MVNDistinct::type.

Referenced by BuildRelationExtStatistics().

◆ statext_ndistinct_deserialize()

MVNDistinct * statext_ndistinct_deserialize ( bytea data)
extern

Definition at line 247 of file mvdistinct.c.

248{
249 int i;
252 MVNDistinct *ndistinct;
253 char *tmp;
254
255 if (data == NULL)
256 return NULL;
257
258 /* we expect at least the basic fields of MVNDistinct struct */
260 elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
262
263 /* initialize pointer to the data part (skip the varlena header) */
264 tmp = VARDATA_ANY(data);
265
266 /* read the header fields and perform basic sanity checks */
267 memcpy(&ndist.magic, tmp, sizeof(uint32));
268 tmp += sizeof(uint32);
269 memcpy(&ndist.type, tmp, sizeof(uint32));
270 tmp += sizeof(uint32);
271 memcpy(&ndist.nitems, tmp, sizeof(uint32));
272 tmp += sizeof(uint32);
273
274 if (ndist.magic != STATS_NDISTINCT_MAGIC)
275 elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
278 elog(ERROR, "invalid ndistinct type %d (expected %d)",
280 if (ndist.nitems == 0)
281 elog(ERROR, "invalid zero-length item array in MVNDistinct");
282
283 /* what minimum bytea size do we expect for those parameters */
286 elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
288
289 /*
290 * Allocate space for the ndistinct items (no space for each item's
291 * attnos: those live in bitmapsets allocated separately)
292 */
293 ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
294 (ndist.nitems * sizeof(MVNDistinctItem)));
295 ndistinct->magic = ndist.magic;
296 ndistinct->type = ndist.type;
297 ndistinct->nitems = ndist.nitems;
298
299 for (i = 0; i < ndistinct->nitems; i++)
300 {
301 MVNDistinctItem *item = &ndistinct->items[i];
302
303 /* ndistinct value */
304 memcpy(&item->ndistinct, tmp, sizeof(double));
305 tmp += sizeof(double);
306
307 /* number of attributes */
308 memcpy(&item->nattributes, tmp, sizeof(int));
309 tmp += sizeof(int);
310 Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
311
312 item->attributes
313 = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
314
315 memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
316 tmp += sizeof(AttrNumber) * item->nattributes;
317
318 /* still within the bytea */
319 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
320 }
321
322 /* we should have consumed the whole bytea exactly */
323 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
324
325 return ndistinct;
326}
#define SizeOfHeader
Definition mvdistinct.c:42
#define MinSizeOfItems(nitems)
Definition mvdistinct.c:52

References Assert, MVNDistinctItem::attributes, data, elog, ERROR, fb(), i, MVNDistinct::items, items, MVNDistinct::magic, MAXALIGN, MinSizeOfItems, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, palloc(), palloc0(), SizeOfHeader, STATS_MAX_DIMENSIONS, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA_ANY(), VARSIZE_ANY(), and VARSIZE_ANY_EXHDR().

Referenced by extended_statistics_update(), pg_ndistinct_out(), and statext_ndistinct_load().

◆ statext_ndistinct_free()

void statext_ndistinct_free ( MVNDistinct ndistinct)
extern

Definition at line 332 of file mvdistinct.c.

333{
334 for (int i = 0; i < ndistinct->nitems; i++)
335 pfree(ndistinct->items[i].attributes);
336 pfree(ndistinct);
337}

References MVNDistinctItem::attributes, i, MVNDistinct::items, MVNDistinct::nitems, and pfree().

Referenced by extended_statistics_update().

◆ statext_ndistinct_serialize()

bytea * statext_ndistinct_serialize ( MVNDistinct ndistinct)
extern

Definition at line 176 of file mvdistinct.c.

177{
178 int i;
179 bytea *output;
180 char *tmp;
181 Size len;
182
183 Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
185
186 /*
187 * Base size is size of scalar fields in the struct, plus one base struct
188 * for each item, including number of items for each.
189 */
191
192 /* and also include space for the actual attribute numbers */
193 for (i = 0; i < ndistinct->nitems; i++)
194 {
195 int nmembers;
196
197 nmembers = ndistinct->items[i].nattributes;
198 Assert(nmembers >= 2);
199
200 len += SizeOfItem(nmembers);
201 }
202
203 output = (bytea *) palloc(len);
205
206 tmp = VARDATA(output);
207
208 /* Store the base struct values (magic, type, nitems) */
209 memcpy(tmp, &ndistinct->magic, sizeof(uint32));
210 tmp += sizeof(uint32);
211 memcpy(tmp, &ndistinct->type, sizeof(uint32));
212 tmp += sizeof(uint32);
213 memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
214 tmp += sizeof(uint32);
215
216 /*
217 * store number of attributes and attribute numbers for each entry
218 */
219 for (i = 0; i < ndistinct->nitems; i++)
220 {
221 MVNDistinctItem item = ndistinct->items[i];
222 int nmembers = item.nattributes;
223
224 memcpy(tmp, &item.ndistinct, sizeof(double));
225 tmp += sizeof(double);
226 memcpy(tmp, &nmembers, sizeof(int));
227 tmp += sizeof(int);
228
229 memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
230 tmp += nmembers * sizeof(AttrNumber);
231
232 /* protect against overflows */
233 Assert(tmp <= ((char *) output + len));
234 }
235
236 /* check we used exactly the expected space */
237 Assert(tmp == ((char *) output + len));
238
239 return output;
240}
#define SizeOfItem(natts)
Definition mvdistinct.c:45

References Assert, MVNDistinctItem::attributes, fb(), i, MVNDistinct::items, len, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, output, palloc(), SET_VARSIZE(), SizeOfHeader, SizeOfItem, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA(), and VARHDRSZ.

Referenced by build_mvndistinct(), and statext_store().

◆ statext_ndistinct_validate()

bool statext_ndistinct_validate ( const MVNDistinct ndistinct,
const int2vector stxkeys,
int  numexprs,
int  elevel 
)
extern

Definition at line 352 of file mvdistinct.c.

355{
357
358 /* Scan through each MVNDistinct entry */
359 for (int i = 0; i < ndistinct->nitems; i++)
360 {
361 MVNDistinctItem item = ndistinct->items[i];
362
363 /*
364 * Cross-check each attribute in a MVNDistinct entry with the extended
365 * stats object definition.
366 */
367 for (int j = 0; j < item.nattributes; j++)
368 {
370 bool ok = false;
371
372 if (attnum > 0)
373 {
374 /* attribute number in stxkeys */
375 for (int k = 0; k < stxkeys->dim1; k++)
376 {
377 if (attnum == stxkeys->values[k])
378 {
379 ok = true;
380 break;
381 }
382 }
383 }
384 else if ((attnum < 0) && (attnum >= attnum_expr_lowbound))
385 {
386 /* attribute number for an expression */
387 ok = true;
388 }
389
390 if (!ok)
391 {
392 ereport(elevel,
394 errmsg("could not validate \"%s\" object: invalid attribute number %d found",
395 "pg_ndistinct", attnum)));
396 return false;
397 }
398 }
399 }
400
401 return true;
402}

References attnum, MVNDistinctItem::attributes, ereport, errcode(), errmsg(), fb(), i, MVNDistinct::items, j, MVNDistinctItem::nattributes, and MVNDistinct::nitems.

Referenced by extended_statistics_update().