PostgreSQL Source Code git master
Loading...
Searching...
No Matches
mvdistinct.c File Reference
#include "postgres.h"
#include <math.h>
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_statistic_ext_data.h"
#include "statistics/extended_stats_internal.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
#include "varatt.h"
Include dependency graph for mvdistinct.c:

Go to the source code of this file.

Data Structures

struct  CombinationGenerator
 

Macros

#define SizeOfHeader   (3 * sizeof(uint32))
 
#define SizeOfItem(natts)    (sizeof(double) + sizeof(int) + (natts) * sizeof(AttrNumber))
 
#define MinSizeOfItem   SizeOfItem(2)
 
#define MinSizeOfItems(nitems)    (SizeOfHeader + (nitems) * MinSizeOfItem)
 

Typedefs

typedef struct CombinationGenerator CombinationGenerator
 

Functions

static double ndistinct_for_combination (double totalrows, StatsBuildData *data, int k, int *combination)
 
static double estimate_ndistinct (double totalrows, int numrows, int d, int f1)
 
static int n_choose_k (int n, int k)
 
static int num_combinations (int n)
 
static CombinationGeneratorgenerator_init (int n, int k)
 
static void generator_free (CombinationGenerator *state)
 
static intgenerator_next (CombinationGenerator *state)
 
static void generate_combinations (CombinationGenerator *state)
 
MVNDistinctstatext_ndistinct_build (double totalrows, StatsBuildData *data)
 
MVNDistinctstatext_ndistinct_load (Oid mvoid, bool inh)
 
byteastatext_ndistinct_serialize (MVNDistinct *ndistinct)
 
MVNDistinctstatext_ndistinct_deserialize (bytea *data)
 
void statext_ndistinct_free (MVNDistinct *ndistinct)
 
bool statext_ndistinct_validate (const MVNDistinct *ndistinct, const int2vector *stxkeys, int numexprs, int elevel)
 
static void generate_combinations_recurse (CombinationGenerator *state, int index, int start, int *current)
 

Macro Definition Documentation

◆ MinSizeOfItem

#define MinSizeOfItem   SizeOfItem(2)

Definition at line 49 of file mvdistinct.c.

◆ MinSizeOfItems

#define MinSizeOfItems (   nitems)     (SizeOfHeader + (nitems) * MinSizeOfItem)

Definition at line 52 of file mvdistinct.c.

58{
59 int k; /* size of the combination */
60 int n; /* total number of elements */
61 int current; /* index of the next combination to return */
62 int ncombinations; /* number of combinations (size of array) */
63 int *combinations; /* array of pre-built combinations */
65
66static CombinationGenerator *generator_init(int n, int k);
70
71
72/*
73 * statext_ndistinct_build
74 * Compute ndistinct coefficient for the combination of attributes.
75 *
76 * This computes the ndistinct estimate using the same estimator used
77 * in analyze.c and then computes the coefficient.
78 *
79 * To handle expressions easily, we treat them as system attributes with
80 * negative attnums, and offset everything by number of expressions to
81 * allow using Bitmapsets.
82 */
85{
86 MVNDistinct *result;
87 int k;
88 int itemcnt;
89 int numattrs = data->nattnums;
91
92 result = palloc(offsetof(MVNDistinct, items) +
93 numcombs * sizeof(MVNDistinctItem));
96 result->nitems = numcombs;
97
98 itemcnt = 0;
99 for (k = 2; k <= numattrs; k++)
100 {
101 int *combination;
103
104 /* generate combinations of K out of N elements */
106
108 {
109 MVNDistinctItem *item = &result->items[itemcnt];
110 int j;
111
113 item->nattributes = k;
114
115 /* translate the indexes to attnums */
116 for (j = 0; j < k; j++)
117 {
118 item->attributes[j] = data->attnums[combination[j]];
119
121 }
122
123 item->ndistinct =
125
126 itemcnt++;
128 }
129
131 }
132
133 /* must consume exactly the whole output array */
134 Assert(itemcnt == result->nitems);
135
136 return result;
137}
138
139/*
140 * statext_ndistinct_load
141 * Load the ndistinct value for the indicated pg_statistic_ext tuple
142 */
145{
146 MVNDistinct *result;
147 bool isnull;
148 Datum ndist;
149 HeapTuple htup;
150
153 if (!HeapTupleIsValid(htup))
154 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
155
158 if (isnull)
159 elog(ERROR,
160 "requested statistics kind \"%c\" is not yet built for statistics object %u",
162
164
165 ReleaseSysCache(htup);
166
167 return result;
168}
169
170/*
171 * statext_ndistinct_serialize
172 * serialize ndistinct to the on-disk bytea format
173 */
174bytea *
176{
177 int i;
178 bytea *output;
179 char *tmp;
180 Size len;
181
182 Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
184
185 /*
186 * Base size is size of scalar fields in the struct, plus one base struct
187 * for each item, including number of items for each.
188 */
190
191 /* and also include space for the actual attribute numbers */
192 for (i = 0; i < ndistinct->nitems; i++)
193 {
194 int nmembers;
195
196 nmembers = ndistinct->items[i].nattributes;
197 Assert(nmembers >= 2);
198
199 len += SizeOfItem(nmembers);
200 }
201
202 output = (bytea *) palloc(len);
204
205 tmp = VARDATA(output);
206
207 /* Store the base struct values (magic, type, nitems) */
208 memcpy(tmp, &ndistinct->magic, sizeof(uint32));
209 tmp += sizeof(uint32);
210 memcpy(tmp, &ndistinct->type, sizeof(uint32));
211 tmp += sizeof(uint32);
212 memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
213 tmp += sizeof(uint32);
214
215 /*
216 * store number of attributes and attribute numbers for each entry
217 */
218 for (i = 0; i < ndistinct->nitems; i++)
219 {
220 MVNDistinctItem item = ndistinct->items[i];
221 int nmembers = item.nattributes;
222
223 memcpy(tmp, &item.ndistinct, sizeof(double));
224 tmp += sizeof(double);
225 memcpy(tmp, &nmembers, sizeof(int));
226 tmp += sizeof(int);
227
228 memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
229 tmp += nmembers * sizeof(AttrNumber);
230
231 /* protect against overflows */
232 Assert(tmp <= ((char *) output + len));
233 }
234
235 /* check we used exactly the expected space */
236 Assert(tmp == ((char *) output + len));
237
238 return output;
239}
240
241/*
242 * statext_ndistinct_deserialize
243 * Read an on-disk bytea format MVNDistinct to in-memory format
244 */
247{
248 int i;
251 MVNDistinct *ndistinct;
252 char *tmp;
253
254 if (data == NULL)
255 return NULL;
256
257 /* we expect at least the basic fields of MVNDistinct struct */
259 elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
261
262 /* initialize pointer to the data part (skip the varlena header) */
263 tmp = VARDATA_ANY(data);
264
265 /* read the header fields and perform basic sanity checks */
266 memcpy(&ndist.magic, tmp, sizeof(uint32));
267 tmp += sizeof(uint32);
268 memcpy(&ndist.type, tmp, sizeof(uint32));
269 tmp += sizeof(uint32);
270 memcpy(&ndist.nitems, tmp, sizeof(uint32));
271 tmp += sizeof(uint32);
272
273 if (ndist.magic != STATS_NDISTINCT_MAGIC)
274 elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
277 elog(ERROR, "invalid ndistinct type %d (expected %d)",
279 if (ndist.nitems == 0)
280 elog(ERROR, "invalid zero-length item array in MVNDistinct");
281
282 /* what minimum bytea size do we expect for those parameters */
285 elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
287
288 /*
289 * Allocate space for the ndistinct items (no space for each item's
290 * attnos: those live in bitmapsets allocated separately)
291 */
292 ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
293 (ndist.nitems * sizeof(MVNDistinctItem)));
294 ndistinct->magic = ndist.magic;
295 ndistinct->type = ndist.type;
296 ndistinct->nitems = ndist.nitems;
297
298 for (i = 0; i < ndistinct->nitems; i++)
299 {
300 MVNDistinctItem *item = &ndistinct->items[i];
301
302 /* ndistinct value */
303 memcpy(&item->ndistinct, tmp, sizeof(double));
304 tmp += sizeof(double);
305
306 /* number of attributes */
307 memcpy(&item->nattributes, tmp, sizeof(int));
308 tmp += sizeof(int);
309 Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
310
311 item->attributes
312 = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
313
314 memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
315 tmp += sizeof(AttrNumber) * item->nattributes;
316
317 /* still within the bytea */
318 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
319 }
320
321 /* we should have consumed the whole bytea exactly */
322 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
323
324 return ndistinct;
325}
326
327/*
328 * Free allocations of a MVNDistinct.
329 */
330void
332{
333 for (int i = 0; i < ndistinct->nitems; i++)
334 pfree(ndistinct->items[i].attributes);
335 pfree(ndistinct);
336}
337
338/*
339 * Validate a set of MVNDistincts against the extended statistics object
340 * definition.
341 *
342 * Every MVNDistinctItem must be checked to ensure that the attnums in the
343 * attributes list correspond to attnums/expressions defined by the extended
344 * statistics object.
345 *
346 * Positive attnums are attributes which must be found in the stxkeys,
347 * while negative attnums correspond to an expression number, no attribute
348 * number can be below (0 - numexprs).
349 */
350bool
352 const int2vector *stxkeys,
353 int numexprs, int elevel)
354{
356
357 /* Scan through each MVNDistinct entry */
358 for (int i = 0; i < ndistinct->nitems; i++)
359 {
360 MVNDistinctItem item = ndistinct->items[i];
361
362 /*
363 * Cross-check each attribute in a MVNDistinct entry with the extended
364 * stats object definition.
365 */
366 for (int j = 0; j < item.nattributes; j++)
367 {
369 bool ok = false;
370
371 if (attnum > 0)
372 {
373 /* attribute number in stxkeys */
374 for (int k = 0; k < stxkeys->dim1; k++)
375 {
376 if (attnum == stxkeys->values[k])
377 {
378 ok = true;
379 break;
380 }
381 }
382 }
383 else if ((attnum < 0) && (attnum >= attnum_expr_lowbound))
384 {
385 /* attribute number for an expression */
386 ok = true;
387 }
388
389 if (!ok)
390 {
391 ereport(elevel,
393 errmsg("could not validate \"%s\" object: invalid attribute number %d found",
394 "pg_ndistinct", attnum)));
395 return false;
396 }
397 }
398 }
399
400 return true;
401}
402
403/*
404 * ndistinct_for_combination
405 * Estimates number of distinct values in a combination of columns.
406 *
407 * This uses the same ndistinct estimator as compute_scalar_stats() in
408 * ANALYZE, i.e.,
409 * n*d / (n - f1 + f1*n/N)
410 *
411 * except that instead of values in a single column we are dealing with
412 * combination of multiple columns.
413 */
414static double
416 int k, int *combination)
417{
418 int i,
419 j;
420 int f1,
421 cnt,
422 d;
423 bool *isnull;
424 Datum *values;
427 int numrows = data->numrows;
428
429 mss = multi_sort_init(k);
430
431 /*
432 * In order to determine the number of distinct elements, create separate
433 * values[]/isnull[] arrays with all the data we have, then sort them
434 * using the specified column combination as dimensions. We could try to
435 * sort in place, but it'd probably be more complex and bug-prone.
436 */
437 items = palloc_array(SortItem, numrows);
438 values = palloc0_array(Datum, numrows * k);
439 isnull = palloc0_array(bool, numrows * k);
440
441 for (i = 0; i < numrows; i++)
442 {
443 items[i].values = &values[i * k];
444 items[i].isnull = &isnull[i * k];
445 }
446
447 /*
448 * For each dimension, set up sort-support and fill in the values from the
449 * sample data.
450 *
451 * We use the column data types' default sort operators and collations;
452 * perhaps at some point it'd be worth using column-specific collations?
453 */
454 for (i = 0; i < k; i++)
455 {
456 Oid typid;
460
461 typid = colstat->attrtypid;
462 collid = colstat->attrcollid;
463
465 if (type->lt_opr == InvalidOid) /* shouldn't happen */
466 elog(ERROR, "cache lookup failed for ordering operator for type %u",
467 typid);
468
469 /* prepare the sort function for this dimension */
471
472 /* accumulate all the data for this dimension into the arrays */
473 for (j = 0; j < numrows; j++)
474 {
475 items[j].values[i] = data->values[combination[i]][j];
476 items[j].isnull[i] = data->nulls[combination[i]][j];
477 }
478 }
479
480 /* We can sort the array now ... */
481 qsort_interruptible(items, numrows, sizeof(SortItem),
483
484 /* ... and count the number of distinct combinations */
485
486 f1 = 0;
487 cnt = 1;
488 d = 1;
489 for (i = 1; i < numrows; i++)
490 {
491 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
492 {
493 if (cnt == 1)
494 f1 += 1;
495
496 d++;
497 cnt = 0;
498 }
499
500 cnt += 1;
501 }
502
503 if (cnt == 1)
504 f1 += 1;
505
506 return estimate_ndistinct(totalrows, numrows, d, f1);
507}
508
509/* The Duj1 estimator (already used in analyze.c). */
510static double
511estimate_ndistinct(double totalrows, int numrows, int d, int f1)
512{
513 double numer,
514 denom,
515 ndistinct;
516
517 numer = (double) numrows * (double) d;
518
519 denom = (double) (numrows - f1) +
520 (double) f1 * (double) numrows / totalrows;
521
522 ndistinct = numer / denom;
523
524 /* Clamp to sane range in case of roundoff error */
525 if (ndistinct < (double) d)
526 ndistinct = (double) d;
527
528 if (ndistinct > totalrows)
529 ndistinct = totalrows;
530
531 return floor(ndistinct + 0.5);
532}
533
534/*
535 * n_choose_k
536 * computes binomial coefficients using an algorithm that is both
537 * efficient and prevents overflows
538 */
539static int
540n_choose_k(int n, int k)
541{
542 int d,
543 r;
544
545 Assert((k > 0) && (n >= k));
546
547 /* use symmetry of the binomial coefficients */
548 k = Min(k, n - k);
549
550 r = 1;
551 for (d = 1; d <= k; ++d)
552 {
553 r *= n--;
554 r /= d;
555 }
556
557 return r;
558}
559
560/*
561 * num_combinations
562 * number of combinations, excluding single-value combinations
563 */
564static int
565num_combinations(int n)
566{
567 return (1 << n) - (n + 1);
568}
569
570/*
571 * generator_init
572 * initialize the generator of combinations
573 *
574 * The generator produces combinations of K elements in the interval (0..N).
575 * We prebuild all the combinations in this method, which is simpler than
576 * generating them on the fly.
577 */
579generator_init(int n, int k)
580{
582
583 Assert((n >= k) && (k > 0));
584
585 /* allocate the generator state as a single chunk of memory */
587
588 state->ncombinations = n_choose_k(n, k);
589
590 /* pre-allocate space for all combinations */
591 state->combinations = palloc_array(int, k * state->ncombinations);
592
593 state->current = 0;
594 state->k = k;
595 state->n = n;
596
597 /* now actually pre-generate all the combinations of K elements */
599
600 /* make sure we got the expected number of combinations */
601 Assert(state->current == state->ncombinations);
602
603 /* reset the number, so we start with the first one */
604 state->current = 0;
605
606 return state;
607}
608
609/*
610 * generator_next
611 * returns the next combination from the prebuilt list
612 *
613 * Returns a combination of K array indexes (0 .. N), as specified to
614 * generator_init), or NULL when there are no more combination.
615 */
616static int *
618{
619 if (state->current == state->ncombinations)
620 return NULL;
621
622 return &state->combinations[state->k * state->current++];
623}
624
625/*
626 * generator_free
627 * free the internal state of the generator
628 *
629 * Releases the generator internal state (pre-built combinations).
630 */
631static void
633{
634 pfree(state->combinations);
635 pfree(state);
636}
637
638/*
639 * generate_combinations_recurse
640 * given a prefix, generate all possible combinations
641 *
642 * Given a prefix (first few elements of the combination), generate following
643 * elements recursively. We generate the combinations in lexicographic order,
644 * which eliminates permutations of the same combination.
645 */
646static void
648 int index, int start, int *current)
649{
650 /* If we haven't filled all the elements, simply recurse. */
651 if (index < state->k)
652 {
653 int i;
654
655 /*
656 * The values have to be in ascending order, so make sure we start
657 * with the value passed by parameter.
658 */
659
660 for (i = start; i < state->n; i++)
661 {
662 current[index] = i;
663 generate_combinations_recurse(state, (index + 1), (i + 1), current);
664 }
665
666 return;
667 }
668 else
669 {
670 /* we got a valid combination, add it to the array */
671 memcpy(&state->combinations[(state->k * state->current)],
672 current, state->k * sizeof(int));
673 state->current++;
674 }
675}
676
677/*
678 * generate_combinations
679 * generate all k-combinations of N elements
680 */
681static void
683{
684 int *current = palloc0_array(int, state->k);
685
686 generate_combinations_recurse(state, 0, 0, current);
687
688 pfree(current);
689}
int16 AttrNumber
Definition attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition attnum.h:34
static Datum values[MAXATTR]
Definition bootstrap.c:155
#define Min(x, y)
Definition c.h:997
#define MAXALIGN(LEN)
Definition c.h:826
#define VARHDRSZ
Definition c.h:711
#define Assert(condition)
Definition c.h:873
uint32_t uint32
Definition c.h:546
size_t Size
Definition c.h:619
Oid collid
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
int multi_sort_compare(const void *a, const void *b, void *arg)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define DatumGetByteaPP(X)
Definition fmgr.h:292
return str start
#define HeapTupleIsValid(tuple)
Definition htup.h:78
#define nitems(x)
Definition indent.h:31
FILE * output
int j
Definition isn.c:78
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
void * palloc(Size size)
Definition mcxt.c:1387
static int n_choose_k(int n, int k)
Definition mvdistinct.c:541
#define SizeOfHeader
Definition mvdistinct.c:42
void statext_ndistinct_free(MVNDistinct *ndistinct)
Definition mvdistinct.c:332
static double estimate_ndistinct(double totalrows, int numrows, int d, int f1)
Definition mvdistinct.c:512
static void generate_combinations_recurse(CombinationGenerator *state, int index, int start, int *current)
Definition mvdistinct.c:648
MVNDistinct * statext_ndistinct_deserialize(bytea *data)
Definition mvdistinct.c:247
static double ndistinct_for_combination(double totalrows, StatsBuildData *data, int k, int *combination)
Definition mvdistinct.c:416
bytea * statext_ndistinct_serialize(MVNDistinct *ndistinct)
Definition mvdistinct.c:176
static void generate_combinations(CombinationGenerator *state)
Definition mvdistinct.c:683
MVNDistinct * statext_ndistinct_load(Oid mvoid, bool inh)
Definition mvdistinct.c:145
static int num_combinations(int n)
Definition mvdistinct.c:566
MVNDistinct * statext_ndistinct_build(double totalrows, StatsBuildData *data)
Definition mvdistinct.c:85
#define SizeOfItem(natts)
Definition mvdistinct.c:45
static void generator_free(CombinationGenerator *state)
Definition mvdistinct.c:633
static CombinationGenerator * generator_init(int n, int k)
Definition mvdistinct.c:580
#define MinSizeOfItems(nitems)
Definition mvdistinct.c:52
bool statext_ndistinct_validate(const MVNDistinct *ndistinct, const int2vector *stxkeys, int numexprs, int elevel)
Definition mvdistinct.c:352
static int * generator_next(CombinationGenerator *state)
Definition mvdistinct.c:618
int16 attnum
const void size_t len
const void * data
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:262
uint64_t Datum
Definition postgres.h:70
#define InvalidOid
unsigned int Oid
static int fb(int x)
int f1[ARRAY_SIZE]
#define STATS_NDISTINCT_MAGIC
Definition statistics.h:22
#define STATS_NDISTINCT_TYPE_BASIC
Definition statistics.h:23
#define STATS_MAX_DIMENSIONS
Definition statistics.h:19
AttrNumber * attributes
Definition statistics.h:30
uint32 nitems
Definition statistics.h:38
uint32 type
Definition statistics.h:37
uint32 magic
Definition statistics.h:36
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition statistics.h:39
Oid attrtypid
Definition vacuum.h:126
Definition type.h:96
Definition c.h:706
void ReleaseSysCache(HeapTuple tuple)
Definition syscache.c:264
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition syscache.c:595
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition syscache.c:230
static ItemArray items
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition typcache.c:386
#define TYPECACHE_LT_OPR
Definition typcache.h:139
static Size VARSIZE_ANY(const void *PTR)
Definition varatt.h:460
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
const char * type

◆ SizeOfHeader

#define SizeOfHeader   (3 * sizeof(uint32))

Definition at line 42 of file mvdistinct.c.

◆ SizeOfItem

#define SizeOfItem (   natts)     (sizeof(double) + sizeof(int) + (natts) * sizeof(AttrNumber))

Definition at line 45 of file mvdistinct.c.

Typedef Documentation

◆ CombinationGenerator

Function Documentation

◆ estimate_ndistinct()

static double estimate_ndistinct ( double  totalrows,
int  numrows,
int  d,
int  f1 
)
static

Definition at line 512 of file mvdistinct.c.

513{
514 double numer,
515 denom,
516 ndistinct;
517
518 numer = (double) numrows * (double) d;
519
520 denom = (double) (numrows - f1) +
521 (double) f1 * (double) numrows / totalrows;
522
523 ndistinct = numer / denom;
524
525 /* Clamp to sane range in case of roundoff error */
526 if (ndistinct < (double) d)
527 ndistinct = (double) d;
528
529 if (ndistinct > totalrows)
530 ndistinct = totalrows;
531
532 return floor(ndistinct + 0.5);
533}

References f1, and fb().

Referenced by ndistinct_for_combination().

◆ generate_combinations()

static void generate_combinations ( CombinationGenerator state)
static

Definition at line 683 of file mvdistinct.c.

684{
685 int *current = palloc0_array(int, state->k);
686
687 generate_combinations_recurse(state, 0, 0, current);
688
689 pfree(current);
690}

References generate_combinations_recurse(), palloc0_array, and pfree().

Referenced by generator_init().

◆ generate_combinations_recurse()

static void generate_combinations_recurse ( CombinationGenerator state,
int  index,
int  start,
int current 
)
static

Definition at line 648 of file mvdistinct.c.

650{
651 /* If we haven't filled all the elements, simply recurse. */
652 if (index < state->k)
653 {
654 int i;
655
656 /*
657 * The values have to be in ascending order, so make sure we start
658 * with the value passed by parameter.
659 */
660
661 for (i = start; i < state->n; i++)
662 {
663 current[index] = i;
664 generate_combinations_recurse(state, (index + 1), (i + 1), current);
665 }
666
667 return;
668 }
669 else
670 {
671 /* we got a valid combination, add it to the array */
672 memcpy(&state->combinations[(state->k * state->current)],
673 current, state->k * sizeof(int));
674 state->current++;
675 }
676}

References fb(), generate_combinations_recurse(), i, and start.

Referenced by generate_combinations(), and generate_combinations_recurse().

◆ generator_free()

static void generator_free ( CombinationGenerator state)
static

Definition at line 633 of file mvdistinct.c.

634{
635 pfree(state->combinations);
636 pfree(state);
637}

References pfree().

Referenced by statext_ndistinct_build().

◆ generator_init()

static CombinationGenerator * generator_init ( int  n,
int  k 
)
static

Definition at line 580 of file mvdistinct.c.

581{
583
584 Assert((n >= k) && (k > 0));
585
586 /* allocate the generator state as a single chunk of memory */
588
589 state->ncombinations = n_choose_k(n, k);
590
591 /* pre-allocate space for all combinations */
592 state->combinations = palloc_array(int, k * state->ncombinations);
593
594 state->current = 0;
595 state->k = k;
596 state->n = n;
597
598 /* now actually pre-generate all the combinations of K elements */
600
601 /* make sure we got the expected number of combinations */
602 Assert(state->current == state->ncombinations);
603
604 /* reset the number, so we start with the first one */
605 state->current = 0;
606
607 return state;
608}

References Assert, generate_combinations(), n_choose_k(), palloc_array, and palloc_object.

Referenced by statext_ndistinct_build().

◆ generator_next()

static int * generator_next ( CombinationGenerator state)
static

Definition at line 618 of file mvdistinct.c.

619{
620 if (state->current == state->ncombinations)
621 return NULL;
622
623 return &state->combinations[state->k * state->current++];
624}

References fb().

Referenced by statext_ndistinct_build().

◆ n_choose_k()

static int n_choose_k ( int  n,
int  k 
)
static

Definition at line 541 of file mvdistinct.c.

542{
543 int d,
544 r;
545
546 Assert((k > 0) && (n >= k));
547
548 /* use symmetry of the binomial coefficients */
549 k = Min(k, n - k);
550
551 r = 1;
552 for (d = 1; d <= k; ++d)
553 {
554 r *= n--;
555 r /= d;
556 }
557
558 return r;
559}

References Assert, and Min.

Referenced by generator_init().

◆ ndistinct_for_combination()

static double ndistinct_for_combination ( double  totalrows,
StatsBuildData data,
int  k,
int combination 
)
static

Definition at line 416 of file mvdistinct.c.

418{
419 int i,
420 j;
421 int f1,
422 cnt,
423 d;
424 bool *isnull;
425 Datum *values;
428 int numrows = data->numrows;
429
430 mss = multi_sort_init(k);
431
432 /*
433 * In order to determine the number of distinct elements, create separate
434 * values[]/isnull[] arrays with all the data we have, then sort them
435 * using the specified column combination as dimensions. We could try to
436 * sort in place, but it'd probably be more complex and bug-prone.
437 */
438 items = palloc_array(SortItem, numrows);
439 values = palloc0_array(Datum, numrows * k);
440 isnull = palloc0_array(bool, numrows * k);
441
442 for (i = 0; i < numrows; i++)
443 {
444 items[i].values = &values[i * k];
445 items[i].isnull = &isnull[i * k];
446 }
447
448 /*
449 * For each dimension, set up sort-support and fill in the values from the
450 * sample data.
451 *
452 * We use the column data types' default sort operators and collations;
453 * perhaps at some point it'd be worth using column-specific collations?
454 */
455 for (i = 0; i < k; i++)
456 {
457 Oid typid;
461
462 typid = colstat->attrtypid;
463 collid = colstat->attrcollid;
464
466 if (type->lt_opr == InvalidOid) /* shouldn't happen */
467 elog(ERROR, "cache lookup failed for ordering operator for type %u",
468 typid);
469
470 /* prepare the sort function for this dimension */
472
473 /* accumulate all the data for this dimension into the arrays */
474 for (j = 0; j < numrows; j++)
475 {
476 items[j].values[i] = data->values[combination[i]][j];
477 items[j].isnull[i] = data->nulls[combination[i]][j];
478 }
479 }
480
481 /* We can sort the array now ... */
482 qsort_interruptible(items, numrows, sizeof(SortItem),
484
485 /* ... and count the number of distinct combinations */
486
487 f1 = 0;
488 cnt = 1;
489 d = 1;
490 for (i = 1; i < numrows; i++)
491 {
492 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
493 {
494 if (cnt == 1)
495 f1 += 1;
496
497 d++;
498 cnt = 0;
499 }
500
501 cnt += 1;
502 }
503
504 if (cnt == 1)
505 f1 += 1;
506
507 return estimate_ndistinct(totalrows, numrows, d, f1);
508}

References VacAttrStats::attrtypid, collid, data, elog, ERROR, estimate_ndistinct(), f1, fb(), i, InvalidOid, items, j, lookup_type_cache(), multi_sort_add_dimension(), multi_sort_compare(), multi_sort_init(), palloc0_array, palloc_array, qsort_interruptible(), type, TYPECACHE_LT_OPR, and values.

Referenced by statext_ndistinct_build().

◆ num_combinations()

static int num_combinations ( int  n)
static

Definition at line 566 of file mvdistinct.c.

567{
568 return (1 << n) - (n + 1);
569}

Referenced by statext_ndistinct_build().

◆ statext_ndistinct_build()

MVNDistinct * statext_ndistinct_build ( double  totalrows,
StatsBuildData data 
)

Definition at line 85 of file mvdistinct.c.

86{
87 MVNDistinct *result;
88 int k;
89 int itemcnt;
90 int numattrs = data->nattnums;
92
93 result = palloc(offsetof(MVNDistinct, items) +
94 numcombs * sizeof(MVNDistinctItem));
97 result->nitems = numcombs;
98
99 itemcnt = 0;
100 for (k = 2; k <= numattrs; k++)
101 {
102 int *combination;
104
105 /* generate combinations of K out of N elements */
107
109 {
110 MVNDistinctItem *item = &result->items[itemcnt];
111 int j;
112
114 item->nattributes = k;
115
116 /* translate the indexes to attnums */
117 for (j = 0; j < k; j++)
118 {
119 item->attributes[j] = data->attnums[combination[j]];
120
122 }
123
124 item->ndistinct =
126
127 itemcnt++;
129 }
130
132 }
133
134 /* must consume exactly the whole output array */
135 Assert(itemcnt == result->nitems);
136
137 return result;
138}

References Assert, AttributeNumberIsValid, MVNDistinctItem::attributes, data, fb(), generator_free(), generator_init(), generator_next(), MVNDistinct::items, items, j, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, ndistinct_for_combination(), MVNDistinct::nitems, nitems, num_combinations(), palloc(), palloc_array, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, and MVNDistinct::type.

Referenced by BuildRelationExtStatistics().

◆ statext_ndistinct_deserialize()

MVNDistinct * statext_ndistinct_deserialize ( bytea data)

Definition at line 247 of file mvdistinct.c.

248{
249 int i;
252 MVNDistinct *ndistinct;
253 char *tmp;
254
255 if (data == NULL)
256 return NULL;
257
258 /* we expect at least the basic fields of MVNDistinct struct */
260 elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
262
263 /* initialize pointer to the data part (skip the varlena header) */
264 tmp = VARDATA_ANY(data);
265
266 /* read the header fields and perform basic sanity checks */
267 memcpy(&ndist.magic, tmp, sizeof(uint32));
268 tmp += sizeof(uint32);
269 memcpy(&ndist.type, tmp, sizeof(uint32));
270 tmp += sizeof(uint32);
271 memcpy(&ndist.nitems, tmp, sizeof(uint32));
272 tmp += sizeof(uint32);
273
274 if (ndist.magic != STATS_NDISTINCT_MAGIC)
275 elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
278 elog(ERROR, "invalid ndistinct type %d (expected %d)",
280 if (ndist.nitems == 0)
281 elog(ERROR, "invalid zero-length item array in MVNDistinct");
282
283 /* what minimum bytea size do we expect for those parameters */
286 elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
288
289 /*
290 * Allocate space for the ndistinct items (no space for each item's
291 * attnos: those live in bitmapsets allocated separately)
292 */
293 ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
294 (ndist.nitems * sizeof(MVNDistinctItem)));
295 ndistinct->magic = ndist.magic;
296 ndistinct->type = ndist.type;
297 ndistinct->nitems = ndist.nitems;
298
299 for (i = 0; i < ndistinct->nitems; i++)
300 {
301 MVNDistinctItem *item = &ndistinct->items[i];
302
303 /* ndistinct value */
304 memcpy(&item->ndistinct, tmp, sizeof(double));
305 tmp += sizeof(double);
306
307 /* number of attributes */
308 memcpy(&item->nattributes, tmp, sizeof(int));
309 tmp += sizeof(int);
310 Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
311
312 item->attributes
313 = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
314
315 memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
316 tmp += sizeof(AttrNumber) * item->nattributes;
317
318 /* still within the bytea */
319 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
320 }
321
322 /* we should have consumed the whole bytea exactly */
323 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
324
325 return ndistinct;
326}

References Assert, MVNDistinctItem::attributes, data, elog, ERROR, fb(), i, MVNDistinct::items, items, MVNDistinct::magic, MAXALIGN, MinSizeOfItems, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, palloc(), palloc0(), SizeOfHeader, STATS_MAX_DIMENSIONS, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA_ANY(), VARSIZE_ANY(), and VARSIZE_ANY_EXHDR().

Referenced by extended_statistics_update(), pg_ndistinct_out(), and statext_ndistinct_load().

◆ statext_ndistinct_free()

void statext_ndistinct_free ( MVNDistinct ndistinct)

Definition at line 332 of file mvdistinct.c.

333{
334 for (int i = 0; i < ndistinct->nitems; i++)
335 pfree(ndistinct->items[i].attributes);
336 pfree(ndistinct);
337}

References MVNDistinctItem::attributes, i, MVNDistinct::items, MVNDistinct::nitems, and pfree().

Referenced by extended_statistics_update().

◆ statext_ndistinct_load()

MVNDistinct * statext_ndistinct_load ( Oid  mvoid,
bool  inh 
)

Definition at line 145 of file mvdistinct.c.

146{
147 MVNDistinct *result;
148 bool isnull;
149 Datum ndist;
150 HeapTuple htup;
151
154 if (!HeapTupleIsValid(htup))
155 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
156
159 if (isnull)
160 elog(ERROR,
161 "requested statistics kind \"%c\" is not yet built for statistics object %u",
163
165
166 ReleaseSysCache(htup);
167
168 return result;
169}

References BoolGetDatum(), DatumGetByteaPP, elog, ERROR, fb(), HeapTupleIsValid, ObjectIdGetDatum(), ReleaseSysCache(), SearchSysCache2(), statext_ndistinct_deserialize(), and SysCacheGetAttr().

Referenced by estimate_multivariate_ndistinct().

◆ statext_ndistinct_serialize()

bytea * statext_ndistinct_serialize ( MVNDistinct ndistinct)

Definition at line 176 of file mvdistinct.c.

177{
178 int i;
179 bytea *output;
180 char *tmp;
181 Size len;
182
183 Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
185
186 /*
187 * Base size is size of scalar fields in the struct, plus one base struct
188 * for each item, including number of items for each.
189 */
191
192 /* and also include space for the actual attribute numbers */
193 for (i = 0; i < ndistinct->nitems; i++)
194 {
195 int nmembers;
196
197 nmembers = ndistinct->items[i].nattributes;
198 Assert(nmembers >= 2);
199
200 len += SizeOfItem(nmembers);
201 }
202
203 output = (bytea *) palloc(len);
205
206 tmp = VARDATA(output);
207
208 /* Store the base struct values (magic, type, nitems) */
209 memcpy(tmp, &ndistinct->magic, sizeof(uint32));
210 tmp += sizeof(uint32);
211 memcpy(tmp, &ndistinct->type, sizeof(uint32));
212 tmp += sizeof(uint32);
213 memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
214 tmp += sizeof(uint32);
215
216 /*
217 * store number of attributes and attribute numbers for each entry
218 */
219 for (i = 0; i < ndistinct->nitems; i++)
220 {
221 MVNDistinctItem item = ndistinct->items[i];
222 int nmembers = item.nattributes;
223
224 memcpy(tmp, &item.ndistinct, sizeof(double));
225 tmp += sizeof(double);
226 memcpy(tmp, &nmembers, sizeof(int));
227 tmp += sizeof(int);
228
229 memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
230 tmp += nmembers * sizeof(AttrNumber);
231
232 /* protect against overflows */
233 Assert(tmp <= ((char *) output + len));
234 }
235
236 /* check we used exactly the expected space */
237 Assert(tmp == ((char *) output + len));
238
239 return output;
240}

References Assert, MVNDistinctItem::attributes, fb(), i, MVNDistinct::items, len, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, output, palloc(), SET_VARSIZE(), SizeOfHeader, SizeOfItem, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA(), and VARHDRSZ.

Referenced by build_mvndistinct(), and statext_store().

◆ statext_ndistinct_validate()

bool statext_ndistinct_validate ( const MVNDistinct ndistinct,
const int2vector stxkeys,
int  numexprs,
int  elevel 
)

Definition at line 352 of file mvdistinct.c.

355{
357
358 /* Scan through each MVNDistinct entry */
359 for (int i = 0; i < ndistinct->nitems; i++)
360 {
361 MVNDistinctItem item = ndistinct->items[i];
362
363 /*
364 * Cross-check each attribute in a MVNDistinct entry with the extended
365 * stats object definition.
366 */
367 for (int j = 0; j < item.nattributes; j++)
368 {
370 bool ok = false;
371
372 if (attnum > 0)
373 {
374 /* attribute number in stxkeys */
375 for (int k = 0; k < stxkeys->dim1; k++)
376 {
377 if (attnum == stxkeys->values[k])
378 {
379 ok = true;
380 break;
381 }
382 }
383 }
384 else if ((attnum < 0) && (attnum >= attnum_expr_lowbound))
385 {
386 /* attribute number for an expression */
387 ok = true;
388 }
389
390 if (!ok)
391 {
392 ereport(elevel,
394 errmsg("could not validate \"%s\" object: invalid attribute number %d found",
395 "pg_ndistinct", attnum)));
396 return false;
397 }
398 }
399 }
400
401 return true;
402}

References attnum, MVNDistinctItem::attributes, ereport, errcode(), errmsg(), fb(), i, MVNDistinct::items, j, MVNDistinctItem::nattributes, and MVNDistinct::nitems.

Referenced by extended_statistics_update().