PostgreSQL Source Code git master
mvdistinct.c File Reference
#include "postgres.h"
#include <math.h>
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_statistic_ext_data.h"
#include "statistics/extended_stats_internal.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
#include "varatt.h"
Include dependency graph for mvdistinct.c:

Go to the source code of this file.

Data Structures

struct  CombinationGenerator
 

Macros

#define SizeOfHeader   (3 * sizeof(uint32))
 
#define SizeOfItem(natts)    (sizeof(double) + sizeof(int) + (natts) * sizeof(AttrNumber))
 
#define MinSizeOfItem   SizeOfItem(2)
 
#define MinSizeOfItems(nitems)    (SizeOfHeader + (nitems) * MinSizeOfItem)
 

Typedefs

typedef struct CombinationGenerator CombinationGenerator
 

Functions

static double ndistinct_for_combination (double totalrows, StatsBuildData *data, int k, int *combination)
 
static double estimate_ndistinct (double totalrows, int numrows, int d, int f1)
 
static int n_choose_k (int n, int k)
 
static int num_combinations (int n)
 
static CombinationGeneratorgenerator_init (int n, int k)
 
static void generator_free (CombinationGenerator *state)
 
static int * generator_next (CombinationGenerator *state)
 
static void generate_combinations (CombinationGenerator *state)
 
MVNDistinctstatext_ndistinct_build (double totalrows, StatsBuildData *data)
 
MVNDistinctstatext_ndistinct_load (Oid mvoid, bool inh)
 
byteastatext_ndistinct_serialize (MVNDistinct *ndistinct)
 
MVNDistinctstatext_ndistinct_deserialize (bytea *data)
 
static void generate_combinations_recurse (CombinationGenerator *state, int index, int start, int *current)
 

Macro Definition Documentation

◆ MinSizeOfItem

#define MinSizeOfItem   SizeOfItem(2)

Definition at line 49 of file mvdistinct.c.

◆ MinSizeOfItems

#define MinSizeOfItems (   nitems)     (SizeOfHeader + (nitems) * MinSizeOfItem)

Definition at line 52 of file mvdistinct.c.

◆ SizeOfHeader

#define SizeOfHeader   (3 * sizeof(uint32))

Definition at line 42 of file mvdistinct.c.

◆ SizeOfItem

#define SizeOfItem (   natts)     (sizeof(double) + sizeof(int) + (natts) * sizeof(AttrNumber))

Definition at line 45 of file mvdistinct.c.

Typedef Documentation

◆ CombinationGenerator

Function Documentation

◆ estimate_ndistinct()

static double estimate_ndistinct ( double  totalrows,
int  numrows,
int  d,
int  f1 
)
static

Definition at line 436 of file mvdistinct.c.

437{
438 double numer,
439 denom,
440 ndistinct;
441
442 numer = (double) numrows * (double) d;
443
444 denom = (double) (numrows - f1) +
445 (double) f1 * (double) numrows / totalrows;
446
447 ndistinct = numer / denom;
448
449 /* Clamp to sane range in case of roundoff error */
450 if (ndistinct < (double) d)
451 ndistinct = (double) d;
452
453 if (ndistinct > totalrows)
454 ndistinct = totalrows;
455
456 return floor(ndistinct + 0.5);
457}
int f1[ARRAY_SIZE]
Definition: sql-declare.c:113

References f1.

Referenced by ndistinct_for_combination().

◆ generate_combinations()

static void generate_combinations ( CombinationGenerator state)
static

Definition at line 607 of file mvdistinct.c.

608{
609 int *current = (int *) palloc0(sizeof(int) * state->k);
610
611 generate_combinations_recurse(state, 0, 0, current);
612
613 pfree(current);
614}
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc0(Size size)
Definition: mcxt.c:1395
static void generate_combinations_recurse(CombinationGenerator *state, int index, int start, int *current)
Definition: mvdistinct.c:572
Definition: regguts.h:323

References generate_combinations_recurse(), palloc0(), and pfree().

Referenced by generator_init().

◆ generate_combinations_recurse()

static void generate_combinations_recurse ( CombinationGenerator state,
int  index,
int  start,
int *  current 
)
static

Definition at line 572 of file mvdistinct.c.

574{
575 /* If we haven't filled all the elements, simply recurse. */
576 if (index < state->k)
577 {
578 int i;
579
580 /*
581 * The values have to be in ascending order, so make sure we start
582 * with the value passed by parameter.
583 */
584
585 for (i = start; i < state->n; i++)
586 {
587 current[index] = i;
588 generate_combinations_recurse(state, (index + 1), (i + 1), current);
589 }
590
591 return;
592 }
593 else
594 {
595 /* we got a valid combination, add it to the array */
596 memcpy(&state->combinations[(state->k * state->current)],
597 current, state->k * sizeof(int));
598 state->current++;
599 }
600}
return str start
int i
Definition: isn.c:77
Definition: type.h:96

References generate_combinations_recurse(), i, and start.

Referenced by generate_combinations(), and generate_combinations_recurse().

◆ generator_free()

static void generator_free ( CombinationGenerator state)
static

Definition at line 557 of file mvdistinct.c.

558{
559 pfree(state->combinations);
560 pfree(state);
561}

References pfree().

Referenced by statext_ndistinct_build().

◆ generator_init()

static CombinationGenerator * generator_init ( int  n,
int  k 
)
static

Definition at line 504 of file mvdistinct.c.

505{
507
508 Assert((n >= k) && (k > 0));
509
510 /* allocate the generator state as a single chunk of memory */
512
513 state->ncombinations = n_choose_k(n, k);
514
515 /* pre-allocate space for all combinations */
516 state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
517
518 state->current = 0;
519 state->k = k;
520 state->n = n;
521
522 /* now actually pre-generate all the combinations of K elements */
524
525 /* make sure we got the expected number of combinations */
526 Assert(state->current == state->ncombinations);
527
528 /* reset the number, so we start with the first one */
529 state->current = 0;
530
531 return state;
532}
Assert(PointerIsAligned(start, uint64))
void * palloc(Size size)
Definition: mcxt.c:1365
static int n_choose_k(int n, int k)
Definition: mvdistinct.c:465
static void generate_combinations(CombinationGenerator *state)
Definition: mvdistinct.c:607

References Assert(), generate_combinations(), n_choose_k(), and palloc().

Referenced by statext_ndistinct_build().

◆ generator_next()

static int * generator_next ( CombinationGenerator state)
static

Definition at line 542 of file mvdistinct.c.

543{
544 if (state->current == state->ncombinations)
545 return NULL;
546
547 return &state->combinations[state->k * state->current++];
548}

Referenced by statext_ndistinct_build().

◆ n_choose_k()

static int n_choose_k ( int  n,
int  k 
)
static

Definition at line 465 of file mvdistinct.c.

466{
467 int d,
468 r;
469
470 Assert((k > 0) && (n >= k));
471
472 /* use symmetry of the binomial coefficients */
473 k = Min(k, n - k);
474
475 r = 1;
476 for (d = 1; d <= k; ++d)
477 {
478 r *= n--;
479 r /= d;
480 }
481
482 return r;
483}
#define Min(x, y)
Definition: c.h:1006

References Assert(), and Min.

Referenced by generator_init().

◆ ndistinct_for_combination()

static double ndistinct_for_combination ( double  totalrows,
StatsBuildData data,
int  k,
int *  combination 
)
static

Definition at line 340 of file mvdistinct.c.

342{
343 int i,
344 j;
345 int f1,
346 cnt,
347 d;
348 bool *isnull;
349 Datum *values;
352 int numrows = data->numrows;
353
354 mss = multi_sort_init(k);
355
356 /*
357 * In order to determine the number of distinct elements, create separate
358 * values[]/isnull[] arrays with all the data we have, then sort them
359 * using the specified column combination as dimensions. We could try to
360 * sort in place, but it'd probably be more complex and bug-prone.
361 */
362 items = (SortItem *) palloc(numrows * sizeof(SortItem));
363 values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
364 isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
365
366 for (i = 0; i < numrows; i++)
367 {
368 items[i].values = &values[i * k];
369 items[i].isnull = &isnull[i * k];
370 }
371
372 /*
373 * For each dimension, set up sort-support and fill in the values from the
374 * sample data.
375 *
376 * We use the column data types' default sort operators and collations;
377 * perhaps at some point it'd be worth using column-specific collations?
378 */
379 for (i = 0; i < k; i++)
380 {
381 Oid typid;
384 VacAttrStats *colstat = data->stats[combination[i]];
385
386 typid = colstat->attrtypid;
387 collid = colstat->attrcollid;
388
390 if (type->lt_opr == InvalidOid) /* shouldn't happen */
391 elog(ERROR, "cache lookup failed for ordering operator for type %u",
392 typid);
393
394 /* prepare the sort function for this dimension */
395 multi_sort_add_dimension(mss, i, type->lt_opr, collid);
396
397 /* accumulate all the data for this dimension into the arrays */
398 for (j = 0; j < numrows; j++)
399 {
400 items[j].values[i] = data->values[combination[i]][j];
401 items[j].isnull[i] = data->nulls[combination[i]][j];
402 }
403 }
404
405 /* We can sort the array now ... */
406 qsort_interruptible(items, numrows, sizeof(SortItem),
407 multi_sort_compare, mss);
408
409 /* ... and count the number of distinct combinations */
410
411 f1 = 0;
412 cnt = 1;
413 d = 1;
414 for (i = 1; i < numrows; i++)
415 {
416 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
417 {
418 if (cnt == 1)
419 f1 += 1;
420
421 d++;
422 cnt = 0;
423 }
424
425 cnt += 1;
426 }
427
428 if (cnt == 1)
429 f1 += 1;
430
431 return estimate_ndistinct(totalrows, numrows, d, f1);
432}
static Datum values[MAXATTR]
Definition: bootstrap.c:153
Oid collid
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
int multi_sort_compare(const void *a, const void *b, void *arg)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
int j
Definition: isn.c:78
static double estimate_ndistinct(double totalrows, int numrows, int d, int f1)
Definition: mvdistinct.c:436
const void * data
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
uint64_t Datum
Definition: postgres.h:70
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
Oid attrtypid
Definition: vacuum.h:126
Oid attrcollid
Definition: vacuum.h:129
static ItemArray items
Definition: test_tidstore.c:48
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:386
#define TYPECACHE_LT_OPR
Definition: typcache.h:139
const char * type

References VacAttrStats::attrcollid, VacAttrStats::attrtypid, collid, data, elog, ERROR, estimate_ndistinct(), f1, i, InvalidOid, items, j, lookup_type_cache(), multi_sort_add_dimension(), multi_sort_compare(), multi_sort_init(), palloc(), palloc0(), qsort_interruptible(), type, TYPECACHE_LT_OPR, and values.

Referenced by statext_ndistinct_build().

◆ num_combinations()

static int num_combinations ( int  n)
static

Definition at line 490 of file mvdistinct.c.

491{
492 return (1 << n) - (n + 1);
493}

Referenced by statext_ndistinct_build().

◆ statext_ndistinct_build()

MVNDistinct * statext_ndistinct_build ( double  totalrows,
StatsBuildData data 
)

Definition at line 85 of file mvdistinct.c.

86{
87 MVNDistinct *result;
88 int k;
89 int itemcnt;
90 int numattrs = data->nattnums;
91 int numcombs = num_combinations(numattrs);
92
93 result = palloc(offsetof(MVNDistinct, items) +
94 numcombs * sizeof(MVNDistinctItem));
97 result->nitems = numcombs;
98
99 itemcnt = 0;
100 for (k = 2; k <= numattrs; k++)
101 {
102 int *combination;
104
105 /* generate combinations of K out of N elements */
106 generator = generator_init(numattrs, k);
107
108 while ((combination = generator_next(generator)))
109 {
110 MVNDistinctItem *item = &result->items[itemcnt];
111 int j;
112
113 item->attributes = palloc(sizeof(AttrNumber) * k);
114 item->nattributes = k;
115
116 /* translate the indexes to attnums */
117 for (j = 0; j < k; j++)
118 {
119 item->attributes[j] = data->attnums[combination[j]];
120
122 }
123
124 item->ndistinct =
125 ndistinct_for_combination(totalrows, data, k, combination);
126
127 itemcnt++;
128 Assert(itemcnt <= result->nitems);
129 }
130
132 }
133
134 /* must consume exactly the whole output array */
135 Assert(itemcnt == result->nitems);
136
137 return result;
138}
int16 AttrNumber
Definition: attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
#define nitems(x)
Definition: indent.h:31
static double ndistinct_for_combination(double totalrows, StatsBuildData *data, int k, int *combination)
Definition: mvdistinct.c:340
static int num_combinations(int n)
Definition: mvdistinct.c:490
static void generator_free(CombinationGenerator *state)
Definition: mvdistinct.c:557
static CombinationGenerator * generator_init(int n, int k)
Definition: mvdistinct.c:504
static int * generator_next(CombinationGenerator *state)
Definition: mvdistinct.c:542
#define STATS_NDISTINCT_MAGIC
Definition: statistics.h:22
#define STATS_NDISTINCT_TYPE_BASIC
Definition: statistics.h:23
double ndistinct
Definition: statistics.h:28
AttrNumber * attributes
Definition: statistics.h:30
uint32 nitems
Definition: statistics.h:38
uint32 type
Definition: statistics.h:37
uint32 magic
Definition: statistics.h:36
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:39

References Assert(), AttributeNumberIsValid, MVNDistinctItem::attributes, data, generator_free(), generator_init(), generator_next(), MVNDistinct::items, items, j, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, ndistinct_for_combination(), MVNDistinct::nitems, nitems, num_combinations(), palloc(), STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, and MVNDistinct::type.

Referenced by BuildRelationExtStatistics().

◆ statext_ndistinct_deserialize()

MVNDistinct * statext_ndistinct_deserialize ( bytea data)

Definition at line 247 of file mvdistinct.c.

248{
249 int i;
250 Size minimum_size;
251 MVNDistinct ndist;
252 MVNDistinct *ndistinct;
253 char *tmp;
254
255 if (data == NULL)
256 return NULL;
257
258 /* we expect at least the basic fields of MVNDistinct struct */
260 elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
262
263 /* initialize pointer to the data part (skip the varlena header) */
264 tmp = VARDATA_ANY(data);
265
266 /* read the header fields and perform basic sanity checks */
267 memcpy(&ndist.magic, tmp, sizeof(uint32));
268 tmp += sizeof(uint32);
269 memcpy(&ndist.type, tmp, sizeof(uint32));
270 tmp += sizeof(uint32);
271 memcpy(&ndist.nitems, tmp, sizeof(uint32));
272 tmp += sizeof(uint32);
273
274 if (ndist.magic != STATS_NDISTINCT_MAGIC)
275 elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
277 if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
278 elog(ERROR, "invalid ndistinct type %d (expected %d)",
280 if (ndist.nitems == 0)
281 elog(ERROR, "invalid zero-length item array in MVNDistinct");
282
283 /* what minimum bytea size do we expect for those parameters */
284 minimum_size = MinSizeOfItems(ndist.nitems);
285 if (VARSIZE_ANY_EXHDR(data) < minimum_size)
286 elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
287 VARSIZE_ANY_EXHDR(data), minimum_size);
288
289 /*
290 * Allocate space for the ndistinct items (no space for each item's
291 * attnos: those live in bitmapsets allocated separately)
292 */
293 ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
294 (ndist.nitems * sizeof(MVNDistinctItem)));
295 ndistinct->magic = ndist.magic;
296 ndistinct->type = ndist.type;
297 ndistinct->nitems = ndist.nitems;
298
299 for (i = 0; i < ndistinct->nitems; i++)
300 {
301 MVNDistinctItem *item = &ndistinct->items[i];
302
303 /* ndistinct value */
304 memcpy(&item->ndistinct, tmp, sizeof(double));
305 tmp += sizeof(double);
306
307 /* number of attributes */
308 memcpy(&item->nattributes, tmp, sizeof(int));
309 tmp += sizeof(int);
310 Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
311
312 item->attributes
313 = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
314
315 memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
316 tmp += sizeof(AttrNumber) * item->nattributes;
317
318 /* still within the bytea */
319 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
320 }
321
322 /* we should have consumed the whole bytea exactly */
323 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
324
325 return ndistinct;
326}
#define MAXALIGN(LEN)
Definition: c.h:813
uint32_t uint32
Definition: c.h:541
size_t Size
Definition: c.h:613
#define SizeOfHeader
Definition: mvdistinct.c:42
#define MinSizeOfItems(nitems)
Definition: mvdistinct.c:52
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
static Size VARSIZE_ANY(const void *PTR)
Definition: varatt.h:460
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition: varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition: varatt.h:486

References Assert(), MVNDistinctItem::attributes, data, elog, ERROR, i, MVNDistinct::items, items, MVNDistinct::magic, MAXALIGN, MinSizeOfItems, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, palloc(), palloc0(), SizeOfHeader, STATS_MAX_DIMENSIONS, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA_ANY(), VARSIZE_ANY(), and VARSIZE_ANY_EXHDR().

Referenced by pg_ndistinct_out(), and statext_ndistinct_load().

◆ statext_ndistinct_load()

MVNDistinct * statext_ndistinct_load ( Oid  mvoid,
bool  inh 
)

Definition at line 145 of file mvdistinct.c.

146{
147 MVNDistinct *result;
148 bool isnull;
149 Datum ndist;
150 HeapTuple htup;
151
152 htup = SearchSysCache2(STATEXTDATASTXOID,
153 ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
154 if (!HeapTupleIsValid(htup))
155 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
156
157 ndist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
158 Anum_pg_statistic_ext_data_stxdndistinct, &isnull);
159 if (isnull)
160 elog(ERROR,
161 "requested statistics kind \"%c\" is not yet built for statistics object %u",
162 STATS_EXT_NDISTINCT, mvoid);
163
165
166 ReleaseSysCache(htup);
167
168 return result;
169}
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
MVNDistinct * statext_ndistinct_deserialize(bytea *data)
Definition: mvdistinct.c:247
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:595
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition: syscache.c:230

References BoolGetDatum(), DatumGetByteaPP, elog, ERROR, HeapTupleIsValid, ObjectIdGetDatum(), ReleaseSysCache(), SearchSysCache2(), statext_ndistinct_deserialize(), and SysCacheGetAttr().

Referenced by estimate_multivariate_ndistinct().

◆ statext_ndistinct_serialize()

bytea * statext_ndistinct_serialize ( MVNDistinct ndistinct)

Definition at line 176 of file mvdistinct.c.

177{
178 int i;
179 bytea *output;
180 char *tmp;
181 Size len;
182
183 Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
185
186 /*
187 * Base size is size of scalar fields in the struct, plus one base struct
188 * for each item, including number of items for each.
189 */
191
192 /* and also include space for the actual attribute numbers */
193 for (i = 0; i < ndistinct->nitems; i++)
194 {
195 int nmembers;
196
197 nmembers = ndistinct->items[i].nattributes;
198 Assert(nmembers >= 2);
199
200 len += SizeOfItem(nmembers);
201 }
202
203 output = (bytea *) palloc(len);
205
206 tmp = VARDATA(output);
207
208 /* Store the base struct values (magic, type, nitems) */
209 memcpy(tmp, &ndistinct->magic, sizeof(uint32));
210 tmp += sizeof(uint32);
211 memcpy(tmp, &ndistinct->type, sizeof(uint32));
212 tmp += sizeof(uint32);
213 memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
214 tmp += sizeof(uint32);
215
216 /*
217 * store number of attributes and attribute numbers for each entry
218 */
219 for (i = 0; i < ndistinct->nitems; i++)
220 {
221 MVNDistinctItem item = ndistinct->items[i];
222 int nmembers = item.nattributes;
223
224 memcpy(tmp, &item.ndistinct, sizeof(double));
225 tmp += sizeof(double);
226 memcpy(tmp, &nmembers, sizeof(int));
227 tmp += sizeof(int);
228
229 memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
230 tmp += nmembers * sizeof(AttrNumber);
231
232 /* protect against overflows */
233 Assert(tmp <= ((char *) output + len));
234 }
235
236 /* check we used exactly the expected space */
237 Assert(tmp == ((char *) output + len));
238
239 return output;
240}
#define VARHDRSZ
Definition: c.h:700
FILE * output
#define SizeOfItem(natts)
Definition: mvdistinct.c:45
const void size_t len
Definition: c.h:695
static char * VARDATA(const void *PTR)
Definition: varatt.h:305
static void SET_VARSIZE(void *PTR, Size len)
Definition: varatt.h:432

References Assert(), MVNDistinctItem::attributes, i, MVNDistinct::items, len, MVNDistinct::magic, MVNDistinctItem::nattributes, MVNDistinctItem::ndistinct, MVNDistinct::nitems, output, palloc(), SET_VARSIZE(), SizeOfHeader, SizeOfItem, STATS_NDISTINCT_MAGIC, STATS_NDISTINCT_TYPE_BASIC, MVNDistinct::type, VARDATA(), and VARHDRSZ.

Referenced by build_mvndistinct(), and statext_store().