PostgreSQL Source Code git master
Loading...
Searching...
No Matches
mcv.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * mcv.c
4 * POSTGRES multivariate MCV lists
5 *
6 *
7 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/statistics/mcv.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/htup_details.h"
20#include "fmgr.h"
21#include "funcapi.h"
22#include "nodes/nodeFuncs.h"
25#include "utils/array.h"
26#include "utils/builtins.h"
27#include "utils/fmgrprotos.h"
28#include "utils/lsyscache.h"
29#include "utils/selfuncs.h"
30#include "utils/syscache.h"
31#include "utils/typcache.h"
32
33/*
34 * Computes size of a serialized MCV item, depending on the number of
35 * dimensions (columns) the statistic is defined on. The datum values are
36 * stored in a separate array (deduplicated, to minimize the size), and
37 * so the serialized items only store uint16 indexes into that array.
38 *
39 * Each serialized item stores (in this order):
40 *
41 * - indexes to values (ndim * sizeof(uint16))
42 * - null flags (ndim * sizeof(bool))
43 * - frequency (sizeof(double))
44 * - base_frequency (sizeof(double))
45 *
46 * There is no alignment padding within an MCV item.
47 * So in total each MCV item requires this many bytes:
48 *
49 * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
50 */
51#define ITEM_SIZE(ndims) \
52 ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
53
54/*
55 * Used to compute size of serialized MCV list representation.
56 */
57#define MinSizeOfMCVList \
58 (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
59
60/*
61 * Size of the serialized MCV list, excluding the space needed for
62 * deduplicated per-dimension values. The macro is meant to be used
63 * when it's not yet safe to access the serialized info about amount
64 * of data for each column.
65 */
66#define SizeOfMCVList(ndims,nitems) \
67 ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
68 ((ndims) * sizeof(DimensionInfo)) + \
69 ((nitems) * ITEM_SIZE(ndims)))
70
72
73static SortItem *build_distinct_groups(int numrows, SortItem *items,
74 MultiSortSupport mss, int *ndistinct);
75
78
79static int count_distinct_groups(int numrows, SortItem *items,
81
82/*
83 * Compute new value for bitmap item, considering whether it's used for
84 * clauses connected by AND/OR.
85 */
86#define RESULT_MERGE(value, is_or, match) \
87 ((is_or) ? ((value) || (match)) : ((value) && (match)))
88
89/*
90 * When processing a list of clauses, the bitmap item may get set to a value
91 * such that additional clauses can't change it. For example, when processing
92 * a list of clauses connected to AND, as soon as the item gets set to 'false'
93 * then it'll remain like that. Similarly clauses connected by OR and 'true'.
94 *
95 * Returns true when the value in the bitmap can't change no matter how the
96 * remaining clauses are evaluated.
97 */
98#define RESULT_IS_FINAL(value, is_or) ((is_or) ? (value) : (!(value)))
99
100/*
101 * get_mincount_for_mcv_list
102 * Determine the minimum number of times a value needs to appear in
103 * the sample for it to be included in the MCV list.
104 *
105 * We want to keep only values that appear sufficiently often in the
106 * sample that it is reasonable to extrapolate their sample frequencies to
107 * the entire table. We do this by placing an upper bound on the relative
108 * standard error of the sample frequency, so that any estimates the
109 * planner generates from the MCV statistics can be expected to be
110 * reasonably accurate.
111 *
112 * Since we are sampling without replacement, the sample frequency of a
113 * particular value is described by a hypergeometric distribution. A
114 * common rule of thumb when estimating errors in this situation is to
115 * require at least 10 instances of the value in the sample, in which case
116 * the distribution can be approximated by a normal distribution, and
117 * standard error analysis techniques can be applied. Given a sample size
118 * of n, a population size of N, and a sample frequency of p=cnt/n, the
119 * standard error of the proportion p is given by
120 * SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
121 * where the second term is the finite population correction. To get
122 * reasonably accurate planner estimates, we impose an upper bound on the
123 * relative standard error of 20% -- i.e., SE/p < 0.2. This 20% relative
124 * error bound is fairly arbitrary, but has been found empirically to work
125 * well. Rearranging this formula gives a lower bound on the number of
126 * instances of the value seen:
127 * cnt > n*(N-n) / (N-n+0.04*n*(N-1))
128 * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
129 * case where n approaches 0 cannot happen in practice, since the sample
130 * size is at least 300. The case where n approaches N corresponds to
131 * sampling the whole table, in which case it is reasonable to keep
132 * the whole MCV list (have no lower bound), so it makes sense to apply
133 * this formula for all inputs, even though the above derivation is
134 * technically only valid when the right hand side is at least around 10.
135 *
136 * An alternative way to look at this formula is as follows -- assume that
137 * the number of instances of the value seen scales up to the entire
138 * table, so that the population count is K=N*cnt/n. Then the distribution
139 * in the sample is a hypergeometric distribution parameterised by N, n
140 * and K, and the bound above is mathematically equivalent to demanding
141 * that the standard deviation of that distribution is less than 20% of
142 * its mean. Thus the relative errors in any planner estimates produced
143 * from the MCV statistics are likely to be not too large.
144 */
145static double
147{
148 double n = samplerows;
149 double N = totalrows;
150 double numer,
151 denom;
152
153 numer = n * (N - n);
154 denom = N - n + 0.04 * n * (N - 1);
155
156 /* Guard against division by zero (possible if n = N = 1) */
157 if (denom == 0.0)
158 return 0.0;
159
160 return numer / denom;
161}
162
163/*
164 * Builds MCV list from the set of sampled rows.
165 *
166 * The algorithm is quite simple:
167 *
168 * (1) sort the data (default collation, '<' for the data type)
169 *
170 * (2) count distinct groups, decide how many to keep
171 *
172 * (3) build the MCV list using the threshold determined in (2)
173 *
174 * (4) remove rows represented by the MCV from the sample
175 *
176 */
177MCVList *
179{
180 int i,
181 numattrs,
182 numrows,
183 ngroups,
184 nitems;
185 double mincount;
190
191 /* comparator for all the columns */
192 mss = build_mss(data);
193
194 /* sort the rows */
196 data->nattnums, data->attnums);
197
198 if (!items)
199 return NULL;
200
201 /* for convenience */
202 numattrs = data->nattnums;
203 numrows = data->numrows;
204
205 /* transform the sorted rows into groups (sorted by frequency) */
207
208 /*
209 * The maximum number of MCV items to store, based on the statistics
210 * target we computed for the statistics object (from the target set for
211 * the object itself, attributes and the system default). In any case, we
212 * can't keep more groups than we have available.
213 */
214 nitems = stattarget;
215 if (nitems > ngroups)
216 nitems = ngroups;
217
218 /*
219 * Decide how many items to keep in the MCV list. We can't use the same
220 * algorithm as per-column MCV lists, because that only considers the
221 * actual group frequency - but we're primarily interested in how the
222 * actual frequency differs from the base frequency (product of simple
223 * per-column frequencies, as if the columns were independent).
224 *
225 * Using the same algorithm might exclude items that are close to the
226 * "average" frequency of the sample. But that does not say whether the
227 * observed frequency is close to the base frequency or not. We also need
228 * to consider unexpectedly uncommon items (again, compared to the base
229 * frequency), and the single-column algorithm does not have to.
230 *
231 * We simply decide how many items to keep by computing the minimum count
232 * using get_mincount_for_mcv_list() and then keep all items that seem to
233 * be more common than that.
234 */
236
237 /*
238 * Walk the groups until we find the first group with a count below the
239 * mincount threshold (the index of that group is the number of groups we
240 * want to keep).
241 */
242 for (i = 0; i < nitems; i++)
243 {
244 if (groups[i].count < mincount)
245 {
246 nitems = i;
247 break;
248 }
249 }
250
251 /*
252 * At this point, we know the number of items for the MCV list. There
253 * might be none (for uniform distribution with many groups), and in that
254 * case, there will be no MCV list. Otherwise, construct the MCV list.
255 */
256 if (nitems > 0)
257 {
258 int j;
259 SortItem key;
261
262 /* frequencies for values in each attribute */
263 SortItem **freqs;
264 int *nfreqs;
265
266 /* used to search values */
268 + sizeof(SortSupportData));
269
270 /* compute frequencies for values in each column */
273
274 /*
275 * Allocate the MCV list structure, set the global parameters.
276 */
278 sizeof(MCVItem) * nitems);
279
280 mcvlist->magic = STATS_MCV_MAGIC;
282 mcvlist->ndimensions = numattrs;
283 mcvlist->nitems = nitems;
284
285 /* store info about data type OIDs */
286 for (i = 0; i < numattrs; i++)
287 mcvlist->types[i] = data->stats[i]->attrtypid;
288
289 /* Copy the first chunk of groups into the result. */
290 for (i = 0; i < nitems; i++)
291 {
292 /* just point to the proper place in the list */
293 MCVItem *item = &mcvlist->items[i];
294
296 item->isnull = palloc_array(bool, numattrs);
297
298 /* copy values for the group */
299 memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
300 memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
301
302 /* groups should be sorted by frequency in descending order */
303 Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
304
305 /* group frequency */
306 item->frequency = (double) groups[i].count / numrows;
307
308 /* base frequency, if the attributes were independent */
309 item->base_frequency = 1.0;
310 for (j = 0; j < numattrs; j++)
311 {
312 SortItem *freq;
313
314 /* single dimension */
315 tmp->ndims = 1;
316 tmp->ssup[0] = mss->ssup[j];
317
318 /* fill search key */
319 key.values = &groups[i].values[j];
320 key.isnull = &groups[i].isnull[j];
321
322 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
323 sizeof(SortItem),
324 multi_sort_compare, tmp);
325
326 item->base_frequency *= ((double) freq->count) / numrows;
327 }
328 }
329
330 pfree(nfreqs);
331 pfree(freqs);
332 }
333
334 pfree(items);
335 pfree(groups);
336
337 return mcvlist;
338}
339
340/*
341 * build_mss
342 * Build a MultiSortSupport for the given StatsBuildData.
343 */
344static MultiSortSupport
346{
347 int i;
348 int numattrs = data->nattnums;
349
350 /* Sort by multiple columns (using array of SortSupport) */
352
353 /* prepare the sort functions for all the attributes */
354 for (i = 0; i < numattrs; i++)
355 {
356 VacAttrStats *colstat = data->stats[i];
358
360 if (type->lt_opr == InvalidOid) /* shouldn't happen */
361 elog(ERROR, "cache lookup failed for ordering operator for type %u",
362 colstat->attrtypid);
363
364 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
365 }
366
367 return mss;
368}
369
370/*
371 * count_distinct_groups
372 * Count distinct combinations of SortItems in the array.
373 *
374 * The array is assumed to be sorted according to the MultiSortSupport.
375 */
376static int
378{
379 int i;
380 int ndistinct;
381
382 ndistinct = 1;
383 for (i = 1; i < numrows; i++)
384 {
385 /* make sure the array really is sorted */
386 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
387
388 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
389 ndistinct += 1;
390 }
391
392 return ndistinct;
393}
394
395/*
396 * compare_sort_item_count
397 * Comparator for sorting items by count (frequencies) in descending
398 * order.
399 */
400static int
401compare_sort_item_count(const void *a, const void *b, void *arg)
402{
403 const SortItem *ia = a;
404 const SortItem *ib = b;
405
406 if (ia->count == ib->count)
407 return 0;
408 else if (ia->count > ib->count)
409 return -1;
410
411 return 1;
412}
413
414/*
415 * build_distinct_groups
416 * Build an array of SortItems for distinct groups and counts matching
417 * items.
418 *
419 * The 'items' array is assumed to be sorted.
420 */
421static SortItem *
423 int *ndistinct)
424{
425 int i,
426 j;
427 int ngroups = count_distinct_groups(numrows, items, mss);
428
429 SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
430
431 j = 0;
432 groups[0] = items[0];
433 groups[0].count = 1;
434
435 for (i = 1; i < numrows; i++)
436 {
437 /* Assume sorted in ascending order. */
438 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
439
440 /* New distinct group detected. */
441 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
442 {
443 groups[++j] = items[i];
444 groups[j].count = 0;
445 }
446
447 groups[j].count++;
448 }
449
450 /* ensure we filled the expected number of distinct groups */
451 Assert(j + 1 == ngroups);
452
453 /* Sort the distinct groups by frequency (in descending order). */
456
457 *ndistinct = ngroups;
458 return groups;
459}
460
461/* compare sort items (single dimension) */
462static int
463sort_item_compare(const void *a, const void *b, void *arg)
464{
465 SortSupport ssup = (SortSupport) arg;
466 const SortItem *ia = a;
467 const SortItem *ib = b;
468
469 return ApplySortComparator(ia->values[0], ia->isnull[0],
470 ib->values[0], ib->isnull[0],
471 ssup);
472}
473
474/*
475 * build_column_frequencies
476 * Compute frequencies of values in each column.
477 *
478 * This returns an array of SortItems for each attribute the MCV is built
479 * on, with a frequency (number of occurrences) for each value. This is
480 * then used to compute "base" frequency of MCV items.
481 *
482 * All the memory is allocated in a single chunk, so that a single pfree
483 * is enough to release it. We do not allocate space for values/isnull
484 * arrays in the SortItems, because we can simply point into the input
485 * groups directly.
486 */
487static SortItem **
490{
491 int i,
492 dim;
493 SortItem **result;
494 char *ptr;
495
496 Assert(groups);
498
499 /* allocate arrays for all columns as a single chunk */
500 ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
501 mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
502
503 /* initial array of pointers */
504 result = (SortItem **) ptr;
505 ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
506
507 for (dim = 0; dim < mss->ndims; dim++)
508 {
509 SortSupport ssup = &mss->ssup[dim];
510
511 /* array of values for a single column */
512 result[dim] = (SortItem *) ptr;
513 ptr += MAXALIGN(sizeof(SortItem) * ngroups);
514
515 /* extract data for the dimension */
516 for (i = 0; i < ngroups; i++)
517 {
518 /* point into the input groups */
519 result[dim][i].values = &groups[i].values[dim];
520 result[dim][i].isnull = &groups[i].isnull[dim];
521 result[dim][i].count = groups[i].count;
522 }
523
524 /* sort the values, deduplicate */
525 qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
526 sort_item_compare, ssup);
527
528 /*
529 * Identify distinct values, compute frequency (there might be
530 * multiple MCV items containing this value, so we need to sum counts
531 * from all of them.
532 */
533 ncounts[dim] = 1;
534 for (i = 1; i < ngroups; i++)
535 {
536 if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
537 {
538 result[dim][ncounts[dim] - 1].count += result[dim][i].count;
539 continue;
540 }
541
542 result[dim][ncounts[dim]] = result[dim][i];
543
544 ncounts[dim]++;
545 }
546 }
547
548 return result;
549}
550
551/*
552 * statext_mcv_load
553 * Load the MCV list for the indicated pg_statistic_ext_data tuple.
554 */
555MCVList *
557{
558 MCVList *result;
559 bool isnull;
563
564 if (!HeapTupleIsValid(htup))
565 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
566
569
570 if (isnull)
571 elog(ERROR,
572 "requested statistics kind \"%c\" is not yet built for statistics object %u",
574
576
577 ReleaseSysCache(htup);
578
579 return result;
580}
581
582
583/*
584 * statext_mcv_serialize
585 * Serialize MCV list into a pg_mcv_list value.
586 *
587 * The MCV items may include values of various data types, and it's reasonable
588 * to expect redundancy (values for a given attribute, repeated for multiple
589 * MCV list items). So we deduplicate the values into arrays, and then replace
590 * the values by indexes into those arrays.
591 *
592 * The overall structure of the serialized representation looks like this:
593 *
594 * +---------------+----------------+---------------------+-------+
595 * | header fields | dimension info | deduplicated values | items |
596 * +---------------+----------------+---------------------+-------+
597 *
598 * Where dimension info stores information about the type of the K-th
599 * attribute (e.g. typlen, typbyval and length of deduplicated values).
600 * Deduplicated values store deduplicated values for each attribute. And
601 * items store the actual MCV list items, with values replaced by indexes into
602 * the arrays.
603 *
604 * When serializing the items, we use uint16 indexes. The number of MCV items
605 * is limited by the statistics target (which is capped to 10k at the moment).
606 * We might increase this to 65k and still fit into uint16, so there's a bit of
607 * slack. Furthermore, this limit is on the number of distinct values per column,
608 * and we usually have few of those (and various combinations of them for the
609 * those MCV list). So uint16 seems fine for now.
610 *
611 * We don't really expect the serialization to save as much space as for
612 * histograms, as we are not doing any bucket splits (which is the source
613 * of high redundancy in histograms).
614 *
615 * TODO: Consider packing boolean flags (NULL) for each item into a single char
616 * (or a longer type) instead of using an array of bool items.
617 */
618bytea *
620{
621 int i;
622 int dim;
623 int ndims = mcvlist->ndimensions;
624
625 SortSupport ssup;
626 DimensionInfo *info;
627
629
630 /* serialized items (indexes into arrays, etc.) */
631 bytea *raw;
632 char *ptr;
633 char *endptr PG_USED_FOR_ASSERTS_ONLY;
634
635 /* values per dimension (and number of non-NULL values) */
636 Datum **values = palloc0_array(Datum *, ndims);
637 int *counts = palloc0_array(int, ndims);
638
639 /*
640 * We'll include some rudimentary information about the attribute types
641 * (length, by-val flag), so that we don't have to look them up while
642 * deserializing the MCV list (we already have the type OID in the
643 * header). This is safe because when changing the type of the attribute
644 * the statistics gets dropped automatically. We need to store the info
645 * about the arrays of deduplicated values anyway.
646 */
647 info = palloc0_array(DimensionInfo, ndims);
648
649 /* sort support data for all attributes included in the MCV list */
650 ssup = palloc0_array(SortSupportData, ndims);
651
652 /* collect and deduplicate values for each dimension (attribute) */
653 for (dim = 0; dim < ndims; dim++)
654 {
655 int ndistinct;
656 TypeCacheEntry *typentry;
657
658 /*
659 * Lookup the LT operator (can't get it from stats extra_data, as we
660 * don't know how to interpret that - scalar vs. array etc.).
661 */
662 typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
663
664 /* copy important info about the data type (length, by-value) */
665 info[dim].typlen = stats[dim]->attrtype->typlen;
666 info[dim].typbyval = stats[dim]->attrtype->typbyval;
667
668 /* allocate space for values in the attribute and collect them */
669 values[dim] = palloc0_array(Datum, mcvlist->nitems);
670
671 for (i = 0; i < mcvlist->nitems; i++)
672 {
673 /* skip NULL values - we don't need to deduplicate those */
674 if (mcvlist->items[i].isnull[dim])
675 continue;
676
677 /* append the value at the end */
678 values[dim][counts[dim]] = mcvlist->items[i].values[dim];
679 counts[dim] += 1;
680 }
681
682 /* if there are just NULL values in this dimension, we're done */
683 if (counts[dim] == 0)
684 continue;
685
686 /* sort and deduplicate the data */
687 ssup[dim].ssup_cxt = CurrentMemoryContext;
688 ssup[dim].ssup_collation = stats[dim]->attrcollid;
689 ssup[dim].ssup_nulls_first = false;
690
691 PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
692
693 qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
694 compare_scalars_simple, &ssup[dim]);
695
696 /*
697 * Walk through the array and eliminate duplicate values, but keep the
698 * ordering (so that we can do a binary search later). We know there's
699 * at least one item as (counts[dim] != 0), so we can skip the first
700 * element.
701 */
702 ndistinct = 1; /* number of distinct values */
703 for (i = 1; i < counts[dim]; i++)
704 {
705 /* expect sorted array */
706 Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
707
708 /* if the value is the same as the previous one, we can skip it */
709 if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
710 continue;
711
712 values[dim][ndistinct] = values[dim][i];
713 ndistinct += 1;
714 }
715
716 /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
717 Assert(ndistinct <= PG_UINT16_MAX);
718
719 /*
720 * Store additional info about the attribute - number of deduplicated
721 * values, and also size of the serialized data. For fixed-length data
722 * types this is trivial to compute, for varwidth types we need to
723 * actually walk the array and sum the sizes.
724 */
725 info[dim].nvalues = ndistinct;
726
727 if (info[dim].typbyval) /* by-value data types */
728 {
729 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
730
731 /*
732 * We copy the data into the MCV item during deserialization, so
733 * we don't need to allocate any extra space.
734 */
735 info[dim].nbytes_aligned = 0;
736 }
737 else if (info[dim].typlen > 0) /* fixed-length by-ref */
738 {
739 /*
740 * We don't care about alignment in the serialized data, so we
741 * pack the data as much as possible. But we also track how much
742 * data will be needed after deserialization, and in that case we
743 * need to account for alignment of each item.
744 *
745 * Note: As the items are fixed-length, we could easily compute
746 * this during deserialization, but we do it here anyway.
747 */
748 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
749 info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
750 }
751 else if (info[dim].typlen == -1) /* varlena */
752 {
753 info[dim].nbytes = 0;
754 info[dim].nbytes_aligned = 0;
755 for (i = 0; i < info[dim].nvalues; i++)
756 {
757 Size len;
758
759 /*
760 * For varlena values, we detoast the values and store the
761 * length and data separately. We don't bother with alignment
762 * here, which means that during deserialization we need to
763 * copy the fields and only access the copies.
764 */
766
767 /* serialized length (uint32 length + data) */
769 info[dim].nbytes += sizeof(uint32); /* length */
770 info[dim].nbytes += len; /* value (no header) */
771
772 /*
773 * During deserialization we'll build regular varlena values
774 * with full headers, and we need to align them properly.
775 */
776 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
777 }
778 }
779 else if (info[dim].typlen == -2) /* cstring */
780 {
781 info[dim].nbytes = 0;
782 info[dim].nbytes_aligned = 0;
783 for (i = 0; i < info[dim].nvalues; i++)
784 {
785 Size len;
786
787 /*
788 * cstring is handled similar to varlena - first we store the
789 * length as uint32 and then the data. We don't care about
790 * alignment, which means that during deserialization we need
791 * to copy the fields and only access the copies.
792 */
793
794 /* c-strings include terminator, so +1 byte */
795 len = strlen(DatumGetCString(values[dim][i])) + 1;
796 info[dim].nbytes += sizeof(uint32); /* length */
797 info[dim].nbytes += len; /* value */
798
799 /* space needed for properly aligned deserialized copies */
800 info[dim].nbytes_aligned += MAXALIGN(len);
801 }
802 }
803
804 /* we know (count>0) so there must be some data */
805 Assert(info[dim].nbytes > 0);
806 }
807
808 /*
809 * Now we can finally compute how much space we'll actually need for the
810 * whole serialized MCV list (varlena header, MCV header, dimension info
811 * for each attribute, deduplicated values and items).
812 */
813 total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
814 + sizeof(AttrNumber) /* ndimensions */
815 + (ndims * sizeof(Oid)); /* attribute types */
816
817 /* dimension info */
818 total_length += ndims * sizeof(DimensionInfo);
819
820 /* add space for the arrays of deduplicated values */
821 for (i = 0; i < ndims; i++)
822 total_length += info[i].nbytes;
823
824 /*
825 * And finally account for the items (those are fixed-length, thanks to
826 * replacing values with uint16 indexes into the deduplicated arrays).
827 */
828 total_length += mcvlist->nitems * ITEM_SIZE(dim);
829
830 /*
831 * Allocate space for the whole serialized MCV list (we'll skip bytes, so
832 * we set them to zero to make the result more compressible).
833 */
836
837 ptr = VARDATA(raw);
838 endptr = ptr + total_length;
839
840 /* copy the MCV list header fields, one by one */
841 memcpy(ptr, &mcvlist->magic, sizeof(uint32));
842 ptr += sizeof(uint32);
843
844 memcpy(ptr, &mcvlist->type, sizeof(uint32));
845 ptr += sizeof(uint32);
846
847 memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
848 ptr += sizeof(uint32);
849
850 memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
851 ptr += sizeof(AttrNumber);
852
853 memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
854 ptr += (sizeof(Oid) * ndims);
855
856 /* store information about the attributes (data amounts, ...) */
857 memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
858 ptr += sizeof(DimensionInfo) * ndims;
859
860 /* Copy the deduplicated values for all attributes to the output. */
861 for (dim = 0; dim < ndims; dim++)
862 {
863 /* remember the starting point for Asserts later */
865
866 for (i = 0; i < info[dim].nvalues; i++)
867 {
868 Datum value = values[dim][i];
869
870 if (info[dim].typbyval) /* passed by value */
871 {
872 Datum tmp;
873
874 /*
875 * For byval types, we need to copy just the significant bytes
876 * - we can't use memcpy directly, as that assumes
877 * little-endian behavior. store_att_byval does almost what
878 * we need, but it requires a properly aligned buffer - the
879 * output buffer does not guarantee that. So we simply use a
880 * local Datum variable (which guarantees proper alignment),
881 * and then copy the value from it.
882 */
883 store_att_byval(&tmp, value, info[dim].typlen);
884
885 memcpy(ptr, &tmp, info[dim].typlen);
886 ptr += info[dim].typlen;
887 }
888 else if (info[dim].typlen > 0) /* passed by reference */
889 {
890 /* no special alignment needed, treated as char array */
891 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
892 ptr += info[dim].typlen;
893 }
894 else if (info[dim].typlen == -1) /* varlena */
895 {
897
898 /* copy the length */
899 memcpy(ptr, &len, sizeof(uint32));
900 ptr += sizeof(uint32);
901
902 /* data from the varlena value (without the header) */
904 ptr += len;
905 }
906 else if (info[dim].typlen == -2) /* cstring */
907 {
909
910 /* copy the length */
911 memcpy(ptr, &len, sizeof(uint32));
912 ptr += sizeof(uint32);
913
914 /* value */
916 ptr += len;
917 }
918
919 /* no underflows or overflows */
920 Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
921 }
922
923 /* we should get exactly nbytes of data for this dimension */
924 Assert((ptr - start) == info[dim].nbytes);
925 }
926
927 /* Serialize the items, with uint16 indexes instead of the values. */
928 for (i = 0; i < mcvlist->nitems; i++)
929 {
930 MCVItem *mcvitem = &mcvlist->items[i];
931
932 /* don't write beyond the allocated space */
933 Assert(ptr <= (endptr - ITEM_SIZE(dim)));
934
935 /* copy NULL and frequency flags into the serialized MCV */
936 memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
937 ptr += sizeof(bool) * ndims;
938
939 memcpy(ptr, &mcvitem->frequency, sizeof(double));
940 ptr += sizeof(double);
941
942 memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
943 ptr += sizeof(double);
944
945 /* store the indexes last */
946 for (dim = 0; dim < ndims; dim++)
947 {
948 uint16 index = 0;
949 Datum *value;
950
951 /* do the lookup only for non-NULL values */
952 if (!mcvitem->isnull[dim])
953 {
954 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
955 info[dim].nvalues, sizeof(Datum),
956 compare_scalars_simple, &ssup[dim]);
957
958 Assert(value != NULL); /* serialization or deduplication
959 * error */
960
961 /* compute index within the deduplicated array */
962 index = (uint16) (value - values[dim]);
963
964 /* check the index is within expected bounds */
965 Assert(index < info[dim].nvalues);
966 }
967
968 /* copy the index into the serialized MCV */
969 memcpy(ptr, &index, sizeof(uint16));
970 ptr += sizeof(uint16);
971 }
972
973 /* make sure we don't overflow the allocated value */
974 Assert(ptr <= endptr);
975 }
976
977 /* at this point we expect to match the total_length exactly */
978 Assert(ptr == endptr);
979
980 pfree(values);
981 pfree(counts);
982
983 return raw;
984}
985
986/*
987 * statext_mcv_deserialize
988 * Reads serialized MCV list into MCVList structure.
989 *
990 * All the memory needed by the MCV list is allocated as a single chunk, so
991 * it's possible to simply pfree() it at once.
992 */
993MCVList *
995{
996 int dim,
997 i;
1000 char *raw;
1001 char *ptr;
1002 char *endptr PG_USED_FOR_ASSERTS_ONLY;
1003
1004 int ndims,
1005 nitems;
1006 DimensionInfo *info = NULL;
1007
1008 /* local allocation buffer (used only for deserialization) */
1009 Datum **map = NULL;
1010
1011 /* MCV list */
1012 Size mcvlen;
1013
1014 /* buffer used for the result */
1015 Size datalen;
1016 char *dataptr;
1017 char *valuesptr;
1018 char *isnullptr;
1019
1020 if (data == NULL)
1021 return NULL;
1022
1023 /*
1024 * We can't possibly deserialize a MCV list if there's not even a complete
1025 * header. We need an explicit formula here, because we serialize the
1026 * header fields one by one, so we need to ignore struct alignment.
1027 */
1029 elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
1031
1032 /* read the MCV list header */
1034
1035 /* pointer to the data part (skip the varlena header) */
1036 raw = (char *) data;
1037 ptr = VARDATA_ANY(raw);
1038 endptr = raw + VARSIZE_ANY(data);
1039
1040 /* get the header and perform further sanity checks */
1041 memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1042 ptr += sizeof(uint32);
1043
1044 memcpy(&mcvlist->type, ptr, sizeof(uint32));
1045 ptr += sizeof(uint32);
1046
1047 memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1048 ptr += sizeof(uint32);
1049
1050 memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1051 ptr += sizeof(AttrNumber);
1052
1053 if (mcvlist->magic != STATS_MCV_MAGIC)
1054 elog(ERROR, "invalid MCV magic %u (expected %u)",
1055 mcvlist->magic, STATS_MCV_MAGIC);
1056
1057 if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1058 elog(ERROR, "invalid MCV type %u (expected %u)",
1060
1061 if (mcvlist->ndimensions == 0)
1062 elog(ERROR, "invalid zero-length dimension array in MCVList");
1063 else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1064 (mcvlist->ndimensions < 0))
1065 elog(ERROR, "invalid length (%d) dimension array in MCVList",
1066 mcvlist->ndimensions);
1067
1068 if (mcvlist->nitems == 0)
1069 elog(ERROR, "invalid zero-length item array in MCVList");
1070 else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1071 elog(ERROR, "invalid length (%u) item array in MCVList",
1072 mcvlist->nitems);
1073
1074 nitems = mcvlist->nitems;
1075 ndims = mcvlist->ndimensions;
1076
1077 /*
1078 * Check amount of data including DimensionInfo for all dimensions and
1079 * also the serialized items (including uint16 indexes). Also, walk
1080 * through the dimension information and add it to the sum.
1081 */
1083
1084 /*
1085 * Check that we have at least the dimension and info records, along with
1086 * the items. We don't know the size of the serialized values yet. We need
1087 * to do this check first, before accessing the dimension info.
1088 */
1090 elog(ERROR, "invalid MCV size %zu (expected %zu)",
1092
1093 /* Now copy the array of type Oids. */
1094 memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1095 ptr += (sizeof(Oid) * ndims);
1096
1097 /* Now it's safe to access the dimension info. */
1098 info = palloc(ndims * sizeof(DimensionInfo));
1099
1100 memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1101 ptr += (ndims * sizeof(DimensionInfo));
1102
1103 /* account for the value arrays */
1104 for (dim = 0; dim < ndims; dim++)
1105 {
1106 /*
1107 * XXX I wonder if we can/should rely on asserts here. Maybe those
1108 * checks should be done every time?
1109 */
1110 Assert(info[dim].nvalues >= 0);
1111 Assert(info[dim].nbytes >= 0);
1112
1113 expected_size += info[dim].nbytes;
1114 }
1115
1116 /*
1117 * Now we know the total expected MCV size, including all the pieces
1118 * (header, dimension info. items and deduplicated data). So do the final
1119 * check on size.
1120 */
1122 elog(ERROR, "invalid MCV size %zu (expected %zu)",
1124
1125 /*
1126 * We need an array of Datum values for each dimension, so that we can
1127 * easily translate the uint16 indexes later. We also need a top-level
1128 * array of pointers to those per-dimension arrays.
1129 *
1130 * While allocating the arrays for dimensions, compute how much space we
1131 * need for a copy of the by-ref data, as we can't simply point to the
1132 * original values (it might go away).
1133 */
1134 datalen = 0; /* space for by-ref data */
1135 map = palloc_array(Datum *, ndims);
1136
1137 for (dim = 0; dim < ndims; dim++)
1138 {
1139 map[dim] = palloc_array(Datum, info[dim].nvalues);
1140
1141 /* space needed for a copy of data for by-ref types */
1142 datalen += info[dim].nbytes_aligned;
1143 }
1144
1145 /*
1146 * Now resize the MCV list so that the allocation includes all the data.
1147 *
1148 * Allocate space for a copy of the data, as we can't simply reference the
1149 * serialized data - it's not aligned properly, and it may disappear while
1150 * we're still using the MCV list, e.g. due to catcache release.
1151 *
1152 * We do care about alignment here, because we will allocate all the
1153 * pieces at once, but then use pointers to different parts.
1154 */
1155 mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1156
1157 /* arrays of values and isnull flags for all MCV items */
1158 mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1159 mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1160
1161 /* we don't quite need to align this, but it makes some asserts easier */
1162 mcvlen += MAXALIGN(datalen);
1163
1164 /* now resize the deserialized MCV list, and compute pointers to parts */
1166
1167 /* pointer to the beginning of values/isnull arrays */
1168 valuesptr = (char *) mcvlist
1169 + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1170
1171 isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1172
1173 dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1174
1175 /*
1176 * Build mapping (index => value) for translating the serialized data into
1177 * the in-memory representation.
1178 */
1179 for (dim = 0; dim < ndims; dim++)
1180 {
1181 /* remember start position in the input array */
1182 char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1183
1184 if (info[dim].typbyval)
1185 {
1186 /* for by-val types we simply copy data into the mapping */
1187 for (i = 0; i < info[dim].nvalues; i++)
1188 {
1189 Datum v = 0;
1190
1191 memcpy(&v, ptr, info[dim].typlen);
1192 ptr += info[dim].typlen;
1193
1194 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1195
1196 /* no under/overflow of input array */
1197 Assert(ptr <= (start + info[dim].nbytes));
1198 }
1199 }
1200 else
1201 {
1202 /* for by-ref types we need to also make a copy of the data */
1203
1204 /* passed by reference, but fixed length (name, tid, ...) */
1205 if (info[dim].typlen > 0)
1206 {
1207 for (i = 0; i < info[dim].nvalues; i++)
1208 {
1209 memcpy(dataptr, ptr, info[dim].typlen);
1210 ptr += info[dim].typlen;
1211
1212 /* just point into the array */
1213 map[dim][i] = PointerGetDatum(dataptr);
1214 dataptr += MAXALIGN(info[dim].typlen);
1215 }
1216 }
1217 else if (info[dim].typlen == -1)
1218 {
1219 /* varlena */
1220 for (i = 0; i < info[dim].nvalues; i++)
1221 {
1222 uint32 len;
1223
1224 /* read the uint32 length */
1225 memcpy(&len, ptr, sizeof(uint32));
1226 ptr += sizeof(uint32);
1227
1228 /* the length is data-only */
1229 SET_VARSIZE(dataptr, len + VARHDRSZ);
1230 memcpy(VARDATA(dataptr), ptr, len);
1231 ptr += len;
1232
1233 /* just point into the array */
1234 map[dim][i] = PointerGetDatum(dataptr);
1235
1236 /* skip to place of the next deserialized value */
1237 dataptr += MAXALIGN(len + VARHDRSZ);
1238 }
1239 }
1240 else if (info[dim].typlen == -2)
1241 {
1242 /* cstring */
1243 for (i = 0; i < info[dim].nvalues; i++)
1244 {
1245 uint32 len;
1246
1247 memcpy(&len, ptr, sizeof(uint32));
1248 ptr += sizeof(uint32);
1249
1250 memcpy(dataptr, ptr, len);
1251 ptr += len;
1252
1253 /* just point into the array */
1254 map[dim][i] = PointerGetDatum(dataptr);
1255 dataptr += MAXALIGN(len);
1256 }
1257 }
1258
1259 /* no under/overflow of input array */
1260 Assert(ptr <= (start + info[dim].nbytes));
1261
1262 /* no overflow of the output mcv value */
1263 Assert(dataptr <= ((char *) mcvlist + mcvlen));
1264 }
1265
1266 /* check we consumed input data for this dimension exactly */
1267 Assert(ptr == (start + info[dim].nbytes));
1268 }
1269
1270 /* we should have also filled the MCV list exactly */
1271 Assert(dataptr == ((char *) mcvlist + mcvlen));
1272
1273 /* deserialize the MCV items and translate the indexes to Datums */
1274 for (i = 0; i < nitems; i++)
1275 {
1276 MCVItem *item = &mcvlist->items[i];
1277
1278 item->values = (Datum *) valuesptr;
1279 valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1280
1281 item->isnull = (bool *) isnullptr;
1282 isnullptr += MAXALIGN(sizeof(bool) * ndims);
1283
1284 memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1285 ptr += sizeof(bool) * ndims;
1286
1287 memcpy(&item->frequency, ptr, sizeof(double));
1288 ptr += sizeof(double);
1289
1290 memcpy(&item->base_frequency, ptr, sizeof(double));
1291 ptr += sizeof(double);
1292
1293 /* finally translate the indexes (for non-NULL only) */
1294 for (dim = 0; dim < ndims; dim++)
1295 {
1296 uint16 index;
1297
1298 memcpy(&index, ptr, sizeof(uint16));
1299 ptr += sizeof(uint16);
1300
1301 if (item->isnull[dim])
1302 continue;
1303
1304 item->values[dim] = map[dim][index];
1305 }
1306
1307 /* check we're not overflowing the input */
1308 Assert(ptr <= endptr);
1309 }
1310
1311 /* check that we processed all the data */
1312 Assert(ptr == endptr);
1313
1314 /* release the buffers used for mapping */
1315 for (dim = 0; dim < ndims; dim++)
1316 pfree(map[dim]);
1317
1318 pfree(map);
1319
1320 return mcvlist;
1321}
1322
1323/*
1324 * SRF with details about buckets of a histogram:
1325 *
1326 * - item ID (0...nitems)
1327 * - values (string array)
1328 * - nulls only (boolean array)
1329 * - frequency (double precision)
1330 * - base_frequency (double precision)
1331 *
1332 * The input is the OID of the statistics, and there are no rows returned if
1333 * the statistics contains no histogram.
1334 */
1335Datum
1337{
1339
1340 /* stuff done only on the first call of the function */
1341 if (SRF_IS_FIRSTCALL())
1342 {
1343 MemoryContext oldcontext;
1345 TupleDesc tupdesc;
1346
1347 /* create a function context for cross-call persistence */
1349
1350 /* switch to memory context appropriate for multiple function calls */
1351 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1352
1354
1355 funcctx->user_fctx = mcvlist;
1356
1357 /* total number of tuples to be returned */
1358 funcctx->max_calls = 0;
1359 if (funcctx->user_fctx != NULL)
1360 funcctx->max_calls = mcvlist->nitems;
1361
1362 /* Build a tuple descriptor for our result type */
1363 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1364 ereport(ERROR,
1366 errmsg("function returning record called in context "
1367 "that cannot accept type record")));
1368 tupdesc = BlessTupleDesc(tupdesc);
1369
1370 /*
1371 * generate attribute metadata needed later to produce tuples from raw
1372 * C strings
1373 */
1374 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1375
1376 MemoryContextSwitchTo(oldcontext);
1377 }
1378
1379 /* stuff done on every call of the function */
1381
1382 if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more
1383 * left to send */
1384 {
1385 Datum values[5];
1386 bool nulls[5];
1387 HeapTuple tuple;
1388 Datum result;
1391
1392 int i;
1394 MCVItem *item;
1395
1396 mcvlist = (MCVList *) funcctx->user_fctx;
1397
1398 Assert(funcctx->call_cntr < mcvlist->nitems);
1399
1400 item = &mcvlist->items[funcctx->call_cntr];
1401
1402 for (i = 0; i < mcvlist->ndimensions; i++)
1403 {
1404
1406 BoolGetDatum(item->isnull[i]),
1407 false,
1408 BOOLOID,
1410
1411 if (!item->isnull[i])
1412 {
1413 bool isvarlena;
1414 Oid outfunc;
1416 Datum val;
1417 text *txt;
1418
1419 /* lookup output func for the type */
1422
1423 val = FunctionCall1(&fmgrinfo, item->values[i]);
1425
1428 false,
1429 TEXTOID,
1431 }
1432 else
1434 (Datum) 0,
1435 true,
1436 TEXTOID,
1438 }
1439
1440 values[0] = Int32GetDatum(funcctx->call_cntr);
1443 values[3] = Float8GetDatum(item->frequency);
1445
1446 /* no NULLs in the tuple */
1447 memset(nulls, 0, sizeof(nulls));
1448
1449 /* build a tuple */
1450 tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1451
1452 /* make the tuple into a datum */
1453 result = HeapTupleGetDatum(tuple);
1454
1455 SRF_RETURN_NEXT(funcctx, result);
1456 }
1457 else /* do when there is no more left */
1458 {
1460 }
1461}
1462
1463/*
1464 * pg_mcv_list_in - input routine for type pg_mcv_list.
1465 *
1466 * pg_mcv_list is real enough to be a table column, but it has no operations
1467 * of its own, and disallows input too
1468 */
1469Datum
1471{
1472 /*
1473 * pg_mcv_list stores the data in binary form and parsing text input is
1474 * not needed, so disallow this.
1475 */
1476 ereport(ERROR,
1478 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1479
1480 PG_RETURN_VOID(); /* keep compiler quiet */
1481}
1482
1483
1484/*
1485 * pg_mcv_list_out - output routine for type pg_mcv_list.
1486 *
1487 * MCV lists are serialized into a bytea value, so we simply call byteaout()
1488 * to serialize the value into text. But it'd be nice to serialize that into
1489 * a meaningful representation (e.g. for inspection by people).
1490 *
1491 * XXX This should probably return something meaningful, similar to what
1492 * pg_dependencies_out does. Not sure how to deal with the deduplicated
1493 * values, though - do we want to expand that or not?
1494 */
1495Datum
1497{
1498 return byteaout(fcinfo);
1499}
1500
1501/*
1502 * pg_mcv_list_recv - binary input routine for type pg_mcv_list.
1503 */
1504Datum
1506{
1507 ereport(ERROR,
1509 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1510
1511 PG_RETURN_VOID(); /* keep compiler quiet */
1512}
1513
1514/*
1515 * pg_mcv_list_send - binary output routine for type pg_mcv_list.
1516 *
1517 * MCV lists are serialized in a bytea value (although the type is named
1518 * differently), so let's just send that.
1519 */
1520Datum
1522{
1523 return byteasend(fcinfo);
1524}
1525
1526/*
1527 * match the attribute/expression to a dimension of the statistic
1528 *
1529 * Returns the zero-based index of the matching statistics dimension.
1530 * Optionally determines the collation.
1531 */
1532static int
1534{
1535 int idx;
1536
1537 if (IsA(expr, Var))
1538 {
1539 /* simple Var, so just lookup using varattno */
1540 Var *var = (Var *) expr;
1541
1542 if (collid)
1543 *collid = var->varcollid;
1544
1545 idx = bms_member_index(keys, var->varattno);
1546
1547 if (idx < 0)
1548 elog(ERROR, "variable not found in statistics object");
1549 }
1550 else
1551 {
1552 /* expression - lookup in stats expressions */
1553 ListCell *lc;
1554
1555 if (collid)
1556 *collid = exprCollation(expr);
1557
1558 /* expressions are stored after the simple columns */
1559 idx = bms_num_members(keys);
1560 foreach(lc, exprs)
1561 {
1562 Node *stat_expr = (Node *) lfirst(lc);
1563
1564 if (equal(expr, stat_expr))
1565 break;
1566
1567 idx++;
1568 }
1569
1570 if (lc == NULL)
1571 elog(ERROR, "expression not found in statistics object");
1572 }
1573
1574 return idx;
1575}
1576
1577/*
1578 * mcv_get_match_bitmap
1579 * Evaluate clauses using the MCV list, and update the match bitmap.
1580 *
1581 * A match bitmap keeps match/mismatch status for each MCV item, and we
1582 * update it based on additional clauses. We also use it to skip items
1583 * that can't possibly match (e.g. item marked as "mismatch" can't change
1584 * to "match" when evaluating AND clause list).
1585 *
1586 * The function also returns a flag indicating whether there was an
1587 * equality condition for all attributes, the minimum frequency in the MCV
1588 * list, and a total MCV frequency (sum of frequencies for all items).
1589 *
1590 * XXX Currently the match bitmap uses a bool for each MCV item, which is
1591 * somewhat wasteful as we could do with just a single bit, thus reducing
1592 * the size to ~1/8. It would also allow us to combine bitmaps simply using
1593 * & and |, which should be faster than min/max. The bitmaps are fairly
1594 * small, though (thanks to the cap on the MCV list size).
1595 */
1596static bool *
1598 Bitmapset *keys, List *exprs,
1599 MCVList *mcvlist, bool is_or)
1600{
1601 ListCell *l;
1602 bool *matches;
1603
1604 /* The bitmap may be partially built. */
1605 Assert(clauses != NIL);
1606 Assert(mcvlist != NULL);
1607 Assert(mcvlist->nitems > 0);
1609
1610 matches = palloc_array(bool, mcvlist->nitems);
1611 memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
1612
1613 /*
1614 * Loop through the list of clauses, and for each of them evaluate all the
1615 * MCV items not yet eliminated by the preceding clauses.
1616 */
1617 foreach(l, clauses)
1618 {
1619 Node *clause = (Node *) lfirst(l);
1620
1621 /* if it's a RestrictInfo, then extract the clause */
1622 if (IsA(clause, RestrictInfo))
1623 clause = (Node *) ((RestrictInfo *) clause)->clause;
1624
1625 /*
1626 * Handle the various types of clauses - OpClause, NullTest and
1627 * AND/OR/NOT
1628 */
1629 if (is_opclause(clause))
1630 {
1631 OpExpr *expr = (OpExpr *) clause;
1633
1634 /* valid only after examine_opclause_args returns true */
1636 Const *cst;
1637 bool expronleft;
1638 int idx;
1639 Oid collid;
1640
1641 fmgr_info(get_opcode(expr->opno), &opproc);
1642
1643 /* extract the var/expr and const from the expression */
1645 elog(ERROR, "incompatible clause");
1646
1647 /* match the attribute/expression to a dimension of the statistic */
1648 idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1649
1650 /*
1651 * Walk through the MCV items and evaluate the current clause. We
1652 * can skip items that were already ruled out, and terminate if
1653 * there are no remaining MCV items that might possibly match.
1654 */
1655 for (int i = 0; i < mcvlist->nitems; i++)
1656 {
1657 bool match = true;
1658 MCVItem *item = &mcvlist->items[i];
1659
1660 Assert(idx >= 0);
1661
1662 /*
1663 * When the MCV item or the Const value is NULL we can treat
1664 * this as a mismatch. We must not call the operator because
1665 * of strictness.
1666 */
1667 if (item->isnull[idx] || cst->constisnull)
1668 {
1669 matches[i] = RESULT_MERGE(matches[i], is_or, false);
1670 continue;
1671 }
1672
1673 /*
1674 * Skip MCV items that can't change result in the bitmap. Once
1675 * the value gets false for AND-lists, or true for OR-lists,
1676 * we don't need to look at more clauses.
1677 */
1679 continue;
1680
1681 /*
1682 * First check whether the constant is below the lower
1683 * boundary (in that case we can skip the bucket, because
1684 * there's no overlap).
1685 *
1686 * We don't store collations used to build the statistics, but
1687 * we can use the collation for the attribute itself, as
1688 * stored in varcollid. We do reset the statistics after a
1689 * type change (including collation change), so this is OK.
1690 * For expressions, we use the collation extracted from the
1691 * expression itself.
1692 */
1693 if (expronleft)
1695 collid,
1696 item->values[idx],
1697 cst->constvalue));
1698 else
1700 collid,
1701 cst->constvalue,
1702 item->values[idx]));
1703
1704 /* update the match bitmap with the result */
1705 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1706 }
1707 }
1708 else if (IsA(clause, ScalarArrayOpExpr))
1709 {
1710 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1712
1713 /* valid only after examine_opclause_args returns true */
1715 Const *cst;
1716 bool expronleft;
1717 Oid collid;
1718 int idx;
1719
1720 /* array evaluation */
1722 int16 elmlen;
1723 bool elmbyval;
1724 char elmalign;
1725 int num_elems;
1726 Datum *elem_values;
1727 bool *elem_nulls;
1728
1729 fmgr_info(get_opcode(expr->opno), &opproc);
1730
1731 /* extract the var/expr and const from the expression */
1733 elog(ERROR, "incompatible clause");
1734
1735 /* We expect Var on left */
1736 if (!expronleft)
1737 elog(ERROR, "incompatible clause");
1738
1739 /*
1740 * Deconstruct the array constant, unless it's NULL (we'll cover
1741 * that case below)
1742 */
1743 if (!cst->constisnull)
1744 {
1745 arrayval = DatumGetArrayTypeP(cst->constvalue);
1747 &elmlen, &elmbyval, &elmalign);
1751 &elem_values, &elem_nulls, &num_elems);
1752 }
1753
1754 /* match the attribute/expression to a dimension of the statistic */
1755 idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1756
1757 /*
1758 * Walk through the MCV items and evaluate the current clause. We
1759 * can skip items that were already ruled out, and terminate if
1760 * there are no remaining MCV items that might possibly match.
1761 */
1762 for (int i = 0; i < mcvlist->nitems; i++)
1763 {
1764 int j;
1765 bool match = !expr->useOr;
1766 MCVItem *item = &mcvlist->items[i];
1767
1768 /*
1769 * When the MCV item or the Const value is NULL we can treat
1770 * this as a mismatch. We must not call the operator because
1771 * of strictness.
1772 */
1773 if (item->isnull[idx] || cst->constisnull)
1774 {
1775 matches[i] = RESULT_MERGE(matches[i], is_or, false);
1776 continue;
1777 }
1778
1779 /*
1780 * Skip MCV items that can't change result in the bitmap. Once
1781 * the value gets false for AND-lists, or true for OR-lists,
1782 * we don't need to look at more clauses.
1783 */
1785 continue;
1786
1787 for (j = 0; j < num_elems; j++)
1788 {
1789 Datum elem_value = elem_values[j];
1790 bool elem_isnull = elem_nulls[j];
1791 bool elem_match;
1792
1793 /* NULL values always evaluate as not matching. */
1794 if (elem_isnull)
1795 {
1796 match = RESULT_MERGE(match, expr->useOr, false);
1797 continue;
1798 }
1799
1800 /*
1801 * Stop evaluating the array elements once we reach a
1802 * matching value that can't change - ALL() is the same as
1803 * AND-list, ANY() is the same as OR-list.
1804 */
1805 if (RESULT_IS_FINAL(match, expr->useOr))
1806 break;
1807
1809 collid,
1810 item->values[idx],
1811 elem_value));
1812
1813 match = RESULT_MERGE(match, expr->useOr, elem_match);
1814 }
1815
1816 /* update the match bitmap with the result */
1817 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1818 }
1819 }
1820 else if (IsA(clause, NullTest))
1821 {
1822 NullTest *expr = (NullTest *) clause;
1823 Node *clause_expr = (Node *) (expr->arg);
1824
1825 /* match the attribute/expression to a dimension of the statistic */
1826 int idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
1827
1828 /*
1829 * Walk through the MCV items and evaluate the current clause. We
1830 * can skip items that were already ruled out, and terminate if
1831 * there are no remaining MCV items that might possibly match.
1832 */
1833 for (int i = 0; i < mcvlist->nitems; i++)
1834 {
1835 bool match = false; /* assume mismatch */
1836 MCVItem *item = &mcvlist->items[i];
1837
1838 /* if the clause mismatches the MCV item, update the bitmap */
1839 switch (expr->nulltesttype)
1840 {
1841 case IS_NULL:
1842 match = (item->isnull[idx]) ? true : match;
1843 break;
1844
1845 case IS_NOT_NULL:
1846 match = (!item->isnull[idx]) ? true : match;
1847 break;
1848 }
1849
1850 /* now, update the match bitmap, depending on OR/AND type */
1851 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1852 }
1853 }
1854 else if (is_orclause(clause) || is_andclause(clause))
1855 {
1856 /* AND/OR clause, with all subclauses being compatible */
1857
1858 int i;
1859 BoolExpr *bool_clause = ((BoolExpr *) clause);
1860 List *bool_clauses = bool_clause->args;
1861
1862 /* match/mismatch bitmap for each MCV item */
1863 bool *bool_matches = NULL;
1864
1867
1868 /* build the match bitmap for the OR-clauses */
1870 mcvlist, is_orclause(clause));
1871
1872 /*
1873 * Merge the bitmap produced by mcv_get_match_bitmap into the
1874 * current one. We need to consider if we're evaluating AND or OR
1875 * condition when merging the results.
1876 */
1877 for (i = 0; i < mcvlist->nitems; i++)
1879
1881 }
1882 else if (is_notclause(clause))
1883 {
1884 /* NOT clause, with all subclauses compatible */
1885
1886 int i;
1887 BoolExpr *not_clause = ((BoolExpr *) clause);
1888 List *not_args = not_clause->args;
1889
1890 /* match/mismatch bitmap for each MCV item */
1891 bool *not_matches = NULL;
1892
1893 Assert(not_args != NIL);
1895
1896 /* build the match bitmap for the NOT-clause */
1898 mcvlist, false);
1899
1900 /*
1901 * Merge the bitmap produced by mcv_get_match_bitmap into the
1902 * current one. We're handling a NOT clause, so invert the result
1903 * before merging it into the global bitmap.
1904 */
1905 for (i = 0; i < mcvlist->nitems; i++)
1907
1909 }
1910 else if (IsA(clause, Var))
1911 {
1912 /* Var (has to be a boolean Var, possibly from below NOT) */
1913
1914 Var *var = (Var *) (clause);
1915
1916 /* match the attribute to a dimension of the statistic */
1917 int idx = bms_member_index(keys, var->varattno);
1918
1919 Assert(var->vartype == BOOLOID);
1920
1921 /*
1922 * Walk through the MCV items and evaluate the current clause. We
1923 * can skip items that were already ruled out, and terminate if
1924 * there are no remaining MCV items that might possibly match.
1925 */
1926 for (int i = 0; i < mcvlist->nitems; i++)
1927 {
1928 MCVItem *item = &mcvlist->items[i];
1929 bool match = false;
1930
1931 /* if the item is NULL, it's a mismatch */
1932 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1933 match = true;
1934
1935 /* update the result bitmap */
1936 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1937 }
1938 }
1939 else
1940 {
1941 /* Otherwise, it must be a bare boolean-returning expression */
1942 int idx;
1943
1944 /* match the expression to a dimension of the statistic */
1945 idx = mcv_match_expression(clause, keys, exprs, NULL);
1946
1947 /*
1948 * Walk through the MCV items and evaluate the current clause. We
1949 * can skip items that were already ruled out, and terminate if
1950 * there are no remaining MCV items that might possibly match.
1951 */
1952 for (int i = 0; i < mcvlist->nitems; i++)
1953 {
1954 bool match;
1955 MCVItem *item = &mcvlist->items[i];
1956
1957 /* "match" just means it's bool TRUE */
1958 match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
1959
1960 /* now, update the match bitmap, depending on OR/AND type */
1961 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1962 }
1963 }
1964 }
1965
1966 return matches;
1967}
1968
1969
1970/*
1971 * mcv_combine_selectivities
1972 * Combine per-column and multi-column MCV selectivity estimates.
1973 *
1974 * simple_sel is a "simple" selectivity estimate (produced without using any
1975 * extended statistics, essentially assuming independence of columns/clauses).
1976 *
1977 * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
1978 * all matching MCV items. The difference (mcv_sel - mcv_basesel) is then
1979 * essentially interpreted as a correction to be added to simple_sel, as
1980 * described below.
1981 *
1982 * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
1983 * matching ones). This is used as an upper bound on the portion of the
1984 * selectivity estimates not covered by the MCV statistics.
1985 *
1986 * Note: While simple and base selectivities are defined in a quite similar
1987 * way, the values are computed differently and are not therefore equal. The
1988 * simple selectivity is computed as a product of per-clause estimates, while
1989 * the base selectivity is computed by adding up base frequencies of matching
1990 * items of the multi-column MCV list. So the values may differ for two main
1991 * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
1992 * the MCV items did not match the estimated clauses.
1993 *
1994 * As both (a) and (b) reduce the base selectivity value, it generally holds
1995 * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
1996 * values may be equal.
1997 *
1998 * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
1999 * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
2000 * correction for the part covered by the MCV list. Those two statements are
2001 * actually equivalent.
2002 */
2008{
2011
2012 /* estimated selectivity of values not covered by MCV matches */
2015
2016 /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2017 if (other_sel > 1.0 - mcv_totalsel)
2018 other_sel = 1.0 - mcv_totalsel;
2019
2020 /* overall selectivity is the sum of the MCV and non-MCV parts */
2021 sel = mcv_sel + other_sel;
2023
2024 return sel;
2025}
2026
2027
2028/*
2029 * mcv_clauselist_selectivity
2030 * Use MCV statistics to estimate the selectivity of an implicitly-ANDed
2031 * list of clauses.
2032 *
2033 * This determines which MCV items match every clause in the list and returns
2034 * the sum of the frequencies of those items.
2035 *
2036 * In addition, it returns the sum of the base frequencies of each of those
2037 * items (that is the sum of the selectivities that each item would have if
2038 * the columns were independent of one another), and the total selectivity of
2039 * all the MCV items (not just the matching ones). These are expected to be
2040 * used together with a "simple" selectivity estimate (one based only on
2041 * per-column statistics) to produce an overall selectivity estimate that
2042 * makes use of both per-column and multi-column statistics --- see
2043 * mcv_combine_selectivities().
2044 */
2047 List *clauses, int varRelid,
2048 JoinType jointype, SpecialJoinInfo *sjinfo,
2049 RelOptInfo *rel,
2051{
2052 int i;
2053 MCVList *mcv;
2054 Selectivity s = 0.0;
2055 RangeTblEntry *rte = root->simple_rte_array[rel->relid];
2056
2057 /* match/mismatch bitmap for each MCV item */
2058 bool *matches = NULL;
2059
2060 /* load the MCV list stored in the statistics object */
2061 mcv = statext_mcv_load(stat->statOid, rte->inh);
2062
2063 /* build a match bitmap for the clauses */
2064 matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2065 mcv, false);
2066
2067 /* sum frequencies for all the matching MCV items */
2068 *basesel = 0.0;
2069 *totalsel = 0.0;
2070 for (i = 0; i < mcv->nitems; i++)
2071 {
2072 *totalsel += mcv->items[i].frequency;
2073
2074 if (matches[i] != false)
2075 {
2076 *basesel += mcv->items[i].base_frequency;
2077 s += mcv->items[i].frequency;
2078 }
2079 }
2080
2081 return s;
2082}
2083
2084
2085/*
2086 * mcv_clause_selectivity_or
2087 * Use MCV statistics to estimate the selectivity of a clause that
2088 * appears in an ORed list of clauses.
2089 *
2090 * As with mcv_clauselist_selectivity() this determines which MCV items match
2091 * the clause and returns both the sum of the frequencies and the sum of the
2092 * base frequencies of those items, as well as the sum of the frequencies of
2093 * all MCV items (not just the matching ones) so that this information can be
2094 * used by mcv_combine_selectivities() to produce a selectivity estimate that
2095 * makes use of both per-column and multi-column statistics.
2096 *
2097 * Additionally, we return information to help compute the overall selectivity
2098 * of the ORed list of clauses assumed to contain this clause. This function
2099 * is intended to be called for each clause in the ORed list of clauses,
2100 * allowing the overall selectivity to be computed using the following
2101 * algorithm:
2102 *
2103 * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
2104 * of the first n clauses in the list. Then the combined selectivity taking
2105 * into account the next clause C[n+1] can be written as
2106 *
2107 * P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
2108 *
2109 * The final term above represents the overlap between the clauses examined so
2110 * far and the (n+1)'th clause. To estimate its selectivity, we track the
2111 * match bitmap for the ORed list of clauses examined so far and examine its
2112 * intersection with the match bitmap for the (n+1)'th clause.
2113 *
2114 * We then also return the sums of the MCV item frequencies and base
2115 * frequencies for the match bitmap intersection corresponding to the overlap
2116 * term above, so that they can be combined with a simple selectivity estimate
2117 * for that term.
2118 *
2119 * The parameter "or_matches" is an in/out parameter tracking the match bitmap
2120 * for the clauses examined so far. The caller is expected to set it to NULL
2121 * the first time it calls this function.
2122 */
2125 MCVList *mcv, Node *clause, bool **or_matches,
2128{
2129 Selectivity s = 0.0;
2130 bool *new_matches;
2131 int i;
2132
2133 /* build the OR-matches bitmap, if not built already */
2134 if (*or_matches == NULL)
2135 *or_matches = palloc0_array(bool, mcv->nitems);
2136
2137 /* build the match bitmap for the new clause */
2139 stat->exprs, mcv, false);
2140
2141 /*
2142 * Sum the frequencies for all the MCV items matching this clause and also
2143 * those matching the overlap between this clause and any of the preceding
2144 * clauses as described above.
2145 */
2146 *basesel = 0.0;
2147 *overlap_mcvsel = 0.0;
2148 *overlap_basesel = 0.0;
2149 *totalsel = 0.0;
2150 for (i = 0; i < mcv->nitems; i++)
2151 {
2152 *totalsel += mcv->items[i].frequency;
2153
2154 if (new_matches[i])
2155 {
2156 s += mcv->items[i].frequency;
2157 *basesel += mcv->items[i].base_frequency;
2158
2159 if ((*or_matches)[i])
2160 {
2161 *overlap_mcvsel += mcv->items[i].frequency;
2163 }
2164 }
2165
2166 /* update the OR-matches bitmap for the next clause */
2167 (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2168 }
2169
2171
2172 return s;
2173}
2174
2175/*
2176 * Free allocations of a MCVList.
2177 */
2178void
2180{
2181 for (int i = 0; i < mcvlist->nitems; i++)
2182 {
2183 MCVItem *item = &mcvlist->items[i];
2184
2185 pfree(item->values);
2186 pfree(item->isnull);
2187 }
2188 pfree(mcvlist);
2189}
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262
#define DatumGetArrayTypeP(X)
Definition array.h:261
#define ARR_ELEMTYPE(a)
Definition array.h:292
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
void deconstruct_array(const ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
int16 AttrNumber
Definition attnum.h:21
int bms_num_members(const Bitmapset *a)
Definition bitmapset.c:750
int bms_member_index(Bitmapset *a, int x)
Definition bitmapset.c:539
static Datum values[MAXATTR]
Definition bootstrap.c:155
Datum byteaout(PG_FUNCTION_ARGS)
Definition bytea.c:275
Datum byteasend(PG_FUNCTION_ARGS)
Definition bytea.c:377
#define MAXALIGN(LEN)
Definition c.h:826
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:223
#define VARHDRSZ
Definition c.h:711
#define Assert(condition)
Definition c.h:873
int16_t int16
Definition c.h:541
uint16_t uint16
Definition c.h:545
uint32_t uint32
Definition c.h:546
#define PG_UINT16_MAX
Definition c.h:601
size_t Size
Definition c.h:619
Oid collid
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
bool equal(const void *a, const void *b)
Definition equalfuncs.c:223
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
int compare_scalars_simple(const void *a, const void *b, void *arg)
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
int multi_sort_compare(const void *a, const void *b, void *arg)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
bool examine_opclause_args(List *args, Node **exprp, Const **cstp, bool *expronleftp)
MultiSortSupportData * MultiSortSupport
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition fmgr.c:1150
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition fmgr.c:128
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define PG_DETOAST_DATUM(datum)
Definition fmgr.h:240
#define FunctionCall1(flinfo, arg1)
Definition fmgr.h:702
#define PG_GETARG_BYTEA_P(n)
Definition fmgr.h:336
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define DatumGetByteaP(X)
Definition fmgr.h:332
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition funcapi.h:328
return str start
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1117
#define HeapTupleIsValid(tuple)
Definition htup.h:78
#define nitems(x)
Definition indent.h:31
long val
Definition informix.c:689
static struct @172 value
int b
Definition isn.c:74
int a
Definition isn.c:73
int j
Definition isn.c:78
int i
Definition isn.c:77
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition lsyscache.c:3057
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition lsyscache.c:2421
RegProcedure get_opcode(Oid opno)
Definition lsyscache.c:1435
void statext_mcv_free(MCVList *mcvlist)
Definition mcv.c:2179
Datum pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
Definition mcv.c:1336
Datum pg_mcv_list_in(PG_FUNCTION_ARGS)
Definition mcv.c:1470
#define ITEM_SIZE(ndims)
Definition mcv.c:51
MCVList * statext_mcv_deserialize(bytea *data)
Definition mcv.c:994
Selectivity mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
Definition mcv.c:2046
static MultiSortSupport build_mss(StatsBuildData *data)
Definition mcv.c:345
static int compare_sort_item_count(const void *a, const void *b, void *arg)
Definition mcv.c:401
MCVList * statext_mcv_load(Oid mvoid, bool inh)
Definition mcv.c:556
Datum pg_mcv_list_out(PG_FUNCTION_ARGS)
Definition mcv.c:1496
static int count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
Definition mcv.c:377
#define MinSizeOfMCVList
Definition mcv.c:57
Datum pg_mcv_list_send(PG_FUNCTION_ARGS)
Definition mcv.c:1521
#define SizeOfMCVList(ndims, nitems)
Definition mcv.c:66
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition mcv.c:1597
#define RESULT_MERGE(value, is_or, match)
Definition mcv.c:86
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition mcv.c:146
Selectivity mcv_combine_selectivities(Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)
Definition mcv.c:2004
Selectivity mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)
Definition mcv.c:2124
#define RESULT_IS_FINAL(value, is_or)
Definition mcv.c:98
static int mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
Definition mcv.c:1533
MCVList * statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
Definition mcv.c:178
Datum pg_mcv_list_recv(PG_FUNCTION_ARGS)
Definition mcv.c:1505
static int sort_item_compare(const void *a, const void *b, void *arg)
Definition mcv.c:463
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition mcv.c:488
bytea * statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
Definition mcv.c:619
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition mcv.c:422
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
void * palloc(Size size)
Definition mcxt.c:1387
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
Oid exprCollation(const Node *expr)
Definition nodeFuncs.c:821
static bool is_andclause(const void *clause)
Definition nodeFuncs.h:107
static bool is_orclause(const void *clause)
Definition nodeFuncs.h:116
static bool is_opclause(const void *clause)
Definition nodeFuncs.h:76
static bool is_notclause(const void *clause)
Definition nodeFuncs.h:125
#define IsA(nodeptr, _type_)
Definition nodes.h:164
double Selectivity
Definition nodes.h:260
JoinType
Definition nodes.h:298
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
void * arg
const void size_t len
const void * data
#define lfirst(lc)
Definition pg_list.h:172
static int list_length(const List *l)
Definition pg_list.h:152
#define NIL
Definition pg_list.h:68
#define list_make1(x1)
Definition pg_list.h:212
void * bsearch_arg(const void *key, const void *base0, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition bsearch_arg.c:55
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static bool DatumGetBool(Datum X)
Definition postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:262
static char * DatumGetCString(Datum X)
Definition postgres.h:365
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
static Datum Float8GetDatum(float8 X)
Definition postgres.h:512
static Datum Int32GetDatum(int32 X)
Definition postgres.h:222
#define InvalidOid
unsigned int Oid
static int fb(int x)
@ IS_NULL
Definition primnodes.h:1977
@ IS_NOT_NULL
Definition primnodes.h:1977
tree ctl root
Definition radixtree.h:1857
#define CLAMP_PROBABILITY(p)
Definition selfuncs.h:63
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
struct SortSupportData * SortSupport
Definition sortsupport.h:58
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
#define STATS_MCV_TYPE_BASIC
Definition statistics.h:67
#define STATS_MCV_MAGIC
Definition statistics.h:66
#define STATS_MCVLIST_MAX_ITEMS
Definition statistics.h:70
#define STATS_MAX_DIMENSIONS
Definition statistics.h:19
Definition pg_list.h:54
bool * isnull
Definition statistics.h:82
double frequency
Definition statistics.h:80
double base_frequency
Definition statistics.h:81
Datum * values
Definition statistics.h:83
uint32 nitems
Definition statistics.h:91
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition statistics.h:94
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
Definition nodes.h:135
NullTestType nulltesttype
Definition primnodes.h:1984
Expr * arg
Definition primnodes.h:1983
Oid opno
Definition primnodes.h:850
List * args
Definition primnodes.h:868
Index relid
Definition pathnodes.h:973
MemoryContext ssup_cxt
Definition sortsupport.h:66
Form_pg_type attrtype
Definition vacuum.h:128
Oid attrcollid
Definition vacuum.h:129
AttrNumber varattno
Definition primnodes.h:274
Definition type.h:96
Definition c.h:706
void ReleaseSysCache(HeapTuple tuple)
Definition syscache.c:264
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition syscache.c:595
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition syscache.c:230
static ItemArray items
static Datum fetch_att(const void *T, bool attbyval, int attlen)
Definition tupmacs.h:50
static void store_att_byval(void *T, Datum newdatum, int attlen)
Definition tupmacs.h:206
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition typcache.c:386
#define TYPECACHE_LT_OPR
Definition typcache.h:139
static Size VARSIZE_ANY(const void *PTR)
Definition varatt.h:460
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
text * cstring_to_text(const char *s)
Definition varlena.c:181
const char * type