PostgreSQL Source Code  git master
mcv.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * mcv.c
4  * POSTGRES multivariate MCV lists
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/statistics/mcv.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <math.h>
18 
19 #include "access/htup_details.h"
22 #include "fmgr.h"
23 #include "funcapi.h"
24 #include "nodes/nodeFuncs.h"
26 #include "statistics/statistics.h"
27 #include "utils/array.h"
28 #include "utils/builtins.h"
29 #include "utils/fmgrprotos.h"
30 #include "utils/lsyscache.h"
31 #include "utils/selfuncs.h"
32 #include "utils/syscache.h"
33 #include "utils/typcache.h"
34 
35 /*
36  * Computes size of a serialized MCV item, depending on the number of
37  * dimensions (columns) the statistic is defined on. The datum values are
38  * stored in a separate array (deduplicated, to minimize the size), and
39  * so the serialized items only store uint16 indexes into that array.
40  *
41  * Each serialized item stores (in this order):
42  *
43  * - indexes to values (ndim * sizeof(uint16))
44  * - null flags (ndim * sizeof(bool))
45  * - frequency (sizeof(double))
46  * - base_frequency (sizeof(double))
47  *
48  * There is no alignment padding within an MCV item.
49  * So in total each MCV item requires this many bytes:
50  *
51  * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
52  */
53 #define ITEM_SIZE(ndims) \
54  ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
55 
56 /*
57  * Used to compute size of serialized MCV list representation.
58  */
59 #define MinSizeOfMCVList \
60  (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
61 
62 /*
63  * Size of the serialized MCV list, excluding the space needed for
64  * deduplicated per-dimension values. The macro is meant to be used
65  * when it's not yet safe to access the serialized info about amount
66  * of data for each column.
67  */
68 #define SizeOfMCVList(ndims,nitems) \
69  ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
70  ((ndims) * sizeof(DimensionInfo)) + \
71  ((nitems) * ITEM_SIZE(ndims)))
72 
74 
75 static SortItem *build_distinct_groups(int numrows, SortItem *items,
76  MultiSortSupport mss, int *ndistinct);
77 
78 static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
79  MultiSortSupport mss, int *ncounts);
80 
81 static int count_distinct_groups(int numrows, SortItem *items,
82  MultiSortSupport mss);
83 
84 /*
85  * Compute new value for bitmap item, considering whether it's used for
86  * clauses connected by AND/OR.
87  */
88 #define RESULT_MERGE(value, is_or, match) \
89  ((is_or) ? ((value) || (match)) : ((value) && (match)))
90 
91 /*
92  * When processing a list of clauses, the bitmap item may get set to a value
93  * such that additional clauses can't change it. For example, when processing
94  * a list of clauses connected to AND, as soon as the item gets set to 'false'
95  * then it'll remain like that. Similarly clauses connected by OR and 'true'.
96  *
97  * Returns true when the value in the bitmap can't change no matter how the
98  * remaining clauses are evaluated.
99  */
100 #define RESULT_IS_FINAL(value, is_or) ((is_or) ? (value) : (!(value)))
101 
102 /*
103  * get_mincount_for_mcv_list
104  * Determine the minimum number of times a value needs to appear in
105  * the sample for it to be included in the MCV list.
106  *
107  * We want to keep only values that appear sufficiently often in the
108  * sample that it is reasonable to extrapolate their sample frequencies to
109  * the entire table. We do this by placing an upper bound on the relative
110  * standard error of the sample frequency, so that any estimates the
111  * planner generates from the MCV statistics can be expected to be
112  * reasonably accurate.
113  *
114  * Since we are sampling without replacement, the sample frequency of a
115  * particular value is described by a hypergeometric distribution. A
116  * common rule of thumb when estimating errors in this situation is to
117  * require at least 10 instances of the value in the sample, in which case
118  * the distribution can be approximated by a normal distribution, and
119  * standard error analysis techniques can be applied. Given a sample size
120  * of n, a population size of N, and a sample frequency of p=cnt/n, the
121  * standard error of the proportion p is given by
122  * SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
123  * where the second term is the finite population correction. To get
124  * reasonably accurate planner estimates, we impose an upper bound on the
125  * relative standard error of 20% -- i.e., SE/p < 0.2. This 20% relative
126  * error bound is fairly arbitrary, but has been found empirically to work
127  * well. Rearranging this formula gives a lower bound on the number of
128  * instances of the value seen:
129  * cnt > n*(N-n) / (N-n+0.04*n*(N-1))
130  * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
131  * case where n approaches 0 cannot happen in practice, since the sample
132  * size is at least 300. The case where n approaches N corresponds to
133  * sampling the whole table, in which case it is reasonable to keep
134  * the whole MCV list (have no lower bound), so it makes sense to apply
135  * this formula for all inputs, even though the above derivation is
136  * technically only valid when the right hand side is at least around 10.
137  *
138  * An alternative way to look at this formula is as follows -- assume that
139  * the number of instances of the value seen scales up to the entire
140  * table, so that the population count is K=N*cnt/n. Then the distribution
141  * in the sample is a hypergeometric distribution parameterised by N, n
142  * and K, and the bound above is mathematically equivalent to demanding
143  * that the standard deviation of that distribution is less than 20% of
144  * its mean. Thus the relative errors in any planner estimates produced
145  * from the MCV statistics are likely to be not too large.
146  */
147 static double
148 get_mincount_for_mcv_list(int samplerows, double totalrows)
149 {
150  double n = samplerows;
151  double N = totalrows;
152  double numer,
153  denom;
154 
155  numer = n * (N - n);
156  denom = N - n + 0.04 * n * (N - 1);
157 
158  /* Guard against division by zero (possible if n = N = 1) */
159  if (denom == 0.0)
160  return 0.0;
161 
162  return numer / denom;
163 }
164 
165 /*
166  * Builds MCV list from the set of sampled rows.
167  *
168  * The algorithm is quite simple:
169  *
170  * (1) sort the data (default collation, '<' for the data type)
171  *
172  * (2) count distinct groups, decide how many to keep
173  *
174  * (3) build the MCV list using the threshold determined in (2)
175  *
176  * (4) remove rows represented by the MCV from the sample
177  *
178  */
179 MCVList *
180 statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
181 {
182  int i,
183  numattrs,
184  numrows,
185  ngroups,
186  nitems;
187  double mincount;
188  SortItem *items;
189  SortItem *groups;
190  MCVList *mcvlist = NULL;
191  MultiSortSupport mss;
192 
193  /* comparator for all the columns */
194  mss = build_mss(data);
195 
196  /* sort the rows */
198  data->nattnums, data->attnums);
199 
200  if (!items)
201  return NULL;
202 
203  /* for convenience */
204  numattrs = data->nattnums;
205  numrows = data->numrows;
206 
207  /* transform the sorted rows into groups (sorted by frequency) */
208  groups = build_distinct_groups(nitems, items, mss, &ngroups);
209 
210  /*
211  * The maximum number of MCV items to store, based on the statistics
212  * target we computed for the statistics object (from the target set for
213  * the object itself, attributes and the system default). In any case, we
214  * can't keep more groups than we have available.
215  */
216  nitems = stattarget;
217  if (nitems > ngroups)
218  nitems = ngroups;
219 
220  /*
221  * Decide how many items to keep in the MCV list. We can't use the same
222  * algorithm as per-column MCV lists, because that only considers the
223  * actual group frequency - but we're primarily interested in how the
224  * actual frequency differs from the base frequency (product of simple
225  * per-column frequencies, as if the columns were independent).
226  *
227  * Using the same algorithm might exclude items that are close to the
228  * "average" frequency of the sample. But that does not say whether the
229  * observed frequency is close to the base frequency or not. We also need
230  * to consider unexpectedly uncommon items (again, compared to the base
231  * frequency), and the single-column algorithm does not have to.
232  *
233  * We simply decide how many items to keep by computing the minimum count
234  * using get_mincount_for_mcv_list() and then keep all items that seem to
235  * be more common than that.
236  */
237  mincount = get_mincount_for_mcv_list(numrows, totalrows);
238 
239  /*
240  * Walk the groups until we find the first group with a count below the
241  * mincount threshold (the index of that group is the number of groups we
242  * want to keep).
243  */
244  for (i = 0; i < nitems; i++)
245  {
246  if (groups[i].count < mincount)
247  {
248  nitems = i;
249  break;
250  }
251  }
252 
253  /*
254  * At this point, we know the number of items for the MCV list. There
255  * might be none (for uniform distribution with many groups), and in that
256  * case, there will be no MCV list. Otherwise, construct the MCV list.
257  */
258  if (nitems > 0)
259  {
260  int j;
261  SortItem key;
262  MultiSortSupport tmp;
263 
264  /* frequencies for values in each attribute */
265  SortItem **freqs;
266  int *nfreqs;
267 
268  /* used to search values */
269  tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
270  + sizeof(SortSupportData));
271 
272  /* compute frequencies for values in each column */
273  nfreqs = (int *) palloc0(sizeof(int) * numattrs);
274  freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
275 
276  /*
277  * Allocate the MCV list structure, set the global parameters.
278  */
279  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
280  sizeof(MCVItem) * nitems);
281 
282  mcvlist->magic = STATS_MCV_MAGIC;
283  mcvlist->type = STATS_MCV_TYPE_BASIC;
284  mcvlist->ndimensions = numattrs;
285  mcvlist->nitems = nitems;
286 
287  /* store info about data type OIDs */
288  for (i = 0; i < numattrs; i++)
289  mcvlist->types[i] = data->stats[i]->attrtypid;
290 
291  /* Copy the first chunk of groups into the result. */
292  for (i = 0; i < nitems; i++)
293  {
294  /* just point to the proper place in the list */
295  MCVItem *item = &mcvlist->items[i];
296 
297  item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
298  item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
299 
300  /* copy values for the group */
301  memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
302  memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
303 
304  /* groups should be sorted by frequency in descending order */
305  Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
306 
307  /* group frequency */
308  item->frequency = (double) groups[i].count / numrows;
309 
310  /* base frequency, if the attributes were independent */
311  item->base_frequency = 1.0;
312  for (j = 0; j < numattrs; j++)
313  {
314  SortItem *freq;
315 
316  /* single dimension */
317  tmp->ndims = 1;
318  tmp->ssup[0] = mss->ssup[j];
319 
320  /* fill search key */
321  key.values = &groups[i].values[j];
322  key.isnull = &groups[i].isnull[j];
323 
324  freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
325  sizeof(SortItem),
326  multi_sort_compare, tmp);
327 
328  item->base_frequency *= ((double) freq->count) / numrows;
329  }
330  }
331 
332  pfree(nfreqs);
333  pfree(freqs);
334  }
335 
336  pfree(items);
337  pfree(groups);
338 
339  return mcvlist;
340 }
341 
342 /*
343  * build_mss
344  * Build a MultiSortSupport for the given StatsBuildData.
345  */
346 static MultiSortSupport
348 {
349  int i;
350  int numattrs = data->nattnums;
351 
352  /* Sort by multiple columns (using array of SortSupport) */
353  MultiSortSupport mss = multi_sort_init(numattrs);
354 
355  /* prepare the sort functions for all the attributes */
356  for (i = 0; i < numattrs; i++)
357  {
358  VacAttrStats *colstat = data->stats[i];
360 
362  if (type->lt_opr == InvalidOid) /* shouldn't happen */
363  elog(ERROR, "cache lookup failed for ordering operator for type %u",
364  colstat->attrtypid);
365 
366  multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
367  }
368 
369  return mss;
370 }
371 
372 /*
373  * count_distinct_groups
374  * Count distinct combinations of SortItems in the array.
375  *
376  * The array is assumed to be sorted according to the MultiSortSupport.
377  */
378 static int
380 {
381  int i;
382  int ndistinct;
383 
384  ndistinct = 1;
385  for (i = 1; i < numrows; i++)
386  {
387  /* make sure the array really is sorted */
388  Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
389 
390  if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
391  ndistinct += 1;
392  }
393 
394  return ndistinct;
395 }
396 
397 /*
398  * compare_sort_item_count
399  * Comparator for sorting items by count (frequencies) in descending
400  * order.
401  */
402 static int
403 compare_sort_item_count(const void *a, const void *b, void *arg)
404 {
405  SortItem *ia = (SortItem *) a;
406  SortItem *ib = (SortItem *) b;
407 
408  if (ia->count == ib->count)
409  return 0;
410  else if (ia->count > ib->count)
411  return -1;
412 
413  return 1;
414 }
415 
416 /*
417  * build_distinct_groups
418  * Build an array of SortItems for distinct groups and counts matching
419  * items.
420  *
421  * The 'items' array is assumed to be sorted.
422  */
423 static SortItem *
425  int *ndistinct)
426 {
427  int i,
428  j;
429  int ngroups = count_distinct_groups(numrows, items, mss);
430 
431  SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
432 
433  j = 0;
434  groups[0] = items[0];
435  groups[0].count = 1;
436 
437  for (i = 1; i < numrows; i++)
438  {
439  /* Assume sorted in ascending order. */
440  Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
441 
442  /* New distinct group detected. */
443  if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
444  {
445  groups[++j] = items[i];
446  groups[j].count = 0;
447  }
448 
449  groups[j].count++;
450  }
451 
452  /* ensure we filled the expected number of distinct groups */
453  Assert(j + 1 == ngroups);
454 
455  /* Sort the distinct groups by frequency (in descending order). */
456  qsort_interruptible(groups, ngroups, sizeof(SortItem),
458 
459  *ndistinct = ngroups;
460  return groups;
461 }
462 
463 /* compare sort items (single dimension) */
464 static int
465 sort_item_compare(const void *a, const void *b, void *arg)
466 {
467  SortSupport ssup = (SortSupport) arg;
468  SortItem *ia = (SortItem *) a;
469  SortItem *ib = (SortItem *) b;
470 
471  return ApplySortComparator(ia->values[0], ia->isnull[0],
472  ib->values[0], ib->isnull[0],
473  ssup);
474 }
475 
476 /*
477  * build_column_frequencies
478  * Compute frequencies of values in each column.
479  *
480  * This returns an array of SortItems for each attribute the MCV is built
481  * on, with a frequency (number of occurrences) for each value. This is
482  * then used to compute "base" frequency of MCV items.
483  *
484  * All the memory is allocated in a single chunk, so that a single pfree
485  * is enough to release it. We do not allocate space for values/isnull
486  * arrays in the SortItems, because we can simply point into the input
487  * groups directly.
488  */
489 static SortItem **
490 build_column_frequencies(SortItem *groups, int ngroups,
491  MultiSortSupport mss, int *ncounts)
492 {
493  int i,
494  dim;
495  SortItem **result;
496  char *ptr;
497 
498  Assert(groups);
499  Assert(ncounts);
500 
501  /* allocate arrays for all columns as a single chunk */
502  ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
503  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
504 
505  /* initial array of pointers */
506  result = (SortItem **) ptr;
507  ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
508 
509  for (dim = 0; dim < mss->ndims; dim++)
510  {
511  SortSupport ssup = &mss->ssup[dim];
512 
513  /* array of values for a single column */
514  result[dim] = (SortItem *) ptr;
515  ptr += MAXALIGN(sizeof(SortItem) * ngroups);
516 
517  /* extract data for the dimension */
518  for (i = 0; i < ngroups; i++)
519  {
520  /* point into the input groups */
521  result[dim][i].values = &groups[i].values[dim];
522  result[dim][i].isnull = &groups[i].isnull[dim];
523  result[dim][i].count = groups[i].count;
524  }
525 
526  /* sort the values, deduplicate */
527  qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
528  sort_item_compare, ssup);
529 
530  /*
531  * Identify distinct values, compute frequency (there might be
532  * multiple MCV items containing this value, so we need to sum counts
533  * from all of them.
534  */
535  ncounts[dim] = 1;
536  for (i = 1; i < ngroups; i++)
537  {
538  if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
539  {
540  result[dim][ncounts[dim] - 1].count += result[dim][i].count;
541  continue;
542  }
543 
544  result[dim][ncounts[dim]] = result[dim][i];
545 
546  ncounts[dim]++;
547  }
548  }
549 
550  return result;
551 }
552 
553 /*
554  * statext_mcv_load
555  * Load the MCV list for the indicated pg_statistic_ext_data tuple.
556  */
557 MCVList *
558 statext_mcv_load(Oid mvoid, bool inh)
559 {
560  MCVList *result;
561  bool isnull;
562  Datum mcvlist;
563  HeapTuple htup = SearchSysCache2(STATEXTDATASTXOID,
564  ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
565 
566  if (!HeapTupleIsValid(htup))
567  elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
568 
569  mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
570  Anum_pg_statistic_ext_data_stxdmcv, &isnull);
571 
572  if (isnull)
573  elog(ERROR,
574  "requested statistics kind \"%c\" is not yet built for statistics object %u",
575  STATS_EXT_MCV, mvoid);
576 
577  result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
578 
579  ReleaseSysCache(htup);
580 
581  return result;
582 }
583 
584 
585 /*
586  * statext_mcv_serialize
587  * Serialize MCV list into a pg_mcv_list value.
588  *
589  * The MCV items may include values of various data types, and it's reasonable
590  * to expect redundancy (values for a given attribute, repeated for multiple
591  * MCV list items). So we deduplicate the values into arrays, and then replace
592  * the values by indexes into those arrays.
593  *
594  * The overall structure of the serialized representation looks like this:
595  *
596  * +---------------+----------------+---------------------+-------+
597  * | header fields | dimension info | deduplicated values | items |
598  * +---------------+----------------+---------------------+-------+
599  *
600  * Where dimension info stores information about the type of the K-th
601  * attribute (e.g. typlen, typbyval and length of deduplicated values).
602  * Deduplicated values store deduplicated values for each attribute. And
603  * items store the actual MCV list items, with values replaced by indexes into
604  * the arrays.
605  *
606  * When serializing the items, we use uint16 indexes. The number of MCV items
607  * is limited by the statistics target (which is capped to 10k at the moment).
608  * We might increase this to 65k and still fit into uint16, so there's a bit of
609  * slack. Furthermore, this limit is on the number of distinct values per column,
610  * and we usually have few of those (and various combinations of them for the
611  * those MCV list). So uint16 seems fine for now.
612  *
613  * We don't really expect the serialization to save as much space as for
614  * histograms, as we are not doing any bucket splits (which is the source
615  * of high redundancy in histograms).
616  *
617  * TODO: Consider packing boolean flags (NULL) for each item into a single char
618  * (or a longer type) instead of using an array of bool items.
619  */
620 bytea *
622 {
623  int i;
624  int dim;
625  int ndims = mcvlist->ndimensions;
626 
627  SortSupport ssup;
628  DimensionInfo *info;
629 
630  Size total_length;
631 
632  /* serialized items (indexes into arrays, etc.) */
633  bytea *raw;
634  char *ptr;
635  char *endptr PG_USED_FOR_ASSERTS_ONLY;
636 
637  /* values per dimension (and number of non-NULL values) */
638  Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
639  int *counts = (int *) palloc0(sizeof(int) * ndims);
640 
641  /*
642  * We'll include some rudimentary information about the attribute types
643  * (length, by-val flag), so that we don't have to look them up while
644  * deserializing the MCV list (we already have the type OID in the
645  * header). This is safe because when changing the type of the attribute
646  * the statistics gets dropped automatically. We need to store the info
647  * about the arrays of deduplicated values anyway.
648  */
649  info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
650 
651  /* sort support data for all attributes included in the MCV list */
652  ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
653 
654  /* collect and deduplicate values for each dimension (attribute) */
655  for (dim = 0; dim < ndims; dim++)
656  {
657  int ndistinct;
658  TypeCacheEntry *typentry;
659 
660  /*
661  * Lookup the LT operator (can't get it from stats extra_data, as we
662  * don't know how to interpret that - scalar vs. array etc.).
663  */
664  typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
665 
666  /* copy important info about the data type (length, by-value) */
667  info[dim].typlen = stats[dim]->attrtype->typlen;
668  info[dim].typbyval = stats[dim]->attrtype->typbyval;
669 
670  /* allocate space for values in the attribute and collect them */
671  values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
672 
673  for (i = 0; i < mcvlist->nitems; i++)
674  {
675  /* skip NULL values - we don't need to deduplicate those */
676  if (mcvlist->items[i].isnull[dim])
677  continue;
678 
679  /* append the value at the end */
680  values[dim][counts[dim]] = mcvlist->items[i].values[dim];
681  counts[dim] += 1;
682  }
683 
684  /* if there are just NULL values in this dimension, we're done */
685  if (counts[dim] == 0)
686  continue;
687 
688  /* sort and deduplicate the data */
689  ssup[dim].ssup_cxt = CurrentMemoryContext;
690  ssup[dim].ssup_collation = stats[dim]->attrcollid;
691  ssup[dim].ssup_nulls_first = false;
692 
693  PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
694 
695  qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
696  compare_scalars_simple, &ssup[dim]);
697 
698  /*
699  * Walk through the array and eliminate duplicate values, but keep the
700  * ordering (so that we can do a binary search later). We know there's
701  * at least one item as (counts[dim] != 0), so we can skip the first
702  * element.
703  */
704  ndistinct = 1; /* number of distinct values */
705  for (i = 1; i < counts[dim]; i++)
706  {
707  /* expect sorted array */
708  Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
709 
710  /* if the value is the same as the previous one, we can skip it */
711  if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
712  continue;
713 
714  values[dim][ndistinct] = values[dim][i];
715  ndistinct += 1;
716  }
717 
718  /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
719  Assert(ndistinct <= PG_UINT16_MAX);
720 
721  /*
722  * Store additional info about the attribute - number of deduplicated
723  * values, and also size of the serialized data. For fixed-length data
724  * types this is trivial to compute, for varwidth types we need to
725  * actually walk the array and sum the sizes.
726  */
727  info[dim].nvalues = ndistinct;
728 
729  if (info[dim].typbyval) /* by-value data types */
730  {
731  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
732 
733  /*
734  * We copy the data into the MCV item during deserialization, so
735  * we don't need to allocate any extra space.
736  */
737  info[dim].nbytes_aligned = 0;
738  }
739  else if (info[dim].typlen > 0) /* fixed-length by-ref */
740  {
741  /*
742  * We don't care about alignment in the serialized data, so we
743  * pack the data as much as possible. But we also track how much
744  * data will be needed after deserialization, and in that case we
745  * need to account for alignment of each item.
746  *
747  * Note: As the items are fixed-length, we could easily compute
748  * this during deserialization, but we do it here anyway.
749  */
750  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
751  info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
752  }
753  else if (info[dim].typlen == -1) /* varlena */
754  {
755  info[dim].nbytes = 0;
756  info[dim].nbytes_aligned = 0;
757  for (i = 0; i < info[dim].nvalues; i++)
758  {
759  Size len;
760 
761  /*
762  * For varlena values, we detoast the values and store the
763  * length and data separately. We don't bother with alignment
764  * here, which means that during deserialization we need to
765  * copy the fields and only access the copies.
766  */
768 
769  /* serialized length (uint32 length + data) */
770  len = VARSIZE_ANY_EXHDR(values[dim][i]);
771  info[dim].nbytes += sizeof(uint32); /* length */
772  info[dim].nbytes += len; /* value (no header) */
773 
774  /*
775  * During deserialization we'll build regular varlena values
776  * with full headers, and we need to align them properly.
777  */
778  info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
779  }
780  }
781  else if (info[dim].typlen == -2) /* cstring */
782  {
783  info[dim].nbytes = 0;
784  info[dim].nbytes_aligned = 0;
785  for (i = 0; i < info[dim].nvalues; i++)
786  {
787  Size len;
788 
789  /*
790  * cstring is handled similar to varlena - first we store the
791  * length as uint32 and then the data. We don't care about
792  * alignment, which means that during deserialization we need
793  * to copy the fields and only access the copies.
794  */
795 
796  /* c-strings include terminator, so +1 byte */
797  len = strlen(DatumGetCString(values[dim][i])) + 1;
798  info[dim].nbytes += sizeof(uint32); /* length */
799  info[dim].nbytes += len; /* value */
800 
801  /* space needed for properly aligned deserialized copies */
802  info[dim].nbytes_aligned += MAXALIGN(len);
803  }
804  }
805 
806  /* we know (count>0) so there must be some data */
807  Assert(info[dim].nbytes > 0);
808  }
809 
810  /*
811  * Now we can finally compute how much space we'll actually need for the
812  * whole serialized MCV list (varlena header, MCV header, dimension info
813  * for each attribute, deduplicated values and items).
814  */
815  total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
816  + sizeof(AttrNumber) /* ndimensions */
817  + (ndims * sizeof(Oid)); /* attribute types */
818 
819  /* dimension info */
820  total_length += ndims * sizeof(DimensionInfo);
821 
822  /* add space for the arrays of deduplicated values */
823  for (i = 0; i < ndims; i++)
824  total_length += info[i].nbytes;
825 
826  /*
827  * And finally account for the items (those are fixed-length, thanks to
828  * replacing values with uint16 indexes into the deduplicated arrays).
829  */
830  total_length += mcvlist->nitems * ITEM_SIZE(dim);
831 
832  /*
833  * Allocate space for the whole serialized MCV list (we'll skip bytes, so
834  * we set them to zero to make the result more compressible).
835  */
836  raw = (bytea *) palloc0(VARHDRSZ + total_length);
837  SET_VARSIZE(raw, VARHDRSZ + total_length);
838 
839  ptr = VARDATA(raw);
840  endptr = ptr + total_length;
841 
842  /* copy the MCV list header fields, one by one */
843  memcpy(ptr, &mcvlist->magic, sizeof(uint32));
844  ptr += sizeof(uint32);
845 
846  memcpy(ptr, &mcvlist->type, sizeof(uint32));
847  ptr += sizeof(uint32);
848 
849  memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
850  ptr += sizeof(uint32);
851 
852  memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
853  ptr += sizeof(AttrNumber);
854 
855  memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
856  ptr += (sizeof(Oid) * ndims);
857 
858  /* store information about the attributes (data amounts, ...) */
859  memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
860  ptr += sizeof(DimensionInfo) * ndims;
861 
862  /* Copy the deduplicated values for all attributes to the output. */
863  for (dim = 0; dim < ndims; dim++)
864  {
865  /* remember the starting point for Asserts later */
866  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
867 
868  for (i = 0; i < info[dim].nvalues; i++)
869  {
870  Datum value = values[dim][i];
871 
872  if (info[dim].typbyval) /* passed by value */
873  {
874  Datum tmp;
875 
876  /*
877  * For byval types, we need to copy just the significant bytes
878  * - we can't use memcpy directly, as that assumes
879  * little-endian behavior. store_att_byval does almost what
880  * we need, but it requires a properly aligned buffer - the
881  * output buffer does not guarantee that. So we simply use a
882  * local Datum variable (which guarantees proper alignment),
883  * and then copy the value from it.
884  */
885  store_att_byval(&tmp, value, info[dim].typlen);
886 
887  memcpy(ptr, &tmp, info[dim].typlen);
888  ptr += info[dim].typlen;
889  }
890  else if (info[dim].typlen > 0) /* passed by reference */
891  {
892  /* no special alignment needed, treated as char array */
893  memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
894  ptr += info[dim].typlen;
895  }
896  else if (info[dim].typlen == -1) /* varlena */
897  {
899 
900  /* copy the length */
901  memcpy(ptr, &len, sizeof(uint32));
902  ptr += sizeof(uint32);
903 
904  /* data from the varlena value (without the header) */
905  memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
906  ptr += len;
907  }
908  else if (info[dim].typlen == -2) /* cstring */
909  {
910  uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
911 
912  /* copy the length */
913  memcpy(ptr, &len, sizeof(uint32));
914  ptr += sizeof(uint32);
915 
916  /* value */
917  memcpy(ptr, DatumGetCString(value), len);
918  ptr += len;
919  }
920 
921  /* no underflows or overflows */
922  Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
923  }
924 
925  /* we should get exactly nbytes of data for this dimension */
926  Assert((ptr - start) == info[dim].nbytes);
927  }
928 
929  /* Serialize the items, with uint16 indexes instead of the values. */
930  for (i = 0; i < mcvlist->nitems; i++)
931  {
932  MCVItem *mcvitem = &mcvlist->items[i];
933 
934  /* don't write beyond the allocated space */
935  Assert(ptr <= (endptr - ITEM_SIZE(dim)));
936 
937  /* copy NULL and frequency flags into the serialized MCV */
938  memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
939  ptr += sizeof(bool) * ndims;
940 
941  memcpy(ptr, &mcvitem->frequency, sizeof(double));
942  ptr += sizeof(double);
943 
944  memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
945  ptr += sizeof(double);
946 
947  /* store the indexes last */
948  for (dim = 0; dim < ndims; dim++)
949  {
950  uint16 index = 0;
951  Datum *value;
952 
953  /* do the lookup only for non-NULL values */
954  if (!mcvitem->isnull[dim])
955  {
956  value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
957  info[dim].nvalues, sizeof(Datum),
958  compare_scalars_simple, &ssup[dim]);
959 
960  Assert(value != NULL); /* serialization or deduplication
961  * error */
962 
963  /* compute index within the deduplicated array */
964  index = (uint16) (value - values[dim]);
965 
966  /* check the index is within expected bounds */
967  Assert(index < info[dim].nvalues);
968  }
969 
970  /* copy the index into the serialized MCV */
971  memcpy(ptr, &index, sizeof(uint16));
972  ptr += sizeof(uint16);
973  }
974 
975  /* make sure we don't overflow the allocated value */
976  Assert(ptr <= endptr);
977  }
978 
979  /* at this point we expect to match the total_length exactly */
980  Assert(ptr == endptr);
981 
982  pfree(values);
983  pfree(counts);
984 
985  return raw;
986 }
987 
988 /*
989  * statext_mcv_deserialize
990  * Reads serialized MCV list into MCVList structure.
991  *
992  * All the memory needed by the MCV list is allocated as a single chunk, so
993  * it's possible to simply pfree() it at once.
994  */
995 MCVList *
997 {
998  int dim,
999  i;
1000  Size expected_size;
1001  MCVList *mcvlist;
1002  char *raw;
1003  char *ptr;
1004  char *endptr PG_USED_FOR_ASSERTS_ONLY;
1005 
1006  int ndims,
1007  nitems;
1008  DimensionInfo *info = NULL;
1009 
1010  /* local allocation buffer (used only for deserialization) */
1011  Datum **map = NULL;
1012 
1013  /* MCV list */
1014  Size mcvlen;
1015 
1016  /* buffer used for the result */
1017  Size datalen;
1018  char *dataptr;
1019  char *valuesptr;
1020  char *isnullptr;
1021 
1022  if (data == NULL)
1023  return NULL;
1024 
1025  /*
1026  * We can't possibly deserialize a MCV list if there's not even a complete
1027  * header. We need an explicit formula here, because we serialize the
1028  * header fields one by one, so we need to ignore struct alignment.
1029  */
1031  elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
1033 
1034  /* read the MCV list header */
1035  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1036 
1037  /* pointer to the data part (skip the varlena header) */
1038  raw = (char *) data;
1039  ptr = VARDATA_ANY(raw);
1040  endptr = (char *) raw + VARSIZE_ANY(data);
1041 
1042  /* get the header and perform further sanity checks */
1043  memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1044  ptr += sizeof(uint32);
1045 
1046  memcpy(&mcvlist->type, ptr, sizeof(uint32));
1047  ptr += sizeof(uint32);
1048 
1049  memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1050  ptr += sizeof(uint32);
1051 
1052  memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1053  ptr += sizeof(AttrNumber);
1054 
1055  if (mcvlist->magic != STATS_MCV_MAGIC)
1056  elog(ERROR, "invalid MCV magic %u (expected %u)",
1057  mcvlist->magic, STATS_MCV_MAGIC);
1058 
1059  if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1060  elog(ERROR, "invalid MCV type %u (expected %u)",
1061  mcvlist->type, STATS_MCV_TYPE_BASIC);
1062 
1063  if (mcvlist->ndimensions == 0)
1064  elog(ERROR, "invalid zero-length dimension array in MCVList");
1065  else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1066  (mcvlist->ndimensions < 0))
1067  elog(ERROR, "invalid length (%d) dimension array in MCVList",
1068  mcvlist->ndimensions);
1069 
1070  if (mcvlist->nitems == 0)
1071  elog(ERROR, "invalid zero-length item array in MCVList");
1072  else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1073  elog(ERROR, "invalid length (%u) item array in MCVList",
1074  mcvlist->nitems);
1075 
1076  nitems = mcvlist->nitems;
1077  ndims = mcvlist->ndimensions;
1078 
1079  /*
1080  * Check amount of data including DimensionInfo for all dimensions and
1081  * also the serialized items (including uint16 indexes). Also, walk
1082  * through the dimension information and add it to the sum.
1083  */
1084  expected_size = SizeOfMCVList(ndims, nitems);
1085 
1086  /*
1087  * Check that we have at least the dimension and info records, along with
1088  * the items. We don't know the size of the serialized values yet. We need
1089  * to do this check first, before accessing the dimension info.
1090  */
1091  if (VARSIZE_ANY(data) < expected_size)
1092  elog(ERROR, "invalid MCV size %zu (expected %zu)",
1093  VARSIZE_ANY(data), expected_size);
1094 
1095  /* Now copy the array of type Oids. */
1096  memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1097  ptr += (sizeof(Oid) * ndims);
1098 
1099  /* Now it's safe to access the dimension info. */
1100  info = palloc(ndims * sizeof(DimensionInfo));
1101 
1102  memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1103  ptr += (ndims * sizeof(DimensionInfo));
1104 
1105  /* account for the value arrays */
1106  for (dim = 0; dim < ndims; dim++)
1107  {
1108  /*
1109  * XXX I wonder if we can/should rely on asserts here. Maybe those
1110  * checks should be done every time?
1111  */
1112  Assert(info[dim].nvalues >= 0);
1113  Assert(info[dim].nbytes >= 0);
1114 
1115  expected_size += info[dim].nbytes;
1116  }
1117 
1118  /*
1119  * Now we know the total expected MCV size, including all the pieces
1120  * (header, dimension info. items and deduplicated data). So do the final
1121  * check on size.
1122  */
1123  if (VARSIZE_ANY(data) != expected_size)
1124  elog(ERROR, "invalid MCV size %zu (expected %zu)",
1125  VARSIZE_ANY(data), expected_size);
1126 
1127  /*
1128  * We need an array of Datum values for each dimension, so that we can
1129  * easily translate the uint16 indexes later. We also need a top-level
1130  * array of pointers to those per-dimension arrays.
1131  *
1132  * While allocating the arrays for dimensions, compute how much space we
1133  * need for a copy of the by-ref data, as we can't simply point to the
1134  * original values (it might go away).
1135  */
1136  datalen = 0; /* space for by-ref data */
1137  map = (Datum **) palloc(ndims * sizeof(Datum *));
1138 
1139  for (dim = 0; dim < ndims; dim++)
1140  {
1141  map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1142 
1143  /* space needed for a copy of data for by-ref types */
1144  datalen += info[dim].nbytes_aligned;
1145  }
1146 
1147  /*
1148  * Now resize the MCV list so that the allocation includes all the data.
1149  *
1150  * Allocate space for a copy of the data, as we can't simply reference the
1151  * serialized data - it's not aligned properly, and it may disappear while
1152  * we're still using the MCV list, e.g. due to catcache release.
1153  *
1154  * We do care about alignment here, because we will allocate all the
1155  * pieces at once, but then use pointers to different parts.
1156  */
1157  mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1158 
1159  /* arrays of values and isnull flags for all MCV items */
1160  mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1161  mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1162 
1163  /* we don't quite need to align this, but it makes some asserts easier */
1164  mcvlen += MAXALIGN(datalen);
1165 
1166  /* now resize the deserialized MCV list, and compute pointers to parts */
1167  mcvlist = repalloc(mcvlist, mcvlen);
1168 
1169  /* pointer to the beginning of values/isnull arrays */
1170  valuesptr = (char *) mcvlist
1171  + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1172 
1173  isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1174 
1175  dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1176 
1177  /*
1178  * Build mapping (index => value) for translating the serialized data into
1179  * the in-memory representation.
1180  */
1181  for (dim = 0; dim < ndims; dim++)
1182  {
1183  /* remember start position in the input array */
1184  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1185 
1186  if (info[dim].typbyval)
1187  {
1188  /* for by-val types we simply copy data into the mapping */
1189  for (i = 0; i < info[dim].nvalues; i++)
1190  {
1191  Datum v = 0;
1192 
1193  memcpy(&v, ptr, info[dim].typlen);
1194  ptr += info[dim].typlen;
1195 
1196  map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1197 
1198  /* no under/overflow of input array */
1199  Assert(ptr <= (start + info[dim].nbytes));
1200  }
1201  }
1202  else
1203  {
1204  /* for by-ref types we need to also make a copy of the data */
1205 
1206  /* passed by reference, but fixed length (name, tid, ...) */
1207  if (info[dim].typlen > 0)
1208  {
1209  for (i = 0; i < info[dim].nvalues; i++)
1210  {
1211  memcpy(dataptr, ptr, info[dim].typlen);
1212  ptr += info[dim].typlen;
1213 
1214  /* just point into the array */
1215  map[dim][i] = PointerGetDatum(dataptr);
1216  dataptr += MAXALIGN(info[dim].typlen);
1217  }
1218  }
1219  else if (info[dim].typlen == -1)
1220  {
1221  /* varlena */
1222  for (i = 0; i < info[dim].nvalues; i++)
1223  {
1224  uint32 len;
1225 
1226  /* read the uint32 length */
1227  memcpy(&len, ptr, sizeof(uint32));
1228  ptr += sizeof(uint32);
1229 
1230  /* the length is data-only */
1231  SET_VARSIZE(dataptr, len + VARHDRSZ);
1232  memcpy(VARDATA(dataptr), ptr, len);
1233  ptr += len;
1234 
1235  /* just point into the array */
1236  map[dim][i] = PointerGetDatum(dataptr);
1237 
1238  /* skip to place of the next deserialized value */
1239  dataptr += MAXALIGN(len + VARHDRSZ);
1240  }
1241  }
1242  else if (info[dim].typlen == -2)
1243  {
1244  /* cstring */
1245  for (i = 0; i < info[dim].nvalues; i++)
1246  {
1247  uint32 len;
1248 
1249  memcpy(&len, ptr, sizeof(uint32));
1250  ptr += sizeof(uint32);
1251 
1252  memcpy(dataptr, ptr, len);
1253  ptr += len;
1254 
1255  /* just point into the array */
1256  map[dim][i] = PointerGetDatum(dataptr);
1257  dataptr += MAXALIGN(len);
1258  }
1259  }
1260 
1261  /* no under/overflow of input array */
1262  Assert(ptr <= (start + info[dim].nbytes));
1263 
1264  /* no overflow of the output mcv value */
1265  Assert(dataptr <= ((char *) mcvlist + mcvlen));
1266  }
1267 
1268  /* check we consumed input data for this dimension exactly */
1269  Assert(ptr == (start + info[dim].nbytes));
1270  }
1271 
1272  /* we should have also filled the MCV list exactly */
1273  Assert(dataptr == ((char *) mcvlist + mcvlen));
1274 
1275  /* deserialize the MCV items and translate the indexes to Datums */
1276  for (i = 0; i < nitems; i++)
1277  {
1278  MCVItem *item = &mcvlist->items[i];
1279 
1280  item->values = (Datum *) valuesptr;
1281  valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1282 
1283  item->isnull = (bool *) isnullptr;
1284  isnullptr += MAXALIGN(sizeof(bool) * ndims);
1285 
1286  memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1287  ptr += sizeof(bool) * ndims;
1288 
1289  memcpy(&item->frequency, ptr, sizeof(double));
1290  ptr += sizeof(double);
1291 
1292  memcpy(&item->base_frequency, ptr, sizeof(double));
1293  ptr += sizeof(double);
1294 
1295  /* finally translate the indexes (for non-NULL only) */
1296  for (dim = 0; dim < ndims; dim++)
1297  {
1298  uint16 index;
1299 
1300  memcpy(&index, ptr, sizeof(uint16));
1301  ptr += sizeof(uint16);
1302 
1303  if (item->isnull[dim])
1304  continue;
1305 
1306  item->values[dim] = map[dim][index];
1307  }
1308 
1309  /* check we're not overflowing the input */
1310  Assert(ptr <= endptr);
1311  }
1312 
1313  /* check that we processed all the data */
1314  Assert(ptr == endptr);
1315 
1316  /* release the buffers used for mapping */
1317  for (dim = 0; dim < ndims; dim++)
1318  pfree(map[dim]);
1319 
1320  pfree(map);
1321 
1322  return mcvlist;
1323 }
1324 
1325 /*
1326  * SRF with details about buckets of a histogram:
1327  *
1328  * - item ID (0...nitems)
1329  * - values (string array)
1330  * - nulls only (boolean array)
1331  * - frequency (double precision)
1332  * - base_frequency (double precision)
1333  *
1334  * The input is the OID of the statistics, and there are no rows returned if
1335  * the statistics contains no histogram.
1336  */
1337 Datum
1339 {
1340  FuncCallContext *funcctx;
1341 
1342  /* stuff done only on the first call of the function */
1343  if (SRF_IS_FIRSTCALL())
1344  {
1345  MemoryContext oldcontext;
1346  MCVList *mcvlist;
1347  TupleDesc tupdesc;
1348 
1349  /* create a function context for cross-call persistence */
1350  funcctx = SRF_FIRSTCALL_INIT();
1351 
1352  /* switch to memory context appropriate for multiple function calls */
1353  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1354 
1356 
1357  funcctx->user_fctx = mcvlist;
1358 
1359  /* total number of tuples to be returned */
1360  funcctx->max_calls = 0;
1361  if (funcctx->user_fctx != NULL)
1362  funcctx->max_calls = mcvlist->nitems;
1363 
1364  /* Build a tuple descriptor for our result type */
1365  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1366  ereport(ERROR,
1367  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1368  errmsg("function returning record called in context "
1369  "that cannot accept type record")));
1370  tupdesc = BlessTupleDesc(tupdesc);
1371 
1372  /*
1373  * generate attribute metadata needed later to produce tuples from raw
1374  * C strings
1375  */
1376  funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1377 
1378  MemoryContextSwitchTo(oldcontext);
1379  }
1380 
1381  /* stuff done on every call of the function */
1382  funcctx = SRF_PERCALL_SETUP();
1383 
1384  if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more
1385  * left to send */
1386  {
1387  Datum values[5];
1388  bool nulls[5];
1389  HeapTuple tuple;
1390  Datum result;
1391  ArrayBuildState *astate_values = NULL;
1392  ArrayBuildState *astate_nulls = NULL;
1393 
1394  int i;
1395  MCVList *mcvlist;
1396  MCVItem *item;
1397 
1398  mcvlist = (MCVList *) funcctx->user_fctx;
1399 
1400  Assert(funcctx->call_cntr < mcvlist->nitems);
1401 
1402  item = &mcvlist->items[funcctx->call_cntr];
1403 
1404  for (i = 0; i < mcvlist->ndimensions; i++)
1405  {
1406 
1407  astate_nulls = accumArrayResult(astate_nulls,
1408  BoolGetDatum(item->isnull[i]),
1409  false,
1410  BOOLOID,
1412 
1413  if (!item->isnull[i])
1414  {
1415  bool isvarlena;
1416  Oid outfunc;
1417  FmgrInfo fmgrinfo;
1418  Datum val;
1419  text *txt;
1420 
1421  /* lookup output func for the type */
1422  getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
1423  fmgr_info(outfunc, &fmgrinfo);
1424 
1425  val = FunctionCall1(&fmgrinfo, item->values[i]);
1427 
1428  astate_values = accumArrayResult(astate_values,
1429  PointerGetDatum(txt),
1430  false,
1431  TEXTOID,
1433  }
1434  else
1435  astate_values = accumArrayResult(astate_values,
1436  (Datum) 0,
1437  true,
1438  TEXTOID,
1440  }
1441 
1442  values[0] = Int32GetDatum(funcctx->call_cntr);
1443  values[1] = makeArrayResult(astate_values, CurrentMemoryContext);
1444  values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);
1445  values[3] = Float8GetDatum(item->frequency);
1446  values[4] = Float8GetDatum(item->base_frequency);
1447 
1448  /* no NULLs in the tuple */
1449  memset(nulls, 0, sizeof(nulls));
1450 
1451  /* build a tuple */
1452  tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1453 
1454  /* make the tuple into a datum */
1455  result = HeapTupleGetDatum(tuple);
1456 
1457  SRF_RETURN_NEXT(funcctx, result);
1458  }
1459  else /* do when there is no more left */
1460  {
1461  SRF_RETURN_DONE(funcctx);
1462  }
1463 }
1464 
1465 /*
1466  * pg_mcv_list_in - input routine for type pg_mcv_list.
1467  *
1468  * pg_mcv_list is real enough to be a table column, but it has no operations
1469  * of its own, and disallows input too
1470  */
1471 Datum
1473 {
1474  /*
1475  * pg_mcv_list stores the data in binary form and parsing text input is
1476  * not needed, so disallow this.
1477  */
1478  ereport(ERROR,
1479  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1480  errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1481 
1482  PG_RETURN_VOID(); /* keep compiler quiet */
1483 }
1484 
1485 
1486 /*
1487  * pg_mcv_list_out - output routine for type pg_mcv_list.
1488  *
1489  * MCV lists are serialized into a bytea value, so we simply call byteaout()
1490  * to serialize the value into text. But it'd be nice to serialize that into
1491  * a meaningful representation (e.g. for inspection by people).
1492  *
1493  * XXX This should probably return something meaningful, similar to what
1494  * pg_dependencies_out does. Not sure how to deal with the deduplicated
1495  * values, though - do we want to expand that or not?
1496  */
1497 Datum
1499 {
1500  return byteaout(fcinfo);
1501 }
1502 
1503 /*
1504  * pg_mcv_list_recv - binary input routine for type pg_mcv_list.
1505  */
1506 Datum
1508 {
1509  ereport(ERROR,
1510  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1511  errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1512 
1513  PG_RETURN_VOID(); /* keep compiler quiet */
1514 }
1515 
1516 /*
1517  * pg_mcv_list_send - binary output routine for type pg_mcv_list.
1518  *
1519  * MCV lists are serialized in a bytea value (although the type is named
1520  * differently), so let's just send that.
1521  */
1522 Datum
1524 {
1525  return byteasend(fcinfo);
1526 }
1527 
1528 /*
1529  * match the attribute/expression to a dimension of the statistic
1530  *
1531  * Returns the zero-based index of the matching statistics dimension.
1532  * Optionally determines the collation.
1533  */
1534 static int
1536 {
1537  int idx;
1538 
1539  if (IsA(expr, Var))
1540  {
1541  /* simple Var, so just lookup using varattno */
1542  Var *var = (Var *) expr;
1543 
1544  if (collid)
1545  *collid = var->varcollid;
1546 
1547  idx = bms_member_index(keys, var->varattno);
1548 
1549  if (idx < 0)
1550  elog(ERROR, "variable not found in statistics object");
1551  }
1552  else
1553  {
1554  /* expression - lookup in stats expressions */
1555  ListCell *lc;
1556 
1557  if (collid)
1558  *collid = exprCollation(expr);
1559 
1560  /* expressions are stored after the simple columns */
1561  idx = bms_num_members(keys);
1562  foreach(lc, exprs)
1563  {
1564  Node *stat_expr = (Node *) lfirst(lc);
1565 
1566  if (equal(expr, stat_expr))
1567  break;
1568 
1569  idx++;
1570  }
1571 
1572  if (lc == NULL)
1573  elog(ERROR, "expression not found in statistics object");
1574  }
1575 
1576  return idx;
1577 }
1578 
1579 /*
1580  * mcv_get_match_bitmap
1581  * Evaluate clauses using the MCV list, and update the match bitmap.
1582  *
1583  * A match bitmap keeps match/mismatch status for each MCV item, and we
1584  * update it based on additional clauses. We also use it to skip items
1585  * that can't possibly match (e.g. item marked as "mismatch" can't change
1586  * to "match" when evaluating AND clause list).
1587  *
1588  * The function also returns a flag indicating whether there was an
1589  * equality condition for all attributes, the minimum frequency in the MCV
1590  * list, and a total MCV frequency (sum of frequencies for all items).
1591  *
1592  * XXX Currently the match bitmap uses a bool for each MCV item, which is
1593  * somewhat wasteful as we could do with just a single bit, thus reducing
1594  * the size to ~1/8. It would also allow us to combine bitmaps simply using
1595  * & and |, which should be faster than min/max. The bitmaps are fairly
1596  * small, though (thanks to the cap on the MCV list size).
1597  */
1598 static bool *
1600  Bitmapset *keys, List *exprs,
1601  MCVList *mcvlist, bool is_or)
1602 {
1603  ListCell *l;
1604  bool *matches;
1605 
1606  /* The bitmap may be partially built. */
1607  Assert(clauses != NIL);
1608  Assert(mcvlist != NULL);
1609  Assert(mcvlist->nitems > 0);
1610  Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
1611 
1612  matches = palloc(sizeof(bool) * mcvlist->nitems);
1613  memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
1614 
1615  /*
1616  * Loop through the list of clauses, and for each of them evaluate all the
1617  * MCV items not yet eliminated by the preceding clauses.
1618  */
1619  foreach(l, clauses)
1620  {
1621  Node *clause = (Node *) lfirst(l);
1622 
1623  /* if it's a RestrictInfo, then extract the clause */
1624  if (IsA(clause, RestrictInfo))
1625  clause = (Node *) ((RestrictInfo *) clause)->clause;
1626 
1627  /*
1628  * Handle the various types of clauses - OpClause, NullTest and
1629  * AND/OR/NOT
1630  */
1631  if (is_opclause(clause))
1632  {
1633  OpExpr *expr = (OpExpr *) clause;
1634  FmgrInfo opproc;
1635 
1636  /* valid only after examine_opclause_args returns true */
1637  Node *clause_expr;
1638  Const *cst;
1639  bool expronleft;
1640  int idx;
1641  Oid collid;
1642 
1643  fmgr_info(get_opcode(expr->opno), &opproc);
1644 
1645  /* extract the var/expr and const from the expression */
1646  if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1647  elog(ERROR, "incompatible clause");
1648 
1649  /* match the attribute/expression to a dimension of the statistic */
1650  idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1651 
1652  /*
1653  * Walk through the MCV items and evaluate the current clause. We
1654  * can skip items that were already ruled out, and terminate if
1655  * there are no remaining MCV items that might possibly match.
1656  */
1657  for (int i = 0; i < mcvlist->nitems; i++)
1658  {
1659  bool match = true;
1660  MCVItem *item = &mcvlist->items[i];
1661 
1662  Assert(idx >= 0);
1663 
1664  /*
1665  * When the MCV item or the Const value is NULL we can treat
1666  * this as a mismatch. We must not call the operator because
1667  * of strictness.
1668  */
1669  if (item->isnull[idx] || cst->constisnull)
1670  {
1671  matches[i] = RESULT_MERGE(matches[i], is_or, false);
1672  continue;
1673  }
1674 
1675  /*
1676  * Skip MCV items that can't change result in the bitmap. Once
1677  * the value gets false for AND-lists, or true for OR-lists,
1678  * we don't need to look at more clauses.
1679  */
1680  if (RESULT_IS_FINAL(matches[i], is_or))
1681  continue;
1682 
1683  /*
1684  * First check whether the constant is below the lower
1685  * boundary (in that case we can skip the bucket, because
1686  * there's no overlap).
1687  *
1688  * We don't store collations used to build the statistics, but
1689  * we can use the collation for the attribute itself, as
1690  * stored in varcollid. We do reset the statistics after a
1691  * type change (including collation change), so this is OK.
1692  * For expressions, we use the collation extracted from the
1693  * expression itself.
1694  */
1695  if (expronleft)
1696  match = DatumGetBool(FunctionCall2Coll(&opproc,
1697  collid,
1698  item->values[idx],
1699  cst->constvalue));
1700  else
1701  match = DatumGetBool(FunctionCall2Coll(&opproc,
1702  collid,
1703  cst->constvalue,
1704  item->values[idx]));
1705 
1706  /* update the match bitmap with the result */
1707  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1708  }
1709  }
1710  else if (IsA(clause, ScalarArrayOpExpr))
1711  {
1712  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1713  FmgrInfo opproc;
1714 
1715  /* valid only after examine_opclause_args returns true */
1716  Node *clause_expr;
1717  Const *cst;
1718  bool expronleft;
1719  Oid collid;
1720  int idx;
1721 
1722  /* array evaluation */
1723  ArrayType *arrayval;
1724  int16 elmlen;
1725  bool elmbyval;
1726  char elmalign;
1727  int num_elems;
1728  Datum *elem_values;
1729  bool *elem_nulls;
1730 
1731  fmgr_info(get_opcode(expr->opno), &opproc);
1732 
1733  /* extract the var/expr and const from the expression */
1734  if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1735  elog(ERROR, "incompatible clause");
1736 
1737  /* We expect Var on left */
1738  if (!expronleft)
1739  elog(ERROR, "incompatible clause");
1740 
1741  /*
1742  * Deconstruct the array constant, unless it's NULL (we'll cover
1743  * that case below)
1744  */
1745  if (!cst->constisnull)
1746  {
1747  arrayval = DatumGetArrayTypeP(cst->constvalue);
1749  &elmlen, &elmbyval, &elmalign);
1750  deconstruct_array(arrayval,
1751  ARR_ELEMTYPE(arrayval),
1752  elmlen, elmbyval, elmalign,
1753  &elem_values, &elem_nulls, &num_elems);
1754  }
1755 
1756  /* match the attribute/expression to a dimension of the statistic */
1757  idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1758 
1759  /*
1760  * Walk through the MCV items and evaluate the current clause. We
1761  * can skip items that were already ruled out, and terminate if
1762  * there are no remaining MCV items that might possibly match.
1763  */
1764  for (int i = 0; i < mcvlist->nitems; i++)
1765  {
1766  int j;
1767  bool match = !expr->useOr;
1768  MCVItem *item = &mcvlist->items[i];
1769 
1770  /*
1771  * When the MCV item or the Const value is NULL we can treat
1772  * this as a mismatch. We must not call the operator because
1773  * of strictness.
1774  */
1775  if (item->isnull[idx] || cst->constisnull)
1776  {
1777  matches[i] = RESULT_MERGE(matches[i], is_or, false);
1778  continue;
1779  }
1780 
1781  /*
1782  * Skip MCV items that can't change result in the bitmap. Once
1783  * the value gets false for AND-lists, or true for OR-lists,
1784  * we don't need to look at more clauses.
1785  */
1786  if (RESULT_IS_FINAL(matches[i], is_or))
1787  continue;
1788 
1789  for (j = 0; j < num_elems; j++)
1790  {
1791  Datum elem_value = elem_values[j];
1792  bool elem_isnull = elem_nulls[j];
1793  bool elem_match;
1794 
1795  /* NULL values always evaluate as not matching. */
1796  if (elem_isnull)
1797  {
1798  match = RESULT_MERGE(match, expr->useOr, false);
1799  continue;
1800  }
1801 
1802  /*
1803  * Stop evaluating the array elements once we reach a
1804  * matching value that can't change - ALL() is the same as
1805  * AND-list, ANY() is the same as OR-list.
1806  */
1807  if (RESULT_IS_FINAL(match, expr->useOr))
1808  break;
1809 
1810  elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
1811  collid,
1812  item->values[idx],
1813  elem_value));
1814 
1815  match = RESULT_MERGE(match, expr->useOr, elem_match);
1816  }
1817 
1818  /* update the match bitmap with the result */
1819  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1820  }
1821  }
1822  else if (IsA(clause, NullTest))
1823  {
1824  NullTest *expr = (NullTest *) clause;
1825  Node *clause_expr = (Node *) (expr->arg);
1826 
1827  /* match the attribute/expression to a dimension of the statistic */
1828  int idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
1829 
1830  /*
1831  * Walk through the MCV items and evaluate the current clause. We
1832  * can skip items that were already ruled out, and terminate if
1833  * there are no remaining MCV items that might possibly match.
1834  */
1835  for (int i = 0; i < mcvlist->nitems; i++)
1836  {
1837  bool match = false; /* assume mismatch */
1838  MCVItem *item = &mcvlist->items[i];
1839 
1840  /* if the clause mismatches the MCV item, update the bitmap */
1841  switch (expr->nulltesttype)
1842  {
1843  case IS_NULL:
1844  match = (item->isnull[idx]) ? true : match;
1845  break;
1846 
1847  case IS_NOT_NULL:
1848  match = (!item->isnull[idx]) ? true : match;
1849  break;
1850  }
1851 
1852  /* now, update the match bitmap, depending on OR/AND type */
1853  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1854  }
1855  }
1856  else if (is_orclause(clause) || is_andclause(clause))
1857  {
1858  /* AND/OR clause, with all subclauses being compatible */
1859 
1860  int i;
1861  BoolExpr *bool_clause = ((BoolExpr *) clause);
1862  List *bool_clauses = bool_clause->args;
1863 
1864  /* match/mismatch bitmap for each MCV item */
1865  bool *bool_matches = NULL;
1866 
1867  Assert(bool_clauses != NIL);
1868  Assert(list_length(bool_clauses) >= 2);
1869 
1870  /* build the match bitmap for the OR-clauses */
1871  bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
1872  mcvlist, is_orclause(clause));
1873 
1874  /*
1875  * Merge the bitmap produced by mcv_get_match_bitmap into the
1876  * current one. We need to consider if we're evaluating AND or OR
1877  * condition when merging the results.
1878  */
1879  for (i = 0; i < mcvlist->nitems; i++)
1880  matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
1881 
1882  pfree(bool_matches);
1883  }
1884  else if (is_notclause(clause))
1885  {
1886  /* NOT clause, with all subclauses compatible */
1887 
1888  int i;
1889  BoolExpr *not_clause = ((BoolExpr *) clause);
1890  List *not_args = not_clause->args;
1891 
1892  /* match/mismatch bitmap for each MCV item */
1893  bool *not_matches = NULL;
1894 
1895  Assert(not_args != NIL);
1896  Assert(list_length(not_args) == 1);
1897 
1898  /* build the match bitmap for the NOT-clause */
1899  not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
1900  mcvlist, false);
1901 
1902  /*
1903  * Merge the bitmap produced by mcv_get_match_bitmap into the
1904  * current one. We're handling a NOT clause, so invert the result
1905  * before merging it into the global bitmap.
1906  */
1907  for (i = 0; i < mcvlist->nitems; i++)
1908  matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
1909 
1910  pfree(not_matches);
1911  }
1912  else if (IsA(clause, Var))
1913  {
1914  /* Var (has to be a boolean Var, possibly from below NOT) */
1915 
1916  Var *var = (Var *) (clause);
1917 
1918  /* match the attribute to a dimension of the statistic */
1919  int idx = bms_member_index(keys, var->varattno);
1920 
1921  Assert(var->vartype == BOOLOID);
1922 
1923  /*
1924  * Walk through the MCV items and evaluate the current clause. We
1925  * can skip items that were already ruled out, and terminate if
1926  * there are no remaining MCV items that might possibly match.
1927  */
1928  for (int i = 0; i < mcvlist->nitems; i++)
1929  {
1930  MCVItem *item = &mcvlist->items[i];
1931  bool match = false;
1932 
1933  /* if the item is NULL, it's a mismatch */
1934  if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1935  match = true;
1936 
1937  /* update the result bitmap */
1938  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1939  }
1940  }
1941  else
1942  {
1943  /* Otherwise, it must be a bare boolean-returning expression */
1944  int idx;
1945 
1946  /* match the expression to a dimension of the statistic */
1947  idx = mcv_match_expression(clause, keys, exprs, NULL);
1948 
1949  /*
1950  * Walk through the MCV items and evaluate the current clause. We
1951  * can skip items that were already ruled out, and terminate if
1952  * there are no remaining MCV items that might possibly match.
1953  */
1954  for (int i = 0; i < mcvlist->nitems; i++)
1955  {
1956  bool match;
1957  MCVItem *item = &mcvlist->items[i];
1958 
1959  /* "match" just means it's bool TRUE */
1960  match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
1961 
1962  /* now, update the match bitmap, depending on OR/AND type */
1963  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1964  }
1965  }
1966  }
1967 
1968  return matches;
1969 }
1970 
1971 
1972 /*
1973  * mcv_combine_selectivities
1974  * Combine per-column and multi-column MCV selectivity estimates.
1975  *
1976  * simple_sel is a "simple" selectivity estimate (produced without using any
1977  * extended statistics, essentially assuming independence of columns/clauses).
1978  *
1979  * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
1980  * all matching MCV items. The difference (mcv_sel - mcv_basesel) is then
1981  * essentially interpreted as a correction to be added to simple_sel, as
1982  * described below.
1983  *
1984  * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
1985  * matching ones). This is used as an upper bound on the portion of the
1986  * selectivity estimates not covered by the MCV statistics.
1987  *
1988  * Note: While simple and base selectivities are defined in a quite similar
1989  * way, the values are computed differently and are not therefore equal. The
1990  * simple selectivity is computed as a product of per-clause estimates, while
1991  * the base selectivity is computed by adding up base frequencies of matching
1992  * items of the multi-column MCV list. So the values may differ for two main
1993  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
1994  * the MCV items did not match the estimated clauses.
1995  *
1996  * As both (a) and (b) reduce the base selectivity value, it generally holds
1997  * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
1998  * values may be equal.
1999  *
2000  * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
2001  * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
2002  * correction for the part covered by the MCV list. Those two statements are
2003  * actually equivalent.
2004  */
2007  Selectivity mcv_sel,
2008  Selectivity mcv_basesel,
2009  Selectivity mcv_totalsel)
2010 {
2011  Selectivity other_sel;
2012  Selectivity sel;
2013 
2014  /* estimated selectivity of values not covered by MCV matches */
2015  other_sel = simple_sel - mcv_basesel;
2016  CLAMP_PROBABILITY(other_sel);
2017 
2018  /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2019  if (other_sel > 1.0 - mcv_totalsel)
2020  other_sel = 1.0 - mcv_totalsel;
2021 
2022  /* overall selectivity is the sum of the MCV and non-MCV parts */
2023  sel = mcv_sel + other_sel;
2024  CLAMP_PROBABILITY(sel);
2025 
2026  return sel;
2027 }
2028 
2029 
2030 /*
2031  * mcv_clauselist_selectivity
2032  * Use MCV statistics to estimate the selectivity of an implicitly-ANDed
2033  * list of clauses.
2034  *
2035  * This determines which MCV items match every clause in the list and returns
2036  * the sum of the frequencies of those items.
2037  *
2038  * In addition, it returns the sum of the base frequencies of each of those
2039  * items (that is the sum of the selectivities that each item would have if
2040  * the columns were independent of one another), and the total selectivity of
2041  * all the MCV items (not just the matching ones). These are expected to be
2042  * used together with a "simple" selectivity estimate (one based only on
2043  * per-column statistics) to produce an overall selectivity estimate that
2044  * makes use of both per-column and multi-column statistics --- see
2045  * mcv_combine_selectivities().
2046  */
2049  List *clauses, int varRelid,
2050  JoinType jointype, SpecialJoinInfo *sjinfo,
2051  RelOptInfo *rel,
2052  Selectivity *basesel, Selectivity *totalsel)
2053 {
2054  int i;
2055  MCVList *mcv;
2056  Selectivity s = 0.0;
2057  RangeTblEntry *rte = root->simple_rte_array[rel->relid];
2058 
2059  /* match/mismatch bitmap for each MCV item */
2060  bool *matches = NULL;
2061 
2062  /* load the MCV list stored in the statistics object */
2063  mcv = statext_mcv_load(stat->statOid, rte->inh);
2064 
2065  /* build a match bitmap for the clauses */
2066  matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2067  mcv, false);
2068 
2069  /* sum frequencies for all the matching MCV items */
2070  *basesel = 0.0;
2071  *totalsel = 0.0;
2072  for (i = 0; i < mcv->nitems; i++)
2073  {
2074  *totalsel += mcv->items[i].frequency;
2075 
2076  if (matches[i] != false)
2077  {
2078  *basesel += mcv->items[i].base_frequency;
2079  s += mcv->items[i].frequency;
2080  }
2081  }
2082 
2083  return s;
2084 }
2085 
2086 
2087 /*
2088  * mcv_clause_selectivity_or
2089  * Use MCV statistics to estimate the selectivity of a clause that
2090  * appears in an ORed list of clauses.
2091  *
2092  * As with mcv_clauselist_selectivity() this determines which MCV items match
2093  * the clause and returns both the sum of the frequencies and the sum of the
2094  * base frequencies of those items, as well as the sum of the frequencies of
2095  * all MCV items (not just the matching ones) so that this information can be
2096  * used by mcv_combine_selectivities() to produce a selectivity estimate that
2097  * makes use of both per-column and multi-column statistics.
2098  *
2099  * Additionally, we return information to help compute the overall selectivity
2100  * of the ORed list of clauses assumed to contain this clause. This function
2101  * is intended to be called for each clause in the ORed list of clauses,
2102  * allowing the overall selectivity to be computed using the following
2103  * algorithm:
2104  *
2105  * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
2106  * of the first n clauses in the list. Then the combined selectivity taking
2107  * into account the next clause C[n+1] can be written as
2108  *
2109  * P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
2110  *
2111  * The final term above represents the overlap between the clauses examined so
2112  * far and the (n+1)'th clause. To estimate its selectivity, we track the
2113  * match bitmap for the ORed list of clauses examined so far and examine its
2114  * intersection with the match bitmap for the (n+1)'th clause.
2115  *
2116  * We then also return the sums of the MCV item frequencies and base
2117  * frequencies for the match bitmap intersection corresponding to the overlap
2118  * term above, so that they can be combined with a simple selectivity estimate
2119  * for that term.
2120  *
2121  * The parameter "or_matches" is an in/out parameter tracking the match bitmap
2122  * for the clauses examined so far. The caller is expected to set it to NULL
2123  * the first time it calls this function.
2124  */
2127  MCVList *mcv, Node *clause, bool **or_matches,
2128  Selectivity *basesel, Selectivity *overlap_mcvsel,
2129  Selectivity *overlap_basesel, Selectivity *totalsel)
2130 {
2131  Selectivity s = 0.0;
2132  bool *new_matches;
2133  int i;
2134 
2135  /* build the OR-matches bitmap, if not built already */
2136  if (*or_matches == NULL)
2137  *or_matches = palloc0(sizeof(bool) * mcv->nitems);
2138 
2139  /* build the match bitmap for the new clause */
2140  new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2141  stat->exprs, mcv, false);
2142 
2143  /*
2144  * Sum the frequencies for all the MCV items matching this clause and also
2145  * those matching the overlap between this clause and any of the preceding
2146  * clauses as described above.
2147  */
2148  *basesel = 0.0;
2149  *overlap_mcvsel = 0.0;
2150  *overlap_basesel = 0.0;
2151  *totalsel = 0.0;
2152  for (i = 0; i < mcv->nitems; i++)
2153  {
2154  *totalsel += mcv->items[i].frequency;
2155 
2156  if (new_matches[i])
2157  {
2158  s += mcv->items[i].frequency;
2159  *basesel += mcv->items[i].base_frequency;
2160 
2161  if ((*or_matches)[i])
2162  {
2163  *overlap_mcvsel += mcv->items[i].frequency;
2164  *overlap_basesel += mcv->items[i].base_frequency;
2165  }
2166  }
2167 
2168  /* update the OR-matches bitmap for the next clause */
2169  (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2170  }
2171 
2172  pfree(new_matches);
2173 
2174  return s;
2175 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define DatumGetArrayTypeP(X)
Definition: array.h:261
#define ARR_ELEMTYPE(a)
Definition: array.h:292
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:5338
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3619
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5408
int16 AttrNumber
Definition: attnum.h:21
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:751
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:539
static Datum values[MAXATTR]
Definition: bootstrap.c:150
unsigned short uint16
Definition: c.h:505
unsigned int uint32
Definition: c.h:506
signed short int16
Definition: c.h:493
#define MAXALIGN(LEN)
Definition: c.h:811
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define VARHDRSZ
Definition: c.h:692
#define Assert(condition)
Definition: c.h:858
unsigned char bool
Definition: c.h:456
#define PG_UINT16_MAX
Definition: c.h:587
size_t Size
Definition: c.h:605
Oid collid
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2158
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:2173
int compare_scalars_simple(const void *a, const void *b, void *arg)
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
int multi_sort_compare(const void *a, const void *b, void *arg)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
bool examine_opclause_args(List *args, Node **exprp, Const **cstp, bool *expronleftp)
MultiSortSupportData * MultiSortSupport
struct DimensionInfo DimensionInfo
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
Datum Float8GetDatum(float8 X)
Definition: fmgr.c:1816
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:659
#define PG_GETARG_BYTEA_P(n)
Definition: fmgr.h:335
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define DatumGetByteaP(X)
Definition: fmgr.h:331
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
return str start
for(;;)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1116
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define nitems(x)
Definition: indent.h:31
long val
Definition: informix.c:689
static struct @157 value
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2907
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2271
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1285
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1599
Datum pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
Definition: mcv.c:1338
Datum pg_mcv_list_in(PG_FUNCTION_ARGS)
Definition: mcv.c:1472
#define ITEM_SIZE(ndims)
Definition: mcv.c:53
Selectivity mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
Definition: mcv.c:2048
static MultiSortSupport build_mss(StatsBuildData *data)
Definition: mcv.c:347
static int compare_sort_item_count(const void *a, const void *b, void *arg)
Definition: mcv.c:403
Datum pg_mcv_list_out(PG_FUNCTION_ARGS)
Definition: mcv.c:1498
static int count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
Definition: mcv.c:379
#define MinSizeOfMCVList
Definition: mcv.c:59
Datum pg_mcv_list_send(PG_FUNCTION_ARGS)
Definition: mcv.c:1523
MCVList * statext_mcv_deserialize(bytea *data)
Definition: mcv.c:996
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:68
#define RESULT_MERGE(value, is_or, match)
Definition: mcv.c:88
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:148
Selectivity mcv_combine_selectivities(Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)
Definition: mcv.c:2006
Selectivity mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)
Definition: mcv.c:2126
MCVList * statext_mcv_load(Oid mvoid, bool inh)
Definition: mcv.c:558
#define RESULT_IS_FINAL(value, is_or)
Definition: mcv.c:100
static int mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
Definition: mcv.c:1535
MCVList * statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
Definition: mcv.c:180
Datum pg_mcv_list_recv(PG_FUNCTION_ARGS)
Definition: mcv.c:1507
static int sort_item_compare(const void *a, const void *b, void *arg)
Definition: mcv.c:465
bytea * statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
Definition: mcv.c:621
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:490
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:424
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:816
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:107
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:116
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:125
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
double Selectivity
Definition: nodes.h:250
JoinType
Definition: nodes.h:288
void * arg
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
void * bsearch_arg(const void *key, const void *base0, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition: bsearch_arg.c:55
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
uintptr_t Datum
Definition: postgres.h:64
static Datum BoolGetDatum(bool X)
Definition: postgres.h:102
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:212
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
@ IS_NULL
Definition: primnodes.h:1948
@ IS_NOT_NULL
Definition: primnodes.h:1948
MemoryContextSwitchTo(old_ctx)
tree ctl root
Definition: radixtree.h:1886
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:134
struct SortSupportData SortSupportData
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:67
#define STATS_MCV_MAGIC
Definition: statistics.h:66
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:70
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
struct MCVItem MCVItem
TupleDesc tupdesc
Definition: funcapi.h:38
List * args
Definition: primnodes.h:940
Definition: fmgr.h:57
void * user_fctx
Definition: funcapi.h:82
uint64 max_calls
Definition: funcapi.h:74
uint64 call_cntr
Definition: funcapi.h:65
AttInMetadata * attinmeta
Definition: funcapi.h:91
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
Definition: pg_list.h:54
bool * isnull
Definition: statistics.h:82
double frequency
Definition: statistics.h:80
double base_frequency
Definition: statistics.h:81
Datum * values
Definition: statistics.h:83
uint32 type
Definition: statistics.h:90
uint32 magic
Definition: statistics.h:89
uint32 nitems
Definition: statistics.h:91
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94
AttrNumber ndimensions
Definition: statistics.h:92
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
Definition: nodes.h:129
NullTestType nulltesttype
Definition: primnodes.h:1955
Expr * arg
Definition: primnodes.h:1954
Oid opno
Definition: primnodes.h:818
List * args
Definition: primnodes.h:836
Index relid
Definition: pathnodes.h:918
bool ssup_nulls_first
Definition: sortsupport.h:75
MemoryContext ssup_cxt
Definition: sortsupport.h:66
Form_pg_type attrtype
Definition: vacuum.h:128
Oid attrtypid
Definition: vacuum.h:126
Oid attrcollid
Definition: vacuum.h:129
Definition: primnodes.h:248
AttrNumber varattno
Definition: primnodes.h:260
Definition: type.h:95
Definition: c.h:687
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:266
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:479
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition: syscache.c:229
static ItemArray items
Definition: test_tidstore.c:49
static Datum fetch_att(const void *T, bool attbyval, int attlen)
Definition: tupmacs.h:52
static void store_att_byval(void *T, Datum newdatum, int attlen)
Definition: tupmacs.h:183
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:356
#define TYPECACHE_LT_OPR
Definition: typcache.h:138
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
text * cstring_to_text(const char *s)
Definition: varlena.c:184
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:388
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:490
const char * type