PostgreSQL Source Code  git master
mcv.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * mcv.c
4  * POSTGRES multivariate MCV lists
5  *
6  *
7  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/statistics/mcv.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <math.h>
18 
19 #include "access/htup_details.h"
20 #include "catalog/pg_collation.h"
23 #include "fmgr.h"
24 #include "funcapi.h"
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
28 #include "statistics/statistics.h"
29 #include "utils/array.h"
30 #include "utils/builtins.h"
31 #include "utils/bytea.h"
32 #include "utils/fmgroids.h"
33 #include "utils/fmgrprotos.h"
34 #include "utils/lsyscache.h"
35 #include "utils/selfuncs.h"
36 #include "utils/syscache.h"
37 #include "utils/typcache.h"
38 
39 /*
40  * Computes size of a serialized MCV item, depending on the number of
41  * dimensions (columns) the statistic is defined on. The datum values are
42  * stored in a separate array (deduplicated, to minimize the size), and
43  * so the serialized items only store uint16 indexes into that array.
44  *
45  * Each serialized item stores (in this order):
46  *
47  * - indexes to values (ndim * sizeof(uint16))
48  * - null flags (ndim * sizeof(bool))
49  * - frequency (sizeof(double))
50  * - base_frequency (sizeof(double))
51  *
52  * There is no alignment padding within an MCV item.
53  * So in total each MCV item requires this many bytes:
54  *
55  * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
56  */
57 #define ITEM_SIZE(ndims) \
58  ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
59 
60 /*
61  * Used to compute size of serialized MCV list representation.
62  */
63 #define MinSizeOfMCVList \
64  (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
65 
66 /*
67  * Size of the serialized MCV list, excluding the space needed for
68  * deduplicated per-dimension values. The macro is meant to be used
69  * when it's not yet safe to access the serialized info about amount
70  * of data for each column.
71  */
72 #define SizeOfMCVList(ndims,nitems) \
73  ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
74  ((ndims) * sizeof(DimensionInfo)) + \
75  ((nitems) * ITEM_SIZE(ndims)))
76 
78 
79 static SortItem *build_distinct_groups(int numrows, SortItem *items,
80  MultiSortSupport mss, int *ndistinct);
81 
82 static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
83  MultiSortSupport mss, int *ncounts);
84 
85 static int count_distinct_groups(int numrows, SortItem *items,
86  MultiSortSupport mss);
87 
88 /*
89  * Compute new value for bitmap item, considering whether it's used for
90  * clauses connected by AND/OR.
91  */
92 #define RESULT_MERGE(value, is_or, match) \
93  ((is_or) ? ((value) || (match)) : ((value) && (match)))
94 
95 /*
96  * When processing a list of clauses, the bitmap item may get set to a value
97  * such that additional clauses can't change it. For example, when processing
98  * a list of clauses connected to AND, as soon as the item gets set to 'false'
99  * then it'll remain like that. Similarly clauses connected by OR and 'true'.
100  *
101  * Returns true when the value in the bitmap can't change no matter how the
102  * remaining clauses are evaluated.
103  */
104 #define RESULT_IS_FINAL(value, is_or) ((is_or) ? (value) : (!(value)))
105 
106 /*
107  * get_mincount_for_mcv_list
108  * Determine the minimum number of times a value needs to appear in
109  * the sample for it to be included in the MCV list.
110  *
111  * We want to keep only values that appear sufficiently often in the
112  * sample that it is reasonable to extrapolate their sample frequencies to
113  * the entire table. We do this by placing an upper bound on the relative
114  * standard error of the sample frequency, so that any estimates the
115  * planner generates from the MCV statistics can be expected to be
116  * reasonably accurate.
117  *
118  * Since we are sampling without replacement, the sample frequency of a
119  * particular value is described by a hypergeometric distribution. A
120  * common rule of thumb when estimating errors in this situation is to
121  * require at least 10 instances of the value in the sample, in which case
122  * the distribution can be approximated by a normal distribution, and
123  * standard error analysis techniques can be applied. Given a sample size
124  * of n, a population size of N, and a sample frequency of p=cnt/n, the
125  * standard error of the proportion p is given by
126  * SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
127  * where the second term is the finite population correction. To get
128  * reasonably accurate planner estimates, we impose an upper bound on the
129  * relative standard error of 20% -- i.e., SE/p < 0.2. This 20% relative
130  * error bound is fairly arbitrary, but has been found empirically to work
131  * well. Rearranging this formula gives a lower bound on the number of
132  * instances of the value seen:
133  * cnt > n*(N-n) / (N-n+0.04*n*(N-1))
134  * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
135  * case where n approaches 0 cannot happen in practice, since the sample
136  * size is at least 300. The case where n approaches N corresponds to
137  * sampling the whole table, in which case it is reasonable to keep
138  * the whole MCV list (have no lower bound), so it makes sense to apply
139  * this formula for all inputs, even though the above derivation is
140  * technically only valid when the right hand side is at least around 10.
141  *
142  * An alternative way to look at this formula is as follows -- assume that
143  * the number of instances of the value seen scales up to the entire
144  * table, so that the population count is K=N*cnt/n. Then the distribution
145  * in the sample is a hypergeometric distribution parameterised by N, n
146  * and K, and the bound above is mathematically equivalent to demanding
147  * that the standard deviation of that distribution is less than 20% of
148  * its mean. Thus the relative errors in any planner estimates produced
149  * from the MCV statistics are likely to be not too large.
150  */
151 static double
152 get_mincount_for_mcv_list(int samplerows, double totalrows)
153 {
154  double n = samplerows;
155  double N = totalrows;
156  double numer,
157  denom;
158 
159  numer = n * (N - n);
160  denom = N - n + 0.04 * n * (N - 1);
161 
162  /* Guard against division by zero (possible if n = N = 1) */
163  if (denom == 0.0)
164  return 0.0;
165 
166  return numer / denom;
167 }
168 
169 /*
170  * Builds MCV list from the set of sampled rows.
171  *
172  * The algorithm is quite simple:
173  *
174  * (1) sort the data (default collation, '<' for the data type)
175  *
176  * (2) count distinct groups, decide how many to keep
177  *
178  * (3) build the MCV list using the threshold determined in (2)
179  *
180  * (4) remove rows represented by the MCV from the sample
181  *
182  */
183 MCVList *
184 statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
185 {
186  int i,
187  numattrs,
188  numrows,
189  ngroups,
190  nitems;
191  double mincount;
192  SortItem *items;
193  SortItem *groups;
194  MCVList *mcvlist = NULL;
195  MultiSortSupport mss;
196 
197  /* comparator for all the columns */
198  mss = build_mss(data);
199 
200  /* sort the rows */
201  items = build_sorted_items(data, &nitems, mss,
202  data->nattnums, data->attnums);
203 
204  if (!items)
205  return NULL;
206 
207  /* for convenience */
208  numattrs = data->nattnums;
209  numrows = data->numrows;
210 
211  /* transform the sorted rows into groups (sorted by frequency) */
212  groups = build_distinct_groups(nitems, items, mss, &ngroups);
213 
214  /*
215  * The maximum number of MCV items to store, based on the statistics
216  * target we computed for the statistics object (from the target set for
217  * the object itself, attributes and the system default). In any case, we
218  * can't keep more groups than we have available.
219  */
220  nitems = stattarget;
221  if (nitems > ngroups)
222  nitems = ngroups;
223 
224  /*
225  * Decide how many items to keep in the MCV list. We can't use the same
226  * algorithm as per-column MCV lists, because that only considers the
227  * actual group frequency - but we're primarily interested in how the
228  * actual frequency differs from the base frequency (product of simple
229  * per-column frequencies, as if the columns were independent).
230  *
231  * Using the same algorithm might exclude items that are close to the
232  * "average" frequency of the sample. But that does not say whether the
233  * observed frequency is close to the base frequency or not. We also need
234  * to consider unexpectedly uncommon items (again, compared to the base
235  * frequency), and the single-column algorithm does not have to.
236  *
237  * We simply decide how many items to keep by computing the minimum count
238  * using get_mincount_for_mcv_list() and then keep all items that seem to
239  * be more common than that.
240  */
241  mincount = get_mincount_for_mcv_list(numrows, totalrows);
242 
243  /*
244  * Walk the groups until we find the first group with a count below the
245  * mincount threshold (the index of that group is the number of groups we
246  * want to keep).
247  */
248  for (i = 0; i < nitems; i++)
249  {
250  if (groups[i].count < mincount)
251  {
252  nitems = i;
253  break;
254  }
255  }
256 
257  /*
258  * At this point, we know the number of items for the MCV list. There
259  * might be none (for uniform distribution with many groups), and in that
260  * case, there will be no MCV list. Otherwise, construct the MCV list.
261  */
262  if (nitems > 0)
263  {
264  int j;
265  SortItem key;
266  MultiSortSupport tmp;
267 
268  /* frequencies for values in each attribute */
269  SortItem **freqs;
270  int *nfreqs;
271 
272  /* used to search values */
274  + sizeof(SortSupportData));
275 
276  /* compute frequencies for values in each column */
277  nfreqs = (int *) palloc0(sizeof(int) * numattrs);
278  freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
279 
280  /*
281  * Allocate the MCV list structure, set the global parameters.
282  */
283  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
284  sizeof(MCVItem) * nitems);
285 
286  mcvlist->magic = STATS_MCV_MAGIC;
287  mcvlist->type = STATS_MCV_TYPE_BASIC;
288  mcvlist->ndimensions = numattrs;
289  mcvlist->nitems = nitems;
290 
291  /* store info about data type OIDs */
292  for (i = 0; i < numattrs; i++)
293  mcvlist->types[i] = data->stats[i]->attrtypid;
294 
295  /* Copy the first chunk of groups into the result. */
296  for (i = 0; i < nitems; i++)
297  {
298  /* just point to the proper place in the list */
299  MCVItem *item = &mcvlist->items[i];
300 
301  item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
302  item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
303 
304  /* copy values for the group */
305  memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
306  memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
307 
308  /* groups should be sorted by frequency in descending order */
309  Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
310 
311  /* group frequency */
312  item->frequency = (double) groups[i].count / numrows;
313 
314  /* base frequency, if the attributes were independent */
315  item->base_frequency = 1.0;
316  for (j = 0; j < numattrs; j++)
317  {
318  SortItem *freq;
319 
320  /* single dimension */
321  tmp->ndims = 1;
322  tmp->ssup[0] = mss->ssup[j];
323 
324  /* fill search key */
325  key.values = &groups[i].values[j];
326  key.isnull = &groups[i].isnull[j];
327 
328  freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
329  sizeof(SortItem),
330  multi_sort_compare, tmp);
331 
332  item->base_frequency *= ((double) freq->count) / numrows;
333  }
334  }
335 
336  pfree(nfreqs);
337  pfree(freqs);
338  }
339 
340  pfree(items);
341  pfree(groups);
342 
343  return mcvlist;
344 }
345 
346 /*
347  * build_mss
348  * Build a MultiSortSupport for the given StatsBuildData.
349  */
350 static MultiSortSupport
352 {
353  int i;
354  int numattrs = data->nattnums;
355 
356  /* Sort by multiple columns (using array of SortSupport) */
357  MultiSortSupport mss = multi_sort_init(numattrs);
358 
359  /* prepare the sort functions for all the attributes */
360  for (i = 0; i < numattrs; i++)
361  {
362  VacAttrStats *colstat = data->stats[i];
364 
366  if (type->lt_opr == InvalidOid) /* shouldn't happen */
367  elog(ERROR, "cache lookup failed for ordering operator for type %u",
368  colstat->attrtypid);
369 
370  multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
371  }
372 
373  return mss;
374 }
375 
376 /*
377  * count_distinct_groups
378  * Count distinct combinations of SortItems in the array.
379  *
380  * The array is assumed to be sorted according to the MultiSortSupport.
381  */
382 static int
384 {
385  int i;
386  int ndistinct;
387 
388  ndistinct = 1;
389  for (i = 1; i < numrows; i++)
390  {
391  /* make sure the array really is sorted */
392  Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
393 
394  if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
395  ndistinct += 1;
396  }
397 
398  return ndistinct;
399 }
400 
401 /*
402  * compare_sort_item_count
403  * Comparator for sorting items by count (frequencies) in descending
404  * order.
405  */
406 static int
407 compare_sort_item_count(const void *a, const void *b)
408 {
409  SortItem *ia = (SortItem *) a;
410  SortItem *ib = (SortItem *) b;
411 
412  if (ia->count == ib->count)
413  return 0;
414  else if (ia->count > ib->count)
415  return -1;
416 
417  return 1;
418 }
419 
420 /*
421  * build_distinct_groups
422  * Build an array of SortItems for distinct groups and counts matching
423  * items.
424  *
425  * The 'items' array is assumed to be sorted.
426  */
427 static SortItem *
429  int *ndistinct)
430 {
431  int i,
432  j;
433  int ngroups = count_distinct_groups(numrows, items, mss);
434 
435  SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
436 
437  j = 0;
438  groups[0] = items[0];
439  groups[0].count = 1;
440 
441  for (i = 1; i < numrows; i++)
442  {
443  /* Assume sorted in ascending order. */
444  Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
445 
446  /* New distinct group detected. */
447  if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
448  {
449  groups[++j] = items[i];
450  groups[j].count = 0;
451  }
452 
453  groups[j].count++;
454  }
455 
456  /* ensure we filled the expected number of distinct groups */
457  Assert(j + 1 == ngroups);
458 
459  /* Sort the distinct groups by frequency (in descending order). */
460  pg_qsort((void *) groups, ngroups, sizeof(SortItem),
462 
463  *ndistinct = ngroups;
464  return groups;
465 }
466 
467 /* compare sort items (single dimension) */
468 static int
469 sort_item_compare(const void *a, const void *b, void *arg)
470 {
471  SortSupport ssup = (SortSupport) arg;
472  SortItem *ia = (SortItem *) a;
473  SortItem *ib = (SortItem *) b;
474 
475  return ApplySortComparator(ia->values[0], ia->isnull[0],
476  ib->values[0], ib->isnull[0],
477  ssup);
478 }
479 
480 /*
481  * build_column_frequencies
482  * Compute frequencies of values in each column.
483  *
484  * This returns an array of SortItems for each attribute the MCV is built
485  * on, with a frequency (number of occurrences) for each value. This is
486  * then used to compute "base" frequency of MCV items.
487  *
488  * All the memory is allocated in a single chunk, so that a single pfree
489  * is enough to release it. We do not allocate space for values/isnull
490  * arrays in the SortItems, because we can simply point into the input
491  * groups directly.
492  */
493 static SortItem **
494 build_column_frequencies(SortItem *groups, int ngroups,
495  MultiSortSupport mss, int *ncounts)
496 {
497  int i,
498  dim;
499  SortItem **result;
500  char *ptr;
501 
502  Assert(groups);
503  Assert(ncounts);
504 
505  /* allocate arrays for all columns as a single chunk */
506  ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
507  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
508 
509  /* initial array of pointers */
510  result = (SortItem **) ptr;
511  ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
512 
513  for (dim = 0; dim < mss->ndims; dim++)
514  {
515  SortSupport ssup = &mss->ssup[dim];
516 
517  /* array of values for a single column */
518  result[dim] = (SortItem *) ptr;
519  ptr += MAXALIGN(sizeof(SortItem) * ngroups);
520 
521  /* extract data for the dimension */
522  for (i = 0; i < ngroups; i++)
523  {
524  /* point into the input groups */
525  result[dim][i].values = &groups[i].values[dim];
526  result[dim][i].isnull = &groups[i].isnull[dim];
527  result[dim][i].count = groups[i].count;
528  }
529 
530  /* sort the values, deduplicate */
531  qsort_arg((void *) result[dim], ngroups, sizeof(SortItem),
532  sort_item_compare, ssup);
533 
534  /*
535  * Identify distinct values, compute frequency (there might be
536  * multiple MCV items containing this value, so we need to sum counts
537  * from all of them.
538  */
539  ncounts[dim] = 1;
540  for (i = 1; i < ngroups; i++)
541  {
542  if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
543  {
544  result[dim][ncounts[dim] - 1].count += result[dim][i].count;
545  continue;
546  }
547 
548  result[dim][ncounts[dim]] = result[dim][i];
549 
550  ncounts[dim]++;
551  }
552  }
553 
554  return result;
555 }
556 
557 /*
558  * statext_mcv_load
559  * Load the MCV list for the indicated pg_statistic_ext_data tuple.
560  */
561 MCVList *
562 statext_mcv_load(Oid mvoid, bool inh)
563 {
564  MCVList *result;
565  bool isnull;
566  Datum mcvlist;
568  ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
569 
570  if (!HeapTupleIsValid(htup))
571  elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
572 
573  mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
574  Anum_pg_statistic_ext_data_stxdmcv, &isnull);
575 
576  if (isnull)
577  elog(ERROR,
578  "requested statistics kind \"%c\" is not yet built for statistics object %u",
579  STATS_EXT_DEPENDENCIES, mvoid);
580 
581  result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
582 
583  ReleaseSysCache(htup);
584 
585  return result;
586 }
587 
588 
589 /*
590  * statext_mcv_serialize
591  * Serialize MCV list into a pg_mcv_list value.
592  *
593  * The MCV items may include values of various data types, and it's reasonable
594  * to expect redundancy (values for a given attribute, repeated for multiple
595  * MCV list items). So we deduplicate the values into arrays, and then replace
596  * the values by indexes into those arrays.
597  *
598  * The overall structure of the serialized representation looks like this:
599  *
600  * +---------------+----------------+---------------------+-------+
601  * | header fields | dimension info | deduplicated values | items |
602  * +---------------+----------------+---------------------+-------+
603  *
604  * Where dimension info stores information about the type of the K-th
605  * attribute (e.g. typlen, typbyval and length of deduplicated values).
606  * Deduplicated values store deduplicated values for each attribute. And
607  * items store the actual MCV list items, with values replaced by indexes into
608  * the arrays.
609  *
610  * When serializing the items, we use uint16 indexes. The number of MCV items
611  * is limited by the statistics target (which is capped to 10k at the moment).
612  * We might increase this to 65k and still fit into uint16, so there's a bit of
613  * slack. Furthermore, this limit is on the number of distinct values per column,
614  * and we usually have few of those (and various combinations of them for the
615  * those MCV list). So uint16 seems fine for now.
616  *
617  * We don't really expect the serialization to save as much space as for
618  * histograms, as we are not doing any bucket splits (which is the source
619  * of high redundancy in histograms).
620  *
621  * TODO: Consider packing boolean flags (NULL) for each item into a single char
622  * (or a longer type) instead of using an array of bool items.
623  */
624 bytea *
626 {
627  int i;
628  int dim;
629  int ndims = mcvlist->ndimensions;
630 
631  SortSupport ssup;
632  DimensionInfo *info;
633 
634  Size total_length;
635 
636  /* serialized items (indexes into arrays, etc.) */
637  bytea *raw;
638  char *ptr;
639  char *endptr PG_USED_FOR_ASSERTS_ONLY;
640 
641  /* values per dimension (and number of non-NULL values) */
642  Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
643  int *counts = (int *) palloc0(sizeof(int) * ndims);
644 
645  /*
646  * We'll include some rudimentary information about the attribute types
647  * (length, by-val flag), so that we don't have to look them up while
648  * deserializing the MCV list (we already have the type OID in the
649  * header). This is safe because when changing the type of the attribute
650  * the statistics gets dropped automatically. We need to store the info
651  * about the arrays of deduplicated values anyway.
652  */
653  info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
654 
655  /* sort support data for all attributes included in the MCV list */
656  ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
657 
658  /* collect and deduplicate values for each dimension (attribute) */
659  for (dim = 0; dim < ndims; dim++)
660  {
661  int ndistinct;
662  TypeCacheEntry *typentry;
663 
664  /*
665  * Lookup the LT operator (can't get it from stats extra_data, as we
666  * don't know how to interpret that - scalar vs. array etc.).
667  */
668  typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
669 
670  /* copy important info about the data type (length, by-value) */
671  info[dim].typlen = stats[dim]->attrtype->typlen;
672  info[dim].typbyval = stats[dim]->attrtype->typbyval;
673 
674  /* allocate space for values in the attribute and collect them */
675  values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
676 
677  for (i = 0; i < mcvlist->nitems; i++)
678  {
679  /* skip NULL values - we don't need to deduplicate those */
680  if (mcvlist->items[i].isnull[dim])
681  continue;
682 
683  /* append the value at the end */
684  values[dim][counts[dim]] = mcvlist->items[i].values[dim];
685  counts[dim] += 1;
686  }
687 
688  /* if there are just NULL values in this dimension, we're done */
689  if (counts[dim] == 0)
690  continue;
691 
692  /* sort and deduplicate the data */
693  ssup[dim].ssup_cxt = CurrentMemoryContext;
694  ssup[dim].ssup_collation = stats[dim]->attrcollid;
695  ssup[dim].ssup_nulls_first = false;
696 
697  PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
698 
699  qsort_arg(values[dim], counts[dim], sizeof(Datum),
700  compare_scalars_simple, &ssup[dim]);
701 
702  /*
703  * Walk through the array and eliminate duplicate values, but keep the
704  * ordering (so that we can do a binary search later). We know there's
705  * at least one item as (counts[dim] != 0), so we can skip the first
706  * element.
707  */
708  ndistinct = 1; /* number of distinct values */
709  for (i = 1; i < counts[dim]; i++)
710  {
711  /* expect sorted array */
712  Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
713 
714  /* if the value is the same as the previous one, we can skip it */
715  if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
716  continue;
717 
718  values[dim][ndistinct] = values[dim][i];
719  ndistinct += 1;
720  }
721 
722  /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
723  Assert(ndistinct <= PG_UINT16_MAX);
724 
725  /*
726  * Store additional info about the attribute - number of deduplicated
727  * values, and also size of the serialized data. For fixed-length data
728  * types this is trivial to compute, for varwidth types we need to
729  * actually walk the array and sum the sizes.
730  */
731  info[dim].nvalues = ndistinct;
732 
733  if (info[dim].typbyval) /* by-value data types */
734  {
735  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
736 
737  /*
738  * We copy the data into the MCV item during deserialization, so
739  * we don't need to allocate any extra space.
740  */
741  info[dim].nbytes_aligned = 0;
742  }
743  else if (info[dim].typlen > 0) /* fixed-length by-ref */
744  {
745  /*
746  * We don't care about alignment in the serialized data, so we
747  * pack the data as much as possible. But we also track how much
748  * data will be needed after deserialization, and in that case we
749  * need to account for alignment of each item.
750  *
751  * Note: As the items are fixed-length, we could easily compute
752  * this during deserialization, but we do it here anyway.
753  */
754  info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
755  info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
756  }
757  else if (info[dim].typlen == -1) /* varlena */
758  {
759  info[dim].nbytes = 0;
760  info[dim].nbytes_aligned = 0;
761  for (i = 0; i < info[dim].nvalues; i++)
762  {
763  Size len;
764 
765  /*
766  * For varlena values, we detoast the values and store the
767  * length and data separately. We don't bother with alignment
768  * here, which means that during deserialization we need to
769  * copy the fields and only access the copies.
770  */
772 
773  /* serialized length (uint32 length + data) */
774  len = VARSIZE_ANY_EXHDR(values[dim][i]);
775  info[dim].nbytes += sizeof(uint32); /* length */
776  info[dim].nbytes += len; /* value (no header) */
777 
778  /*
779  * During deserialization we'll build regular varlena values
780  * with full headers, and we need to align them properly.
781  */
782  info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
783  }
784  }
785  else if (info[dim].typlen == -2) /* cstring */
786  {
787  info[dim].nbytes = 0;
788  info[dim].nbytes_aligned = 0;
789  for (i = 0; i < info[dim].nvalues; i++)
790  {
791  Size len;
792 
793  /*
794  * cstring is handled similar to varlena - first we store the
795  * length as uint32 and then the data. We don't care about
796  * alignment, which means that during deserialization we need
797  * to copy the fields and only access the copies.
798  */
799 
800  /* c-strings include terminator, so +1 byte */
801  len = strlen(DatumGetCString(values[dim][i])) + 1;
802  info[dim].nbytes += sizeof(uint32); /* length */
803  info[dim].nbytes += len; /* value */
804 
805  /* space needed for properly aligned deserialized copies */
806  info[dim].nbytes_aligned += MAXALIGN(len);
807  }
808  }
809 
810  /* we know (count>0) so there must be some data */
811  Assert(info[dim].nbytes > 0);
812  }
813 
814  /*
815  * Now we can finally compute how much space we'll actually need for the
816  * whole serialized MCV list (varlena header, MCV header, dimension info
817  * for each attribute, deduplicated values and items).
818  */
819  total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
820  + sizeof(AttrNumber) /* ndimensions */
821  + (ndims * sizeof(Oid)); /* attribute types */
822 
823  /* dimension info */
824  total_length += ndims * sizeof(DimensionInfo);
825 
826  /* add space for the arrays of deduplicated values */
827  for (i = 0; i < ndims; i++)
828  total_length += info[i].nbytes;
829 
830  /*
831  * And finally account for the items (those are fixed-length, thanks to
832  * replacing values with uint16 indexes into the deduplicated arrays).
833  */
834  total_length += mcvlist->nitems * ITEM_SIZE(dim);
835 
836  /*
837  * Allocate space for the whole serialized MCV list (we'll skip bytes, so
838  * we set them to zero to make the result more compressible).
839  */
840  raw = (bytea *) palloc0(VARHDRSZ + total_length);
841  SET_VARSIZE(raw, VARHDRSZ + total_length);
842 
843  ptr = VARDATA(raw);
844  endptr = ptr + total_length;
845 
846  /* copy the MCV list header fields, one by one */
847  memcpy(ptr, &mcvlist->magic, sizeof(uint32));
848  ptr += sizeof(uint32);
849 
850  memcpy(ptr, &mcvlist->type, sizeof(uint32));
851  ptr += sizeof(uint32);
852 
853  memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
854  ptr += sizeof(uint32);
855 
856  memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
857  ptr += sizeof(AttrNumber);
858 
859  memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
860  ptr += (sizeof(Oid) * ndims);
861 
862  /* store information about the attributes (data amounts, ...) */
863  memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
864  ptr += sizeof(DimensionInfo) * ndims;
865 
866  /* Copy the deduplicated values for all attributes to the output. */
867  for (dim = 0; dim < ndims; dim++)
868  {
869  /* remember the starting point for Asserts later */
870  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
871 
872  for (i = 0; i < info[dim].nvalues; i++)
873  {
874  Datum value = values[dim][i];
875 
876  if (info[dim].typbyval) /* passed by value */
877  {
878  Datum tmp;
879 
880  /*
881  * For byval types, we need to copy just the significant bytes
882  * - we can't use memcpy directly, as that assumes
883  * little-endian behavior. store_att_byval does almost what
884  * we need, but it requires a properly aligned buffer - the
885  * output buffer does not guarantee that. So we simply use a
886  * local Datum variable (which guarantees proper alignment),
887  * and then copy the value from it.
888  */
889  store_att_byval(&tmp, value, info[dim].typlen);
890 
891  memcpy(ptr, &tmp, info[dim].typlen);
892  ptr += info[dim].typlen;
893  }
894  else if (info[dim].typlen > 0) /* passed by reference */
895  {
896  /* no special alignment needed, treated as char array */
897  memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
898  ptr += info[dim].typlen;
899  }
900  else if (info[dim].typlen == -1) /* varlena */
901  {
903 
904  /* copy the length */
905  memcpy(ptr, &len, sizeof(uint32));
906  ptr += sizeof(uint32);
907 
908  /* data from the varlena value (without the header) */
909  memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
910  ptr += len;
911  }
912  else if (info[dim].typlen == -2) /* cstring */
913  {
914  uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
915 
916  /* copy the length */
917  memcpy(ptr, &len, sizeof(uint32));
918  ptr += sizeof(uint32);
919 
920  /* value */
921  memcpy(ptr, DatumGetCString(value), len);
922  ptr += len;
923  }
924 
925  /* no underflows or overflows */
926  Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
927  }
928 
929  /* we should get exactly nbytes of data for this dimension */
930  Assert((ptr - start) == info[dim].nbytes);
931  }
932 
933  /* Serialize the items, with uint16 indexes instead of the values. */
934  for (i = 0; i < mcvlist->nitems; i++)
935  {
936  MCVItem *mcvitem = &mcvlist->items[i];
937 
938  /* don't write beyond the allocated space */
939  Assert(ptr <= (endptr - ITEM_SIZE(dim)));
940 
941  /* copy NULL and frequency flags into the serialized MCV */
942  memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
943  ptr += sizeof(bool) * ndims;
944 
945  memcpy(ptr, &mcvitem->frequency, sizeof(double));
946  ptr += sizeof(double);
947 
948  memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
949  ptr += sizeof(double);
950 
951  /* store the indexes last */
952  for (dim = 0; dim < ndims; dim++)
953  {
954  uint16 index = 0;
955  Datum *value;
956 
957  /* do the lookup only for non-NULL values */
958  if (!mcvitem->isnull[dim])
959  {
960  value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
961  info[dim].nvalues, sizeof(Datum),
962  compare_scalars_simple, &ssup[dim]);
963 
964  Assert(value != NULL); /* serialization or deduplication
965  * error */
966 
967  /* compute index within the deduplicated array */
968  index = (uint16) (value - values[dim]);
969 
970  /* check the index is within expected bounds */
971  Assert(index < info[dim].nvalues);
972  }
973 
974  /* copy the index into the serialized MCV */
975  memcpy(ptr, &index, sizeof(uint16));
976  ptr += sizeof(uint16);
977  }
978 
979  /* make sure we don't overflow the allocated value */
980  Assert(ptr <= endptr);
981  }
982 
983  /* at this point we expect to match the total_length exactly */
984  Assert(ptr == endptr);
985 
986  pfree(values);
987  pfree(counts);
988 
989  return raw;
990 }
991 
992 /*
993  * statext_mcv_deserialize
994  * Reads serialized MCV list into MCVList structure.
995  *
996  * All the memory needed by the MCV list is allocated as a single chunk, so
997  * it's possible to simply pfree() it at once.
998  */
999 MCVList *
1001 {
1002  int dim,
1003  i;
1004  Size expected_size;
1005  MCVList *mcvlist;
1006  char *raw;
1007  char *ptr;
1008  char *endptr PG_USED_FOR_ASSERTS_ONLY;
1009 
1010  int ndims,
1011  nitems;
1012  DimensionInfo *info = NULL;
1013 
1014  /* local allocation buffer (used only for deserialization) */
1015  Datum **map = NULL;
1016 
1017  /* MCV list */
1018  Size mcvlen;
1019 
1020  /* buffer used for the result */
1021  Size datalen;
1022  char *dataptr;
1023  char *valuesptr;
1024  char *isnullptr;
1025 
1026  if (data == NULL)
1027  return NULL;
1028 
1029  /*
1030  * We can't possibly deserialize a MCV list if there's not even a complete
1031  * header. We need an explicit formula here, because we serialize the
1032  * header fields one by one, so we need to ignore struct alignment.
1033  */
1035  elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
1037 
1038  /* read the MCV list header */
1039  mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1040 
1041  /* pointer to the data part (skip the varlena header) */
1042  raw = (char *) data;
1043  ptr = VARDATA_ANY(raw);
1044  endptr = (char *) raw + VARSIZE_ANY(data);
1045 
1046  /* get the header and perform further sanity checks */
1047  memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1048  ptr += sizeof(uint32);
1049 
1050  memcpy(&mcvlist->type, ptr, sizeof(uint32));
1051  ptr += sizeof(uint32);
1052 
1053  memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1054  ptr += sizeof(uint32);
1055 
1056  memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1057  ptr += sizeof(AttrNumber);
1058 
1059  if (mcvlist->magic != STATS_MCV_MAGIC)
1060  elog(ERROR, "invalid MCV magic %u (expected %u)",
1061  mcvlist->magic, STATS_MCV_MAGIC);
1062 
1063  if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1064  elog(ERROR, "invalid MCV type %u (expected %u)",
1065  mcvlist->type, STATS_MCV_TYPE_BASIC);
1066 
1067  if (mcvlist->ndimensions == 0)
1068  elog(ERROR, "invalid zero-length dimension array in MCVList");
1069  else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1070  (mcvlist->ndimensions < 0))
1071  elog(ERROR, "invalid length (%d) dimension array in MCVList",
1072  mcvlist->ndimensions);
1073 
1074  if (mcvlist->nitems == 0)
1075  elog(ERROR, "invalid zero-length item array in MCVList");
1076  else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1077  elog(ERROR, "invalid length (%u) item array in MCVList",
1078  mcvlist->nitems);
1079 
1080  nitems = mcvlist->nitems;
1081  ndims = mcvlist->ndimensions;
1082 
1083  /*
1084  * Check amount of data including DimensionInfo for all dimensions and
1085  * also the serialized items (including uint16 indexes). Also, walk
1086  * through the dimension information and add it to the sum.
1087  */
1088  expected_size = SizeOfMCVList(ndims, nitems);
1089 
1090  /*
1091  * Check that we have at least the dimension and info records, along with
1092  * the items. We don't know the size of the serialized values yet. We need
1093  * to do this check first, before accessing the dimension info.
1094  */
1095  if (VARSIZE_ANY(data) < expected_size)
1096  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1097  VARSIZE_ANY(data), expected_size);
1098 
1099  /* Now copy the array of type Oids. */
1100  memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1101  ptr += (sizeof(Oid) * ndims);
1102 
1103  /* Now it's safe to access the dimension info. */
1104  info = palloc(ndims * sizeof(DimensionInfo));
1105 
1106  memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1107  ptr += (ndims * sizeof(DimensionInfo));
1108 
1109  /* account for the value arrays */
1110  for (dim = 0; dim < ndims; dim++)
1111  {
1112  /*
1113  * XXX I wonder if we can/should rely on asserts here. Maybe those
1114  * checks should be done every time?
1115  */
1116  Assert(info[dim].nvalues >= 0);
1117  Assert(info[dim].nbytes >= 0);
1118 
1119  expected_size += info[dim].nbytes;
1120  }
1121 
1122  /*
1123  * Now we know the total expected MCV size, including all the pieces
1124  * (header, dimension info. items and deduplicated data). So do the final
1125  * check on size.
1126  */
1127  if (VARSIZE_ANY(data) != expected_size)
1128  elog(ERROR, "invalid MCV size %zd (expected %zu)",
1129  VARSIZE_ANY(data), expected_size);
1130 
1131  /*
1132  * We need an array of Datum values for each dimension, so that we can
1133  * easily translate the uint16 indexes later. We also need a top-level
1134  * array of pointers to those per-dimension arrays.
1135  *
1136  * While allocating the arrays for dimensions, compute how much space we
1137  * need for a copy of the by-ref data, as we can't simply point to the
1138  * original values (it might go away).
1139  */
1140  datalen = 0; /* space for by-ref data */
1141  map = (Datum **) palloc(ndims * sizeof(Datum *));
1142 
1143  for (dim = 0; dim < ndims; dim++)
1144  {
1145  map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1146 
1147  /* space needed for a copy of data for by-ref types */
1148  datalen += info[dim].nbytes_aligned;
1149  }
1150 
1151  /*
1152  * Now resize the MCV list so that the allocation includes all the data.
1153  *
1154  * Allocate space for a copy of the data, as we can't simply reference the
1155  * serialized data - it's not aligned properly, and it may disappear while
1156  * we're still using the MCV list, e.g. due to catcache release.
1157  *
1158  * We do care about alignment here, because we will allocate all the
1159  * pieces at once, but then use pointers to different parts.
1160  */
1161  mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1162 
1163  /* arrays of values and isnull flags for all MCV items */
1164  mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1165  mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1166 
1167  /* we don't quite need to align this, but it makes some asserts easier */
1168  mcvlen += MAXALIGN(datalen);
1169 
1170  /* now resize the deserialized MCV list, and compute pointers to parts */
1171  mcvlist = repalloc(mcvlist, mcvlen);
1172 
1173  /* pointer to the beginning of values/isnull arrays */
1174  valuesptr = (char *) mcvlist
1175  + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1176 
1177  isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1178 
1179  dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1180 
1181  /*
1182  * Build mapping (index => value) for translating the serialized data into
1183  * the in-memory representation.
1184  */
1185  for (dim = 0; dim < ndims; dim++)
1186  {
1187  /* remember start position in the input array */
1188  char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1189 
1190  if (info[dim].typbyval)
1191  {
1192  /* for by-val types we simply copy data into the mapping */
1193  for (i = 0; i < info[dim].nvalues; i++)
1194  {
1195  Datum v = 0;
1196 
1197  memcpy(&v, ptr, info[dim].typlen);
1198  ptr += info[dim].typlen;
1199 
1200  map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1201 
1202  /* no under/overflow of input array */
1203  Assert(ptr <= (start + info[dim].nbytes));
1204  }
1205  }
1206  else
1207  {
1208  /* for by-ref types we need to also make a copy of the data */
1209 
1210  /* passed by reference, but fixed length (name, tid, ...) */
1211  if (info[dim].typlen > 0)
1212  {
1213  for (i = 0; i < info[dim].nvalues; i++)
1214  {
1215  memcpy(dataptr, ptr, info[dim].typlen);
1216  ptr += info[dim].typlen;
1217 
1218  /* just point into the array */
1219  map[dim][i] = PointerGetDatum(dataptr);
1220  dataptr += MAXALIGN(info[dim].typlen);
1221  }
1222  }
1223  else if (info[dim].typlen == -1)
1224  {
1225  /* varlena */
1226  for (i = 0; i < info[dim].nvalues; i++)
1227  {
1228  uint32 len;
1229 
1230  /* read the uint32 length */
1231  memcpy(&len, ptr, sizeof(uint32));
1232  ptr += sizeof(uint32);
1233 
1234  /* the length is data-only */
1235  SET_VARSIZE(dataptr, len + VARHDRSZ);
1236  memcpy(VARDATA(dataptr), ptr, len);
1237  ptr += len;
1238 
1239  /* just point into the array */
1240  map[dim][i] = PointerGetDatum(dataptr);
1241 
1242  /* skip to place of the next deserialized value */
1243  dataptr += MAXALIGN(len + VARHDRSZ);
1244  }
1245  }
1246  else if (info[dim].typlen == -2)
1247  {
1248  /* cstring */
1249  for (i = 0; i < info[dim].nvalues; i++)
1250  {
1251  uint32 len;
1252 
1253  memcpy(&len, ptr, sizeof(uint32));
1254  ptr += sizeof(uint32);
1255 
1256  memcpy(dataptr, ptr, len);
1257  ptr += len;
1258 
1259  /* just point into the array */
1260  map[dim][i] = PointerGetDatum(dataptr);
1261  dataptr += MAXALIGN(len);
1262  }
1263  }
1264 
1265  /* no under/overflow of input array */
1266  Assert(ptr <= (start + info[dim].nbytes));
1267 
1268  /* no overflow of the output mcv value */
1269  Assert(dataptr <= ((char *) mcvlist + mcvlen));
1270  }
1271 
1272  /* check we consumed input data for this dimension exactly */
1273  Assert(ptr == (start + info[dim].nbytes));
1274  }
1275 
1276  /* we should have also filled the MCV list exactly */
1277  Assert(dataptr == ((char *) mcvlist + mcvlen));
1278 
1279  /* deserialize the MCV items and translate the indexes to Datums */
1280  for (i = 0; i < nitems; i++)
1281  {
1282  MCVItem *item = &mcvlist->items[i];
1283 
1284  item->values = (Datum *) valuesptr;
1285  valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1286 
1287  item->isnull = (bool *) isnullptr;
1288  isnullptr += MAXALIGN(sizeof(bool) * ndims);
1289 
1290  memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1291  ptr += sizeof(bool) * ndims;
1292 
1293  memcpy(&item->frequency, ptr, sizeof(double));
1294  ptr += sizeof(double);
1295 
1296  memcpy(&item->base_frequency, ptr, sizeof(double));
1297  ptr += sizeof(double);
1298 
1299  /* finally translate the indexes (for non-NULL only) */
1300  for (dim = 0; dim < ndims; dim++)
1301  {
1302  uint16 index;
1303 
1304  memcpy(&index, ptr, sizeof(uint16));
1305  ptr += sizeof(uint16);
1306 
1307  if (item->isnull[dim])
1308  continue;
1309 
1310  item->values[dim] = map[dim][index];
1311  }
1312 
1313  /* check we're not overflowing the input */
1314  Assert(ptr <= endptr);
1315  }
1316 
1317  /* check that we processed all the data */
1318  Assert(ptr == endptr);
1319 
1320  /* release the buffers used for mapping */
1321  for (dim = 0; dim < ndims; dim++)
1322  pfree(map[dim]);
1323 
1324  pfree(map);
1325 
1326  return mcvlist;
1327 }
1328 
1329 /*
1330  * SRF with details about buckets of a histogram:
1331  *
1332  * - item ID (0...nitems)
1333  * - values (string array)
1334  * - nulls only (boolean array)
1335  * - frequency (double precision)
1336  * - base_frequency (double precision)
1337  *
1338  * The input is the OID of the statistics, and there are no rows returned if
1339  * the statistics contains no histogram.
1340  */
1341 Datum
1343 {
1344  FuncCallContext *funcctx;
1345 
1346  /* stuff done only on the first call of the function */
1347  if (SRF_IS_FIRSTCALL())
1348  {
1349  MemoryContext oldcontext;
1350  MCVList *mcvlist;
1351  TupleDesc tupdesc;
1352 
1353  /* create a function context for cross-call persistence */
1354  funcctx = SRF_FIRSTCALL_INIT();
1355 
1356  /* switch to memory context appropriate for multiple function calls */
1357  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1358 
1360 
1361  funcctx->user_fctx = mcvlist;
1362 
1363  /* total number of tuples to be returned */
1364  funcctx->max_calls = 0;
1365  if (funcctx->user_fctx != NULL)
1366  funcctx->max_calls = mcvlist->nitems;
1367 
1368  /* Build a tuple descriptor for our result type */
1369  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1370  ereport(ERROR,
1371  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1372  errmsg("function returning record called in context "
1373  "that cannot accept type record")));
1374  tupdesc = BlessTupleDesc(tupdesc);
1375 
1376  /*
1377  * generate attribute metadata needed later to produce tuples from raw
1378  * C strings
1379  */
1380  funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1381 
1382  MemoryContextSwitchTo(oldcontext);
1383  }
1384 
1385  /* stuff done on every call of the function */
1386  funcctx = SRF_PERCALL_SETUP();
1387 
1388  if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more
1389  * left to send */
1390  {
1391  Datum values[5];
1392  bool nulls[5];
1393  HeapTuple tuple;
1394  Datum result;
1395  ArrayBuildState *astate_values = NULL;
1396  ArrayBuildState *astate_nulls = NULL;
1397 
1398  int i;
1399  MCVList *mcvlist;
1400  MCVItem *item;
1401 
1402  mcvlist = (MCVList *) funcctx->user_fctx;
1403 
1404  Assert(funcctx->call_cntr < mcvlist->nitems);
1405 
1406  item = &mcvlist->items[funcctx->call_cntr];
1407 
1408  for (i = 0; i < mcvlist->ndimensions; i++)
1409  {
1410 
1411  astate_nulls = accumArrayResult(astate_nulls,
1412  BoolGetDatum(item->isnull[i]),
1413  false,
1414  BOOLOID,
1416 
1417  if (!item->isnull[i])
1418  {
1419  bool isvarlena;
1420  Oid outfunc;
1421  FmgrInfo fmgrinfo;
1422  Datum val;
1423  text *txt;
1424 
1425  /* lookup output func for the type */
1426  getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
1427  fmgr_info(outfunc, &fmgrinfo);
1428 
1429  val = FunctionCall1(&fmgrinfo, item->values[i]);
1431 
1432  astate_values = accumArrayResult(astate_values,
1433  PointerGetDatum(txt),
1434  false,
1435  TEXTOID,
1437  }
1438  else
1439  astate_values = accumArrayResult(astate_values,
1440  (Datum) 0,
1441  true,
1442  TEXTOID,
1444  }
1445 
1446  values[0] = Int32GetDatum(funcctx->call_cntr);
1449  values[3] = Float8GetDatum(item->frequency);
1450  values[4] = Float8GetDatum(item->base_frequency);
1451 
1452  /* no NULLs in the tuple */
1453  memset(nulls, 0, sizeof(nulls));
1454 
1455  /* build a tuple */
1456  tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1457 
1458  /* make the tuple into a datum */
1459  result = HeapTupleGetDatum(tuple);
1460 
1461  SRF_RETURN_NEXT(funcctx, result);
1462  }
1463  else /* do when there is no more left */
1464  {
1465  SRF_RETURN_DONE(funcctx);
1466  }
1467 }
1468 
1469 /*
1470  * pg_mcv_list_in - input routine for type pg_mcv_list.
1471  *
1472  * pg_mcv_list is real enough to be a table column, but it has no operations
1473  * of its own, and disallows input too
1474  */
1475 Datum
1477 {
1478  /*
1479  * pg_mcv_list stores the data in binary form and parsing text input is
1480  * not needed, so disallow this.
1481  */
1482  ereport(ERROR,
1483  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1484  errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1485 
1486  PG_RETURN_VOID(); /* keep compiler quiet */
1487 }
1488 
1489 
1490 /*
1491  * pg_mcv_list_out - output routine for type pg_mcv_list.
1492  *
1493  * MCV lists are serialized into a bytea value, so we simply call byteaout()
1494  * to serialize the value into text. But it'd be nice to serialize that into
1495  * a meaningful representation (e.g. for inspection by people).
1496  *
1497  * XXX This should probably return something meaningful, similar to what
1498  * pg_dependencies_out does. Not sure how to deal with the deduplicated
1499  * values, though - do we want to expand that or not?
1500  */
1501 Datum
1503 {
1504  return byteaout(fcinfo);
1505 }
1506 
1507 /*
1508  * pg_mcv_list_recv - binary input routine for type pg_mcv_list.
1509  */
1510 Datum
1512 {
1513  ereport(ERROR,
1514  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1515  errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1516 
1517  PG_RETURN_VOID(); /* keep compiler quiet */
1518 }
1519 
1520 /*
1521  * pg_mcv_list_send - binary output routine for type pg_mcv_list.
1522  *
1523  * MCV lists are serialized in a bytea value (although the type is named
1524  * differently), so let's just send that.
1525  */
1526 Datum
1528 {
1529  return byteasend(fcinfo);
1530 }
1531 
1532 /*
1533  * match the attribute/expression to a dimension of the statistic
1534  *
1535  * Match the attribute/expression to statistics dimension. Optionally
1536  * determine the collation.
1537  */
1538 static int
1539 mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
1540 {
1541  int idx = -1;
1542 
1543  if (IsA(expr, Var))
1544  {
1545  /* simple Var, so just lookup using varattno */
1546  Var *var = (Var *) expr;
1547 
1548  if (collid)
1549  *collid = var->varcollid;
1550 
1551  idx = bms_member_index(keys, var->varattno);
1552 
1553  /* make sure the index is valid */
1554  Assert((idx >= 0) && (idx <= bms_num_members(keys)));
1555  }
1556  else
1557  {
1558  ListCell *lc;
1559 
1560  /* expressions are stored after the simple columns */
1561  idx = bms_num_members(keys);
1562 
1563  if (collid)
1564  *collid = exprCollation(expr);
1565 
1566  /* expression - lookup in stats expressions */
1567  foreach(lc, exprs)
1568  {
1569  Node *stat_expr = (Node *) lfirst(lc);
1570 
1571  if (equal(expr, stat_expr))
1572  break;
1573 
1574  idx++;
1575  }
1576 
1577  /* make sure the index is valid */
1578  Assert((idx >= bms_num_members(keys)) &&
1579  (idx <= bms_num_members(keys) + list_length(exprs)));
1580  }
1581 
1582  Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1583 
1584  return idx;
1585 }
1586 
1587 /*
1588  * mcv_get_match_bitmap
1589  * Evaluate clauses using the MCV list, and update the match bitmap.
1590  *
1591  * A match bitmap keeps match/mismatch status for each MCV item, and we
1592  * update it based on additional clauses. We also use it to skip items
1593  * that can't possibly match (e.g. item marked as "mismatch" can't change
1594  * to "match" when evaluating AND clause list).
1595  *
1596  * The function also returns a flag indicating whether there was an
1597  * equality condition for all attributes, the minimum frequency in the MCV
1598  * list, and a total MCV frequency (sum of frequencies for all items).
1599  *
1600  * XXX Currently the match bitmap uses a bool for each MCV item, which is
1601  * somewhat wasteful as we could do with just a single bit, thus reducing
1602  * the size to ~1/8. It would also allow us to combine bitmaps simply using
1603  * & and |, which should be faster than min/max. The bitmaps are fairly
1604  * small, though (thanks to the cap on the MCV list size).
1605  */
1606 static bool *
1608  Bitmapset *keys, List *exprs,
1609  MCVList *mcvlist, bool is_or)
1610 {
1611  int i;
1612  ListCell *l;
1613  bool *matches;
1614 
1615  /* The bitmap may be partially built. */
1616  Assert(clauses != NIL);
1617  Assert(list_length(clauses) >= 1);
1618  Assert(mcvlist != NULL);
1619  Assert(mcvlist->nitems > 0);
1620  Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
1621 
1622  matches = palloc(sizeof(bool) * mcvlist->nitems);
1623  memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
1624 
1625  /*
1626  * Loop through the list of clauses, and for each of them evaluate all the
1627  * MCV items not yet eliminated by the preceding clauses.
1628  */
1629  foreach(l, clauses)
1630  {
1631  Node *clause = (Node *) lfirst(l);
1632 
1633  /* if it's a RestrictInfo, then extract the clause */
1634  if (IsA(clause, RestrictInfo))
1635  clause = (Node *) ((RestrictInfo *) clause)->clause;
1636 
1637  /*
1638  * Handle the various types of clauses - OpClause, NullTest and
1639  * AND/OR/NOT
1640  */
1641  if (is_opclause(clause))
1642  {
1643  OpExpr *expr = (OpExpr *) clause;
1644  FmgrInfo opproc;
1645 
1646  /* valid only after examine_opclause_args returns true */
1647  Node *clause_expr;
1648  Const *cst;
1649  bool expronleft;
1650  int idx;
1651  Oid collid;
1652 
1653  fmgr_info(get_opcode(expr->opno), &opproc);
1654 
1655  /* extract the var/expr and const from the expression */
1656  if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1657  elog(ERROR, "incompatible clause");
1658 
1659  /* match the attribute/expression to a dimension of the statistic */
1660  idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1661 
1662  Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1663 
1664  /*
1665  * Walk through the MCV items and evaluate the current clause. We
1666  * can skip items that were already ruled out, and terminate if
1667  * there are no remaining MCV items that might possibly match.
1668  */
1669  for (i = 0; i < mcvlist->nitems; i++)
1670  {
1671  bool match = true;
1672  MCVItem *item = &mcvlist->items[i];
1673 
1674  Assert(idx >= 0);
1675 
1676  /*
1677  * When the MCV item or the Const value is NULL we can treat
1678  * this as a mismatch. We must not call the operator because
1679  * of strictness.
1680  */
1681  if (item->isnull[idx] || cst->constisnull)
1682  {
1683  matches[i] = RESULT_MERGE(matches[i], is_or, false);
1684  continue;
1685  }
1686 
1687  /*
1688  * Skip MCV items that can't change result in the bitmap. Once
1689  * the value gets false for AND-lists, or true for OR-lists,
1690  * we don't need to look at more clauses.
1691  */
1692  if (RESULT_IS_FINAL(matches[i], is_or))
1693  continue;
1694 
1695  /*
1696  * First check whether the constant is below the lower
1697  * boundary (in that case we can skip the bucket, because
1698  * there's no overlap).
1699  *
1700  * We don't store collations used to build the statistics, but
1701  * we can use the collation for the attribute itself, as
1702  * stored in varcollid. We do reset the statistics after a
1703  * type change (including collation change), so this is OK.
1704  * For expressions, we use the collation extracted from the
1705  * expression itself.
1706  */
1707  if (expronleft)
1708  match = DatumGetBool(FunctionCall2Coll(&opproc,
1709  collid,
1710  item->values[idx],
1711  cst->constvalue));
1712  else
1713  match = DatumGetBool(FunctionCall2Coll(&opproc,
1714  collid,
1715  cst->constvalue,
1716  item->values[idx]));
1717 
1718  /* update the match bitmap with the result */
1719  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1720  }
1721  }
1722  else if (IsA(clause, ScalarArrayOpExpr))
1723  {
1724  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1725  FmgrInfo opproc;
1726 
1727  /* valid only after examine_opclause_args returns true */
1728  Node *clause_expr;
1729  Const *cst;
1730  bool expronleft;
1731  Oid collid;
1732  int idx;
1733 
1734  /* array evaluation */
1735  ArrayType *arrayval;
1736  int16 elmlen;
1737  bool elmbyval;
1738  char elmalign;
1739  int num_elems;
1740  Datum *elem_values;
1741  bool *elem_nulls;
1742 
1743  fmgr_info(get_opcode(expr->opno), &opproc);
1744 
1745  /* extract the var/expr and const from the expression */
1746  if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1747  elog(ERROR, "incompatible clause");
1748 
1749  /* ScalarArrayOpExpr has the Var always on the left */
1750  Assert(expronleft);
1751 
1752  /* XXX what if (cst->constisnull == NULL)? */
1753  if (!cst->constisnull)
1754  {
1755  arrayval = DatumGetArrayTypeP(cst->constvalue);
1757  &elmlen, &elmbyval, &elmalign);
1758  deconstruct_array(arrayval,
1759  ARR_ELEMTYPE(arrayval),
1760  elmlen, elmbyval, elmalign,
1761  &elem_values, &elem_nulls, &num_elems);
1762  }
1763 
1764  /* match the attribute/expression to a dimension of the statistic */
1765  idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1766 
1767  /*
1768  * Walk through the MCV items and evaluate the current clause. We
1769  * can skip items that were already ruled out, and terminate if
1770  * there are no remaining MCV items that might possibly match.
1771  */
1772  for (i = 0; i < mcvlist->nitems; i++)
1773  {
1774  int j;
1775  bool match = !expr->useOr;
1776  MCVItem *item = &mcvlist->items[i];
1777 
1778  /*
1779  * When the MCV item or the Const value is NULL we can treat
1780  * this as a mismatch. We must not call the operator because
1781  * of strictness.
1782  */
1783  if (item->isnull[idx] || cst->constisnull)
1784  {
1785  matches[i] = RESULT_MERGE(matches[i], is_or, false);
1786  continue;
1787  }
1788 
1789  /*
1790  * Skip MCV items that can't change result in the bitmap. Once
1791  * the value gets false for AND-lists, or true for OR-lists,
1792  * we don't need to look at more clauses.
1793  */
1794  if (RESULT_IS_FINAL(matches[i], is_or))
1795  continue;
1796 
1797  for (j = 0; j < num_elems; j++)
1798  {
1799  Datum elem_value = elem_values[j];
1800  bool elem_isnull = elem_nulls[j];
1801  bool elem_match;
1802 
1803  /* NULL values always evaluate as not matching. */
1804  if (elem_isnull)
1805  {
1806  match = RESULT_MERGE(match, expr->useOr, false);
1807  continue;
1808  }
1809 
1810  /*
1811  * Stop evaluating the array elements once we reach a
1812  * matching value that can't change - ALL() is the same as
1813  * AND-list, ANY() is the same as OR-list.
1814  */
1815  if (RESULT_IS_FINAL(match, expr->useOr))
1816  break;
1817 
1818  elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
1819  collid,
1820  item->values[idx],
1821  elem_value));
1822 
1823  match = RESULT_MERGE(match, expr->useOr, elem_match);
1824  }
1825 
1826  /* update the match bitmap with the result */
1827  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1828  }
1829  }
1830  else if (IsA(clause, NullTest))
1831  {
1832  NullTest *expr = (NullTest *) clause;
1833  Node *clause_expr = (Node *) (expr->arg);
1834 
1835  /* match the attribute/expression to a dimension of the statistic */
1836  int idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
1837 
1838  /*
1839  * Walk through the MCV items and evaluate the current clause. We
1840  * can skip items that were already ruled out, and terminate if
1841  * there are no remaining MCV items that might possibly match.
1842  */
1843  for (i = 0; i < mcvlist->nitems; i++)
1844  {
1845  bool match = false; /* assume mismatch */
1846  MCVItem *item = &mcvlist->items[i];
1847 
1848  /* if the clause mismatches the MCV item, update the bitmap */
1849  switch (expr->nulltesttype)
1850  {
1851  case IS_NULL:
1852  match = (item->isnull[idx]) ? true : match;
1853  break;
1854 
1855  case IS_NOT_NULL:
1856  match = (!item->isnull[idx]) ? true : match;
1857  break;
1858  }
1859 
1860  /* now, update the match bitmap, depending on OR/AND type */
1861  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1862  }
1863  }
1864  else if (is_orclause(clause) || is_andclause(clause))
1865  {
1866  /* AND/OR clause, with all subclauses being compatible */
1867 
1868  int i;
1869  BoolExpr *bool_clause = ((BoolExpr *) clause);
1870  List *bool_clauses = bool_clause->args;
1871 
1872  /* match/mismatch bitmap for each MCV item */
1873  bool *bool_matches = NULL;
1874 
1875  Assert(bool_clauses != NIL);
1876  Assert(list_length(bool_clauses) >= 2);
1877 
1878  /* build the match bitmap for the OR-clauses */
1879  bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
1880  mcvlist, is_orclause(clause));
1881 
1882  /*
1883  * Merge the bitmap produced by mcv_get_match_bitmap into the
1884  * current one. We need to consider if we're evaluating AND or OR
1885  * condition when merging the results.
1886  */
1887  for (i = 0; i < mcvlist->nitems; i++)
1888  matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
1889 
1890  pfree(bool_matches);
1891  }
1892  else if (is_notclause(clause))
1893  {
1894  /* NOT clause, with all subclauses compatible */
1895 
1896  int i;
1897  BoolExpr *not_clause = ((BoolExpr *) clause);
1898  List *not_args = not_clause->args;
1899 
1900  /* match/mismatch bitmap for each MCV item */
1901  bool *not_matches = NULL;
1902 
1903  Assert(not_args != NIL);
1904  Assert(list_length(not_args) == 1);
1905 
1906  /* build the match bitmap for the NOT-clause */
1907  not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
1908  mcvlist, false);
1909 
1910  /*
1911  * Merge the bitmap produced by mcv_get_match_bitmap into the
1912  * current one. We're handling a NOT clause, so invert the result
1913  * before merging it into the global bitmap.
1914  */
1915  for (i = 0; i < mcvlist->nitems; i++)
1916  matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
1917 
1918  pfree(not_matches);
1919  }
1920  else if (IsA(clause, Var))
1921  {
1922  /* Var (has to be a boolean Var, possibly from below NOT) */
1923 
1924  Var *var = (Var *) (clause);
1925 
1926  /* match the attribute to a dimension of the statistic */
1927  int idx = bms_member_index(keys, var->varattno);
1928 
1929  Assert(var->vartype == BOOLOID);
1930 
1931  /*
1932  * Walk through the MCV items and evaluate the current clause. We
1933  * can skip items that were already ruled out, and terminate if
1934  * there are no remaining MCV items that might possibly match.
1935  */
1936  for (i = 0; i < mcvlist->nitems; i++)
1937  {
1938  MCVItem *item = &mcvlist->items[i];
1939  bool match = false;
1940 
1941  /* if the item is NULL, it's a mismatch */
1942  if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1943  match = true;
1944 
1945  /* update the result bitmap */
1946  matches[i] = RESULT_MERGE(matches[i], is_or, match);
1947  }
1948  }
1949  else
1950  elog(ERROR, "unknown clause type: %d", clause->type);
1951  }
1952 
1953  return matches;
1954 }
1955 
1956 
1957 /*
1958  * mcv_combine_selectivities
1959  * Combine per-column and multi-column MCV selectivity estimates.
1960  *
1961  * simple_sel is a "simple" selectivity estimate (produced without using any
1962  * extended statistics, essentially assuming independence of columns/clauses).
1963  *
1964  * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
1965  * all matching MCV items. The difference (mcv_sel - mcv_basesel) is then
1966  * essentially interpreted as a correction to be added to simple_sel, as
1967  * described below.
1968  *
1969  * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
1970  * matching ones). This is used as an upper bound on the portion of the
1971  * selectivity estimates not covered by the MCV statistics.
1972  *
1973  * Note: While simple and base selectivities are defined in a quite similar
1974  * way, the values are computed differently and are not therefore equal. The
1975  * simple selectivity is computed as a product of per-clause estimates, while
1976  * the base selectivity is computed by adding up base frequencies of matching
1977  * items of the multi-column MCV list. So the values may differ for two main
1978  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
1979  * the MCV items did not match the estimated clauses.
1980  *
1981  * As both (a) and (b) reduce the base selectivity value, it generally holds
1982  * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
1983  * values may be equal.
1984  *
1985  * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
1986  * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
1987  * correction for the part covered by the MCV list. Those two statements are
1988  * actually equivalent.
1989  */
1992  Selectivity mcv_sel,
1993  Selectivity mcv_basesel,
1994  Selectivity mcv_totalsel)
1995 {
1996  Selectivity other_sel;
1997  Selectivity sel;
1998 
1999  /* estimated selectivity of values not covered by MCV matches */
2000  other_sel = simple_sel - mcv_basesel;
2001  CLAMP_PROBABILITY(other_sel);
2002 
2003  /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2004  if (other_sel > 1.0 - mcv_totalsel)
2005  other_sel = 1.0 - mcv_totalsel;
2006 
2007  /* overall selectivity is the sum of the MCV and non-MCV parts */
2008  sel = mcv_sel + other_sel;
2009  CLAMP_PROBABILITY(sel);
2010 
2011  return sel;
2012 }
2013 
2014 
2015 /*
2016  * mcv_clauselist_selectivity
2017  * Use MCV statistics to estimate the selectivity of an implicitly-ANDed
2018  * list of clauses.
2019  *
2020  * This determines which MCV items match every clause in the list and returns
2021  * the sum of the frequencies of those items.
2022  *
2023  * In addition, it returns the sum of the base frequencies of each of those
2024  * items (that is the sum of the selectivities that each item would have if
2025  * the columns were independent of one another), and the total selectivity of
2026  * all the MCV items (not just the matching ones). These are expected to be
2027  * used together with a "simple" selectivity estimate (one based only on
2028  * per-column statistics) to produce an overall selectivity estimate that
2029  * makes use of both per-column and multi-column statistics --- see
2030  * mcv_combine_selectivities().
2031  */
2034  List *clauses, int varRelid,
2035  JoinType jointype, SpecialJoinInfo *sjinfo,
2036  RelOptInfo *rel,
2037  Selectivity *basesel, Selectivity *totalsel)
2038 {
2039  int i;
2040  MCVList *mcv;
2041  Selectivity s = 0.0;
2042  RangeTblEntry *rte = root->simple_rte_array[rel->relid];
2043 
2044  /* match/mismatch bitmap for each MCV item */
2045  bool *matches = NULL;
2046 
2047  /* load the MCV list stored in the statistics object */
2048  mcv = statext_mcv_load(stat->statOid, rte->inh);
2049 
2050  /* build a match bitmap for the clauses */
2051  matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2052  mcv, false);
2053 
2054  /* sum frequencies for all the matching MCV items */
2055  *basesel = 0.0;
2056  *totalsel = 0.0;
2057  for (i = 0; i < mcv->nitems; i++)
2058  {
2059  *totalsel += mcv->items[i].frequency;
2060 
2061  if (matches[i] != false)
2062  {
2063  *basesel += mcv->items[i].base_frequency;
2064  s += mcv->items[i].frequency;
2065  }
2066  }
2067 
2068  return s;
2069 }
2070 
2071 
2072 /*
2073  * mcv_clause_selectivity_or
2074  * Use MCV statistics to estimate the selectivity of a clause that
2075  * appears in an ORed list of clauses.
2076  *
2077  * As with mcv_clauselist_selectivity() this determines which MCV items match
2078  * the clause and returns both the sum of the frequencies and the sum of the
2079  * base frequencies of those items, as well as the sum of the frequencies of
2080  * all MCV items (not just the matching ones) so that this information can be
2081  * used by mcv_combine_selectivities() to produce a selectivity estimate that
2082  * makes use of both per-column and multi-column statistics.
2083  *
2084  * Additionally, we return information to help compute the overall selectivity
2085  * of the ORed list of clauses assumed to contain this clause. This function
2086  * is intended to be called for each clause in the ORed list of clauses,
2087  * allowing the overall selectivity to be computed using the following
2088  * algorithm:
2089  *
2090  * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
2091  * of the first n clauses in the list. Then the combined selectivity taking
2092  * into account the next clause C[n+1] can be written as
2093  *
2094  * P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
2095  *
2096  * The final term above represents the overlap between the clauses examined so
2097  * far and the (n+1)'th clause. To estimate its selectivity, we track the
2098  * match bitmap for the ORed list of clauses examined so far and examine its
2099  * intersection with the match bitmap for the (n+1)'th clause.
2100  *
2101  * We then also return the sums of the MCV item frequencies and base
2102  * frequencies for the match bitmap intersection corresponding to the overlap
2103  * term above, so that they can be combined with a simple selectivity estimate
2104  * for that term.
2105  *
2106  * The parameter "or_matches" is an in/out parameter tracking the match bitmap
2107  * for the clauses examined so far. The caller is expected to set it to NULL
2108  * the first time it calls this function.
2109  */
2112  MCVList *mcv, Node *clause, bool **or_matches,
2113  Selectivity *basesel, Selectivity *overlap_mcvsel,
2114  Selectivity *overlap_basesel, Selectivity *totalsel)
2115 {
2116  Selectivity s = 0.0;
2117  bool *new_matches;
2118  int i;
2119 
2120  /* build the OR-matches bitmap, if not built already */
2121  if (*or_matches == NULL)
2122  *or_matches = palloc0(sizeof(bool) * mcv->nitems);
2123 
2124  /* build the match bitmap for the new clause */
2125  new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2126  stat->exprs, mcv, false);
2127 
2128  /*
2129  * Sum the frequencies for all the MCV items matching this clause and also
2130  * those matching the overlap between this clause and any of the preceding
2131  * clauses as described above.
2132  */
2133  *basesel = 0.0;
2134  *overlap_mcvsel = 0.0;
2135  *overlap_basesel = 0.0;
2136  *totalsel = 0.0;
2137  for (i = 0; i < mcv->nitems; i++)
2138  {
2139  *totalsel += mcv->items[i].frequency;
2140 
2141  if (new_matches[i])
2142  {
2143  s += mcv->items[i].frequency;
2144  *basesel += mcv->items[i].base_frequency;
2145 
2146  if ((*or_matches)[i])
2147  {
2148  *overlap_mcvsel += mcv->items[i].frequency;
2149  *overlap_basesel += mcv->items[i].base_frequency;
2150  }
2151  }
2152 
2153  /* update the OR-matches bitmap for the next clause */
2154  (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2155  }
2156 
2157  pfree(new_matches);
2158 
2159  return s;
2160 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define DatumGetArrayTypeP(X)
Definition: array.h:254
#define ARR_ELEMTYPE(a)
Definition: array.h:285
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:5278
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3577
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5342
int16 AttrNumber
Definition: attnum.h:21
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:648
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:453
static Datum values[MAXATTR]
Definition: bootstrap.c:156
unsigned short uint16
Definition: c.h:440
unsigned int uint32
Definition: c.h:441
signed short int16
Definition: c.h:428
#define MAXALIGN(LEN)
Definition: c.h:757
#define offsetof(type, field)
Definition: c.h:727
#define VARHDRSZ
Definition: c.h:627
unsigned char bool
Definition: c.h:391
#define PG_UINT16_MAX
Definition: c.h:522
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:155
size_t Size
Definition: c.h:540
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3564
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2071
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:2086
int compare_scalars_simple(const void *a, const void *b, void *arg)
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
int compare_datums_simple(Datum a, Datum b, SortSupport ssup)
int multi_sort_compare(const void *a, const void *b, void *arg)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
bool examine_opclause_args(List *args, Node **exprp, Const **cstp, bool *expronleftp)
MultiSortSupportData * MultiSortSupport
struct DimensionInfo DimensionInfo
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1134
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:126
Datum Float8GetDatum(float8 X)
Definition: fmgr.c:1692
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:649
#define PG_GETARG_BYTEA_P(n)
Definition: fmgr.h:335
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define DatumGetByteaP(X)
Definition: fmgr.h:331
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:299
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:303
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:220
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:305
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:301
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:323
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
static struct @151 value
long val
Definition: informix.c:664
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2864
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2228
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1266
static bool * mcv_get_match_bitmap(PlannerInfo *root, List *clauses, Bitmapset *keys, List *exprs, MCVList *mcvlist, bool is_or)
Definition: mcv.c:1607
Datum pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
Definition: mcv.c:1342
Datum pg_mcv_list_in(PG_FUNCTION_ARGS)
Definition: mcv.c:1476
#define ITEM_SIZE(ndims)
Definition: mcv.c:57
Selectivity mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, Selectivity *totalsel)
Definition: mcv.c:2033
static MultiSortSupport build_mss(StatsBuildData *data)
Definition: mcv.c:351
Datum pg_mcv_list_out(PG_FUNCTION_ARGS)
Definition: mcv.c:1502
static int count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
Definition: mcv.c:383
#define MinSizeOfMCVList
Definition: mcv.c:63
Datum pg_mcv_list_send(PG_FUNCTION_ARGS)
Definition: mcv.c:1527
MCVList * statext_mcv_deserialize(bytea *data)
Definition: mcv.c:1000
#define SizeOfMCVList(ndims, nitems)
Definition: mcv.c:72
#define RESULT_MERGE(value, is_or, match)
Definition: mcv.c:92
static double get_mincount_for_mcv_list(int samplerows, double totalrows)
Definition: mcv.c:152
Selectivity mcv_combine_selectivities(Selectivity simple_sel, Selectivity mcv_sel, Selectivity mcv_basesel, Selectivity mcv_totalsel)
Definition: mcv.c:1991
Selectivity mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, MCVList *mcv, Node *clause, bool **or_matches, Selectivity *basesel, Selectivity *overlap_mcvsel, Selectivity *overlap_basesel, Selectivity *totalsel)
Definition: mcv.c:2111
MCVList * statext_mcv_load(Oid mvoid, bool inh)
Definition: mcv.c:562
#define RESULT_IS_FINAL(value, is_or)
Definition: mcv.c:104
static int mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
Definition: mcv.c:1539
MCVList * statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
Definition: mcv.c:184
Datum pg_mcv_list_recv(PG_FUNCTION_ARGS)
Definition: mcv.c:1511
static int sort_item_compare(const void *a, const void *b, void *arg)
Definition: mcv.c:469
bytea * statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
Definition: mcv.c:625
static SortItem ** build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, int *ncounts)
Definition: mcv.c:494
static SortItem * build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct)
Definition: mcv.c:428
static int compare_sort_item_count(const void *a, const void *b)
Definition: mcv.c:407
void pfree(void *pointer)
Definition: mcxt.c:1175
void * palloc0(Size size)
Definition: mcxt.c:1099
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1188
void * palloc(Size size)
Definition: mcxt.c:1068
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:788
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:95
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:104
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:64
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:113
#define IsA(nodeptr, _type_)
Definition: nodes.h:624
double Selectivity
Definition: nodes.h:706
JoinType
Definition: nodes.h:744
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
void * arg
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:169
static int list_length(const List *l)
Definition: pg_list.h:149
#define NIL
Definition: pg_list.h:65
#define list_make1(x1)
Definition: pg_list.h:206
void * bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition: bsearch_arg.c:55
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
void pg_qsort(void *base, size_t nel, size_t elsize, int(*cmp)(const void *, const void *))
uintptr_t Datum
Definition: postgres.h:411
#define VARSIZE_ANY(PTR)
Definition: postgres.h:348
#define DatumGetBool(X)
Definition: postgres.h:437
#define DatumGetPointer(X)
Definition: postgres.h:593
#define VARDATA(PTR)
Definition: postgres.h:315
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
#define BoolGetDatum(X)
Definition: postgres.h:446
#define Int32GetDatum(X)
Definition: postgres.h:523
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:342
#define DatumGetCString(X)
Definition: postgres.h:610
#define PointerGetDatum(X)
Definition: postgres.h:600
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
@ IS_NULL
Definition: primnodes.h:1520
@ IS_NOT_NULL
Definition: primnodes.h:1520
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup)
Definition: sortsupport.c:135
struct SortSupportData SortSupportData
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
static int ApplySortComparator(Datum datum1, bool isNull1, Datum datum2, bool isNull2, SortSupport ssup)
Definition: sortsupport.h:200
#define STATS_MCV_TYPE_BASIC
Definition: statistics.h:67
#define STATS_MCV_MAGIC
Definition: statistics.h:66
#define STATS_MCVLIST_MAX_ITEMS
Definition: statistics.h:70
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
struct MCVItem MCVItem
TupleDesc tupdesc
Definition: funcapi.h:38
List * args
Definition: primnodes.h:635
Datum constvalue
Definition: primnodes.h:228
bool constisnull
Definition: primnodes.h:229
Definition: fmgr.h:57
void * user_fctx
Definition: funcapi.h:82
uint64 max_calls
Definition: funcapi.h:74
uint64 call_cntr
Definition: funcapi.h:65
AttInMetadata * attinmeta
Definition: funcapi.h:91
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
Definition: pg_list.h:51
bool * isnull
Definition: statistics.h:82
double frequency
Definition: statistics.h:80
double base_frequency
Definition: statistics.h:81
Datum * values
Definition: statistics.h:83
uint32 type
Definition: statistics.h:90
uint32 magic
Definition: statistics.h:89
uint32 nitems
Definition: statistics.h:91
MCVItem items[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:94
AttrNumber ndimensions
Definition: statistics.h:92
Oid types[STATS_MAX_DIMENSIONS]
Definition: statistics.h:93
SortSupportData ssup[FLEXIBLE_ARRAY_MEMBER]
Definition: nodes.h:574
NodeTag type
Definition: nodes.h:575
NullTestType nulltesttype
Definition: primnodes.h:1527
Expr * arg
Definition: primnodes.h:1526
Oid opno
Definition: primnodes.h:551
List * args
Definition: primnodes.h:557
RangeTblEntry ** simple_rte_array
Definition: pathnodes.h:194
Index relid
Definition: pathnodes.h:710
bool ssup_nulls_first
Definition: sortsupport.h:75
MemoryContext ssup_cxt
Definition: sortsupport.h:66
Form_pg_type attrtype
Definition: vacuum.h:131
Oid attrtypid
Definition: vacuum.h:129
Oid attrcollid
Definition: vacuum.h:132
Definition: primnodes.h:196
Oid vartype
Definition: primnodes.h:202
AttrNumber varattno
Definition: primnodes.h:200
Oid varcollid
Definition: primnodes.h:204
Definition: type.h:90
Definition: c.h:622
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1221
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1434
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition: syscache.c:1184
@ STATEXTDATASTXOID
Definition: syscache.h:94
#define store_att_byval(T, newdatum, attlen)
Definition: tupmacs.h:226
#define fetch_att(T, attbyval, attlen)
Definition: tupmacs.h:75
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:339
#define TYPECACHE_LT_OPR
Definition: typcache.h:137
text * cstring_to_text(const char *s)
Definition: varlena.c:188
Datum byteaout(PG_FUNCTION_ARGS)
Definition: varlena.c:390
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:492