PostgreSQL Source Code  git master
dependencies.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dependencies.c
4  * POSTGRES functional dependencies
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/statistics/dependencies.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/htup_details.h"
17 #include "access/sysattr.h"
18 #include "catalog/pg_operator.h"
21 #include "lib/stringinfo.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/nodes.h"
24 #include "nodes/pathnodes.h"
25 #include "optimizer/clauses.h"
26 #include "optimizer/optimizer.h"
28 #include "statistics/statistics.h"
29 #include "utils/bytea.h"
30 #include "utils/fmgroids.h"
31 #include "utils/fmgrprotos.h"
32 #include "utils/lsyscache.h"
33 #include "utils/syscache.h"
34 #include "utils/typcache.h"
35 
36 /* size of the struct header fields (magic, type, ndeps) */
37 #define SizeOfHeader (3 * sizeof(uint32))
38 
39 /* size of a serialized dependency (degree, natts, atts) */
40 #define SizeOfItem(natts) \
41  (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
42 
43 /* minimal size of a dependency (with two attributes) */
44 #define MinSizeOfItem SizeOfItem(2)
45 
46 /* minimal size of dependencies, when all deps are minimal */
47 #define MinSizeOfItems(ndeps) \
48  (SizeOfHeader + (ndeps) * MinSizeOfItem)
49 
50 /*
51  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
52  * k-permutations of n elements, except that the order does not matter for the
53  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
54  */
56 {
57  int k; /* size of the dependency */
58  int n; /* number of possible attributes */
59  int current; /* next dependency to return (index) */
60  AttrNumber ndependencies; /* number of dependencies generated */
61  AttrNumber *dependencies; /* array of pre-generated dependencies */
63 
65 
67  int index, AttrNumber start, AttrNumber *current);
72 static double dependency_degree(int numrows, HeapTuple *rows, int k,
73  AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs);
74 static bool dependency_is_fully_matched(MVDependency *dependency,
75  Bitmapset *attnums);
76 static bool dependency_implies_attribute(MVDependency *dependency,
78 static bool dependency_is_compatible_clause(Node *clause, Index relid,
82  Bitmapset *attnums);
83 
84 static void
87 {
88  /*
89  * The generator handles the first (k-1) elements differently from the
90  * last element.
91  */
92  if (index < (state->k - 1))
93  {
94  AttrNumber i;
95 
96  /*
97  * The first (k-1) values have to be in ascending order, which we
98  * generate recursively.
99  */
100 
101  for (i = start; i < state->n; i++)
102  {
103  current[index] = i;
104  generate_dependencies_recurse(state, (index + 1), (i + 1), current);
105  }
106  }
107  else
108  {
109  int i;
110 
111  /*
112  * the last element is the implied value, which does not respect the
113  * ascending order. We just need to check that the value is not in the
114  * first (k-1) elements.
115  */
116 
117  for (i = 0; i < state->n; i++)
118  {
119  int j;
120  bool match = false;
121 
122  current[index] = i;
123 
124  for (j = 0; j < index; j++)
125  {
126  if (current[j] == i)
127  {
128  match = true;
129  break;
130  }
131  }
132 
133  /*
134  * If the value is not found in the first part of the dependency,
135  * we're done.
136  */
137  if (!match)
138  {
139  state->dependencies = (AttrNumber *) repalloc(state->dependencies,
140  state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
141  memcpy(&state->dependencies[(state->k * state->ndependencies)],
142  current, state->k * sizeof(AttrNumber));
143  state->ndependencies++;
144  }
145  }
146  }
147 }
148 
149 /* generate all dependencies (k-permutations of n elements) */
150 static void
152 {
153  AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
154 
155  generate_dependencies_recurse(state, 0, 0, current);
156 
157  pfree(current);
158 }
159 
160 /*
161  * initialize the DependencyGenerator of variations, and prebuild the variations
162  *
163  * This pre-builds all the variations. We could also generate them in
164  * DependencyGenerator_next(), but this seems simpler.
165  */
166 static DependencyGenerator
168 {
170 
171  Assert((n >= k) && (k > 0));
172 
173  /* allocate the DependencyGenerator state */
175  state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
176 
177  state->ndependencies = 0;
178  state->current = 0;
179  state->k = k;
180  state->n = n;
181 
182  /* now actually pre-generate all the variations */
183  generate_dependencies(state);
184 
185  return state;
186 }
187 
188 /* free the DependencyGenerator state */
189 static void
191 {
192  pfree(state->dependencies);
193  pfree(state);
194 
195 }
196 
197 /* generate next combination */
198 static AttrNumber *
200 {
201  if (state->current == state->ndependencies)
202  return NULL;
203 
204  return &state->dependencies[state->k * state->current++];
205 }
206 
207 
208 /*
209  * validates functional dependency on the data
210  *
211  * An actual work horse of detecting functional dependencies. Given a variation
212  * of k attributes, it checks that the first (k-1) are sufficient to determine
213  * the last one.
214  */
215 static double
216 dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
217  VacAttrStats **stats, Bitmapset *attrs)
218 {
219  int i,
220  nitems;
221  MultiSortSupport mss;
222  SortItem *items;
223  AttrNumber *attnums;
224  AttrNumber *attnums_dep;
225  int numattrs;
226 
227  /* counters valid within a group */
228  int group_size = 0;
229  int n_violations = 0;
230 
231  /* total number of rows supporting (consistent with) the dependency */
232  int n_supporting_rows = 0;
233 
234  /* Make sure we have at least two input attributes. */
235  Assert(k >= 2);
236 
237  /* sort info for all attributes columns */
238  mss = multi_sort_init(k);
239 
240  /*
241  * Transform the attrs from bitmap to an array to make accessing the i-th
242  * member easier, and then construct a filtered version with only attnums
243  * referenced by the dependency we validate.
244  */
245  attnums = build_attnums_array(attrs, &numattrs);
246 
247  attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
248  for (i = 0; i < k; i++)
249  attnums_dep[i] = attnums[dependency[i]];
250 
251  /*
252  * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
253  *
254  * (a) sort the data lexicographically
255  *
256  * (b) split the data into groups by first (k-1) columns
257  *
258  * (c) for each group count different values in the last column
259  *
260  * We use the column data types' default sort operators and collations;
261  * perhaps at some point it'd be worth using column-specific collations?
262  */
263 
264  /* prepare the sort function for the dimensions */
265  for (i = 0; i < k; i++)
266  {
267  VacAttrStats *colstat = stats[dependency[i]];
269 
270  type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
271  if (type->lt_opr == InvalidOid) /* shouldn't happen */
272  elog(ERROR, "cache lookup failed for ordering operator for type %u",
273  colstat->attrtypid);
274 
275  /* prepare the sort function for this dimension */
276  multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
277  }
278 
279  /*
280  * build an array of SortItem(s) sorted using the multi-sort support
281  *
282  * XXX This relies on all stats entries pointing to the same tuple
283  * descriptor. For now that assumption holds, but it might change in the
284  * future for example if we support statistics on multiple tables.
285  */
286  items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
287  mss, k, attnums_dep);
288 
289  /*
290  * Walk through the sorted array, split it into rows according to the
291  * first (k-1) columns. If there's a single value in the last column, we
292  * count the group as 'supporting' the functional dependency. Otherwise we
293  * count it as contradicting.
294  */
295 
296  /* start with the first row forming a group */
297  group_size = 1;
298 
299  /* loop 1 beyond the end of the array so that we count the final group */
300  for (i = 1; i <= nitems; i++)
301  {
302  /*
303  * Check if the group ended, which may be either because we processed
304  * all the items (i==nitems), or because the i-th item is not equal to
305  * the preceding one.
306  */
307  if (i == nitems ||
308  multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
309  {
310  /*
311  * If no violations were found in the group then track the rows of
312  * the group as supporting the functional dependency.
313  */
314  if (n_violations == 0)
315  n_supporting_rows += group_size;
316 
317  /* Reset counters for the new group */
318  n_violations = 0;
319  group_size = 1;
320  continue;
321  }
322  /* first columns match, but the last one does not (so contradicting) */
323  else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
324  n_violations++;
325 
326  group_size++;
327  }
328 
329  if (items)
330  pfree(items);
331 
332  pfree(mss);
333  pfree(attnums);
334  pfree(attnums_dep);
335 
336  /* Compute the 'degree of validity' as (supporting/total). */
337  return (n_supporting_rows * 1.0 / numrows);
338 }
339 
340 /*
341  * detects functional dependencies between groups of columns
342  *
343  * Generates all possible subsets of columns (variations) and computes
344  * the degree of validity for each one. For example when creating statistics
345  * on three columns (a,b,c) there are 9 possible dependencies
346  *
347  * two columns three columns
348  * ----------- -------------
349  * (a) -> b (a,b) -> c
350  * (a) -> c (a,c) -> b
351  * (b) -> a (b,c) -> a
352  * (b) -> c
353  * (c) -> a
354  * (c) -> b
355  */
357 statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
358  VacAttrStats **stats)
359 {
360  int i,
361  k;
362  int numattrs;
363  AttrNumber *attnums;
364 
365  /* result */
367 
368  /*
369  * Transform the bms into an array, to make accessing i-th member easier.
370  */
371  attnums = build_attnums_array(attrs, &numattrs);
372 
373  Assert(numattrs >= 2);
374 
375  /*
376  * We'll try build functional dependencies starting from the smallest ones
377  * covering just 2 columns, to the largest ones, covering all columns
378  * included in the statistics object. We start from the smallest ones
379  * because we want to be able to skip already implied ones.
380  */
381  for (k = 2; k <= numattrs; k++)
382  {
383  AttrNumber *dependency; /* array with k elements */
384 
385  /* prepare a DependencyGenerator of variation */
387 
388  /* generate all possible variations of k values (out of n) */
389  while ((dependency = DependencyGenerator_next(DependencyGenerator)))
390  {
391  double degree;
392  MVDependency *d;
393 
394  /* compute how valid the dependency seems */
395  degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
396 
397  /*
398  * if the dependency seems entirely invalid, don't store it
399  */
400  if (degree == 0.0)
401  continue;
402 
403  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
404  + k * sizeof(AttrNumber));
405 
406  /* copy the dependency (and keep the indexes into stxkeys) */
407  d->degree = degree;
408  d->nattributes = k;
409  for (i = 0; i < k; i++)
410  d->attributes[i] = attnums[dependency[i]];
411 
412  /* initialize the list of dependencies */
413  if (dependencies == NULL)
414  {
415  dependencies
416  = (MVDependencies *) palloc0(sizeof(MVDependencies));
417 
418  dependencies->magic = STATS_DEPS_MAGIC;
419  dependencies->type = STATS_DEPS_TYPE_BASIC;
420  dependencies->ndeps = 0;
421  }
422 
423  dependencies->ndeps++;
424  dependencies = (MVDependencies *) repalloc(dependencies,
425  offsetof(MVDependencies, deps)
426  + dependencies->ndeps * sizeof(MVDependency *));
427 
428  dependencies->deps[dependencies->ndeps - 1] = d;
429  }
430 
431  /*
432  * we're done with variations of k elements, so free the
433  * DependencyGenerator
434  */
435  DependencyGenerator_free(DependencyGenerator);
436  }
437 
438  return dependencies;
439 }
440 
441 
442 /*
443  * Serialize list of dependencies into a bytea value.
444  */
445 bytea *
447 {
448  int i;
449  bytea *output;
450  char *tmp;
451  Size len;
452 
453  /* we need to store ndeps, with a number of attributes for each one */
454  len = VARHDRSZ + SizeOfHeader;
455 
456  /* and also include space for the actual attribute numbers and degrees */
457  for (i = 0; i < dependencies->ndeps; i++)
458  len += SizeOfItem(dependencies->deps[i]->nattributes);
459 
460  output = (bytea *) palloc0(len);
461  SET_VARSIZE(output, len);
462 
463  tmp = VARDATA(output);
464 
465  /* Store the base struct values (magic, type, ndeps) */
466  memcpy(tmp, &dependencies->magic, sizeof(uint32));
467  tmp += sizeof(uint32);
468  memcpy(tmp, &dependencies->type, sizeof(uint32));
469  tmp += sizeof(uint32);
470  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
471  tmp += sizeof(uint32);
472 
473  /* store number of attributes and attribute numbers for each dependency */
474  for (i = 0; i < dependencies->ndeps; i++)
475  {
476  MVDependency *d = dependencies->deps[i];
477 
478  memcpy(tmp, &d->degree, sizeof(double));
479  tmp += sizeof(double);
480 
481  memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
482  tmp += sizeof(AttrNumber);
483 
484  memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
485  tmp += sizeof(AttrNumber) * d->nattributes;
486 
487  /* protect against overflow */
488  Assert(tmp <= ((char *) output + len));
489  }
490 
491  /* make sure we've produced exactly the right amount of data */
492  Assert(tmp == ((char *) output + len));
493 
494  return output;
495 }
496 
497 /*
498  * Reads serialized dependencies into MVDependencies structure.
499  */
502 {
503  int i;
504  Size min_expected_size;
506  char *tmp;
507 
508  if (data == NULL)
509  return NULL;
510 
511  if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
512  elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
514 
515  /* read the MVDependencies header */
516  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
517 
518  /* initialize pointer to the data part (skip the varlena header) */
519  tmp = VARDATA_ANY(data);
520 
521  /* read the header fields and perform basic sanity checks */
522  memcpy(&dependencies->magic, tmp, sizeof(uint32));
523  tmp += sizeof(uint32);
524  memcpy(&dependencies->type, tmp, sizeof(uint32));
525  tmp += sizeof(uint32);
526  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
527  tmp += sizeof(uint32);
528 
529  if (dependencies->magic != STATS_DEPS_MAGIC)
530  elog(ERROR, "invalid dependency magic %d (expected %d)",
531  dependencies->magic, STATS_DEPS_MAGIC);
532 
533  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
534  elog(ERROR, "invalid dependency type %d (expected %d)",
535  dependencies->type, STATS_DEPS_TYPE_BASIC);
536 
537  if (dependencies->ndeps == 0)
538  elog(ERROR, "invalid zero-length item array in MVDependencies");
539 
540  /* what minimum bytea size do we expect for those parameters */
541  min_expected_size = SizeOfItem(dependencies->ndeps);
542 
543  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
544  elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
545  VARSIZE_ANY_EXHDR(data), min_expected_size);
546 
547  /* allocate space for the MCV items */
548  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
549  + (dependencies->ndeps * sizeof(MVDependency *)));
550 
551  for (i = 0; i < dependencies->ndeps; i++)
552  {
553  double degree;
554  AttrNumber k;
555  MVDependency *d;
556 
557  /* degree of validity */
558  memcpy(&degree, tmp, sizeof(double));
559  tmp += sizeof(double);
560 
561  /* number of attributes */
562  memcpy(&k, tmp, sizeof(AttrNumber));
563  tmp += sizeof(AttrNumber);
564 
565  /* is the number of attributes valid? */
566  Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
567 
568  /* now that we know the number of attributes, allocate the dependency */
569  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
570  + (k * sizeof(AttrNumber)));
571 
572  d->degree = degree;
573  d->nattributes = k;
574 
575  /* copy attribute numbers */
576  memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
577  tmp += sizeof(AttrNumber) * d->nattributes;
578 
579  dependencies->deps[i] = d;
580 
581  /* still within the bytea */
582  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
583  }
584 
585  /* we should have consumed the whole bytea exactly */
586  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
587 
588  return dependencies;
589 }
590 
591 /*
592  * dependency_is_fully_matched
593  * checks that a functional dependency is fully matched given clauses on
594  * attributes (assuming the clauses are suitable equality clauses)
595  */
596 static bool
598 {
599  int j;
600 
601  /*
602  * Check that the dependency actually is fully covered by clauses. We have
603  * to translate all attribute numbers, as those are referenced
604  */
605  for (j = 0; j < dependency->nattributes; j++)
606  {
607  int attnum = dependency->attributes[j];
608 
609  if (!bms_is_member(attnum, attnums))
610  return false;
611  }
612 
613  return true;
614 }
615 
616 /*
617  * dependency_implies_attribute
618  * check that the attnum matches is implied by the functional dependency
619  */
620 static bool
622 {
623  if (attnum == dependency->attributes[dependency->nattributes - 1])
624  return true;
625 
626  return false;
627 }
628 
629 /*
630  * statext_dependencies_load
631  * Load the functional dependencies for the indicated pg_statistic_ext tuple
632  */
635 {
636  MVDependencies *result;
637  bool isnull;
638  Datum deps;
639  HeapTuple htup;
640 
642  if (!HeapTupleIsValid(htup))
643  elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
644 
645  deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
646  Anum_pg_statistic_ext_data_stxddependencies, &isnull);
647  if (isnull)
648  elog(ERROR,
649  "requested statistic kind \"%c\" is not yet built for statistics object %u",
650  STATS_EXT_DEPENDENCIES, mvoid);
651 
653 
654  ReleaseSysCache(htup);
655 
656  return result;
657 }
658 
659 /*
660  * pg_dependencies_in - input routine for type pg_dependencies.
661  *
662  * pg_dependencies is real enough to be a table column, but it has no operations
663  * of its own, and disallows input too
664  */
665 Datum
667 {
668  /*
669  * pg_node_list stores the data in binary form and parsing text input is
670  * not needed, so disallow this.
671  */
672  ereport(ERROR,
673  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
674  errmsg("cannot accept a value of type %s", "pg_dependencies")));
675 
676  PG_RETURN_VOID(); /* keep compiler quiet */
677 }
678 
679 /*
680  * pg_dependencies - output routine for type pg_dependencies.
681  */
682 Datum
684 {
685  bytea *data = PG_GETARG_BYTEA_PP(0);
687  int i,
688  j;
690 
691  initStringInfo(&str);
692  appendStringInfoChar(&str, '{');
693 
694  for (i = 0; i < dependencies->ndeps; i++)
695  {
696  MVDependency *dependency = dependencies->deps[i];
697 
698  if (i > 0)
699  appendStringInfoString(&str, ", ");
700 
701  appendStringInfoChar(&str, '"');
702  for (j = 0; j < dependency->nattributes; j++)
703  {
704  if (j == dependency->nattributes - 1)
705  appendStringInfoString(&str, " => ");
706  else if (j > 0)
707  appendStringInfoString(&str, ", ");
708 
709  appendStringInfo(&str, "%d", dependency->attributes[j]);
710  }
711  appendStringInfo(&str, "\": %f", dependency->degree);
712  }
713 
714  appendStringInfoChar(&str, '}');
715 
716  PG_RETURN_CSTRING(str.data);
717 }
718 
719 /*
720  * pg_dependencies_recv - binary input routine for type pg_dependencies.
721  */
722 Datum
724 {
725  ereport(ERROR,
726  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
727  errmsg("cannot accept a value of type %s", "pg_dependencies")));
728 
729  PG_RETURN_VOID(); /* keep compiler quiet */
730 }
731 
732 /*
733  * pg_dependencies_send - binary output routine for type pg_dependencies.
734  *
735  * Functional dependencies are serialized in a bytea value (although the type
736  * is named differently), so let's just send that.
737  */
738 Datum
740 {
741  return byteasend(fcinfo);
742 }
743 
744 /*
745  * dependency_is_compatible_clause
746  * Determines if the clause is compatible with functional dependencies
747  *
748  * Only clauses that have the form of equality to a pseudoconstant, or can be
749  * interpreted that way, are currently accepted. Furthermore the variable
750  * part of the clause must be a simple Var belonging to the specified
751  * relation, whose attribute number we return in *attnum on success.
752  */
753 static bool
755 {
756  RestrictInfo *rinfo = (RestrictInfo *) clause;
757  Var *var;
758 
759  if (!IsA(rinfo, RestrictInfo))
760  return false;
761 
762  /* Pseudoconstants are not interesting (they couldn't contain a Var) */
763  if (rinfo->pseudoconstant)
764  return false;
765 
766  /* Clauses referencing multiple, or no, varnos are incompatible */
768  return false;
769 
770  if (is_opclause(rinfo->clause))
771  {
772  /* If it's an opclause, check for Var = Const or Const = Var. */
773  OpExpr *expr = (OpExpr *) rinfo->clause;
774 
775  /* Only expressions with two arguments are candidates. */
776  if (list_length(expr->args) != 2)
777  return false;
778 
779  /* Make sure non-selected argument is a pseudoconstant. */
781  var = linitial(expr->args);
782  else if (is_pseudo_constant_clause(linitial(expr->args)))
783  var = lsecond(expr->args);
784  else
785  return false;
786 
787  /*
788  * If it's not an "=" operator, just ignore the clause, as it's not
789  * compatible with functional dependencies.
790  *
791  * This uses the function for estimating selectivity, not the operator
792  * directly (a bit awkward, but well ...).
793  *
794  * XXX this is pretty dubious; probably it'd be better to check btree
795  * or hash opclass membership, so as not to be fooled by custom
796  * selectivity functions, and to be more consistent with decisions
797  * elsewhere in the planner.
798  */
799  if (get_oprrest(expr->opno) != F_EQSEL)
800  return false;
801 
802  /* OK to proceed with checking "var" */
803  }
804  else if (is_notclause(rinfo->clause))
805  {
806  /*
807  * "NOT x" can be interpreted as "x = false", so get the argument and
808  * proceed with seeing if it's a suitable Var.
809  */
810  var = (Var *) get_notclausearg(rinfo->clause);
811  }
812  else
813  {
814  /*
815  * A boolean expression "x" can be interpreted as "x = true", so
816  * proceed with seeing if it's a suitable Var.
817  */
818  var = (Var *) rinfo->clause;
819  }
820 
821  /*
822  * We may ignore any RelabelType node above the operand. (There won't be
823  * more than one, since eval_const_expressions has been applied already.)
824  */
825  if (IsA(var, RelabelType))
826  var = (Var *) ((RelabelType *) var)->arg;
827 
828  /* We only support plain Vars for now */
829  if (!IsA(var, Var))
830  return false;
831 
832  /* Ensure Var is from the correct relation */
833  if (var->varno != relid)
834  return false;
835 
836  /* We also better ensure the Var is from the current level */
837  if (var->varlevelsup != 0)
838  return false;
839 
840  /* Also ignore system attributes (we don't allow stats on those) */
842  return false;
843 
844  *attnum = var->varattno;
845  return true;
846 }
847 
848 /*
849  * find_strongest_dependency
850  * find the strongest dependency on the attributes
851  *
852  * When applying functional dependencies, we start with the strongest
853  * dependencies. That is, we select the dependency that:
854  *
855  * (a) has all attributes covered by equality clauses
856  *
857  * (b) has the most attributes
858  *
859  * (c) has the highest degree of validity
860  *
861  * This guarantees that we eliminate the most redundant conditions first
862  * (see the comment in dependencies_clauselist_selectivity).
863  */
864 static MVDependency *
866  Bitmapset *attnums)
867 {
868  int i;
869  MVDependency *strongest = NULL;
870 
871  /* number of attnums in clauses */
872  int nattnums = bms_num_members(attnums);
873 
874  /*
875  * Iterate over the MVDependency items and find the strongest one from the
876  * fully-matched dependencies. We do the cheap checks first, before
877  * matching it against the attnums.
878  */
879  for (i = 0; i < dependencies->ndeps; i++)
880  {
881  MVDependency *dependency = dependencies->deps[i];
882 
883  /*
884  * Skip dependencies referencing more attributes than available
885  * clauses, as those can't be fully matched.
886  */
887  if (dependency->nattributes > nattnums)
888  continue;
889 
890  if (strongest)
891  {
892  /* skip dependencies on fewer attributes than the strongest. */
893  if (dependency->nattributes < strongest->nattributes)
894  continue;
895 
896  /* also skip weaker dependencies when attribute count matches */
897  if (strongest->nattributes == dependency->nattributes &&
898  strongest->degree > dependency->degree)
899  continue;
900  }
901 
902  /*
903  * this dependency is stronger, but we must still check that it's
904  * fully matched to these attnums. We perform this check last as it's
905  * slightly more expensive than the previous checks.
906  */
907  if (dependency_is_fully_matched(dependency, attnums))
908  strongest = dependency; /* save new best match */
909  }
910 
911  return strongest;
912 }
913 
914 /*
915  * dependencies_clauselist_selectivity
916  * Return the estimated selectivity of (a subset of) the given clauses
917  * using functional dependency statistics, or 1.0 if no useful functional
918  * dependency statistic exists.
919  *
920  * 'estimatedclauses' is an input/output argument that gets a bit set
921  * corresponding to the (zero-based) list index of each clause that is included
922  * in the estimated selectivity.
923  *
924  * Given equality clauses on attributes (a,b) we find the strongest dependency
925  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
926  * dependency, we then combine the per-clause selectivities using the formula
927  *
928  * P(a,b) = P(a) * [f + (1-f)*P(b)]
929  *
930  * where 'f' is the degree of the dependency.
931  *
932  * With clauses on more than two attributes, the dependencies are applied
933  * recursively, starting with the widest/strongest dependencies. For example
934  * P(a,b,c) is first split like this:
935  *
936  * P(a,b,c) = P(a,b) * [f + (1-f)*P(c)]
937  *
938  * assuming (a,b=>c) is the strongest dependency.
939  */
942  List *clauses,
943  int varRelid,
944  JoinType jointype,
945  SpecialJoinInfo *sjinfo,
946  RelOptInfo *rel,
947  Bitmapset **estimatedclauses)
948 {
949  Selectivity s1 = 1.0;
950  ListCell *l;
951  Bitmapset *clauses_attnums = NULL;
954  Bitmapset **list_attnums;
955  int listidx;
956 
957  /* check if there's any stats that might be useful for us. */
958  if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
959  return 1.0;
960 
961  list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
962  list_length(clauses));
963 
964  /*
965  * Pre-process the clauses list to extract the attnums seen in each item.
966  * We need to determine if there's any clauses which will be useful for
967  * dependency selectivity estimations. Along the way we'll record all of
968  * the attnums for each clause in a list which we'll reference later so we
969  * don't need to repeat the same work again. We'll also keep track of all
970  * attnums seen.
971  *
972  * We also skip clauses that we already estimated using different types of
973  * statistics (we treat them as incompatible).
974  */
975  listidx = 0;
976  foreach(l, clauses)
977  {
978  Node *clause = (Node *) lfirst(l);
980 
981  if (!bms_is_member(listidx, *estimatedclauses) &&
982  dependency_is_compatible_clause(clause, rel->relid, &attnum))
983  {
984  list_attnums[listidx] = bms_make_singleton(attnum);
985  clauses_attnums = bms_add_member(clauses_attnums, attnum);
986  }
987  else
988  list_attnums[listidx] = NULL;
989 
990  listidx++;
991  }
992 
993  /*
994  * If there's not at least two distinct attnums then reject the whole list
995  * of clauses. We must return 1.0 so the calling function's selectivity is
996  * unaffected.
997  */
998  if (bms_num_members(clauses_attnums) < 2)
999  {
1000  pfree(list_attnums);
1001  return 1.0;
1002  }
1003 
1004  /* find the best suited statistics object for these attnums */
1005  stat = choose_best_statistics(rel->statlist, STATS_EXT_DEPENDENCIES,
1006  list_attnums, list_length(clauses));
1007 
1008  /* if no matching stats could be found then we've nothing to do */
1009  if (!stat)
1010  {
1011  pfree(list_attnums);
1012  return 1.0;
1013  }
1014 
1015  /* load the dependency items stored in the statistics object */
1016  dependencies = statext_dependencies_load(stat->statOid);
1017 
1018  /*
1019  * Apply the dependencies recursively, starting with the widest/strongest
1020  * ones, and proceeding to the smaller/weaker ones. At the end of each
1021  * round we factor in the selectivity of clauses on the implied attribute,
1022  * and remove the clauses from the list.
1023  */
1024  while (true)
1025  {
1026  Selectivity s2 = 1.0;
1027  MVDependency *dependency;
1028 
1029  /* the widest/strongest dependency, fully matched by clauses */
1030  dependency = find_strongest_dependency(stat, dependencies,
1031  clauses_attnums);
1032 
1033  /* if no suitable dependency was found, we're done */
1034  if (!dependency)
1035  break;
1036 
1037  /*
1038  * We found an applicable dependency, so find all the clauses on the
1039  * implied attribute - with dependency (a,b => c) we look for clauses
1040  * on 'c'.
1041  */
1042  listidx = -1;
1043  foreach(l, clauses)
1044  {
1045  Node *clause;
1047 
1048  listidx++;
1049 
1050  /*
1051  * Skip incompatible clauses, and ones we've already estimated on.
1052  */
1053  if (!list_attnums[listidx])
1054  continue;
1055 
1056  /*
1057  * We expect the bitmaps ton contain a single attribute number.
1058  */
1059  attnum = bms_singleton_member(list_attnums[listidx]);
1060 
1061  /*
1062  * Technically we could find more than one clause for a given
1063  * attnum. Since these clauses must be equality clauses, we choose
1064  * to only take the selectivity estimate from the final clause in
1065  * the list for this attnum. If the attnum happens to be compared
1066  * to a different Const in another clause then no rows will match
1067  * anyway. If it happens to be compared to the same Const, then
1068  * ignoring the additional clause is just the thing to do.
1069  */
1070  if (dependency_implies_attribute(dependency, attnum))
1071  {
1072  clause = (Node *) lfirst(l);
1073 
1074  s2 = clause_selectivity(root, clause, varRelid, jointype,
1075  sjinfo);
1076 
1077  /* mark this one as done, so we don't touch it again. */
1078  *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1079 
1080  /*
1081  * Mark that we've got and used the dependency on this clause.
1082  * We'll want to ignore this when looking for the next
1083  * strongest dependency above.
1084  */
1085  clauses_attnums = bms_del_member(clauses_attnums, attnum);
1086  }
1087  }
1088 
1089  /*
1090  * Now factor in the selectivity for all the "implied" clauses into
1091  * the final one, using this formula:
1092  *
1093  * P(a,b) = P(a) * (f + (1-f) * P(b))
1094  *
1095  * where 'f' is the degree of validity of the dependency.
1096  */
1097  s1 *= (dependency->degree + (1 - dependency->degree) * s2);
1098  }
1099 
1100  pfree(dependencies);
1101  pfree(list_attnums);
1102 
1103  return s1;
1104 }
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:53
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:61
#define STATS_DEPS_MAGIC
Definition: statistics.h:42
Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses)
Definition: dependencies.c:941
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
List * statlist
Definition: pathnodes.h:679
MVDependencies * statext_dependencies_load(Oid mvoid)
Definition: dependencies.c:634
Index varlevelsup
Definition: primnodes.h:177
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:43
struct DependencyGeneratorData DependencyGeneratorData
static void output(uint64 loop_count)
#define VARHDRSZ
Definition: c.h:562
Relids clause_relids
Definition: pathnodes.h:1958
bool pseudoconstant
Definition: pathnodes.h:1951
SortItem * build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
#define SizeOfItem(natts)
Definition: dependencies.c:40
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
Definition: nodes.h:525
int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
int errcode(int sqlerrcode)
Definition: elog.c:608
AttrNumber varattno
Definition: primnodes.h:172
#define DatumGetByteaPP(X)
Definition: fmgr.h:285
Datum pg_dependencies_out(PG_FUNCTION_ARGS)
Definition: dependencies.c:683
Datum pg_dependencies_recv(PG_FUNCTION_ARGS)
Definition: dependencies.c:723
double Selectivity
Definition: nodes.h:658
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:167
#define lsecond(l)
Definition: pg_list.h:200
AttrNumber nattributes
Definition: statistics.h:52
JoinType
Definition: nodes.h:692
Definition: type.h:89
void pfree(void *pointer)
Definition: mcxt.c:1056
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
Oid attrtypid
Definition: vacuum.h:86
#define linitial(l)
Definition: pg_list.h:195
#define ObjectIdGetDatum(X)
Definition: postgres.h:507
#define ERROR
Definition: elog.h:43
char * s1
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:167
static bool dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
Definition: dependencies.c:597
AttrNumber * dependencies
Definition: dependencies.c:61
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
Bitmapset * bms_make_singleton(int x)
Definition: bitmapset.c:186
int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
AttrNumber * build_attnums_array(Bitmapset *attrs, int *numattrs)
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:600
unsigned int uint32
Definition: c.h:359
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:2089
DependencyGeneratorData * DependencyGenerator
Definition: dependencies.c:64
static void generate_dependencies_recurse(DependencyGenerator state, int index, AttrNumber start, AttrNumber *current)
Definition: dependencies.c:85
static void generate_dependencies(DependencyGenerator state)
Definition: dependencies.c:151
static double dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs)
Definition: dependencies.c:216
#define ereport(elevel, rest)
Definition: elog.h:141
MultiSortSupport multi_sort_init(int ndims)
Index relid
Definition: pathnodes.h:669
Expr * clause
Definition: pathnodes.h:1943
static bool dependency_implies_attribute(MVDependency *dependency, AttrNumber attnum)
Definition: dependencies.c:621
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:464
Index varno
Definition: primnodes.h:170
#define stat(a, b)
Definition: win32_port.h:255
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1116
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:112
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1359
MVDependencies * statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs, VacAttrStats **stats)
Definition: dependencies.c:357
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:672
void * palloc0(Size size)
Definition: mcxt.c:980
char * s2
uintptr_t Datum
Definition: postgres.h:367
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1164
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:577
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1377
unsigned int Index
Definition: c.h:476
uint32 magic
Definition: statistics.h:58
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:322
#define InvalidOid
Definition: postgres_ext.h:36
static MVDependency * find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies, Bitmapset *attnums)
Definition: dependencies.c:865
int16 attnum
Definition: pg_attribute.h:79
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:121
#define PG_RETURN_VOID()
Definition: fmgr.h:339
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
uint32 ndeps
Definition: statistics.h:60
#define Assert(condition)
Definition: c.h:739
#define lfirst(lc)
Definition: pg_list.h:190
double degree
Definition: statistics.h:51
Definition: regguts.h:298
Datum pg_dependencies_in(PG_FUNCTION_ARGS)
Definition: dependencies.c:666
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:352
size_t Size
Definition: c.h:467
static int list_length(const List *l)
Definition: pg_list.h:169
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:302
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:736
#define SizeOfHeader
Definition: dependencies.c:37
Oid attrcollid
Definition: vacuum.h:89
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:199
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822
bytea * statext_dependencies_serialize(MVDependencies *dependencies)
Definition: dependencies.c:446
#define elog(elevel,...)
Definition: elog.h:228
int i
#define TYPECACHE_LT_OPR
Definition: typcache.h:129
Definition: c.h:556
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
Oid opno
Definition: primnodes.h:502
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:63
List * args
Definition: primnodes.h:508
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:773
Definition: pg_list.h:50
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:190
Datum pg_dependencies_send(PG_FUNCTION_ARGS)
Definition: dependencies.c:739
bool has_stats_of_kind(List *stats, char requiredkind)
int16 AttrNumber
Definition: attnum.h:21
MVDependencies * statext_dependencies_deserialize(bytea *data)
Definition: dependencies.c:501
#define offsetof(type, field)
Definition: c.h:662
static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
Definition: dependencies.c:754
StatisticExtInfo * choose_best_statistics(List *stats, char requiredkind, Bitmapset **clause_attnums, int nclauses)