PostgreSQL Source Code  git master
dependencies.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dependencies.c
4  * POSTGRES functional dependencies
5  *
6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/statistics/dependencies.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/htup_details.h"
17 #include "access/sysattr.h"
18 #include "catalog/pg_operator.h"
21 #include "lib/stringinfo.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/nodes.h"
24 #include "nodes/pathnodes.h"
25 #include "optimizer/clauses.h"
26 #include "optimizer/optimizer.h"
27 #include "parser/parsetree.h"
29 #include "statistics/statistics.h"
30 #include "utils/bytea.h"
31 #include "utils/fmgroids.h"
32 #include "utils/fmgrprotos.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/selfuncs.h"
36 #include "utils/syscache.h"
37 #include "utils/typcache.h"
38 
39 /* size of the struct header fields (magic, type, ndeps) */
40 #define SizeOfHeader (3 * sizeof(uint32))
41 
42 /* size of a serialized dependency (degree, natts, atts) */
43 #define SizeOfItem(natts) \
44  (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
45 
46 /* minimal size of a dependency (with two attributes) */
47 #define MinSizeOfItem SizeOfItem(2)
48 
49 /* minimal size of dependencies, when all deps are minimal */
50 #define MinSizeOfItems(ndeps) \
51  (SizeOfHeader + (ndeps) * MinSizeOfItem)
52 
53 /*
54  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
55  * k-permutations of n elements, except that the order does not matter for the
56  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
57  */
59 {
60  int k; /* size of the dependency */
61  int n; /* number of possible attributes */
62  int current; /* next dependency to return (index) */
63  AttrNumber ndependencies; /* number of dependencies generated */
64  AttrNumber *dependencies; /* array of pre-generated dependencies */
66 
68 
70  int index, AttrNumber start, AttrNumber *current);
75 static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
76 static bool dependency_is_fully_matched(MVDependency *dependency,
77  Bitmapset *attnums);
78 static bool dependency_is_compatible_clause(Node *clause, Index relid,
80 static bool dependency_is_compatible_expression(Node *clause, Index relid,
81  List *statlist, Node **expr);
83  int ndependencies, Bitmapset *attnums);
85  int varRelid, JoinType jointype,
86  SpecialJoinInfo *sjinfo,
87  MVDependency **dependencies,
88  int ndependencies,
89  AttrNumber *list_attnums,
90  Bitmapset **estimatedclauses);
91 
92 static void
94  AttrNumber start, AttrNumber *current)
95 {
96  /*
97  * The generator handles the first (k-1) elements differently from the
98  * last element.
99  */
100  if (index < (state->k - 1))
101  {
102  AttrNumber i;
103 
104  /*
105  * The first (k-1) values have to be in ascending order, which we
106  * generate recursively.
107  */
108 
109  for (i = start; i < state->n; i++)
110  {
111  current[index] = i;
112  generate_dependencies_recurse(state, (index + 1), (i + 1), current);
113  }
114  }
115  else
116  {
117  int i;
118 
119  /*
120  * the last element is the implied value, which does not respect the
121  * ascending order. We just need to check that the value is not in the
122  * first (k-1) elements.
123  */
124 
125  for (i = 0; i < state->n; i++)
126  {
127  int j;
128  bool match = false;
129 
130  current[index] = i;
131 
132  for (j = 0; j < index; j++)
133  {
134  if (current[j] == i)
135  {
136  match = true;
137  break;
138  }
139  }
140 
141  /*
142  * If the value is not found in the first part of the dependency,
143  * we're done.
144  */
145  if (!match)
146  {
147  state->dependencies = (AttrNumber *) repalloc(state->dependencies,
148  state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
149  memcpy(&state->dependencies[(state->k * state->ndependencies)],
150  current, state->k * sizeof(AttrNumber));
151  state->ndependencies++;
152  }
153  }
154  }
155 }
156 
157 /* generate all dependencies (k-permutations of n elements) */
158 static void
160 {
161  AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
162 
163  generate_dependencies_recurse(state, 0, 0, current);
164 
165  pfree(current);
166 }
167 
168 /*
169  * initialize the DependencyGenerator of variations, and prebuild the variations
170  *
171  * This pre-builds all the variations. We could also generate them in
172  * DependencyGenerator_next(), but this seems simpler.
173  */
174 static DependencyGenerator
176 {
178 
179  Assert((n >= k) && (k > 0));
180 
181  /* allocate the DependencyGenerator state */
183  state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
184 
185  state->ndependencies = 0;
186  state->current = 0;
187  state->k = k;
188  state->n = n;
189 
190  /* now actually pre-generate all the variations */
192 
193  return state;
194 }
195 
196 /* free the DependencyGenerator state */
197 static void
199 {
200  pfree(state->dependencies);
201  pfree(state);
202 
203 }
204 
205 /* generate next combination */
206 static AttrNumber *
208 {
209  if (state->current == state->ndependencies)
210  return NULL;
211 
212  return &state->dependencies[state->k * state->current++];
213 }
214 
215 
216 /*
217  * validates functional dependency on the data
218  *
219  * An actual work horse of detecting functional dependencies. Given a variation
220  * of k attributes, it checks that the first (k-1) are sufficient to determine
221  * the last one.
222  */
223 static double
225 {
226  int i,
227  nitems;
228  MultiSortSupport mss;
229  SortItem *items;
230  AttrNumber *attnums_dep;
231 
232  /* counters valid within a group */
233  int group_size = 0;
234  int n_violations = 0;
235 
236  /* total number of rows supporting (consistent with) the dependency */
237  int n_supporting_rows = 0;
238 
239  /* Make sure we have at least two input attributes. */
240  Assert(k >= 2);
241 
242  /* sort info for all attributes columns */
243  mss = multi_sort_init(k);
244 
245  /*
246  * Translate the array of indexes to regular attnums for the dependency
247  * (we will need this to identify the columns in StatsBuildData).
248  */
249  attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
250  for (i = 0; i < k; i++)
251  attnums_dep[i] = data->attnums[dependency[i]];
252 
253  /*
254  * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
255  *
256  * (a) sort the data lexicographically
257  *
258  * (b) split the data into groups by first (k-1) columns
259  *
260  * (c) for each group count different values in the last column
261  *
262  * We use the column data types' default sort operators and collations;
263  * perhaps at some point it'd be worth using column-specific collations?
264  */
265 
266  /* prepare the sort function for the dimensions */
267  for (i = 0; i < k; i++)
268  {
269  VacAttrStats *colstat = data->stats[dependency[i]];
271 
273  if (type->lt_opr == InvalidOid) /* shouldn't happen */
274  elog(ERROR, "cache lookup failed for ordering operator for type %u",
275  colstat->attrtypid);
276 
277  /* prepare the sort function for this dimension */
278  multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
279  }
280 
281  /*
282  * build an array of SortItem(s) sorted using the multi-sort support
283  *
284  * XXX This relies on all stats entries pointing to the same tuple
285  * descriptor. For now that assumption holds, but it might change in the
286  * future for example if we support statistics on multiple tables.
287  */
288  items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
289 
290  /*
291  * Walk through the sorted array, split it into rows according to the
292  * first (k-1) columns. If there's a single value in the last column, we
293  * count the group as 'supporting' the functional dependency. Otherwise we
294  * count it as contradicting.
295  */
296 
297  /* start with the first row forming a group */
298  group_size = 1;
299 
300  /* loop 1 beyond the end of the array so that we count the final group */
301  for (i = 1; i <= nitems; i++)
302  {
303  /*
304  * Check if the group ended, which may be either because we processed
305  * all the items (i==nitems), or because the i-th item is not equal to
306  * the preceding one.
307  */
308  if (i == nitems ||
309  multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
310  {
311  /*
312  * If no violations were found in the group then track the rows of
313  * the group as supporting the functional dependency.
314  */
315  if (n_violations == 0)
316  n_supporting_rows += group_size;
317 
318  /* Reset counters for the new group */
319  n_violations = 0;
320  group_size = 1;
321  continue;
322  }
323  /* first columns match, but the last one does not (so contradicting) */
324  else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
325  n_violations++;
326 
327  group_size++;
328  }
329 
330  /* Compute the 'degree of validity' as (supporting/total). */
331  return (n_supporting_rows * 1.0 / data->numrows);
332 }
333 
334 /*
335  * detects functional dependencies between groups of columns
336  *
337  * Generates all possible subsets of columns (variations) and computes
338  * the degree of validity for each one. For example when creating statistics
339  * on three columns (a,b,c) there are 9 possible dependencies
340  *
341  * two columns three columns
342  * ----------- -------------
343  * (a) -> b (a,b) -> c
344  * (a) -> c (a,c) -> b
345  * (b) -> a (b,c) -> a
346  * (b) -> c
347  * (c) -> a
348  * (c) -> b
349  */
352 {
353  int i,
354  k;
355 
356  /* result */
357  MVDependencies *dependencies = NULL;
358  MemoryContext cxt;
359 
360  Assert(data->nattnums >= 2);
361 
362  /* tracks memory allocated by dependency_degree calls */
364  "dependency_degree cxt",
366 
367  /*
368  * We'll try build functional dependencies starting from the smallest ones
369  * covering just 2 columns, to the largest ones, covering all columns
370  * included in the statistics object. We start from the smallest ones
371  * because we want to be able to skip already implied ones.
372  */
373  for (k = 2; k <= data->nattnums; k++)
374  {
375  AttrNumber *dependency; /* array with k elements */
376 
377  /* prepare a DependencyGenerator of variation */
379 
380  /* generate all possible variations of k values (out of n) */
381  while ((dependency = DependencyGenerator_next(DependencyGenerator)))
382  {
383  double degree;
384  MVDependency *d;
385  MemoryContext oldcxt;
386 
387  /* release memory used by dependency degree calculation */
388  oldcxt = MemoryContextSwitchTo(cxt);
389 
390  /* compute how valid the dependency seems */
391  degree = dependency_degree(data, k, dependency);
392 
393  MemoryContextSwitchTo(oldcxt);
394  MemoryContextReset(cxt);
395 
396  /*
397  * if the dependency seems entirely invalid, don't store it
398  */
399  if (degree == 0.0)
400  continue;
401 
402  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
403  + k * sizeof(AttrNumber));
404 
405  /* copy the dependency (and keep the indexes into stxkeys) */
406  d->degree = degree;
407  d->nattributes = k;
408  for (i = 0; i < k; i++)
409  d->attributes[i] = data->attnums[dependency[i]];
410 
411  /* initialize the list of dependencies */
412  if (dependencies == NULL)
413  {
414  dependencies
415  = (MVDependencies *) palloc0(sizeof(MVDependencies));
416 
417  dependencies->magic = STATS_DEPS_MAGIC;
418  dependencies->type = STATS_DEPS_TYPE_BASIC;
419  dependencies->ndeps = 0;
420  }
421 
422  dependencies->ndeps++;
423  dependencies = (MVDependencies *) repalloc(dependencies,
424  offsetof(MVDependencies, deps)
425  + dependencies->ndeps * sizeof(MVDependency *));
426 
427  dependencies->deps[dependencies->ndeps - 1] = d;
428  }
429 
430  /*
431  * we're done with variations of k elements, so free the
432  * DependencyGenerator
433  */
435  }
436 
437  MemoryContextDelete(cxt);
438 
439  return dependencies;
440 }
441 
442 
443 /*
444  * Serialize list of dependencies into a bytea value.
445  */
446 bytea *
448 {
449  int i;
450  bytea *output;
451  char *tmp;
452  Size len;
453 
454  /* we need to store ndeps, with a number of attributes for each one */
456 
457  /* and also include space for the actual attribute numbers and degrees */
458  for (i = 0; i < dependencies->ndeps; i++)
459  len += SizeOfItem(dependencies->deps[i]->nattributes);
460 
461  output = (bytea *) palloc0(len);
463 
464  tmp = VARDATA(output);
465 
466  /* Store the base struct values (magic, type, ndeps) */
467  memcpy(tmp, &dependencies->magic, sizeof(uint32));
468  tmp += sizeof(uint32);
469  memcpy(tmp, &dependencies->type, sizeof(uint32));
470  tmp += sizeof(uint32);
471  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
472  tmp += sizeof(uint32);
473 
474  /* store number of attributes and attribute numbers for each dependency */
475  for (i = 0; i < dependencies->ndeps; i++)
476  {
477  MVDependency *d = dependencies->deps[i];
478 
479  memcpy(tmp, &d->degree, sizeof(double));
480  tmp += sizeof(double);
481 
482  memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
483  tmp += sizeof(AttrNumber);
484 
485  memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
486  tmp += sizeof(AttrNumber) * d->nattributes;
487 
488  /* protect against overflow */
489  Assert(tmp <= ((char *) output + len));
490  }
491 
492  /* make sure we've produced exactly the right amount of data */
493  Assert(tmp == ((char *) output + len));
494 
495  return output;
496 }
497 
498 /*
499  * Reads serialized dependencies into MVDependencies structure.
500  */
503 {
504  int i;
505  Size min_expected_size;
506  MVDependencies *dependencies;
507  char *tmp;
508 
509  if (data == NULL)
510  return NULL;
511 
513  elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
515 
516  /* read the MVDependencies header */
517  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
518 
519  /* initialize pointer to the data part (skip the varlena header) */
520  tmp = VARDATA_ANY(data);
521 
522  /* read the header fields and perform basic sanity checks */
523  memcpy(&dependencies->magic, tmp, sizeof(uint32));
524  tmp += sizeof(uint32);
525  memcpy(&dependencies->type, tmp, sizeof(uint32));
526  tmp += sizeof(uint32);
527  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
528  tmp += sizeof(uint32);
529 
530  if (dependencies->magic != STATS_DEPS_MAGIC)
531  elog(ERROR, "invalid dependency magic %d (expected %d)",
532  dependencies->magic, STATS_DEPS_MAGIC);
533 
534  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
535  elog(ERROR, "invalid dependency type %d (expected %d)",
536  dependencies->type, STATS_DEPS_TYPE_BASIC);
537 
538  if (dependencies->ndeps == 0)
539  elog(ERROR, "invalid zero-length item array in MVDependencies");
540 
541  /* what minimum bytea size do we expect for those parameters */
542  min_expected_size = SizeOfItem(dependencies->ndeps);
543 
544  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
545  elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
546  VARSIZE_ANY_EXHDR(data), min_expected_size);
547 
548  /* allocate space for the MCV items */
549  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
550  + (dependencies->ndeps * sizeof(MVDependency *)));
551 
552  for (i = 0; i < dependencies->ndeps; i++)
553  {
554  double degree;
555  AttrNumber k;
556  MVDependency *d;
557 
558  /* degree of validity */
559  memcpy(&degree, tmp, sizeof(double));
560  tmp += sizeof(double);
561 
562  /* number of attributes */
563  memcpy(&k, tmp, sizeof(AttrNumber));
564  tmp += sizeof(AttrNumber);
565 
566  /* is the number of attributes valid? */
567  Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
568 
569  /* now that we know the number of attributes, allocate the dependency */
570  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
571  + (k * sizeof(AttrNumber)));
572 
573  d->degree = degree;
574  d->nattributes = k;
575 
576  /* copy attribute numbers */
577  memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
578  tmp += sizeof(AttrNumber) * d->nattributes;
579 
580  dependencies->deps[i] = d;
581 
582  /* still within the bytea */
583  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
584  }
585 
586  /* we should have consumed the whole bytea exactly */
587  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
588 
589  return dependencies;
590 }
591 
592 /*
593  * dependency_is_fully_matched
594  * checks that a functional dependency is fully matched given clauses on
595  * attributes (assuming the clauses are suitable equality clauses)
596  */
597 static bool
599 {
600  int j;
601 
602  /*
603  * Check that the dependency actually is fully covered by clauses. We have
604  * to translate all attribute numbers, as those are referenced
605  */
606  for (j = 0; j < dependency->nattributes; j++)
607  {
608  int attnum = dependency->attributes[j];
609 
610  if (!bms_is_member(attnum, attnums))
611  return false;
612  }
613 
614  return true;
615 }
616 
617 /*
618  * statext_dependencies_load
619  * Load the functional dependencies for the indicated pg_statistic_ext tuple
620  */
623 {
624  MVDependencies *result;
625  bool isnull;
626  Datum deps;
627  HeapTuple htup;
628 
630  ObjectIdGetDatum(mvoid),
631  BoolGetDatum(inh));
632  if (!HeapTupleIsValid(htup))
633  elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
634 
635  deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
636  Anum_pg_statistic_ext_data_stxddependencies, &isnull);
637  if (isnull)
638  elog(ERROR,
639  "requested statistics kind \"%c\" is not yet built for statistics object %u",
640  STATS_EXT_DEPENDENCIES, mvoid);
641 
643 
644  ReleaseSysCache(htup);
645 
646  return result;
647 }
648 
649 /*
650  * pg_dependencies_in - input routine for type pg_dependencies.
651  *
652  * pg_dependencies is real enough to be a table column, but it has no operations
653  * of its own, and disallows input too
654  */
655 Datum
657 {
658  /*
659  * pg_node_list stores the data in binary form and parsing text input is
660  * not needed, so disallow this.
661  */
662  ereport(ERROR,
663  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
664  errmsg("cannot accept a value of type %s", "pg_dependencies")));
665 
666  PG_RETURN_VOID(); /* keep compiler quiet */
667 }
668 
669 /*
670  * pg_dependencies - output routine for type pg_dependencies.
671  */
672 Datum
674 {
677  int i,
678  j;
680 
682  appendStringInfoChar(&str, '{');
683 
684  for (i = 0; i < dependencies->ndeps; i++)
685  {
686  MVDependency *dependency = dependencies->deps[i];
687 
688  if (i > 0)
689  appendStringInfoString(&str, ", ");
690 
691  appendStringInfoChar(&str, '"');
692  for (j = 0; j < dependency->nattributes; j++)
693  {
694  if (j == dependency->nattributes - 1)
695  appendStringInfoString(&str, " => ");
696  else if (j > 0)
697  appendStringInfoString(&str, ", ");
698 
699  appendStringInfo(&str, "%d", dependency->attributes[j]);
700  }
701  appendStringInfo(&str, "\": %f", dependency->degree);
702  }
703 
704  appendStringInfoChar(&str, '}');
705 
706  PG_RETURN_CSTRING(str.data);
707 }
708 
709 /*
710  * pg_dependencies_recv - binary input routine for type pg_dependencies.
711  */
712 Datum
714 {
715  ereport(ERROR,
716  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
717  errmsg("cannot accept a value of type %s", "pg_dependencies")));
718 
719  PG_RETURN_VOID(); /* keep compiler quiet */
720 }
721 
722 /*
723  * pg_dependencies_send - binary output routine for type pg_dependencies.
724  *
725  * Functional dependencies are serialized in a bytea value (although the type
726  * is named differently), so let's just send that.
727  */
728 Datum
730 {
731  return byteasend(fcinfo);
732 }
733 
734 /*
735  * dependency_is_compatible_clause
736  * Determines if the clause is compatible with functional dependencies
737  *
738  * Only clauses that have the form of equality to a pseudoconstant, or can be
739  * interpreted that way, are currently accepted. Furthermore the variable
740  * part of the clause must be a simple Var belonging to the specified
741  * relation, whose attribute number we return in *attnum on success.
742  */
743 static bool
745 {
746  Var *var;
747  Node *clause_expr;
748 
749  if (IsA(clause, RestrictInfo))
750  {
751  RestrictInfo *rinfo = (RestrictInfo *) clause;
752 
753  /* Pseudoconstants are not interesting (they couldn't contain a Var) */
754  if (rinfo->pseudoconstant)
755  return false;
756 
757  /* Clauses referencing multiple, or no, varnos are incompatible */
759  return false;
760 
761  clause = (Node *) rinfo->clause;
762  }
763 
764  if (is_opclause(clause))
765  {
766  /* If it's an opclause, check for Var = Const or Const = Var. */
767  OpExpr *expr = (OpExpr *) clause;
768 
769  /* Only expressions with two arguments are candidates. */
770  if (list_length(expr->args) != 2)
771  return false;
772 
773  /* Make sure non-selected argument is a pseudoconstant. */
775  clause_expr = linitial(expr->args);
776  else if (is_pseudo_constant_clause(linitial(expr->args)))
777  clause_expr = lsecond(expr->args);
778  else
779  return false;
780 
781  /*
782  * If it's not an "=" operator, just ignore the clause, as it's not
783  * compatible with functional dependencies.
784  *
785  * This uses the function for estimating selectivity, not the operator
786  * directly (a bit awkward, but well ...).
787  *
788  * XXX this is pretty dubious; probably it'd be better to check btree
789  * or hash opclass membership, so as not to be fooled by custom
790  * selectivity functions, and to be more consistent with decisions
791  * elsewhere in the planner.
792  */
793  if (get_oprrest(expr->opno) != F_EQSEL)
794  return false;
795 
796  /* OK to proceed with checking "var" */
797  }
798  else if (IsA(clause, ScalarArrayOpExpr))
799  {
800  /* If it's an scalar array operator, check for Var IN Const. */
801  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
802 
803  /*
804  * Reject ALL() variant, we only care about ANY/IN.
805  *
806  * XXX Maybe we should check if all the values are the same, and allow
807  * ALL in that case? Doesn't seem very practical, though.
808  */
809  if (!expr->useOr)
810  return false;
811 
812  /* Only expressions with two arguments are candidates. */
813  if (list_length(expr->args) != 2)
814  return false;
815 
816  /*
817  * We know it's always (Var IN Const), so we assume the var is the
818  * first argument, and pseudoconstant is the second one.
819  */
821  return false;
822 
823  clause_expr = linitial(expr->args);
824 
825  /*
826  * If it's not an "=" operator, just ignore the clause, as it's not
827  * compatible with functional dependencies. The operator is identified
828  * simply by looking at which function it uses to estimate
829  * selectivity. That's a bit strange, but it's what other similar
830  * places do.
831  */
832  if (get_oprrest(expr->opno) != F_EQSEL)
833  return false;
834 
835  /* OK to proceed with checking "var" */
836  }
837  else if (is_orclause(clause))
838  {
839  BoolExpr *bool_expr = (BoolExpr *) clause;
840  ListCell *lc;
841 
842  /* start with no attribute number */
844 
845  foreach(lc, bool_expr->args)
846  {
847  AttrNumber clause_attnum;
848 
849  /*
850  * Had we found incompatible clause in the arguments, treat the
851  * whole clause as incompatible.
852  */
854  relid, &clause_attnum))
855  return false;
856 
857  if (*attnum == InvalidAttrNumber)
858  *attnum = clause_attnum;
859 
860  /* ensure all the variables are the same (same attnum) */
861  if (*attnum != clause_attnum)
862  return false;
863  }
864 
865  /* the Var is already checked by the recursive call */
866  return true;
867  }
868  else if (is_notclause(clause))
869  {
870  /*
871  * "NOT x" can be interpreted as "x = false", so get the argument and
872  * proceed with seeing if it's a suitable Var.
873  */
874  clause_expr = (Node *) get_notclausearg(clause);
875  }
876  else
877  {
878  /*
879  * A boolean expression "x" can be interpreted as "x = true", so
880  * proceed with seeing if it's a suitable Var.
881  */
882  clause_expr = (Node *) clause;
883  }
884 
885  /*
886  * We may ignore any RelabelType node above the operand. (There won't be
887  * more than one, since eval_const_expressions has been applied already.)
888  */
889  if (IsA(clause_expr, RelabelType))
890  clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
891 
892  /* We only support plain Vars for now */
893  if (!IsA(clause_expr, Var))
894  return false;
895 
896  /* OK, we know we have a Var */
897  var = (Var *) clause_expr;
898 
899  /* Ensure Var is from the correct relation */
900  if (var->varno != relid)
901  return false;
902 
903  /* We also better ensure the Var is from the current level */
904  if (var->varlevelsup != 0)
905  return false;
906 
907  /* Also ignore system attributes (we don't allow stats on those) */
909  return false;
910 
911  *attnum = var->varattno;
912  return true;
913 }
914 
915 /*
916  * find_strongest_dependency
917  * find the strongest dependency on the attributes
918  *
919  * When applying functional dependencies, we start with the strongest
920  * dependencies. That is, we select the dependency that:
921  *
922  * (a) has all attributes covered by equality clauses
923  *
924  * (b) has the most attributes
925  *
926  * (c) has the highest degree of validity
927  *
928  * This guarantees that we eliminate the most redundant conditions first
929  * (see the comment in dependencies_clauselist_selectivity).
930  */
931 static MVDependency *
932 find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
933  Bitmapset *attnums)
934 {
935  int i,
936  j;
937  MVDependency *strongest = NULL;
938 
939  /* number of attnums in clauses */
940  int nattnums = bms_num_members(attnums);
941 
942  /*
943  * Iterate over the MVDependency items and find the strongest one from the
944  * fully-matched dependencies. We do the cheap checks first, before
945  * matching it against the attnums.
946  */
947  for (i = 0; i < ndependencies; i++)
948  {
949  for (j = 0; j < dependencies[i]->ndeps; j++)
950  {
951  MVDependency *dependency = dependencies[i]->deps[j];
952 
953  /*
954  * Skip dependencies referencing more attributes than available
955  * clauses, as those can't be fully matched.
956  */
957  if (dependency->nattributes > nattnums)
958  continue;
959 
960  if (strongest)
961  {
962  /* skip dependencies on fewer attributes than the strongest. */
963  if (dependency->nattributes < strongest->nattributes)
964  continue;
965 
966  /* also skip weaker dependencies when attribute count matches */
967  if (strongest->nattributes == dependency->nattributes &&
968  strongest->degree > dependency->degree)
969  continue;
970  }
971 
972  /*
973  * this dependency is stronger, but we must still check that it's
974  * fully matched to these attnums. We perform this check last as
975  * it's slightly more expensive than the previous checks.
976  */
977  if (dependency_is_fully_matched(dependency, attnums))
978  strongest = dependency; /* save new best match */
979  }
980  }
981 
982  return strongest;
983 }
984 
985 /*
986  * clauselist_apply_dependencies
987  * Apply the specified functional dependencies to a list of clauses and
988  * return the estimated selectivity of the clauses that are compatible
989  * with any of the given dependencies.
990  *
991  * This will estimate all not-already-estimated clauses that are compatible
992  * with functional dependencies, and which have an attribute mentioned by any
993  * of the given dependencies (either as an implying or implied attribute).
994  *
995  * Given (lists of) clauses on attributes (a,b) and a functional dependency
996  * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
997  * using the formula
998  *
999  * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1000  *
1001  * where 'f' is the degree of dependency. This reflects the fact that we
1002  * expect a fraction f of all rows to be consistent with the dependency
1003  * (a=>b), and so have a selectivity of P(a), while the remaining rows are
1004  * treated as independent.
1005  *
1006  * In practice, we use a slightly modified version of this formula, which uses
1007  * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
1008  * should obviously not exceed either column's individual selectivity. I.e.,
1009  * we actually combine selectivities using the formula
1010  *
1011  * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1012  *
1013  * This can make quite a difference if the specific values matching the
1014  * clauses are not consistent with the functional dependency.
1015  */
1016 static Selectivity
1018  int varRelid, JoinType jointype,
1019  SpecialJoinInfo *sjinfo,
1020  MVDependency **dependencies, int ndependencies,
1021  AttrNumber *list_attnums,
1022  Bitmapset **estimatedclauses)
1023 {
1024  Bitmapset *attnums;
1025  int i;
1026  int j;
1027  int nattrs;
1028  Selectivity *attr_sel;
1029  int attidx;
1030  int listidx;
1031  ListCell *l;
1032  Selectivity s1;
1033 
1034  /*
1035  * Extract the attnums of all implying and implied attributes from all the
1036  * given dependencies. Each of these attributes is expected to have at
1037  * least 1 not-already-estimated compatible clause that we will estimate
1038  * here.
1039  */
1040  attnums = NULL;
1041  for (i = 0; i < ndependencies; i++)
1042  {
1043  for (j = 0; j < dependencies[i]->nattributes; j++)
1044  {
1045  AttrNumber attnum = dependencies[i]->attributes[j];
1046 
1047  attnums = bms_add_member(attnums, attnum);
1048  }
1049  }
1050 
1051  /*
1052  * Compute per-column selectivity estimates for each of these attributes,
1053  * and mark all the corresponding clauses as estimated.
1054  */
1055  nattrs = bms_num_members(attnums);
1056  attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
1057 
1058  attidx = 0;
1059  i = -1;
1060  while ((i = bms_next_member(attnums, i)) >= 0)
1061  {
1062  List *attr_clauses = NIL;
1063  Selectivity simple_sel;
1064 
1065  listidx = -1;
1066  foreach(l, clauses)
1067  {
1068  Node *clause = (Node *) lfirst(l);
1069 
1070  listidx++;
1071  if (list_attnums[listidx] == i)
1072  {
1073  attr_clauses = lappend(attr_clauses, clause);
1074  *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1075  }
1076  }
1077 
1078  simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
1079  jointype, sjinfo, false);
1080  attr_sel[attidx++] = simple_sel;
1081  }
1082 
1083  /*
1084  * Now combine these selectivities using the dependency information. For
1085  * chains of dependencies such as a -> b -> c, the b -> c dependency will
1086  * come before the a -> b dependency in the array, so we traverse the
1087  * array backwards to ensure such chains are computed in the right order.
1088  *
1089  * As explained above, pairs of selectivities are combined using the
1090  * formula
1091  *
1092  * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1093  *
1094  * to ensure that the combined selectivity is never greater than either
1095  * individual selectivity.
1096  *
1097  * Where multiple dependencies apply (e.g., a -> b -> c), we use
1098  * conditional probabilities to compute the overall result as follows:
1099  *
1100  * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1101  *
1102  * so we replace the selectivities of all implied attributes with
1103  * conditional probabilities, that are conditional on all their implying
1104  * attributes. The selectivities of all other non-implied attributes are
1105  * left as they are.
1106  */
1107  for (i = ndependencies - 1; i >= 0; i--)
1108  {
1109  MVDependency *dependency = dependencies[i];
1111  Selectivity s2;
1112  double f;
1113 
1114  /* Selectivity of all the implying attributes */
1115  s1 = 1.0;
1116  for (j = 0; j < dependency->nattributes - 1; j++)
1117  {
1118  attnum = dependency->attributes[j];
1119  attidx = bms_member_index(attnums, attnum);
1120  s1 *= attr_sel[attidx];
1121  }
1122 
1123  /* Original selectivity of the implied attribute */
1124  attnum = dependency->attributes[j];
1125  attidx = bms_member_index(attnums, attnum);
1126  s2 = attr_sel[attidx];
1127 
1128  /*
1129  * Replace s2 with the conditional probability s2 given s1, computed
1130  * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1131  *
1132  * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1133  *
1134  * where P(a) = s1, the selectivity of the implying attributes, and
1135  * P(b) = s2, the selectivity of the implied attribute.
1136  */
1137  f = dependency->degree;
1138 
1139  if (s1 <= s2)
1140  attr_sel[attidx] = f + (1 - f) * s2;
1141  else
1142  attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1143  }
1144 
1145  /*
1146  * The overall selectivity of all the clauses on all these attributes is
1147  * then the product of all the original (non-implied) probabilities and
1148  * the new conditional (implied) probabilities.
1149  */
1150  s1 = 1.0;
1151  for (i = 0; i < nattrs; i++)
1152  s1 *= attr_sel[i];
1153 
1155 
1156  pfree(attr_sel);
1157  bms_free(attnums);
1158 
1159  return s1;
1160 }
1161 
1162 /*
1163  * dependency_is_compatible_expression
1164  * Determines if the expression is compatible with functional dependencies
1165  *
1166  * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1167  * expression is a simple Var. OTOH we check that there's at least one
1168  * statistics object matching the expression.
1169  */
1170 static bool
1171 dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1172 {
1173  List *vars;
1174  ListCell *lc,
1175  *lc2;
1176  Node *clause_expr;
1177 
1178  if (IsA(clause, RestrictInfo))
1179  {
1180  RestrictInfo *rinfo = (RestrictInfo *) clause;
1181 
1182  /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1183  if (rinfo->pseudoconstant)
1184  return false;
1185 
1186  /* Clauses referencing multiple, or no, varnos are incompatible */
1188  return false;
1189 
1190  clause = (Node *) rinfo->clause;
1191  }
1192 
1193  if (is_opclause(clause))
1194  {
1195  /* If it's an opclause, check for Var = Const or Const = Var. */
1196  OpExpr *expr = (OpExpr *) clause;
1197 
1198  /* Only expressions with two arguments are candidates. */
1199  if (list_length(expr->args) != 2)
1200  return false;
1201 
1202  /* Make sure non-selected argument is a pseudoconstant. */
1204  clause_expr = linitial(expr->args);
1205  else if (is_pseudo_constant_clause(linitial(expr->args)))
1206  clause_expr = lsecond(expr->args);
1207  else
1208  return false;
1209 
1210  /*
1211  * If it's not an "=" operator, just ignore the clause, as it's not
1212  * compatible with functional dependencies.
1213  *
1214  * This uses the function for estimating selectivity, not the operator
1215  * directly (a bit awkward, but well ...).
1216  *
1217  * XXX this is pretty dubious; probably it'd be better to check btree
1218  * or hash opclass membership, so as not to be fooled by custom
1219  * selectivity functions, and to be more consistent with decisions
1220  * elsewhere in the planner.
1221  */
1222  if (get_oprrest(expr->opno) != F_EQSEL)
1223  return false;
1224 
1225  /* OK to proceed with checking "var" */
1226  }
1227  else if (IsA(clause, ScalarArrayOpExpr))
1228  {
1229  /* If it's an scalar array operator, check for Var IN Const. */
1230  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1231 
1232  /*
1233  * Reject ALL() variant, we only care about ANY/IN.
1234  *
1235  * FIXME Maybe we should check if all the values are the same, and
1236  * allow ALL in that case? Doesn't seem very practical, though.
1237  */
1238  if (!expr->useOr)
1239  return false;
1240 
1241  /* Only expressions with two arguments are candidates. */
1242  if (list_length(expr->args) != 2)
1243  return false;
1244 
1245  /*
1246  * We know it's always (Var IN Const), so we assume the var is the
1247  * first argument, and pseudoconstant is the second one.
1248  */
1249  if (!is_pseudo_constant_clause(lsecond(expr->args)))
1250  return false;
1251 
1252  clause_expr = linitial(expr->args);
1253 
1254  /*
1255  * If it's not an "=" operator, just ignore the clause, as it's not
1256  * compatible with functional dependencies. The operator is identified
1257  * simply by looking at which function it uses to estimate
1258  * selectivity. That's a bit strange, but it's what other similar
1259  * places do.
1260  */
1261  if (get_oprrest(expr->opno) != F_EQSEL)
1262  return false;
1263 
1264  /* OK to proceed with checking "var" */
1265  }
1266  else if (is_orclause(clause))
1267  {
1268  BoolExpr *bool_expr = (BoolExpr *) clause;
1269  ListCell *lc;
1270 
1271  /* start with no expression (we'll use the first match) */
1272  *expr = NULL;
1273 
1274  foreach(lc, bool_expr->args)
1275  {
1276  Node *or_expr = NULL;
1277 
1278  /*
1279  * Had we found incompatible expression in the arguments, treat
1280  * the whole expression as incompatible.
1281  */
1282  if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
1283  statlist, &or_expr))
1284  return false;
1285 
1286  if (*expr == NULL)
1287  *expr = or_expr;
1288 
1289  /* ensure all the expressions are the same */
1290  if (!equal(or_expr, *expr))
1291  return false;
1292  }
1293 
1294  /* the expression is already checked by the recursive call */
1295  return true;
1296  }
1297  else if (is_notclause(clause))
1298  {
1299  /*
1300  * "NOT x" can be interpreted as "x = false", so get the argument and
1301  * proceed with seeing if it's a suitable Var.
1302  */
1303  clause_expr = (Node *) get_notclausearg(clause);
1304  }
1305  else
1306  {
1307  /*
1308  * A boolean expression "x" can be interpreted as "x = true", so
1309  * proceed with seeing if it's a suitable Var.
1310  */
1311  clause_expr = (Node *) clause;
1312  }
1313 
1314  /*
1315  * We may ignore any RelabelType node above the operand. (There won't be
1316  * more than one, since eval_const_expressions has been applied already.)
1317  */
1318  if (IsA(clause_expr, RelabelType))
1319  clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1320 
1321  vars = pull_var_clause(clause_expr, 0);
1322 
1323  foreach(lc, vars)
1324  {
1325  Var *var = (Var *) lfirst(lc);
1326 
1327  /* Ensure Var is from the correct relation */
1328  if (var->varno != relid)
1329  return false;
1330 
1331  /* We also better ensure the Var is from the current level */
1332  if (var->varlevelsup != 0)
1333  return false;
1334 
1335  /* Also ignore system attributes (we don't allow stats on those) */
1337  return false;
1338  }
1339 
1340  /*
1341  * Check if we actually have a matching statistics for the expression.
1342  *
1343  * XXX Maybe this is an overkill. We'll eliminate the expressions later.
1344  */
1345  foreach(lc, statlist)
1346  {
1347  StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1348 
1349  /* ignore stats without dependencies */
1350  if (info->kind != STATS_EXT_DEPENDENCIES)
1351  continue;
1352 
1353  foreach(lc2, info->exprs)
1354  {
1355  Node *stat_expr = (Node *) lfirst(lc2);
1356 
1357  if (equal(clause_expr, stat_expr))
1358  {
1359  *expr = stat_expr;
1360  return true;
1361  }
1362  }
1363  }
1364 
1365  return false;
1366 }
1367 
1368 /*
1369  * dependencies_clauselist_selectivity
1370  * Return the estimated selectivity of (a subset of) the given clauses
1371  * using functional dependency statistics, or 1.0 if no useful functional
1372  * dependency statistic exists.
1373  *
1374  * 'estimatedclauses' is an input/output argument that gets a bit set
1375  * corresponding to the (zero-based) list index of each clause that is included
1376  * in the estimated selectivity.
1377  *
1378  * Given equality clauses on attributes (a,b) we find the strongest dependency
1379  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1380  * dependency, we then combine the per-clause selectivities using the formula
1381  *
1382  * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1383  *
1384  * where 'f' is the degree of the dependency. (Actually we use a slightly
1385  * modified version of this formula -- see clauselist_apply_dependencies()).
1386  *
1387  * With clauses on more than two attributes, the dependencies are applied
1388  * recursively, starting with the widest/strongest dependencies. For example
1389  * P(a,b,c) is first split like this:
1390  *
1391  * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1392  *
1393  * assuming (a,b=>c) is the strongest dependency.
1394  */
1397  List *clauses,
1398  int varRelid,
1399  JoinType jointype,
1400  SpecialJoinInfo *sjinfo,
1401  RelOptInfo *rel,
1402  Bitmapset **estimatedclauses)
1403 {
1404  Selectivity s1 = 1.0;
1405  ListCell *l;
1406  Bitmapset *clauses_attnums = NULL;
1407  AttrNumber *list_attnums;
1408  int listidx;
1409  MVDependencies **func_dependencies;
1410  int nfunc_dependencies;
1411  int total_ndeps;
1412  MVDependency **dependencies;
1413  int ndependencies;
1414  int i;
1415  AttrNumber attnum_offset;
1416  RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1417 
1418  /* unique expressions */
1419  Node **unique_exprs;
1420  int unique_exprs_cnt;
1421 
1422  /* check if there's any stats that might be useful for us. */
1423  if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1424  return 1.0;
1425 
1426  list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
1427  list_length(clauses));
1428 
1429  /*
1430  * We allocate space as if every clause was a unique expression, although
1431  * that's probably overkill. Some will be simple column references that
1432  * we'll translate to attnums, and there might be duplicates. But it's
1433  * easier and cheaper to just do one allocation than repalloc later.
1434  */
1435  unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
1436  unique_exprs_cnt = 0;
1437 
1438  /*
1439  * Pre-process the clauses list to extract the attnums seen in each item.
1440  * We need to determine if there's any clauses which will be useful for
1441  * dependency selectivity estimations. Along the way we'll record all of
1442  * the attnums for each clause in a list which we'll reference later so we
1443  * don't need to repeat the same work again. We'll also keep track of all
1444  * attnums seen.
1445  *
1446  * We also skip clauses that we already estimated using different types of
1447  * statistics (we treat them as incompatible).
1448  *
1449  * To handle expressions, we assign them negative attnums, as if it was a
1450  * system attribute (this is fine, as we only allow extended stats on user
1451  * attributes). And then we offset everything by the number of
1452  * expressions, so that we can store the values in a bitmapset.
1453  */
1454  listidx = 0;
1455  foreach(l, clauses)
1456  {
1457  Node *clause = (Node *) lfirst(l);
1459  Node *expr = NULL;
1460 
1461  /* ignore clause by default */
1462  list_attnums[listidx] = InvalidAttrNumber;
1463 
1464  if (!bms_is_member(listidx, *estimatedclauses))
1465  {
1466  /*
1467  * If it's a simple column reference, just extract the attnum. If
1468  * it's an expression, assign a negative attnum as if it was a
1469  * system attribute.
1470  */
1471  if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1472  {
1473  list_attnums[listidx] = attnum;
1474  }
1475  else if (dependency_is_compatible_expression(clause, rel->relid,
1476  rel->statlist,
1477  &expr))
1478  {
1479  /* special attnum assigned to this expression */
1481 
1482  Assert(expr != NULL);
1483 
1484  /* If the expression is duplicate, use the same attnum. */
1485  for (i = 0; i < unique_exprs_cnt; i++)
1486  {
1487  if (equal(unique_exprs[i], expr))
1488  {
1489  /* negative attribute number to expression */
1490  attnum = -(i + 1);
1491  break;
1492  }
1493  }
1494 
1495  /* not found in the list, so add it */
1496  if (attnum == InvalidAttrNumber)
1497  {
1498  unique_exprs[unique_exprs_cnt++] = expr;
1499 
1500  /* after incrementing the value, to get -1, -2, ... */
1501  attnum = (-unique_exprs_cnt);
1502  }
1503 
1504  /* remember which attnum was assigned to this clause */
1505  list_attnums[listidx] = attnum;
1506  }
1507  }
1508 
1509  listidx++;
1510  }
1511 
1512  Assert(listidx == list_length(clauses));
1513 
1514  /*
1515  * How much we need to offset the attnums? If there are no expressions,
1516  * then no offset is needed. Otherwise we need to offset enough for the
1517  * lowest value (-unique_exprs_cnt) to become 1.
1518  */
1519  if (unique_exprs_cnt > 0)
1520  attnum_offset = (unique_exprs_cnt + 1);
1521  else
1522  attnum_offset = 0;
1523 
1524  /*
1525  * Now that we know how many expressions there are, we can offset the
1526  * values just enough to build the bitmapset.
1527  */
1528  for (i = 0; i < list_length(clauses); i++)
1529  {
1531 
1532  /* ignore incompatible or already estimated clauses */
1533  if (list_attnums[i] == InvalidAttrNumber)
1534  continue;
1535 
1536  /* make sure the attnum is in the expected range */
1537  Assert(list_attnums[i] >= (-unique_exprs_cnt));
1538  Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1539 
1540  /* make sure the attnum is positive (valid AttrNumber) */
1541  attnum = list_attnums[i] + attnum_offset;
1542 
1543  /*
1544  * Either it's a regular attribute, or it's an expression, in which
1545  * case we must not have seen it before (expressions are unique).
1546  *
1547  * XXX Check whether it's a regular attribute has to be done using the
1548  * original attnum, while the second check has to use the value with
1549  * an offset.
1550  */
1551  Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1552  !bms_is_member(attnum, clauses_attnums));
1553 
1554  /*
1555  * Remember the offset attnum, both for attributes and expressions.
1556  * We'll pass list_attnums to clauselist_apply_dependencies, which
1557  * uses it to identify clauses in a bitmap. We could also pass the
1558  * offset, but this is more convenient.
1559  */
1560  list_attnums[i] = attnum;
1561 
1562  clauses_attnums = bms_add_member(clauses_attnums, attnum);
1563  }
1564 
1565  /*
1566  * If there's not at least two distinct attnums and expressions, then
1567  * reject the whole list of clauses. We must return 1.0 so the calling
1568  * function's selectivity is unaffected.
1569  */
1570  if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1571  {
1572  bms_free(clauses_attnums);
1573  pfree(list_attnums);
1574  return 1.0;
1575  }
1576 
1577  /*
1578  * Load all functional dependencies matching at least two parameters. We
1579  * can simply consider all dependencies at once, without having to search
1580  * for the best statistics object.
1581  *
1582  * To not waste cycles and memory, we deserialize dependencies only for
1583  * statistics that match at least two attributes. The array is allocated
1584  * with the assumption that all objects match - we could grow the array to
1585  * make it just the right size, but it's likely wasteful anyway thanks to
1586  * moving the freed chunks to freelists etc.
1587  */
1588  func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
1589  list_length(rel->statlist));
1590  nfunc_dependencies = 0;
1591  total_ndeps = 0;
1592 
1593  foreach(l, rel->statlist)
1594  {
1596  int nmatched;
1597  int nexprs;
1598  int k;
1599  MVDependencies *deps;
1600 
1601  /* skip statistics that are not of the correct type */
1602  if (stat->kind != STATS_EXT_DEPENDENCIES)
1603  continue;
1604 
1605  /* skip statistics with mismatching stxdinherit value */
1606  if (stat->inherit != rte->inh)
1607  continue;
1608 
1609  /*
1610  * Count matching attributes - we have to undo the attnum offsets. The
1611  * input attribute numbers are not offset (expressions are not
1612  * included in stat->keys, so it's not necessary). But we need to
1613  * offset it before checking against clauses_attnums.
1614  */
1615  nmatched = 0;
1616  k = -1;
1617  while ((k = bms_next_member(stat->keys, k)) >= 0)
1618  {
1620 
1621  /* skip expressions */
1623  continue;
1624 
1625  /* apply the same offset as above */
1626  attnum += attnum_offset;
1627 
1628  if (bms_is_member(attnum, clauses_attnums))
1629  nmatched++;
1630  }
1631 
1632  /* count matching expressions */
1633  nexprs = 0;
1634  for (i = 0; i < unique_exprs_cnt; i++)
1635  {
1636  ListCell *lc;
1637 
1638  foreach(lc, stat->exprs)
1639  {
1640  Node *stat_expr = (Node *) lfirst(lc);
1641 
1642  /* try to match it */
1643  if (equal(stat_expr, unique_exprs[i]))
1644  nexprs++;
1645  }
1646  }
1647 
1648  /*
1649  * Skip objects matching fewer than two attributes/expressions from
1650  * clauses.
1651  */
1652  if (nmatched + nexprs < 2)
1653  continue;
1654 
1655  deps = statext_dependencies_load(stat->statOid, rte->inh);
1656 
1657  /*
1658  * The expressions may be represented by different attnums in the
1659  * stats, we need to remap them to be consistent with the clauses.
1660  * That will make the later steps (e.g. picking the strongest item and
1661  * so on) much simpler and cheaper, because it won't need to care
1662  * about the offset at all.
1663  *
1664  * When we're at it, we can ignore dependencies that are not fully
1665  * matched by clauses (i.e. referencing attributes or expressions that
1666  * are not in the clauses).
1667  *
1668  * We have to do this for all statistics, as long as there are any
1669  * expressions - we need to shift the attnums in all dependencies.
1670  *
1671  * XXX Maybe we should do this always, because it also eliminates some
1672  * of the dependencies early. It might be cheaper than having to walk
1673  * the longer list in find_strongest_dependency later, especially as
1674  * we need to do that repeatedly?
1675  *
1676  * XXX We have to do this even when there are no expressions in
1677  * clauses, otherwise find_strongest_dependency may fail for stats
1678  * with expressions (due to lookup of negative value in bitmap). So we
1679  * need to at least filter out those dependencies. Maybe we could do
1680  * it in a cheaper way (if there are no expr clauses, we can just
1681  * discard all negative attnums without any lookups).
1682  */
1683  if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1684  {
1685  int ndeps = 0;
1686 
1687  for (i = 0; i < deps->ndeps; i++)
1688  {
1689  bool skip = false;
1690  MVDependency *dep = deps->deps[i];
1691  int j;
1692 
1693  for (j = 0; j < dep->nattributes; j++)
1694  {
1695  int idx;
1696  Node *expr;
1697  int k;
1698  AttrNumber unique_attnum = InvalidAttrNumber;
1700 
1701  /* undo the per-statistics offset */
1702  attnum = dep->attributes[j];
1703 
1704  /*
1705  * For regular attributes we can simply check if it
1706  * matches any clause. If there's no matching clause, we
1707  * can just ignore it. We need to offset the attnum
1708  * though.
1709  */
1711  {
1712  dep->attributes[j] = attnum + attnum_offset;
1713 
1714  if (!bms_is_member(dep->attributes[j], clauses_attnums))
1715  {
1716  skip = true;
1717  break;
1718  }
1719 
1720  continue;
1721  }
1722 
1723  /*
1724  * the attnum should be a valid system attnum (-1, -2,
1725  * ...)
1726  */
1728 
1729  /*
1730  * For expressions, we need to do two translations. First
1731  * we have to translate the negative attnum to index in
1732  * the list of expressions (in the statistics object).
1733  * Then we need to see if there's a matching clause. The
1734  * index of the unique expression determines the attnum
1735  * (and we offset it).
1736  */
1737  idx = -(1 + attnum);
1738 
1739  /* Is the expression index is valid? */
1740  Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1741 
1742  expr = (Node *) list_nth(stat->exprs, idx);
1743 
1744  /* try to find the expression in the unique list */
1745  for (k = 0; k < unique_exprs_cnt; k++)
1746  {
1747  /*
1748  * found a matching unique expression, use the attnum
1749  * (derived from index of the unique expression)
1750  */
1751  if (equal(unique_exprs[k], expr))
1752  {
1753  unique_attnum = -(k + 1) + attnum_offset;
1754  break;
1755  }
1756  }
1757 
1758  /*
1759  * Found no matching expression, so we can simply skip
1760  * this dependency, because there's no chance it will be
1761  * fully covered.
1762  */
1763  if (unique_attnum == InvalidAttrNumber)
1764  {
1765  skip = true;
1766  break;
1767  }
1768 
1769  /* otherwise remap it to the new attnum */
1770  dep->attributes[j] = unique_attnum;
1771  }
1772 
1773  /* if found a matching dependency, keep it */
1774  if (!skip)
1775  {
1776  /* maybe we've skipped something earlier, so move it */
1777  if (ndeps != i)
1778  deps->deps[ndeps] = deps->deps[i];
1779 
1780  ndeps++;
1781  }
1782  }
1783 
1784  deps->ndeps = ndeps;
1785  }
1786 
1787  /*
1788  * It's possible we've removed all dependencies, in which case we
1789  * don't bother adding it to the list.
1790  */
1791  if (deps->ndeps > 0)
1792  {
1793  func_dependencies[nfunc_dependencies] = deps;
1794  total_ndeps += deps->ndeps;
1795  nfunc_dependencies++;
1796  }
1797  }
1798 
1799  /* if no matching stats could be found then we've nothing to do */
1800  if (nfunc_dependencies == 0)
1801  {
1802  pfree(func_dependencies);
1803  bms_free(clauses_attnums);
1804  pfree(list_attnums);
1805  pfree(unique_exprs);
1806  return 1.0;
1807  }
1808 
1809  /*
1810  * Work out which dependencies we can apply, starting with the
1811  * widest/strongest ones, and proceeding to smaller/weaker ones.
1812  */
1813  dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
1814  total_ndeps);
1815  ndependencies = 0;
1816 
1817  while (true)
1818  {
1819  MVDependency *dependency;
1821 
1822  /* the widest/strongest dependency, fully matched by clauses */
1823  dependency = find_strongest_dependency(func_dependencies,
1824  nfunc_dependencies,
1825  clauses_attnums);
1826  if (!dependency)
1827  break;
1828 
1829  dependencies[ndependencies++] = dependency;
1830 
1831  /* Ignore dependencies using this implied attribute in later loops */
1832  attnum = dependency->attributes[dependency->nattributes - 1];
1833  clauses_attnums = bms_del_member(clauses_attnums, attnum);
1834  }
1835 
1836  /*
1837  * If we found applicable dependencies, use them to estimate all
1838  * compatible clauses on attributes that they refer to.
1839  */
1840  if (ndependencies != 0)
1841  s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1842  sjinfo, dependencies, ndependencies,
1843  list_attnums, estimatedclauses);
1844 
1845  /* free deserialized functional dependencies (and then the array) */
1846  for (i = 0; i < nfunc_dependencies; i++)
1847  pfree(func_dependencies[i]);
1848 
1849  pfree(dependencies);
1850  pfree(func_dependencies);
1851  bms_free(clauses_attnums);
1852  pfree(list_attnums);
1853  pfree(unique_exprs);
1854 
1855  return s1;
1856 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int16 AttrNumber
Definition: attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
#define InvalidAttrNumber
Definition: attnum.h:23
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1045
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:648
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:738
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:775
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:674
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:453
@ BMS_SINGLETON
Definition: bitmapset.h:69
@ BMS_MULTIPLE
Definition: bitmapset.h:70
unsigned int uint32
Definition: c.h:441
#define offsetof(type, field)
Definition: c.h:727
#define VARHDRSZ
Definition: c.h:627
unsigned int Index
Definition: c.h:549
size_t Size
Definition: c.h:540
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:1931
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:119
MVDependencies * statext_dependencies_deserialize(bytea *data)
Definition: dependencies.c:502
static void generate_dependencies(DependencyGenerator state)
Definition: dependencies.c:159
Datum pg_dependencies_in(PG_FUNCTION_ARGS)
Definition: dependencies.c:656
#define SizeOfHeader
Definition: dependencies.c:40
static bool dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:207
bytea * statext_dependencies_serialize(MVDependencies *dependencies)
Definition: dependencies.c:447
Datum pg_dependencies_out(PG_FUNCTION_ARGS)
Definition: dependencies.c:673
static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
Definition: dependencies.c:744
static bool dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
Definition: dependencies.c:598
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:198
struct DependencyGeneratorData DependencyGeneratorData
MVDependencies * statext_dependencies_load(Oid mvoid, bool inh)
Definition: dependencies.c:622
static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, MVDependency **dependencies, int ndependencies, AttrNumber *list_attnums, Bitmapset **estimatedclauses)
Datum pg_dependencies_send(PG_FUNCTION_ARGS)
Definition: dependencies.c:729
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:175
#define SizeOfItem(natts)
Definition: dependencies.c:43
Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses)
MVDependencies * statext_dependencies_build(StatsBuildData *data)
Definition: dependencies.c:351
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
Definition: dependencies.c:224
Datum pg_dependencies_recv(PG_FUNCTION_ARGS)
Definition: dependencies.c:713
static void generate_dependencies_recurse(DependencyGenerator state, int index, AttrNumber start, AttrNumber *current)
Definition: dependencies.c:93
DependencyGeneratorData * DependencyGenerator
Definition: dependencies.c:67
static MVDependency * find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums)
Definition: dependencies.c:932
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3170
bool has_stats_of_kind(List *stats, char requiredkind)
int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:362
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define MaxHeapAttributeNumber
Definition: htup_details.h:47
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:336
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1528
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:143
void pfree(void *pointer)
Definition: mcxt.c:1169
void * palloc0(Size size)
Definition: mcxt.c:1093
MemoryContext CurrentMemoryContext
Definition: mcxt.c:42
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:218
void * palloc(Size size)
Definition: mcxt.c:1062
#define AllocSetContextCreate
Definition: memutils.h:173
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:195
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:122
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:104
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:64
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:113
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
double Selectivity
Definition: nodes.h:672
JoinType
Definition: nodes.h:708
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:388
int16 attnum
Definition: pg_attribute.h:83
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:116
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:169
static int list_length(const List *l)
Definition: pg_list.h:149
#define NIL
Definition: pg_list.h:65
#define linitial(l)
Definition: pg_list.h:174
#define lsecond(l)
Definition: pg_list.h:179
static void * list_nth(const List *list, int n)
Definition: pg_list.h:278
static void output(uint64 loop_count)
uintptr_t Datum
Definition: postgres.h:411
#define VARSIZE_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:315
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
#define BoolGetDatum(X)
Definition: postgres.h:446
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:342
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
char * s1
char * s2
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define STATS_DEPS_MAGIC
Definition: statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:44
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
List * args
Definition: primnodes.h:626
AttrNumber * dependencies
Definition: dependencies.c:64
Definition: pg_list.h:51
uint32 ndeps
Definition: statistics.h:61
uint32 magic
Definition: statistics.h:59
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:62
AttrNumber nattributes
Definition: statistics.h:53
double degree
Definition: statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:54
Definition: nodes.h:540
Oid opno
Definition: primnodes.h:542
List * args
Definition: primnodes.h:548
Index relid
Definition: pathnodes.h:709
List * statlist
Definition: pathnodes.h:719
bool pseudoconstant
Definition: pathnodes.h:2067
Relids clause_relids
Definition: pathnodes.h:2077
Expr * clause
Definition: pathnodes.h:2059
Oid attrtypid
Definition: vacuum.h:129
Oid attrcollid
Definition: vacuum.h:132
Definition: primnodes.h:187
AttrNumber varattno
Definition: primnodes.h:191
int varno
Definition: primnodes.h:189
Index varlevelsup
Definition: primnodes.h:196
Definition: type.h:90
Definition: regguts.h:318
Definition: c.h:622
Definition: regcomp.c:238
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1198
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1411
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition: syscache.c:1161
@ STATEXTDATASTXOID
Definition: syscache.h:92
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:339
#define TYPECACHE_LT_OPR
Definition: typcache.h:137
List * pull_var_clause(Node *node, int flags)
Definition: var.c:604
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:493