PostgreSQL Source Code  git master
dependencies.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dependencies.c
4  * POSTGRES functional dependencies
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/statistics/dependencies.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/htup_details.h"
17 #include "access/sysattr.h"
18 #include "catalog/pg_operator.h"
21 #include "lib/stringinfo.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/nodes.h"
24 #include "nodes/pathnodes.h"
25 #include "optimizer/clauses.h"
26 #include "optimizer/optimizer.h"
28 #include "statistics/statistics.h"
29 #include "utils/bytea.h"
30 #include "utils/fmgroids.h"
31 #include "utils/fmgrprotos.h"
32 #include "utils/lsyscache.h"
33 #include "utils/selfuncs.h"
34 #include "utils/syscache.h"
35 #include "utils/typcache.h"
36 
37 /* size of the struct header fields (magic, type, ndeps) */
38 #define SizeOfHeader (3 * sizeof(uint32))
39 
40 /* size of a serialized dependency (degree, natts, atts) */
41 #define SizeOfItem(natts) \
42  (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
43 
44 /* minimal size of a dependency (with two attributes) */
45 #define MinSizeOfItem SizeOfItem(2)
46 
47 /* minimal size of dependencies, when all deps are minimal */
48 #define MinSizeOfItems(ndeps) \
49  (SizeOfHeader + (ndeps) * MinSizeOfItem)
50 
51 /*
52  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
53  * k-permutations of n elements, except that the order does not matter for the
54  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
55  */
57 {
58  int k; /* size of the dependency */
59  int n; /* number of possible attributes */
60  int current; /* next dependency to return (index) */
61  AttrNumber ndependencies; /* number of dependencies generated */
62  AttrNumber *dependencies; /* array of pre-generated dependencies */
64 
66 
68  int index, AttrNumber start, AttrNumber *current);
73 static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
74 static bool dependency_is_fully_matched(MVDependency *dependency,
75  Bitmapset *attnums);
76 static bool dependency_is_compatible_clause(Node *clause, Index relid,
78 static bool dependency_is_compatible_expression(Node *clause, Index relid,
79  List *statlist, Node **expr);
81  int ndependencies, Bitmapset *attnums);
83  int varRelid, JoinType jointype,
84  SpecialJoinInfo *sjinfo,
86  int ndependencies,
87  AttrNumber *list_attnums,
88  Bitmapset **estimatedclauses);
89 
90 static void
93 {
94  /*
95  * The generator handles the first (k-1) elements differently from the
96  * last element.
97  */
98  if (index < (state->k - 1))
99  {
100  AttrNumber i;
101 
102  /*
103  * The first (k-1) values have to be in ascending order, which we
104  * generate recursively.
105  */
106 
107  for (i = start; i < state->n; i++)
108  {
109  current[index] = i;
110  generate_dependencies_recurse(state, (index + 1), (i + 1), current);
111  }
112  }
113  else
114  {
115  int i;
116 
117  /*
118  * the last element is the implied value, which does not respect the
119  * ascending order. We just need to check that the value is not in the
120  * first (k-1) elements.
121  */
122 
123  for (i = 0; i < state->n; i++)
124  {
125  int j;
126  bool match = false;
127 
128  current[index] = i;
129 
130  for (j = 0; j < index; j++)
131  {
132  if (current[j] == i)
133  {
134  match = true;
135  break;
136  }
137  }
138 
139  /*
140  * If the value is not found in the first part of the dependency,
141  * we're done.
142  */
143  if (!match)
144  {
145  state->dependencies = (AttrNumber *) repalloc(state->dependencies,
146  state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
147  memcpy(&state->dependencies[(state->k * state->ndependencies)],
148  current, state->k * sizeof(AttrNumber));
149  state->ndependencies++;
150  }
151  }
152  }
153 }
154 
155 /* generate all dependencies (k-permutations of n elements) */
156 static void
158 {
159  AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
160 
161  generate_dependencies_recurse(state, 0, 0, current);
162 
163  pfree(current);
164 }
165 
166 /*
167  * initialize the DependencyGenerator of variations, and prebuild the variations
168  *
169  * This pre-builds all the variations. We could also generate them in
170  * DependencyGenerator_next(), but this seems simpler.
171  */
172 static DependencyGenerator
174 {
176 
177  Assert((n >= k) && (k > 0));
178 
179  /* allocate the DependencyGenerator state */
181  state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
182 
183  state->ndependencies = 0;
184  state->current = 0;
185  state->k = k;
186  state->n = n;
187 
188  /* now actually pre-generate all the variations */
189  generate_dependencies(state);
190 
191  return state;
192 }
193 
194 /* free the DependencyGenerator state */
195 static void
197 {
198  pfree(state->dependencies);
199  pfree(state);
200 
201 }
202 
203 /* generate next combination */
204 static AttrNumber *
206 {
207  if (state->current == state->ndependencies)
208  return NULL;
209 
210  return &state->dependencies[state->k * state->current++];
211 }
212 
213 
214 /*
215  * validates functional dependency on the data
216  *
217  * An actual work horse of detecting functional dependencies. Given a variation
218  * of k attributes, it checks that the first (k-1) are sufficient to determine
219  * the last one.
220  */
221 static double
223 {
224  int i,
225  nitems;
226  MultiSortSupport mss;
227  SortItem *items;
228  AttrNumber *attnums_dep;
229 
230  /* counters valid within a group */
231  int group_size = 0;
232  int n_violations = 0;
233 
234  /* total number of rows supporting (consistent with) the dependency */
235  int n_supporting_rows = 0;
236 
237  /* Make sure we have at least two input attributes. */
238  Assert(k >= 2);
239 
240  /* sort info for all attributes columns */
241  mss = multi_sort_init(k);
242 
243  /*
244  * Translate the array of indexes to regular attnums for the dependency (we
245  * will need this to identify the columns in StatsBuildData).
246  */
247  attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
248  for (i = 0; i < k; i++)
249  attnums_dep[i] = data->attnums[dependency[i]];
250 
251  /*
252  * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
253  *
254  * (a) sort the data lexicographically
255  *
256  * (b) split the data into groups by first (k-1) columns
257  *
258  * (c) for each group count different values in the last column
259  *
260  * We use the column data types' default sort operators and collations;
261  * perhaps at some point it'd be worth using column-specific collations?
262  */
263 
264  /* prepare the sort function for the dimensions */
265  for (i = 0; i < k; i++)
266  {
267  VacAttrStats *colstat = data->stats[dependency[i]];
269 
270  type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
271  if (type->lt_opr == InvalidOid) /* shouldn't happen */
272  elog(ERROR, "cache lookup failed for ordering operator for type %u",
273  colstat->attrtypid);
274 
275  /* prepare the sort function for this dimension */
276  multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
277  }
278 
279  /*
280  * build an array of SortItem(s) sorted using the multi-sort support
281  *
282  * XXX This relies on all stats entries pointing to the same tuple
283  * descriptor. For now that assumption holds, but it might change in the
284  * future for example if we support statistics on multiple tables.
285  */
286  items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
287 
288  /*
289  * Walk through the sorted array, split it into rows according to the
290  * first (k-1) columns. If there's a single value in the last column, we
291  * count the group as 'supporting' the functional dependency. Otherwise we
292  * count it as contradicting.
293  */
294 
295  /* start with the first row forming a group */
296  group_size = 1;
297 
298  /* loop 1 beyond the end of the array so that we count the final group */
299  for (i = 1; i <= nitems; i++)
300  {
301  /*
302  * Check if the group ended, which may be either because we processed
303  * all the items (i==nitems), or because the i-th item is not equal to
304  * the preceding one.
305  */
306  if (i == nitems ||
307  multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
308  {
309  /*
310  * If no violations were found in the group then track the rows of
311  * the group as supporting the functional dependency.
312  */
313  if (n_violations == 0)
314  n_supporting_rows += group_size;
315 
316  /* Reset counters for the new group */
317  n_violations = 0;
318  group_size = 1;
319  continue;
320  }
321  /* first columns match, but the last one does not (so contradicting) */
322  else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
323  n_violations++;
324 
325  group_size++;
326  }
327 
328  if (items)
329  pfree(items);
330 
331  pfree(mss);
332  pfree(attnums_dep);
333 
334  /* Compute the 'degree of validity' as (supporting/total). */
335  return (n_supporting_rows * 1.0 / data->numrows);
336 }
337 
338 /*
339  * detects functional dependencies between groups of columns
340  *
341  * Generates all possible subsets of columns (variations) and computes
342  * the degree of validity for each one. For example when creating statistics
343  * on three columns (a,b,c) there are 9 possible dependencies
344  *
345  * two columns three columns
346  * ----------- -------------
347  * (a) -> b (a,b) -> c
348  * (a) -> c (a,c) -> b
349  * (b) -> a (b,c) -> a
350  * (b) -> c
351  * (c) -> a
352  * (c) -> b
353  */
356 {
357  int i,
358  k;
359 
360  /* result */
362 
363  Assert(data->nattnums >= 2);
364 
365  /*
366  * We'll try build functional dependencies starting from the smallest ones
367  * covering just 2 columns, to the largest ones, covering all columns
368  * included in the statistics object. We start from the smallest ones
369  * because we want to be able to skip already implied ones.
370  */
371  for (k = 2; k <= data->nattnums; k++)
372  {
373  AttrNumber *dependency; /* array with k elements */
374 
375  /* prepare a DependencyGenerator of variation */
377 
378  /* generate all possible variations of k values (out of n) */
379  while ((dependency = DependencyGenerator_next(DependencyGenerator)))
380  {
381  double degree;
382  MVDependency *d;
383 
384  /* compute how valid the dependency seems */
385  degree = dependency_degree(data, k, dependency);
386 
387  /*
388  * if the dependency seems entirely invalid, don't store it
389  */
390  if (degree == 0.0)
391  continue;
392 
393  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
394  + k * sizeof(AttrNumber));
395 
396  /* copy the dependency (and keep the indexes into stxkeys) */
397  d->degree = degree;
398  d->nattributes = k;
399  for (i = 0; i < k; i++)
400  d->attributes[i] = data->attnums[dependency[i]];
401 
402  /* initialize the list of dependencies */
403  if (dependencies == NULL)
404  {
405  dependencies
406  = (MVDependencies *) palloc0(sizeof(MVDependencies));
407 
408  dependencies->magic = STATS_DEPS_MAGIC;
409  dependencies->type = STATS_DEPS_TYPE_BASIC;
410  dependencies->ndeps = 0;
411  }
412 
413  dependencies->ndeps++;
414  dependencies = (MVDependencies *) repalloc(dependencies,
415  offsetof(MVDependencies, deps)
416  + dependencies->ndeps * sizeof(MVDependency *));
417 
418  dependencies->deps[dependencies->ndeps - 1] = d;
419  }
420 
421  /*
422  * we're done with variations of k elements, so free the
423  * DependencyGenerator
424  */
425  DependencyGenerator_free(DependencyGenerator);
426  }
427 
428  return dependencies;
429 }
430 
431 
432 /*
433  * Serialize list of dependencies into a bytea value.
434  */
435 bytea *
437 {
438  int i;
439  bytea *output;
440  char *tmp;
441  Size len;
442 
443  /* we need to store ndeps, with a number of attributes for each one */
444  len = VARHDRSZ + SizeOfHeader;
445 
446  /* and also include space for the actual attribute numbers and degrees */
447  for (i = 0; i < dependencies->ndeps; i++)
448  len += SizeOfItem(dependencies->deps[i]->nattributes);
449 
450  output = (bytea *) palloc0(len);
451  SET_VARSIZE(output, len);
452 
453  tmp = VARDATA(output);
454 
455  /* Store the base struct values (magic, type, ndeps) */
456  memcpy(tmp, &dependencies->magic, sizeof(uint32));
457  tmp += sizeof(uint32);
458  memcpy(tmp, &dependencies->type, sizeof(uint32));
459  tmp += sizeof(uint32);
460  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
461  tmp += sizeof(uint32);
462 
463  /* store number of attributes and attribute numbers for each dependency */
464  for (i = 0; i < dependencies->ndeps; i++)
465  {
466  MVDependency *d = dependencies->deps[i];
467 
468  memcpy(tmp, &d->degree, sizeof(double));
469  tmp += sizeof(double);
470 
471  memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
472  tmp += sizeof(AttrNumber);
473 
474  memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
475  tmp += sizeof(AttrNumber) * d->nattributes;
476 
477  /* protect against overflow */
478  Assert(tmp <= ((char *) output + len));
479  }
480 
481  /* make sure we've produced exactly the right amount of data */
482  Assert(tmp == ((char *) output + len));
483 
484  return output;
485 }
486 
487 /*
488  * Reads serialized dependencies into MVDependencies structure.
489  */
492 {
493  int i;
494  Size min_expected_size;
496  char *tmp;
497 
498  if (data == NULL)
499  return NULL;
500 
501  if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
502  elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
504 
505  /* read the MVDependencies header */
506  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
507 
508  /* initialize pointer to the data part (skip the varlena header) */
509  tmp = VARDATA_ANY(data);
510 
511  /* read the header fields and perform basic sanity checks */
512  memcpy(&dependencies->magic, tmp, sizeof(uint32));
513  tmp += sizeof(uint32);
514  memcpy(&dependencies->type, tmp, sizeof(uint32));
515  tmp += sizeof(uint32);
516  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
517  tmp += sizeof(uint32);
518 
519  if (dependencies->magic != STATS_DEPS_MAGIC)
520  elog(ERROR, "invalid dependency magic %d (expected %d)",
521  dependencies->magic, STATS_DEPS_MAGIC);
522 
523  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
524  elog(ERROR, "invalid dependency type %d (expected %d)",
525  dependencies->type, STATS_DEPS_TYPE_BASIC);
526 
527  if (dependencies->ndeps == 0)
528  elog(ERROR, "invalid zero-length item array in MVDependencies");
529 
530  /* what minimum bytea size do we expect for those parameters */
531  min_expected_size = SizeOfItem(dependencies->ndeps);
532 
533  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
534  elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
535  VARSIZE_ANY_EXHDR(data), min_expected_size);
536 
537  /* allocate space for the MCV items */
538  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
539  + (dependencies->ndeps * sizeof(MVDependency *)));
540 
541  for (i = 0; i < dependencies->ndeps; i++)
542  {
543  double degree;
544  AttrNumber k;
545  MVDependency *d;
546 
547  /* degree of validity */
548  memcpy(&degree, tmp, sizeof(double));
549  tmp += sizeof(double);
550 
551  /* number of attributes */
552  memcpy(&k, tmp, sizeof(AttrNumber));
553  tmp += sizeof(AttrNumber);
554 
555  /* is the number of attributes valid? */
556  Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
557 
558  /* now that we know the number of attributes, allocate the dependency */
559  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
560  + (k * sizeof(AttrNumber)));
561 
562  d->degree = degree;
563  d->nattributes = k;
564 
565  /* copy attribute numbers */
566  memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
567  tmp += sizeof(AttrNumber) * d->nattributes;
568 
569  dependencies->deps[i] = d;
570 
571  /* still within the bytea */
572  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
573  }
574 
575  /* we should have consumed the whole bytea exactly */
576  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
577 
578  return dependencies;
579 }
580 
581 /*
582  * dependency_is_fully_matched
583  * checks that a functional dependency is fully matched given clauses on
584  * attributes (assuming the clauses are suitable equality clauses)
585  */
586 static bool
588 {
589  int j;
590 
591  /*
592  * Check that the dependency actually is fully covered by clauses. We have
593  * to translate all attribute numbers, as those are referenced
594  */
595  for (j = 0; j < dependency->nattributes; j++)
596  {
597  int attnum = dependency->attributes[j];
598 
599  if (!bms_is_member(attnum, attnums))
600  return false;
601  }
602 
603  return true;
604 }
605 
606 /*
607  * statext_dependencies_load
608  * Load the functional dependencies for the indicated pg_statistic_ext tuple
609  */
612 {
613  MVDependencies *result;
614  bool isnull;
615  Datum deps;
616  HeapTuple htup;
617 
619  if (!HeapTupleIsValid(htup))
620  elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
621 
622  deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
623  Anum_pg_statistic_ext_data_stxddependencies, &isnull);
624  if (isnull)
625  elog(ERROR,
626  "requested statistics kind \"%c\" is not yet built for statistics object %u",
627  STATS_EXT_DEPENDENCIES, mvoid);
628 
630 
631  ReleaseSysCache(htup);
632 
633  return result;
634 }
635 
636 /*
637  * pg_dependencies_in - input routine for type pg_dependencies.
638  *
639  * pg_dependencies is real enough to be a table column, but it has no operations
640  * of its own, and disallows input too
641  */
642 Datum
644 {
645  /*
646  * pg_node_list stores the data in binary form and parsing text input is
647  * not needed, so disallow this.
648  */
649  ereport(ERROR,
650  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
651  errmsg("cannot accept a value of type %s", "pg_dependencies")));
652 
653  PG_RETURN_VOID(); /* keep compiler quiet */
654 }
655 
656 /*
657  * pg_dependencies - output routine for type pg_dependencies.
658  */
659 Datum
661 {
662  bytea *data = PG_GETARG_BYTEA_PP(0);
664  int i,
665  j;
667 
668  initStringInfo(&str);
669  appendStringInfoChar(&str, '{');
670 
671  for (i = 0; i < dependencies->ndeps; i++)
672  {
673  MVDependency *dependency = dependencies->deps[i];
674 
675  if (i > 0)
676  appendStringInfoString(&str, ", ");
677 
678  appendStringInfoChar(&str, '"');
679  for (j = 0; j < dependency->nattributes; j++)
680  {
681  if (j == dependency->nattributes - 1)
682  appendStringInfoString(&str, " => ");
683  else if (j > 0)
684  appendStringInfoString(&str, ", ");
685 
686  appendStringInfo(&str, "%d", dependency->attributes[j]);
687  }
688  appendStringInfo(&str, "\": %f", dependency->degree);
689  }
690 
691  appendStringInfoChar(&str, '}');
692 
693  PG_RETURN_CSTRING(str.data);
694 }
695 
696 /*
697  * pg_dependencies_recv - binary input routine for type pg_dependencies.
698  */
699 Datum
701 {
702  ereport(ERROR,
703  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
704  errmsg("cannot accept a value of type %s", "pg_dependencies")));
705 
706  PG_RETURN_VOID(); /* keep compiler quiet */
707 }
708 
709 /*
710  * pg_dependencies_send - binary output routine for type pg_dependencies.
711  *
712  * Functional dependencies are serialized in a bytea value (although the type
713  * is named differently), so let's just send that.
714  */
715 Datum
717 {
718  return byteasend(fcinfo);
719 }
720 
721 /*
722  * dependency_is_compatible_clause
723  * Determines if the clause is compatible with functional dependencies
724  *
725  * Only clauses that have the form of equality to a pseudoconstant, or can be
726  * interpreted that way, are currently accepted. Furthermore the variable
727  * part of the clause must be a simple Var belonging to the specified
728  * relation, whose attribute number we return in *attnum on success.
729  */
730 static bool
732 {
733  Var *var;
734  Node *clause_expr;
735 
736  if (IsA(clause, RestrictInfo))
737  {
738  RestrictInfo *rinfo = (RestrictInfo *) clause;
739 
740  /* Pseudoconstants are not interesting (they couldn't contain a Var) */
741  if (rinfo->pseudoconstant)
742  return false;
743 
744  /* Clauses referencing multiple, or no, varnos are incompatible */
746  return false;
747 
748  clause = (Node *) rinfo->clause;
749  }
750 
751  if (is_opclause(clause))
752  {
753  /* If it's an opclause, check for Var = Const or Const = Var. */
754  OpExpr *expr = (OpExpr *) clause;
755 
756  /* Only expressions with two arguments are candidates. */
757  if (list_length(expr->args) != 2)
758  return false;
759 
760  /* Make sure non-selected argument is a pseudoconstant. */
762  clause_expr = linitial(expr->args);
763  else if (is_pseudo_constant_clause(linitial(expr->args)))
764  clause_expr = lsecond(expr->args);
765  else
766  return false;
767 
768  /*
769  * If it's not an "=" operator, just ignore the clause, as it's not
770  * compatible with functional dependencies.
771  *
772  * This uses the function for estimating selectivity, not the operator
773  * directly (a bit awkward, but well ...).
774  *
775  * XXX this is pretty dubious; probably it'd be better to check btree
776  * or hash opclass membership, so as not to be fooled by custom
777  * selectivity functions, and to be more consistent with decisions
778  * elsewhere in the planner.
779  */
780  if (get_oprrest(expr->opno) != F_EQSEL)
781  return false;
782 
783  /* OK to proceed with checking "var" */
784  }
785  else if (IsA(clause, ScalarArrayOpExpr))
786  {
787  /* If it's an scalar array operator, check for Var IN Const. */
788  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
789 
790  /*
791  * Reject ALL() variant, we only care about ANY/IN.
792  *
793  * XXX Maybe we should check if all the values are the same, and allow
794  * ALL in that case? Doesn't seem very practical, though.
795  */
796  if (!expr->useOr)
797  return false;
798 
799  /* Only expressions with two arguments are candidates. */
800  if (list_length(expr->args) != 2)
801  return false;
802 
803  /*
804  * We know it's always (Var IN Const), so we assume the var is the
805  * first argument, and pseudoconstant is the second one.
806  */
808  return false;
809 
810  clause_expr = linitial(expr->args);
811 
812  /*
813  * If it's not an "=" operator, just ignore the clause, as it's not
814  * compatible with functional dependencies. The operator is identified
815  * simply by looking at which function it uses to estimate
816  * selectivity. That's a bit strange, but it's what other similar
817  * places do.
818  */
819  if (get_oprrest(expr->opno) != F_EQSEL)
820  return false;
821 
822  /* OK to proceed with checking "var" */
823  }
824  else if (is_orclause(clause))
825  {
826  BoolExpr *bool_expr = (BoolExpr *) clause;
827  ListCell *lc;
828 
829  /* start with no attribute number */
830  *attnum = InvalidAttrNumber;
831 
832  foreach(lc, bool_expr->args)
833  {
834  AttrNumber clause_attnum;
835 
836  /*
837  * Had we found incompatible clause in the arguments, treat the
838  * whole clause as incompatible.
839  */
841  relid, &clause_attnum))
842  return false;
843 
844  if (*attnum == InvalidAttrNumber)
845  *attnum = clause_attnum;
846 
847  /* ensure all the variables are the same (same attnum) */
848  if (*attnum != clause_attnum)
849  return false;
850  }
851 
852  /* the Var is already checked by the recursive call */
853  return true;
854  }
855  else if (is_notclause(clause))
856  {
857  /*
858  * "NOT x" can be interpreted as "x = false", so get the argument and
859  * proceed with seeing if it's a suitable Var.
860  */
861  clause_expr = (Node *) get_notclausearg(clause);
862  }
863  else
864  {
865  /*
866  * A boolean expression "x" can be interpreted as "x = true", so
867  * proceed with seeing if it's a suitable Var.
868  */
869  clause_expr = (Node *) clause;
870  }
871 
872  /*
873  * We may ignore any RelabelType node above the operand. (There won't be
874  * more than one, since eval_const_expressions has been applied already.)
875  */
876  if (IsA(clause_expr, RelabelType))
877  clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
878 
879  /* We only support plain Vars for now */
880  if (!IsA(clause_expr, Var))
881  return false;
882 
883  /* OK, we know we have a Var */
884  var = (Var *) clause_expr;
885 
886  /* Ensure Var is from the correct relation */
887  if (var->varno != relid)
888  return false;
889 
890  /* We also better ensure the Var is from the current level */
891  if (var->varlevelsup != 0)
892  return false;
893 
894  /* Also ignore system attributes (we don't allow stats on those) */
896  return false;
897 
898  *attnum = var->varattno;
899  return true;
900 }
901 
902 /*
903  * find_strongest_dependency
904  * find the strongest dependency on the attributes
905  *
906  * When applying functional dependencies, we start with the strongest
907  * dependencies. That is, we select the dependency that:
908  *
909  * (a) has all attributes covered by equality clauses
910  *
911  * (b) has the most attributes
912  *
913  * (c) has the highest degree of validity
914  *
915  * This guarantees that we eliminate the most redundant conditions first
916  * (see the comment in dependencies_clauselist_selectivity).
917  */
918 static MVDependency *
920  Bitmapset *attnums)
921 {
922  int i,
923  j;
924  MVDependency *strongest = NULL;
925 
926  /* number of attnums in clauses */
927  int nattnums = bms_num_members(attnums);
928 
929  /*
930  * Iterate over the MVDependency items and find the strongest one from the
931  * fully-matched dependencies. We do the cheap checks first, before
932  * matching it against the attnums.
933  */
934  for (i = 0; i < ndependencies; i++)
935  {
936  for (j = 0; j < dependencies[i]->ndeps; j++)
937  {
938  MVDependency *dependency = dependencies[i]->deps[j];
939 
940  /*
941  * Skip dependencies referencing more attributes than available
942  * clauses, as those can't be fully matched.
943  */
944  if (dependency->nattributes > nattnums)
945  continue;
946 
947  if (strongest)
948  {
949  /* skip dependencies on fewer attributes than the strongest. */
950  if (dependency->nattributes < strongest->nattributes)
951  continue;
952 
953  /* also skip weaker dependencies when attribute count matches */
954  if (strongest->nattributes == dependency->nattributes &&
955  strongest->degree > dependency->degree)
956  continue;
957  }
958 
959  /*
960  * this dependency is stronger, but we must still check that it's
961  * fully matched to these attnums. We perform this check last as
962  * it's slightly more expensive than the previous checks.
963  */
964  if (dependency_is_fully_matched(dependency, attnums))
965  strongest = dependency; /* save new best match */
966  }
967  }
968 
969  return strongest;
970 }
971 
972 /*
973  * clauselist_apply_dependencies
974  * Apply the specified functional dependencies to a list of clauses and
975  * return the estimated selectivity of the clauses that are compatible
976  * with any of the given dependencies.
977  *
978  * This will estimate all not-already-estimated clauses that are compatible
979  * with functional dependencies, and which have an attribute mentioned by any
980  * of the given dependencies (either as an implying or implied attribute).
981  *
982  * Given (lists of) clauses on attributes (a,b) and a functional dependency
983  * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
984  * using the formula
985  *
986  * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
987  *
988  * where 'f' is the degree of dependency. This reflects the fact that we
989  * expect a fraction f of all rows to be consistent with the dependency
990  * (a=>b), and so have a selectivity of P(a), while the remaining rows are
991  * treated as independent.
992  *
993  * In practice, we use a slightly modified version of this formula, which uses
994  * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
995  * should obviously not exceed either column's individual selectivity. I.e.,
996  * we actually combine selectivities using the formula
997  *
998  * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
999  *
1000  * This can make quite a difference if the specific values matching the
1001  * clauses are not consistent with the functional dependency.
1002  */
1003 static Selectivity
1005  int varRelid, JoinType jointype,
1006  SpecialJoinInfo *sjinfo,
1008  AttrNumber *list_attnums,
1009  Bitmapset **estimatedclauses)
1010 {
1011  Bitmapset *attnums;
1012  int i;
1013  int j;
1014  int nattrs;
1015  Selectivity *attr_sel;
1016  int attidx;
1017  int listidx;
1018  ListCell *l;
1019  Selectivity s1;
1020 
1021  /*
1022  * Extract the attnums of all implying and implied attributes from all the
1023  * given dependencies. Each of these attributes is expected to have at
1024  * least 1 not-already-estimated compatible clause that we will estimate
1025  * here.
1026  */
1027  attnums = NULL;
1028  for (i = 0; i < ndependencies; i++)
1029  {
1030  for (j = 0; j < dependencies[i]->nattributes; j++)
1031  {
1032  AttrNumber attnum = dependencies[i]->attributes[j];
1033 
1034  attnums = bms_add_member(attnums, attnum);
1035  }
1036  }
1037 
1038  /*
1039  * Compute per-column selectivity estimates for each of these attributes,
1040  * and mark all the corresponding clauses as estimated.
1041  */
1042  nattrs = bms_num_members(attnums);
1043  attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
1044 
1045  attidx = 0;
1046  i = -1;
1047  while ((i = bms_next_member(attnums, i)) >= 0)
1048  {
1049  List *attr_clauses = NIL;
1050  Selectivity simple_sel;
1051 
1052  listidx = -1;
1053  foreach(l, clauses)
1054  {
1055  Node *clause = (Node *) lfirst(l);
1056 
1057  listidx++;
1058  if (list_attnums[listidx] == i)
1059  {
1060  attr_clauses = lappend(attr_clauses, clause);
1061  *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1062  }
1063  }
1064 
1065  simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
1066  jointype, sjinfo, false);
1067  attr_sel[attidx++] = simple_sel;
1068  }
1069 
1070  /*
1071  * Now combine these selectivities using the dependency information. For
1072  * chains of dependencies such as a -> b -> c, the b -> c dependency will
1073  * come before the a -> b dependency in the array, so we traverse the
1074  * array backwards to ensure such chains are computed in the right order.
1075  *
1076  * As explained above, pairs of selectivities are combined using the
1077  * formula
1078  *
1079  * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1080  *
1081  * to ensure that the combined selectivity is never greater than either
1082  * individual selectivity.
1083  *
1084  * Where multiple dependencies apply (e.g., a -> b -> c), we use
1085  * conditional probabilities to compute the overall result as follows:
1086  *
1087  * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1088  *
1089  * so we replace the selectivities of all implied attributes with
1090  * conditional probabilities, that are conditional on all their implying
1091  * attributes. The selectivities of all other non-implied attributes are
1092  * left as they are.
1093  */
1094  for (i = ndependencies - 1; i >= 0; i--)
1095  {
1096  MVDependency *dependency = dependencies[i];
1098  Selectivity s2;
1099  double f;
1100 
1101  /* Selectivity of all the implying attributes */
1102  s1 = 1.0;
1103  for (j = 0; j < dependency->nattributes - 1; j++)
1104  {
1105  attnum = dependency->attributes[j];
1106  attidx = bms_member_index(attnums, attnum);
1107  s1 *= attr_sel[attidx];
1108  }
1109 
1110  /* Original selectivity of the implied attribute */
1111  attnum = dependency->attributes[j];
1112  attidx = bms_member_index(attnums, attnum);
1113  s2 = attr_sel[attidx];
1114 
1115  /*
1116  * Replace s2 with the conditional probability s2 given s1, computed
1117  * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1118  *
1119  * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1120  *
1121  * where P(a) = s1, the selectivity of the implying attributes, and
1122  * P(b) = s2, the selectivity of the implied attribute.
1123  */
1124  f = dependency->degree;
1125 
1126  if (s1 <= s2)
1127  attr_sel[attidx] = f + (1 - f) * s2;
1128  else
1129  attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1130  }
1131 
1132  /*
1133  * The overall selectivity of all the clauses on all these attributes is
1134  * then the product of all the original (non-implied) probabilities and
1135  * the new conditional (implied) probabilities.
1136  */
1137  s1 = 1.0;
1138  for (i = 0; i < nattrs; i++)
1139  s1 *= attr_sel[i];
1140 
1141  CLAMP_PROBABILITY(s1);
1142 
1143  pfree(attr_sel);
1144  bms_free(attnums);
1145 
1146  return s1;
1147 }
1148 
1149 /*
1150  * dependency_is_compatible_expression
1151  * Determines if the expression is compatible with functional dependencies
1152  *
1153  * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1154  * expression is a simple Var. OTOH we check that there's at least one
1155  * statistics object matching the expression.
1156  */
1157 static bool
1158 dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1159 {
1160  List *vars;
1161  ListCell *lc,
1162  *lc2;
1163  Node *clause_expr;
1164 
1165  if (IsA(clause, RestrictInfo))
1166  {
1167  RestrictInfo *rinfo = (RestrictInfo *) clause;
1168 
1169  /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1170  if (rinfo->pseudoconstant)
1171  return false;
1172 
1173  /* Clauses referencing multiple, or no, varnos are incompatible */
1175  return false;
1176 
1177  clause = (Node *) rinfo->clause;
1178  }
1179 
1180  if (is_opclause(clause))
1181  {
1182  /* If it's an opclause, check for Var = Const or Const = Var. */
1183  OpExpr *expr = (OpExpr *) clause;
1184 
1185  /* Only expressions with two arguments are candidates. */
1186  if (list_length(expr->args) != 2)
1187  return false;
1188 
1189  /* Make sure non-selected argument is a pseudoconstant. */
1191  clause_expr = linitial(expr->args);
1192  else if (is_pseudo_constant_clause(linitial(expr->args)))
1193  clause_expr = lsecond(expr->args);
1194  else
1195  return false;
1196 
1197  /*
1198  * If it's not an "=" operator, just ignore the clause, as it's not
1199  * compatible with functional dependencies.
1200  *
1201  * This uses the function for estimating selectivity, not the operator
1202  * directly (a bit awkward, but well ...).
1203  *
1204  * XXX this is pretty dubious; probably it'd be better to check btree
1205  * or hash opclass membership, so as not to be fooled by custom
1206  * selectivity functions, and to be more consistent with decisions
1207  * elsewhere in the planner.
1208  */
1209  if (get_oprrest(expr->opno) != F_EQSEL)
1210  return false;
1211 
1212  /* OK to proceed with checking "var" */
1213  }
1214  else if (IsA(clause, ScalarArrayOpExpr))
1215  {
1216  /* If it's an scalar array operator, check for Var IN Const. */
1217  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1218 
1219  /*
1220  * Reject ALL() variant, we only care about ANY/IN.
1221  *
1222  * FIXME Maybe we should check if all the values are the same, and
1223  * allow ALL in that case? Doesn't seem very practical, though.
1224  */
1225  if (!expr->useOr)
1226  return false;
1227 
1228  /* Only expressions with two arguments are candidates. */
1229  if (list_length(expr->args) != 2)
1230  return false;
1231 
1232  /*
1233  * We know it's always (Var IN Const), so we assume the var is the
1234  * first argument, and pseudoconstant is the second one.
1235  */
1236  if (!is_pseudo_constant_clause(lsecond(expr->args)))
1237  return false;
1238 
1239  clause_expr = linitial(expr->args);
1240 
1241  /*
1242  * If it's not an "=" operator, just ignore the clause, as it's not
1243  * compatible with functional dependencies. The operator is identified
1244  * simply by looking at which function it uses to estimate
1245  * selectivity. That's a bit strange, but it's what other similar
1246  * places do.
1247  */
1248  if (get_oprrest(expr->opno) != F_EQSEL)
1249  return false;
1250 
1251  /* OK to proceed with checking "var" */
1252  }
1253  else if (is_orclause(clause))
1254  {
1255  BoolExpr *bool_expr = (BoolExpr *) clause;
1256  ListCell *lc;
1257 
1258  /* start with no expression (we'll use the first match) */
1259  *expr = NULL;
1260 
1261  foreach(lc, bool_expr->args)
1262  {
1263  Node *or_expr = NULL;
1264 
1265  /*
1266  * Had we found incompatible expression in the arguments, treat
1267  * the whole expression as incompatible.
1268  */
1269  if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
1270  statlist, &or_expr))
1271  return false;
1272 
1273  if (*expr == NULL)
1274  *expr = or_expr;
1275 
1276  /* ensure all the expressions are the same */
1277  if (!equal(or_expr, *expr))
1278  return false;
1279  }
1280 
1281  /* the expression is already checked by the recursive call */
1282  return true;
1283  }
1284  else if (is_notclause(clause))
1285  {
1286  /*
1287  * "NOT x" can be interpreted as "x = false", so get the argument and
1288  * proceed with seeing if it's a suitable Var.
1289  */
1290  clause_expr = (Node *) get_notclausearg(clause);
1291  }
1292  else
1293  {
1294  /*
1295  * A boolean expression "x" can be interpreted as "x = true", so
1296  * proceed with seeing if it's a suitable Var.
1297  */
1298  clause_expr = (Node *) clause;
1299  }
1300 
1301  /*
1302  * We may ignore any RelabelType node above the operand. (There won't be
1303  * more than one, since eval_const_expressions has been applied already.)
1304  */
1305  if (IsA(clause_expr, RelabelType))
1306  clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1307 
1308  vars = pull_var_clause(clause_expr, 0);
1309 
1310  foreach(lc, vars)
1311  {
1312  Var *var = (Var *) lfirst(lc);
1313 
1314  /* Ensure Var is from the correct relation */
1315  if (var->varno != relid)
1316  return false;
1317 
1318  /* We also better ensure the Var is from the current level */
1319  if (var->varlevelsup != 0)
1320  return false;
1321 
1322  /* Also ignore system attributes (we don't allow stats on those) */
1324  return false;
1325  }
1326 
1327  /*
1328  * Check if we actually have a matching statistics for the expression.
1329  *
1330  * XXX Maybe this is an overkill. We'll eliminate the expressions later.
1331  */
1332  foreach(lc, statlist)
1333  {
1334  StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1335 
1336  /* ignore stats without dependencies */
1337  if (info->kind != STATS_EXT_DEPENDENCIES)
1338  continue;
1339 
1340  foreach(lc2, info->exprs)
1341  {
1342  Node *stat_expr = (Node *) lfirst(lc2);
1343 
1344  if (equal(clause_expr, stat_expr))
1345  {
1346  *expr = stat_expr;
1347  return true;
1348  }
1349  }
1350  }
1351 
1352  return false;
1353 }
1354 
1355 /*
1356  * dependencies_clauselist_selectivity
1357  * Return the estimated selectivity of (a subset of) the given clauses
1358  * using functional dependency statistics, or 1.0 if no useful functional
1359  * dependency statistic exists.
1360  *
1361  * 'estimatedclauses' is an input/output argument that gets a bit set
1362  * corresponding to the (zero-based) list index of each clause that is included
1363  * in the estimated selectivity.
1364  *
1365  * Given equality clauses on attributes (a,b) we find the strongest dependency
1366  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1367  * dependency, we then combine the per-clause selectivities using the formula
1368  *
1369  * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1370  *
1371  * where 'f' is the degree of the dependency. (Actually we use a slightly
1372  * modified version of this formula -- see clauselist_apply_dependencies()).
1373  *
1374  * With clauses on more than two attributes, the dependencies are applied
1375  * recursively, starting with the widest/strongest dependencies. For example
1376  * P(a,b,c) is first split like this:
1377  *
1378  * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1379  *
1380  * assuming (a,b=>c) is the strongest dependency.
1381  */
1384  List *clauses,
1385  int varRelid,
1386  JoinType jointype,
1387  SpecialJoinInfo *sjinfo,
1388  RelOptInfo *rel,
1389  Bitmapset **estimatedclauses)
1390 {
1391  Selectivity s1 = 1.0;
1392  ListCell *l;
1393  Bitmapset *clauses_attnums = NULL;
1394  AttrNumber *list_attnums;
1395  int listidx;
1396  MVDependencies **func_dependencies;
1397  int nfunc_dependencies;
1398  int total_ndeps;
1400  int ndependencies;
1401  int i;
1402  AttrNumber attnum_offset;
1403 
1404  /* unique expressions */
1405  Node **unique_exprs;
1406  int unique_exprs_cnt;
1407 
1408  /* check if there's any stats that might be useful for us. */
1409  if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1410  return 1.0;
1411 
1412  list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
1413  list_length(clauses));
1414 
1415  /*
1416  * We allocate space as if every clause was a unique expression, although
1417  * that's probably overkill. Some will be simple column references that
1418  * we'll translate to attnums, and there might be duplicates. But it's
1419  * easier and cheaper to just do one allocation than repalloc later.
1420  */
1421  unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
1422  unique_exprs_cnt = 0;
1423 
1424  /*
1425  * Pre-process the clauses list to extract the attnums seen in each item.
1426  * We need to determine if there's any clauses which will be useful for
1427  * dependency selectivity estimations. Along the way we'll record all of
1428  * the attnums for each clause in a list which we'll reference later so we
1429  * don't need to repeat the same work again. We'll also keep track of all
1430  * attnums seen.
1431  *
1432  * We also skip clauses that we already estimated using different types of
1433  * statistics (we treat them as incompatible).
1434  *
1435  * To handle expressions, we assign them negative attnums, as if it was a
1436  * system attribute (this is fine, as we only allow extended stats on user
1437  * attributes). And then we offset everything by the number of
1438  * expressions, so that we can store the values in a bitmapset.
1439  */
1440  listidx = 0;
1441  foreach(l, clauses)
1442  {
1443  Node *clause = (Node *) lfirst(l);
1445  Node *expr = NULL;
1446 
1447  /* ignore clause by default */
1448  list_attnums[listidx] = InvalidAttrNumber;
1449 
1450  if (!bms_is_member(listidx, *estimatedclauses))
1451  {
1452  /*
1453  * If it's a simple column reference, just extract the attnum. If
1454  * it's an expression, assign a negative attnum as if it was a
1455  * system attribute.
1456  */
1457  if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1458  {
1459  list_attnums[listidx] = attnum;
1460  }
1461  else if (dependency_is_compatible_expression(clause, rel->relid,
1462  rel->statlist,
1463  &expr))
1464  {
1465  /* special attnum assigned to this expression */
1466  attnum = InvalidAttrNumber;
1467 
1468  Assert(expr != NULL);
1469 
1470  /* If the expression is duplicate, use the same attnum. */
1471  for (i = 0; i < unique_exprs_cnt; i++)
1472  {
1473  if (equal(unique_exprs[i], expr))
1474  {
1475  /* negative attribute number to expression */
1476  attnum = -(i + 1);
1477  break;
1478  }
1479  }
1480 
1481  /* not found in the list, so add it */
1482  if (attnum == InvalidAttrNumber)
1483  {
1484  unique_exprs[unique_exprs_cnt++] = expr;
1485 
1486  /* after incrementing the value, to get -1, -2, ... */
1487  attnum = (-unique_exprs_cnt);
1488  }
1489 
1490  /* remember which attnum was assigned to this clause */
1491  list_attnums[listidx] = attnum;
1492  }
1493  }
1494 
1495  listidx++;
1496  }
1497 
1498  Assert(listidx == list_length(clauses));
1499 
1500  /*
1501  * How much we need to offset the attnums? If there are no expressions,
1502  * then no offset is needed. Otherwise we need to offset enough for the
1503  * lowest value (-unique_exprs_cnt) to become 1.
1504  */
1505  if (unique_exprs_cnt > 0)
1506  attnum_offset = (unique_exprs_cnt + 1);
1507  else
1508  attnum_offset = 0;
1509 
1510  /*
1511  * Now that we know how many expressions there are, we can offset the
1512  * values just enough to build the bitmapset.
1513  */
1514  for (i = 0; i < list_length(clauses); i++)
1515  {
1517 
1518  /* ignore incompatible or already estimated clauses */
1519  if (list_attnums[i] == InvalidAttrNumber)
1520  continue;
1521 
1522  /* make sure the attnum is in the expected range */
1523  Assert(list_attnums[i] >= (-unique_exprs_cnt));
1524  Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1525 
1526  /* make sure the attnum is positive (valid AttrNumber) */
1527  attnum = list_attnums[i] + attnum_offset;
1528 
1529  /*
1530  * Either it's a regular attribute, or it's an expression, in which
1531  * case we must not have seen it before (expressions are unique).
1532  *
1533  * XXX Check whether it's a regular attribute has to be done using the
1534  * original attnum, while the second check has to use the value with
1535  * an offset.
1536  */
1537  Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1538  !bms_is_member(attnum, clauses_attnums));
1539 
1540  /*
1541  * Remember the offset attnum, both for attributes and expressions.
1542  * We'll pass list_attnums to clauselist_apply_dependencies, which
1543  * uses it to identify clauses in a bitmap. We could also pass the
1544  * offset, but this is more convenient.
1545  */
1546  list_attnums[i] = attnum;
1547 
1548  clauses_attnums = bms_add_member(clauses_attnums, attnum);
1549  }
1550 
1551  /*
1552  * If there's not at least two distinct attnums and expressions, then
1553  * reject the whole list of clauses. We must return 1.0 so the calling
1554  * function's selectivity is unaffected.
1555  */
1556  if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1557  {
1558  bms_free(clauses_attnums);
1559  pfree(list_attnums);
1560  return 1.0;
1561  }
1562 
1563  /*
1564  * Load all functional dependencies matching at least two parameters. We
1565  * can simply consider all dependencies at once, without having to search
1566  * for the best statistics object.
1567  *
1568  * To not waste cycles and memory, we deserialize dependencies only for
1569  * statistics that match at least two attributes. The array is allocated
1570  * with the assumption that all objects match - we could grow the array to
1571  * make it just the right size, but it's likely wasteful anyway thanks to
1572  * moving the freed chunks to freelists etc.
1573  */
1574  func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
1575  list_length(rel->statlist));
1576  nfunc_dependencies = 0;
1577  total_ndeps = 0;
1578 
1579  foreach(l, rel->statlist)
1580  {
1582  int nmatched;
1583  int nexprs;
1584  int k;
1585  MVDependencies *deps;
1586 
1587  /* skip statistics that are not of the correct type */
1588  if (stat->kind != STATS_EXT_DEPENDENCIES)
1589  continue;
1590 
1591  /*
1592  * Count matching attributes - we have to undo the attnum offsets. The
1593  * input attribute numbers are not offset (expressions are not
1594  * included in stat->keys, so it's not necessary). But we need to
1595  * offset it before checking against clauses_attnums.
1596  */
1597  nmatched = 0;
1598  k = -1;
1599  while ((k = bms_next_member(stat->keys, k)) >= 0)
1600  {
1602 
1603  /* skip expressions */
1604  if (!AttrNumberIsForUserDefinedAttr(attnum))
1605  continue;
1606 
1607  /* apply the same offset as above */
1608  attnum += attnum_offset;
1609 
1610  if (bms_is_member(attnum, clauses_attnums))
1611  nmatched++;
1612  }
1613 
1614  /* count matching expressions */
1615  nexprs = 0;
1616  for (i = 0; i < unique_exprs_cnt; i++)
1617  {
1618  ListCell *lc;
1619 
1620  foreach(lc, stat->exprs)
1621  {
1622  Node *stat_expr = (Node *) lfirst(lc);
1623 
1624  /* try to match it */
1625  if (equal(stat_expr, unique_exprs[i]))
1626  nexprs++;
1627  }
1628  }
1629 
1630  /*
1631  * Skip objects matching fewer than two attributes/expressions from
1632  * clauses.
1633  */
1634  if (nmatched + nexprs < 2)
1635  continue;
1636 
1637  deps = statext_dependencies_load(stat->statOid);
1638 
1639  /*
1640  * The expressions may be represented by different attnums in the
1641  * stats, we need to remap them to be consistent with the clauses.
1642  * That will make the later steps (e.g. picking the strongest item and
1643  * so on) much simpler and cheaper, because it won't need to care
1644  * about the offset at all.
1645  *
1646  * When we're at it, we can ignore dependencies that are not fully
1647  * matched by clauses (i.e. referencing attributes or expressions that
1648  * are not in the clauses).
1649  *
1650  * We have to do this for all statistics, as long as there are any
1651  * expressions - we need to shift the attnums in all dependencies.
1652  *
1653  * XXX Maybe we should do this always, because it also eliminates some
1654  * of the dependencies early. It might be cheaper than having to walk
1655  * the longer list in find_strongest_dependency later, especially as
1656  * we need to do that repeatedly?
1657  *
1658  * XXX We have to do this even when there are no expressions in
1659  * clauses, otherwise find_strongest_dependency may fail for stats
1660  * with expressions (due to lookup of negative value in bitmap). So we
1661  * need to at least filter out those dependencies. Maybe we could do
1662  * it in a cheaper way (if there are no expr clauses, we can just
1663  * discard all negative attnums without any lookups).
1664  */
1665  if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1666  {
1667  int ndeps = 0;
1668 
1669  for (i = 0; i < deps->ndeps; i++)
1670  {
1671  bool skip = false;
1672  MVDependency *dep = deps->deps[i];
1673  int j;
1674 
1675  for (j = 0; j < dep->nattributes; j++)
1676  {
1677  int idx;
1678  Node *expr;
1679  int k;
1680  AttrNumber unique_attnum = InvalidAttrNumber;
1682 
1683  /* undo the per-statistics offset */
1684  attnum = dep->attributes[j];
1685 
1686  /*
1687  * For regular attributes we can simply check if it
1688  * matches any clause. If there's no matching clause, we
1689  * can just ignore it. We need to offset the attnum
1690  * though.
1691  */
1692  if (AttrNumberIsForUserDefinedAttr(attnum))
1693  {
1694  dep->attributes[j] = attnum + attnum_offset;
1695 
1696  if (!bms_is_member(dep->attributes[j], clauses_attnums))
1697  {
1698  skip = true;
1699  break;
1700  }
1701 
1702  continue;
1703  }
1704 
1705  /*
1706  * the attnum should be a valid system attnum (-1, -2,
1707  * ...)
1708  */
1709  Assert(AttributeNumberIsValid(attnum));
1710 
1711  /*
1712  * For expressions, we need to do two translations. First
1713  * we have to translate the negative attnum to index in
1714  * the list of expressions (in the statistics object).
1715  * Then we need to see if there's a matching clause. The
1716  * index of the unique expression determines the attnum
1717  * (and we offset it).
1718  */
1719  idx = -(1 + attnum);
1720 
1721  /* Is the expression index is valid? */
1722  Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1723 
1724  expr = (Node *) list_nth(stat->exprs, idx);
1725 
1726  /* try to find the expression in the unique list */
1727  for (k = 0; k < unique_exprs_cnt; k++)
1728  {
1729  /*
1730  * found a matching unique expression, use the attnum
1731  * (derived from index of the unique expression)
1732  */
1733  if (equal(unique_exprs[k], expr))
1734  {
1735  unique_attnum = -(k + 1) + attnum_offset;
1736  break;
1737  }
1738  }
1739 
1740  /*
1741  * Found no matching expression, so we can simply skip
1742  * this dependency, because there's no chance it will be
1743  * fully covered.
1744  */
1745  if (unique_attnum == InvalidAttrNumber)
1746  {
1747  skip = true;
1748  break;
1749  }
1750 
1751  /* otherwise remap it to the new attnum */
1752  dep->attributes[j] = unique_attnum;
1753  }
1754 
1755  /* if found a matching dependency, keep it */
1756  if (!skip)
1757  {
1758  /* maybe we've skipped something earlier, so move it */
1759  if (ndeps != i)
1760  deps->deps[ndeps] = deps->deps[i];
1761 
1762  ndeps++;
1763  }
1764  }
1765 
1766  deps->ndeps = ndeps;
1767  }
1768 
1769  /*
1770  * It's possible we've removed all dependencies, in which case we
1771  * don't bother adding it to the list.
1772  */
1773  if (deps->ndeps > 0)
1774  {
1775  func_dependencies[nfunc_dependencies] = deps;
1776  total_ndeps += deps->ndeps;
1777  nfunc_dependencies++;
1778  }
1779  }
1780 
1781  /* if no matching stats could be found then we've nothing to do */
1782  if (nfunc_dependencies == 0)
1783  {
1784  pfree(func_dependencies);
1785  bms_free(clauses_attnums);
1786  pfree(list_attnums);
1787  pfree(unique_exprs);
1788  return 1.0;
1789  }
1790 
1791  /*
1792  * Work out which dependencies we can apply, starting with the
1793  * widest/strongest ones, and proceeding to smaller/weaker ones.
1794  */
1795  dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
1796  total_ndeps);
1797  ndependencies = 0;
1798 
1799  while (true)
1800  {
1801  MVDependency *dependency;
1803 
1804  /* the widest/strongest dependency, fully matched by clauses */
1805  dependency = find_strongest_dependency(func_dependencies,
1806  nfunc_dependencies,
1807  clauses_attnums);
1808  if (!dependency)
1809  break;
1810 
1811  dependencies[ndependencies++] = dependency;
1812 
1813  /* Ignore dependencies using this implied attribute in later loops */
1814  attnum = dependency->attributes[dependency->nattributes - 1];
1815  clauses_attnums = bms_del_member(clauses_attnums, attnum);
1816  }
1817 
1818  /*
1819  * If we found applicable dependencies, use them to estimate all
1820  * compatible clauses on attributes that they refer to.
1821  */
1822  if (ndependencies != 0)
1823  s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1824  sjinfo, dependencies, ndependencies,
1825  list_attnums, estimatedclauses);
1826 
1827  /* free deserialized functional dependencies (and then the array) */
1828  for (i = 0; i < nfunc_dependencies; i++)
1829  pfree(func_dependencies[i]);
1830 
1831  pfree(dependencies);
1832  pfree(func_dependencies);
1833  bms_free(clauses_attnums);
1834  pfree(list_attnums);
1835  pfree(unique_exprs);
1836 
1837  return s1;
1838 }
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:54
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:62
#define STATS_DEPS_MAGIC
Definition: statistics.h:43
#define NIL
Definition: pg_list.h:65
Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses)
#define IsA(nodeptr, _type_)
Definition: nodes.h:590
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
Definition: dependencies.c:222
List * statlist
Definition: pathnodes.h:714
MVDependencies * statext_dependencies_load(Oid mvoid)
Definition: dependencies.c:611
Index varlevelsup
Definition: primnodes.h:196
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
#define VARDATA(PTR)
Definition: postgres.h:315
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:44
struct DependencyGeneratorData DependencyGeneratorData
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:3105
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:106
static void output(uint64 loop_count)
#define VARHDRSZ
Definition: c.h:627
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:112
Relids clause_relids
Definition: pathnodes.h:2063
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1043
bool pseudoconstant
Definition: pathnodes.h:2053
#define SizeOfItem(natts)
Definition: dependencies.c:41
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
Definition: nodes.h:539
int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
int errcode(int sqlerrcode)
Definition: elog.c:698
AttrNumber varattno
Definition: primnodes.h:191
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
Datum pg_dependencies_out(PG_FUNCTION_ARGS)
Definition: dependencies.c:660
List * pull_var_clause(Node *node, int flags)
Definition: var.c:562
Datum pg_dependencies_recv(PG_FUNCTION_ARGS)
Definition: dependencies.c:700
double Selectivity
Definition: nodes.h:672
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:186
#define lsecond(l)
Definition: pg_list.h:179
AttrNumber nattributes
Definition: statistics.h:53
static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, MVDependency **dependencies, int ndependencies, AttrNumber *list_attnums, Bitmapset **estimatedclauses)
JoinType
Definition: nodes.h:706
Definition: type.h:89
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
void pfree(void *pointer)
Definition: mcxt.c:1169
MVDependencies * statext_dependencies_build(StatsBuildData *data)
Definition: dependencies.c:355
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
Oid attrtypid
Definition: vacuum.h:124
#define linitial(l)
Definition: pg_list.h:174
#define ObjectIdGetDatum(X)
Definition: postgres.h:551
#define ERROR
Definition: elog.h:46
char * s1
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:173
static bool dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
Definition: dependencies.c:587
static void * list_nth(const List *list, int n)
Definition: pg_list.h:278
AttrNumber * dependencies
Definition: dependencies.c:62
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:646
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:453
unsigned int uint32
Definition: c.h:441
static bool dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:1926
DependencyGeneratorData * DependencyGenerator
Definition: dependencies.c:65
static void generate_dependencies_recurse(DependencyGenerator state, int index, AttrNumber start, AttrNumber *current)
Definition: dependencies.c:91
static void generate_dependencies(DependencyGenerator state)
Definition: dependencies.c:157
MultiSortSupport multi_sort_init(int ndims)
Index relid
Definition: pathnodes.h:704
List * lappend(List *list, void *datum)
Definition: list.c:336
Expr * clause
Definition: pathnodes.h:2045
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:500
Index varno
Definition: primnodes.h:189
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Definition: syscache.c:1127
static MVDependency * find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums)
Definition: dependencies.c:919
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:115
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1528
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:672
void * palloc0(Size size)
Definition: mcxt.c:1093
char * s2
uintptr_t Datum
Definition: postgres.h:411
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1175
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1388
VacAttrStats ** stats
unsigned int Index
Definition: c.h:549
uint32 magic
Definition: statistics.h:59
#define VARSIZE_ANY(PTR)
Definition: postgres.h:348
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:338
#define InvalidOid
Definition: postgres_ext.h:36
int16 attnum
Definition: pg_attribute.h:83
#define ereport(elevel,...)
Definition: elog.h:157
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:124
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define MaxHeapAttributeNumber
Definition: htup_details.h:47
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
uint32 ndeps
Definition: statistics.h:61
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
double degree
Definition: statistics.h:52
Definition: regguts.h:317
Datum pg_dependencies_in(PG_FUNCTION_ARGS)
Definition: dependencies.c:643
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:362
size_t Size
Definition: c.h:540
static int list_length(const List *l)
Definition: pg_list.h:149
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:736
#define SizeOfHeader
Definition: dependencies.c:38
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:119
Oid attrcollid
Definition: vacuum.h:127
List * args
Definition: primnodes.h:616
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1182
#define InvalidAttrNumber
Definition: attnum.h:23
Bitmapset * keys
Definition: pathnodes.h:931
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:205
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
bytea * statext_dependencies_serialize(MVDependencies *dependencies)
Definition: dependencies.c:436
#define elog(elevel,...)
Definition: elog.h:232
int i
#define TYPECACHE_LT_OPR
Definition: typcache.h:137
Definition: c.h:621
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
Oid opno
Definition: primnodes.h:542
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:342
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:66
List * args
Definition: primnodes.h:548
Definition: regcomp.c:237
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:773
Definition: pg_list.h:50
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:196
Datum pg_dependencies_send(PG_FUNCTION_ARGS)
Definition: dependencies.c:716
bool has_stats_of_kind(List *stats, char requiredkind)
int16 AttrNumber
Definition: attnum.h:21
MVDependencies * statext_dependencies_deserialize(bytea *data)
Definition: dependencies.c:491
#define offsetof(type, field)
Definition: c.h:727
static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
Definition: dependencies.c:731