PostgreSQL Source Code  git master
dependencies.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dependencies.c
4  * POSTGRES functional dependencies
5  *
6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/statistics/dependencies.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/htup_details.h"
17 #include "access/sysattr.h"
18 #include "catalog/pg_operator.h"
21 #include "lib/stringinfo.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/nodes.h"
24 #include "nodes/pathnodes.h"
25 #include "optimizer/clauses.h"
26 #include "optimizer/optimizer.h"
27 #include "parser/parsetree.h"
29 #include "statistics/statistics.h"
30 #include "utils/bytea.h"
31 #include "utils/fmgroids.h"
32 #include "utils/fmgrprotos.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/selfuncs.h"
36 #include "utils/syscache.h"
37 #include "utils/typcache.h"
38 
39 /* size of the struct header fields (magic, type, ndeps) */
40 #define SizeOfHeader (3 * sizeof(uint32))
41 
42 /* size of a serialized dependency (degree, natts, atts) */
43 #define SizeOfItem(natts) \
44  (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
45 
46 /* minimal size of a dependency (with two attributes) */
47 #define MinSizeOfItem SizeOfItem(2)
48 
49 /* minimal size of dependencies, when all deps are minimal */
50 #define MinSizeOfItems(ndeps) \
51  (SizeOfHeader + (ndeps) * MinSizeOfItem)
52 
53 /*
54  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
55  * k-permutations of n elements, except that the order does not matter for the
56  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
57  */
59 {
60  int k; /* size of the dependency */
61  int n; /* number of possible attributes */
62  int current; /* next dependency to return (index) */
63  AttrNumber ndependencies; /* number of dependencies generated */
64  AttrNumber *dependencies; /* array of pre-generated dependencies */
66 
68 
70  int index, AttrNumber start, AttrNumber *current);
75 static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
76 static bool dependency_is_fully_matched(MVDependency *dependency,
77  Bitmapset *attnums);
78 static bool dependency_is_compatible_clause(Node *clause, Index relid,
80 static bool dependency_is_compatible_expression(Node *clause, Index relid,
81  List *statlist, Node **expr);
83  int ndependencies, Bitmapset *attnums);
85  int varRelid, JoinType jointype,
86  SpecialJoinInfo *sjinfo,
87  MVDependency **dependencies,
88  int ndependencies,
89  AttrNumber *list_attnums,
90  Bitmapset **estimatedclauses);
91 
92 static void
94  AttrNumber start, AttrNumber *current)
95 {
96  /*
97  * The generator handles the first (k-1) elements differently from the
98  * last element.
99  */
100  if (index < (state->k - 1))
101  {
102  AttrNumber i;
103 
104  /*
105  * The first (k-1) values have to be in ascending order, which we
106  * generate recursively.
107  */
108 
109  for (i = start; i < state->n; i++)
110  {
111  current[index] = i;
112  generate_dependencies_recurse(state, (index + 1), (i + 1), current);
113  }
114  }
115  else
116  {
117  int i;
118 
119  /*
120  * the last element is the implied value, which does not respect the
121  * ascending order. We just need to check that the value is not in the
122  * first (k-1) elements.
123  */
124 
125  for (i = 0; i < state->n; i++)
126  {
127  int j;
128  bool match = false;
129 
130  current[index] = i;
131 
132  for (j = 0; j < index; j++)
133  {
134  if (current[j] == i)
135  {
136  match = true;
137  break;
138  }
139  }
140 
141  /*
142  * If the value is not found in the first part of the dependency,
143  * we're done.
144  */
145  if (!match)
146  {
147  state->dependencies = (AttrNumber *) repalloc(state->dependencies,
148  state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
149  memcpy(&state->dependencies[(state->k * state->ndependencies)],
150  current, state->k * sizeof(AttrNumber));
151  state->ndependencies++;
152  }
153  }
154  }
155 }
156 
157 /* generate all dependencies (k-permutations of n elements) */
158 static void
160 {
161  AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
162 
163  generate_dependencies_recurse(state, 0, 0, current);
164 
165  pfree(current);
166 }
167 
168 /*
169  * initialize the DependencyGenerator of variations, and prebuild the variations
170  *
171  * This pre-builds all the variations. We could also generate them in
172  * DependencyGenerator_next(), but this seems simpler.
173  */
174 static DependencyGenerator
176 {
178 
179  Assert((n >= k) && (k > 0));
180 
181  /* allocate the DependencyGenerator state */
183  state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
184 
185  state->ndependencies = 0;
186  state->current = 0;
187  state->k = k;
188  state->n = n;
189 
190  /* now actually pre-generate all the variations */
192 
193  return state;
194 }
195 
196 /* free the DependencyGenerator state */
197 static void
199 {
200  pfree(state->dependencies);
201  pfree(state);
202 }
203 
204 /* generate next combination */
205 static AttrNumber *
207 {
208  if (state->current == state->ndependencies)
209  return NULL;
210 
211  return &state->dependencies[state->k * state->current++];
212 }
213 
214 
215 /*
216  * validates functional dependency on the data
217  *
218  * An actual work horse of detecting functional dependencies. Given a variation
219  * of k attributes, it checks that the first (k-1) are sufficient to determine
220  * the last one.
221  */
222 static double
224 {
225  int i,
226  nitems;
227  MultiSortSupport mss;
228  SortItem *items;
229  AttrNumber *attnums_dep;
230 
231  /* counters valid within a group */
232  int group_size = 0;
233  int n_violations = 0;
234 
235  /* total number of rows supporting (consistent with) the dependency */
236  int n_supporting_rows = 0;
237 
238  /* Make sure we have at least two input attributes. */
239  Assert(k >= 2);
240 
241  /* sort info for all attributes columns */
242  mss = multi_sort_init(k);
243 
244  /*
245  * Translate the array of indexes to regular attnums for the dependency
246  * (we will need this to identify the columns in StatsBuildData).
247  */
248  attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
249  for (i = 0; i < k; i++)
250  attnums_dep[i] = data->attnums[dependency[i]];
251 
252  /*
253  * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
254  *
255  * (a) sort the data lexicographically
256  *
257  * (b) split the data into groups by first (k-1) columns
258  *
259  * (c) for each group count different values in the last column
260  *
261  * We use the column data types' default sort operators and collations;
262  * perhaps at some point it'd be worth using column-specific collations?
263  */
264 
265  /* prepare the sort function for the dimensions */
266  for (i = 0; i < k; i++)
267  {
268  VacAttrStats *colstat = data->stats[dependency[i]];
270 
272  if (type->lt_opr == InvalidOid) /* shouldn't happen */
273  elog(ERROR, "cache lookup failed for ordering operator for type %u",
274  colstat->attrtypid);
275 
276  /* prepare the sort function for this dimension */
277  multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
278  }
279 
280  /*
281  * build an array of SortItem(s) sorted using the multi-sort support
282  *
283  * XXX This relies on all stats entries pointing to the same tuple
284  * descriptor. For now that assumption holds, but it might change in the
285  * future for example if we support statistics on multiple tables.
286  */
287  items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
288 
289  /*
290  * Walk through the sorted array, split it into rows according to the
291  * first (k-1) columns. If there's a single value in the last column, we
292  * count the group as 'supporting' the functional dependency. Otherwise we
293  * count it as contradicting.
294  */
295 
296  /* start with the first row forming a group */
297  group_size = 1;
298 
299  /* loop 1 beyond the end of the array so that we count the final group */
300  for (i = 1; i <= nitems; i++)
301  {
302  /*
303  * Check if the group ended, which may be either because we processed
304  * all the items (i==nitems), or because the i-th item is not equal to
305  * the preceding one.
306  */
307  if (i == nitems ||
308  multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
309  {
310  /*
311  * If no violations were found in the group then track the rows of
312  * the group as supporting the functional dependency.
313  */
314  if (n_violations == 0)
315  n_supporting_rows += group_size;
316 
317  /* Reset counters for the new group */
318  n_violations = 0;
319  group_size = 1;
320  continue;
321  }
322  /* first columns match, but the last one does not (so contradicting) */
323  else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
324  n_violations++;
325 
326  group_size++;
327  }
328 
329  /* Compute the 'degree of validity' as (supporting/total). */
330  return (n_supporting_rows * 1.0 / data->numrows);
331 }
332 
333 /*
334  * detects functional dependencies between groups of columns
335  *
336  * Generates all possible subsets of columns (variations) and computes
337  * the degree of validity for each one. For example when creating statistics
338  * on three columns (a,b,c) there are 9 possible dependencies
339  *
340  * two columns three columns
341  * ----------- -------------
342  * (a) -> b (a,b) -> c
343  * (a) -> c (a,c) -> b
344  * (b) -> a (b,c) -> a
345  * (b) -> c
346  * (c) -> a
347  * (c) -> b
348  */
351 {
352  int i,
353  k;
354 
355  /* result */
356  MVDependencies *dependencies = NULL;
357  MemoryContext cxt;
358 
359  Assert(data->nattnums >= 2);
360 
361  /* tracks memory allocated by dependency_degree calls */
363  "dependency_degree cxt",
365 
366  /*
367  * We'll try build functional dependencies starting from the smallest ones
368  * covering just 2 columns, to the largest ones, covering all columns
369  * included in the statistics object. We start from the smallest ones
370  * because we want to be able to skip already implied ones.
371  */
372  for (k = 2; k <= data->nattnums; k++)
373  {
374  AttrNumber *dependency; /* array with k elements */
375 
376  /* prepare a DependencyGenerator of variation */
378 
379  /* generate all possible variations of k values (out of n) */
380  while ((dependency = DependencyGenerator_next(DependencyGenerator)))
381  {
382  double degree;
383  MVDependency *d;
384  MemoryContext oldcxt;
385 
386  /* release memory used by dependency degree calculation */
387  oldcxt = MemoryContextSwitchTo(cxt);
388 
389  /* compute how valid the dependency seems */
390  degree = dependency_degree(data, k, dependency);
391 
392  MemoryContextSwitchTo(oldcxt);
393  MemoryContextReset(cxt);
394 
395  /*
396  * if the dependency seems entirely invalid, don't store it
397  */
398  if (degree == 0.0)
399  continue;
400 
401  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
402  + k * sizeof(AttrNumber));
403 
404  /* copy the dependency (and keep the indexes into stxkeys) */
405  d->degree = degree;
406  d->nattributes = k;
407  for (i = 0; i < k; i++)
408  d->attributes[i] = data->attnums[dependency[i]];
409 
410  /* initialize the list of dependencies */
411  if (dependencies == NULL)
412  {
413  dependencies
414  = (MVDependencies *) palloc0(sizeof(MVDependencies));
415 
416  dependencies->magic = STATS_DEPS_MAGIC;
417  dependencies->type = STATS_DEPS_TYPE_BASIC;
418  dependencies->ndeps = 0;
419  }
420 
421  dependencies->ndeps++;
422  dependencies = (MVDependencies *) repalloc(dependencies,
423  offsetof(MVDependencies, deps)
424  + dependencies->ndeps * sizeof(MVDependency *));
425 
426  dependencies->deps[dependencies->ndeps - 1] = d;
427  }
428 
429  /*
430  * we're done with variations of k elements, so free the
431  * DependencyGenerator
432  */
434  }
435 
436  MemoryContextDelete(cxt);
437 
438  return dependencies;
439 }
440 
441 
442 /*
443  * Serialize list of dependencies into a bytea value.
444  */
445 bytea *
447 {
448  int i;
449  bytea *output;
450  char *tmp;
451  Size len;
452 
453  /* we need to store ndeps, with a number of attributes for each one */
455 
456  /* and also include space for the actual attribute numbers and degrees */
457  for (i = 0; i < dependencies->ndeps; i++)
458  len += SizeOfItem(dependencies->deps[i]->nattributes);
459 
460  output = (bytea *) palloc0(len);
462 
463  tmp = VARDATA(output);
464 
465  /* Store the base struct values (magic, type, ndeps) */
466  memcpy(tmp, &dependencies->magic, sizeof(uint32));
467  tmp += sizeof(uint32);
468  memcpy(tmp, &dependencies->type, sizeof(uint32));
469  tmp += sizeof(uint32);
470  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
471  tmp += sizeof(uint32);
472 
473  /* store number of attributes and attribute numbers for each dependency */
474  for (i = 0; i < dependencies->ndeps; i++)
475  {
476  MVDependency *d = dependencies->deps[i];
477 
478  memcpy(tmp, &d->degree, sizeof(double));
479  tmp += sizeof(double);
480 
481  memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
482  tmp += sizeof(AttrNumber);
483 
484  memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
485  tmp += sizeof(AttrNumber) * d->nattributes;
486 
487  /* protect against overflow */
488  Assert(tmp <= ((char *) output + len));
489  }
490 
491  /* make sure we've produced exactly the right amount of data */
492  Assert(tmp == ((char *) output + len));
493 
494  return output;
495 }
496 
497 /*
498  * Reads serialized dependencies into MVDependencies structure.
499  */
502 {
503  int i;
504  Size min_expected_size;
505  MVDependencies *dependencies;
506  char *tmp;
507 
508  if (data == NULL)
509  return NULL;
510 
512  elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
514 
515  /* read the MVDependencies header */
516  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
517 
518  /* initialize pointer to the data part (skip the varlena header) */
519  tmp = VARDATA_ANY(data);
520 
521  /* read the header fields and perform basic sanity checks */
522  memcpy(&dependencies->magic, tmp, sizeof(uint32));
523  tmp += sizeof(uint32);
524  memcpy(&dependencies->type, tmp, sizeof(uint32));
525  tmp += sizeof(uint32);
526  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
527  tmp += sizeof(uint32);
528 
529  if (dependencies->magic != STATS_DEPS_MAGIC)
530  elog(ERROR, "invalid dependency magic %d (expected %d)",
531  dependencies->magic, STATS_DEPS_MAGIC);
532 
533  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
534  elog(ERROR, "invalid dependency type %d (expected %d)",
535  dependencies->type, STATS_DEPS_TYPE_BASIC);
536 
537  if (dependencies->ndeps == 0)
538  elog(ERROR, "invalid zero-length item array in MVDependencies");
539 
540  /* what minimum bytea size do we expect for those parameters */
541  min_expected_size = SizeOfItem(dependencies->ndeps);
542 
543  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
544  elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
545  VARSIZE_ANY_EXHDR(data), min_expected_size);
546 
547  /* allocate space for the MCV items */
548  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
549  + (dependencies->ndeps * sizeof(MVDependency *)));
550 
551  for (i = 0; i < dependencies->ndeps; i++)
552  {
553  double degree;
554  AttrNumber k;
555  MVDependency *d;
556 
557  /* degree of validity */
558  memcpy(&degree, tmp, sizeof(double));
559  tmp += sizeof(double);
560 
561  /* number of attributes */
562  memcpy(&k, tmp, sizeof(AttrNumber));
563  tmp += sizeof(AttrNumber);
564 
565  /* is the number of attributes valid? */
566  Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
567 
568  /* now that we know the number of attributes, allocate the dependency */
569  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
570  + (k * sizeof(AttrNumber)));
571 
572  d->degree = degree;
573  d->nattributes = k;
574 
575  /* copy attribute numbers */
576  memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
577  tmp += sizeof(AttrNumber) * d->nattributes;
578 
579  dependencies->deps[i] = d;
580 
581  /* still within the bytea */
582  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
583  }
584 
585  /* we should have consumed the whole bytea exactly */
586  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
587 
588  return dependencies;
589 }
590 
591 /*
592  * dependency_is_fully_matched
593  * checks that a functional dependency is fully matched given clauses on
594  * attributes (assuming the clauses are suitable equality clauses)
595  */
596 static bool
598 {
599  int j;
600 
601  /*
602  * Check that the dependency actually is fully covered by clauses. We have
603  * to translate all attribute numbers, as those are referenced
604  */
605  for (j = 0; j < dependency->nattributes; j++)
606  {
607  int attnum = dependency->attributes[j];
608 
609  if (!bms_is_member(attnum, attnums))
610  return false;
611  }
612 
613  return true;
614 }
615 
616 /*
617  * statext_dependencies_load
618  * Load the functional dependencies for the indicated pg_statistic_ext tuple
619  */
622 {
623  MVDependencies *result;
624  bool isnull;
625  Datum deps;
626  HeapTuple htup;
627 
629  ObjectIdGetDatum(mvoid),
630  BoolGetDatum(inh));
631  if (!HeapTupleIsValid(htup))
632  elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
633 
634  deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
635  Anum_pg_statistic_ext_data_stxddependencies, &isnull);
636  if (isnull)
637  elog(ERROR,
638  "requested statistics kind \"%c\" is not yet built for statistics object %u",
639  STATS_EXT_DEPENDENCIES, mvoid);
640 
642 
643  ReleaseSysCache(htup);
644 
645  return result;
646 }
647 
648 /*
649  * pg_dependencies_in - input routine for type pg_dependencies.
650  *
651  * pg_dependencies is real enough to be a table column, but it has no operations
652  * of its own, and disallows input too
653  */
654 Datum
656 {
657  /*
658  * pg_node_list stores the data in binary form and parsing text input is
659  * not needed, so disallow this.
660  */
661  ereport(ERROR,
662  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
663  errmsg("cannot accept a value of type %s", "pg_dependencies")));
664 
665  PG_RETURN_VOID(); /* keep compiler quiet */
666 }
667 
668 /*
669  * pg_dependencies - output routine for type pg_dependencies.
670  */
671 Datum
673 {
676  int i,
677  j;
679 
681  appendStringInfoChar(&str, '{');
682 
683  for (i = 0; i < dependencies->ndeps; i++)
684  {
685  MVDependency *dependency = dependencies->deps[i];
686 
687  if (i > 0)
688  appendStringInfoString(&str, ", ");
689 
690  appendStringInfoChar(&str, '"');
691  for (j = 0; j < dependency->nattributes; j++)
692  {
693  if (j == dependency->nattributes - 1)
694  appendStringInfoString(&str, " => ");
695  else if (j > 0)
696  appendStringInfoString(&str, ", ");
697 
698  appendStringInfo(&str, "%d", dependency->attributes[j]);
699  }
700  appendStringInfo(&str, "\": %f", dependency->degree);
701  }
702 
703  appendStringInfoChar(&str, '}');
704 
705  PG_RETURN_CSTRING(str.data);
706 }
707 
708 /*
709  * pg_dependencies_recv - binary input routine for type pg_dependencies.
710  */
711 Datum
713 {
714  ereport(ERROR,
715  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
716  errmsg("cannot accept a value of type %s", "pg_dependencies")));
717 
718  PG_RETURN_VOID(); /* keep compiler quiet */
719 }
720 
721 /*
722  * pg_dependencies_send - binary output routine for type pg_dependencies.
723  *
724  * Functional dependencies are serialized in a bytea value (although the type
725  * is named differently), so let's just send that.
726  */
727 Datum
729 {
730  return byteasend(fcinfo);
731 }
732 
733 /*
734  * dependency_is_compatible_clause
735  * Determines if the clause is compatible with functional dependencies
736  *
737  * Only clauses that have the form of equality to a pseudoconstant, or can be
738  * interpreted that way, are currently accepted. Furthermore the variable
739  * part of the clause must be a simple Var belonging to the specified
740  * relation, whose attribute number we return in *attnum on success.
741  */
742 static bool
744 {
745  Var *var;
746  Node *clause_expr;
747 
748  if (IsA(clause, RestrictInfo))
749  {
750  RestrictInfo *rinfo = (RestrictInfo *) clause;
751 
752  /* Pseudoconstants are not interesting (they couldn't contain a Var) */
753  if (rinfo->pseudoconstant)
754  return false;
755 
756  /* Clauses referencing multiple, or no, varnos are incompatible */
757  if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
758  return false;
759 
760  clause = (Node *) rinfo->clause;
761  }
762 
763  if (is_opclause(clause))
764  {
765  /* If it's an opclause, check for Var = Const or Const = Var. */
766  OpExpr *expr = (OpExpr *) clause;
767 
768  /* Only expressions with two arguments are candidates. */
769  if (list_length(expr->args) != 2)
770  return false;
771 
772  /* Make sure non-selected argument is a pseudoconstant. */
774  clause_expr = linitial(expr->args);
775  else if (is_pseudo_constant_clause(linitial(expr->args)))
776  clause_expr = lsecond(expr->args);
777  else
778  return false;
779 
780  /*
781  * If it's not an "=" operator, just ignore the clause, as it's not
782  * compatible with functional dependencies.
783  *
784  * This uses the function for estimating selectivity, not the operator
785  * directly (a bit awkward, but well ...).
786  *
787  * XXX this is pretty dubious; probably it'd be better to check btree
788  * or hash opclass membership, so as not to be fooled by custom
789  * selectivity functions, and to be more consistent with decisions
790  * elsewhere in the planner.
791  */
792  if (get_oprrest(expr->opno) != F_EQSEL)
793  return false;
794 
795  /* OK to proceed with checking "var" */
796  }
797  else if (IsA(clause, ScalarArrayOpExpr))
798  {
799  /* If it's an scalar array operator, check for Var IN Const. */
800  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
801 
802  /*
803  * Reject ALL() variant, we only care about ANY/IN.
804  *
805  * XXX Maybe we should check if all the values are the same, and allow
806  * ALL in that case? Doesn't seem very practical, though.
807  */
808  if (!expr->useOr)
809  return false;
810 
811  /* Only expressions with two arguments are candidates. */
812  if (list_length(expr->args) != 2)
813  return false;
814 
815  /*
816  * We know it's always (Var IN Const), so we assume the var is the
817  * first argument, and pseudoconstant is the second one.
818  */
820  return false;
821 
822  clause_expr = linitial(expr->args);
823 
824  /*
825  * If it's not an "=" operator, just ignore the clause, as it's not
826  * compatible with functional dependencies. The operator is identified
827  * simply by looking at which function it uses to estimate
828  * selectivity. That's a bit strange, but it's what other similar
829  * places do.
830  */
831  if (get_oprrest(expr->opno) != F_EQSEL)
832  return false;
833 
834  /* OK to proceed with checking "var" */
835  }
836  else if (is_orclause(clause))
837  {
838  BoolExpr *bool_expr = (BoolExpr *) clause;
839  ListCell *lc;
840 
841  /* start with no attribute number */
843 
844  foreach(lc, bool_expr->args)
845  {
846  AttrNumber clause_attnum;
847 
848  /*
849  * Had we found incompatible clause in the arguments, treat the
850  * whole clause as incompatible.
851  */
853  relid, &clause_attnum))
854  return false;
855 
856  if (*attnum == InvalidAttrNumber)
857  *attnum = clause_attnum;
858 
859  /* ensure all the variables are the same (same attnum) */
860  if (*attnum != clause_attnum)
861  return false;
862  }
863 
864  /* the Var is already checked by the recursive call */
865  return true;
866  }
867  else if (is_notclause(clause))
868  {
869  /*
870  * "NOT x" can be interpreted as "x = false", so get the argument and
871  * proceed with seeing if it's a suitable Var.
872  */
873  clause_expr = (Node *) get_notclausearg(clause);
874  }
875  else
876  {
877  /*
878  * A boolean expression "x" can be interpreted as "x = true", so
879  * proceed with seeing if it's a suitable Var.
880  */
881  clause_expr = (Node *) clause;
882  }
883 
884  /*
885  * We may ignore any RelabelType node above the operand. (There won't be
886  * more than one, since eval_const_expressions has been applied already.)
887  */
888  if (IsA(clause_expr, RelabelType))
889  clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
890 
891  /* We only support plain Vars for now */
892  if (!IsA(clause_expr, Var))
893  return false;
894 
895  /* OK, we know we have a Var */
896  var = (Var *) clause_expr;
897 
898  /* Ensure Var is from the correct relation */
899  if (var->varno != relid)
900  return false;
901 
902  /* We also better ensure the Var is from the current level */
903  if (var->varlevelsup != 0)
904  return false;
905 
906  /* Also ignore system attributes (we don't allow stats on those) */
908  return false;
909 
910  *attnum = var->varattno;
911  return true;
912 }
913 
914 /*
915  * find_strongest_dependency
916  * find the strongest dependency on the attributes
917  *
918  * When applying functional dependencies, we start with the strongest
919  * dependencies. That is, we select the dependency that:
920  *
921  * (a) has all attributes covered by equality clauses
922  *
923  * (b) has the most attributes
924  *
925  * (c) has the highest degree of validity
926  *
927  * This guarantees that we eliminate the most redundant conditions first
928  * (see the comment in dependencies_clauselist_selectivity).
929  */
930 static MVDependency *
931 find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
932  Bitmapset *attnums)
933 {
934  int i,
935  j;
936  MVDependency *strongest = NULL;
937 
938  /* number of attnums in clauses */
939  int nattnums = bms_num_members(attnums);
940 
941  /*
942  * Iterate over the MVDependency items and find the strongest one from the
943  * fully-matched dependencies. We do the cheap checks first, before
944  * matching it against the attnums.
945  */
946  for (i = 0; i < ndependencies; i++)
947  {
948  for (j = 0; j < dependencies[i]->ndeps; j++)
949  {
950  MVDependency *dependency = dependencies[i]->deps[j];
951 
952  /*
953  * Skip dependencies referencing more attributes than available
954  * clauses, as those can't be fully matched.
955  */
956  if (dependency->nattributes > nattnums)
957  continue;
958 
959  if (strongest)
960  {
961  /* skip dependencies on fewer attributes than the strongest. */
962  if (dependency->nattributes < strongest->nattributes)
963  continue;
964 
965  /* also skip weaker dependencies when attribute count matches */
966  if (strongest->nattributes == dependency->nattributes &&
967  strongest->degree > dependency->degree)
968  continue;
969  }
970 
971  /*
972  * this dependency is stronger, but we must still check that it's
973  * fully matched to these attnums. We perform this check last as
974  * it's slightly more expensive than the previous checks.
975  */
976  if (dependency_is_fully_matched(dependency, attnums))
977  strongest = dependency; /* save new best match */
978  }
979  }
980 
981  return strongest;
982 }
983 
984 /*
985  * clauselist_apply_dependencies
986  * Apply the specified functional dependencies to a list of clauses and
987  * return the estimated selectivity of the clauses that are compatible
988  * with any of the given dependencies.
989  *
990  * This will estimate all not-already-estimated clauses that are compatible
991  * with functional dependencies, and which have an attribute mentioned by any
992  * of the given dependencies (either as an implying or implied attribute).
993  *
994  * Given (lists of) clauses on attributes (a,b) and a functional dependency
995  * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
996  * using the formula
997  *
998  * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
999  *
1000  * where 'f' is the degree of dependency. This reflects the fact that we
1001  * expect a fraction f of all rows to be consistent with the dependency
1002  * (a=>b), and so have a selectivity of P(a), while the remaining rows are
1003  * treated as independent.
1004  *
1005  * In practice, we use a slightly modified version of this formula, which uses
1006  * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
1007  * should obviously not exceed either column's individual selectivity. I.e.,
1008  * we actually combine selectivities using the formula
1009  *
1010  * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1011  *
1012  * This can make quite a difference if the specific values matching the
1013  * clauses are not consistent with the functional dependency.
1014  */
1015 static Selectivity
1017  int varRelid, JoinType jointype,
1018  SpecialJoinInfo *sjinfo,
1019  MVDependency **dependencies, int ndependencies,
1020  AttrNumber *list_attnums,
1021  Bitmapset **estimatedclauses)
1022 {
1023  Bitmapset *attnums;
1024  int i;
1025  int j;
1026  int nattrs;
1027  Selectivity *attr_sel;
1028  int attidx;
1029  int listidx;
1030  ListCell *l;
1031  Selectivity s1;
1032 
1033  /*
1034  * Extract the attnums of all implying and implied attributes from all the
1035  * given dependencies. Each of these attributes is expected to have at
1036  * least 1 not-already-estimated compatible clause that we will estimate
1037  * here.
1038  */
1039  attnums = NULL;
1040  for (i = 0; i < ndependencies; i++)
1041  {
1042  for (j = 0; j < dependencies[i]->nattributes; j++)
1043  {
1044  AttrNumber attnum = dependencies[i]->attributes[j];
1045 
1046  attnums = bms_add_member(attnums, attnum);
1047  }
1048  }
1049 
1050  /*
1051  * Compute per-column selectivity estimates for each of these attributes,
1052  * and mark all the corresponding clauses as estimated.
1053  */
1054  nattrs = bms_num_members(attnums);
1055  attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
1056 
1057  attidx = 0;
1058  i = -1;
1059  while ((i = bms_next_member(attnums, i)) >= 0)
1060  {
1061  List *attr_clauses = NIL;
1062  Selectivity simple_sel;
1063 
1064  listidx = -1;
1065  foreach(l, clauses)
1066  {
1067  Node *clause = (Node *) lfirst(l);
1068 
1069  listidx++;
1070  if (list_attnums[listidx] == i)
1071  {
1072  attr_clauses = lappend(attr_clauses, clause);
1073  *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1074  }
1075  }
1076 
1077  simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
1078  jointype, sjinfo, false);
1079  attr_sel[attidx++] = simple_sel;
1080  }
1081 
1082  /*
1083  * Now combine these selectivities using the dependency information. For
1084  * chains of dependencies such as a -> b -> c, the b -> c dependency will
1085  * come before the a -> b dependency in the array, so we traverse the
1086  * array backwards to ensure such chains are computed in the right order.
1087  *
1088  * As explained above, pairs of selectivities are combined using the
1089  * formula
1090  *
1091  * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1092  *
1093  * to ensure that the combined selectivity is never greater than either
1094  * individual selectivity.
1095  *
1096  * Where multiple dependencies apply (e.g., a -> b -> c), we use
1097  * conditional probabilities to compute the overall result as follows:
1098  *
1099  * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1100  *
1101  * so we replace the selectivities of all implied attributes with
1102  * conditional probabilities, that are conditional on all their implying
1103  * attributes. The selectivities of all other non-implied attributes are
1104  * left as they are.
1105  */
1106  for (i = ndependencies - 1; i >= 0; i--)
1107  {
1108  MVDependency *dependency = dependencies[i];
1110  Selectivity s2;
1111  double f;
1112 
1113  /* Selectivity of all the implying attributes */
1114  s1 = 1.0;
1115  for (j = 0; j < dependency->nattributes - 1; j++)
1116  {
1117  attnum = dependency->attributes[j];
1118  attidx = bms_member_index(attnums, attnum);
1119  s1 *= attr_sel[attidx];
1120  }
1121 
1122  /* Original selectivity of the implied attribute */
1123  attnum = dependency->attributes[j];
1124  attidx = bms_member_index(attnums, attnum);
1125  s2 = attr_sel[attidx];
1126 
1127  /*
1128  * Replace s2 with the conditional probability s2 given s1, computed
1129  * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1130  *
1131  * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1132  *
1133  * where P(a) = s1, the selectivity of the implying attributes, and
1134  * P(b) = s2, the selectivity of the implied attribute.
1135  */
1136  f = dependency->degree;
1137 
1138  if (s1 <= s2)
1139  attr_sel[attidx] = f + (1 - f) * s2;
1140  else
1141  attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1142  }
1143 
1144  /*
1145  * The overall selectivity of all the clauses on all these attributes is
1146  * then the product of all the original (non-implied) probabilities and
1147  * the new conditional (implied) probabilities.
1148  */
1149  s1 = 1.0;
1150  for (i = 0; i < nattrs; i++)
1151  s1 *= attr_sel[i];
1152 
1154 
1155  pfree(attr_sel);
1156  bms_free(attnums);
1157 
1158  return s1;
1159 }
1160 
1161 /*
1162  * dependency_is_compatible_expression
1163  * Determines if the expression is compatible with functional dependencies
1164  *
1165  * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1166  * expression is a simple Var. OTOH we check that there's at least one
1167  * statistics object matching the expression.
1168  */
1169 static bool
1170 dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1171 {
1172  List *vars;
1173  ListCell *lc,
1174  *lc2;
1175  Node *clause_expr;
1176 
1177  if (IsA(clause, RestrictInfo))
1178  {
1179  RestrictInfo *rinfo = (RestrictInfo *) clause;
1180 
1181  /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1182  if (rinfo->pseudoconstant)
1183  return false;
1184 
1185  /* Clauses referencing multiple, or no, varnos are incompatible */
1186  if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1187  return false;
1188 
1189  clause = (Node *) rinfo->clause;
1190  }
1191 
1192  if (is_opclause(clause))
1193  {
1194  /* If it's an opclause, check for Var = Const or Const = Var. */
1195  OpExpr *expr = (OpExpr *) clause;
1196 
1197  /* Only expressions with two arguments are candidates. */
1198  if (list_length(expr->args) != 2)
1199  return false;
1200 
1201  /* Make sure non-selected argument is a pseudoconstant. */
1203  clause_expr = linitial(expr->args);
1204  else if (is_pseudo_constant_clause(linitial(expr->args)))
1205  clause_expr = lsecond(expr->args);
1206  else
1207  return false;
1208 
1209  /*
1210  * If it's not an "=" operator, just ignore the clause, as it's not
1211  * compatible with functional dependencies.
1212  *
1213  * This uses the function for estimating selectivity, not the operator
1214  * directly (a bit awkward, but well ...).
1215  *
1216  * XXX this is pretty dubious; probably it'd be better to check btree
1217  * or hash opclass membership, so as not to be fooled by custom
1218  * selectivity functions, and to be more consistent with decisions
1219  * elsewhere in the planner.
1220  */
1221  if (get_oprrest(expr->opno) != F_EQSEL)
1222  return false;
1223 
1224  /* OK to proceed with checking "var" */
1225  }
1226  else if (IsA(clause, ScalarArrayOpExpr))
1227  {
1228  /* If it's an scalar array operator, check for Var IN Const. */
1229  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1230 
1231  /*
1232  * Reject ALL() variant, we only care about ANY/IN.
1233  *
1234  * FIXME Maybe we should check if all the values are the same, and
1235  * allow ALL in that case? Doesn't seem very practical, though.
1236  */
1237  if (!expr->useOr)
1238  return false;
1239 
1240  /* Only expressions with two arguments are candidates. */
1241  if (list_length(expr->args) != 2)
1242  return false;
1243 
1244  /*
1245  * We know it's always (Var IN Const), so we assume the var is the
1246  * first argument, and pseudoconstant is the second one.
1247  */
1248  if (!is_pseudo_constant_clause(lsecond(expr->args)))
1249  return false;
1250 
1251  clause_expr = linitial(expr->args);
1252 
1253  /*
1254  * If it's not an "=" operator, just ignore the clause, as it's not
1255  * compatible with functional dependencies. The operator is identified
1256  * simply by looking at which function it uses to estimate
1257  * selectivity. That's a bit strange, but it's what other similar
1258  * places do.
1259  */
1260  if (get_oprrest(expr->opno) != F_EQSEL)
1261  return false;
1262 
1263  /* OK to proceed with checking "var" */
1264  }
1265  else if (is_orclause(clause))
1266  {
1267  BoolExpr *bool_expr = (BoolExpr *) clause;
1268 
1269  /* start with no expression (we'll use the first match) */
1270  *expr = NULL;
1271 
1272  foreach(lc, bool_expr->args)
1273  {
1274  Node *or_expr = NULL;
1275 
1276  /*
1277  * Had we found incompatible expression in the arguments, treat
1278  * the whole expression as incompatible.
1279  */
1280  if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
1281  statlist, &or_expr))
1282  return false;
1283 
1284  if (*expr == NULL)
1285  *expr = or_expr;
1286 
1287  /* ensure all the expressions are the same */
1288  if (!equal(or_expr, *expr))
1289  return false;
1290  }
1291 
1292  /* the expression is already checked by the recursive call */
1293  return true;
1294  }
1295  else if (is_notclause(clause))
1296  {
1297  /*
1298  * "NOT x" can be interpreted as "x = false", so get the argument and
1299  * proceed with seeing if it's a suitable Var.
1300  */
1301  clause_expr = (Node *) get_notclausearg(clause);
1302  }
1303  else
1304  {
1305  /*
1306  * A boolean expression "x" can be interpreted as "x = true", so
1307  * proceed with seeing if it's a suitable Var.
1308  */
1309  clause_expr = (Node *) clause;
1310  }
1311 
1312  /*
1313  * We may ignore any RelabelType node above the operand. (There won't be
1314  * more than one, since eval_const_expressions has been applied already.)
1315  */
1316  if (IsA(clause_expr, RelabelType))
1317  clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1318 
1319  vars = pull_var_clause(clause_expr, 0);
1320 
1321  foreach(lc, vars)
1322  {
1323  Var *var = (Var *) lfirst(lc);
1324 
1325  /* Ensure Var is from the correct relation */
1326  if (var->varno != relid)
1327  return false;
1328 
1329  /* We also better ensure the Var is from the current level */
1330  if (var->varlevelsup != 0)
1331  return false;
1332 
1333  /* Also ignore system attributes (we don't allow stats on those) */
1335  return false;
1336  }
1337 
1338  /*
1339  * Check if we actually have a matching statistics for the expression.
1340  *
1341  * XXX Maybe this is an overkill. We'll eliminate the expressions later.
1342  */
1343  foreach(lc, statlist)
1344  {
1345  StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1346 
1347  /* ignore stats without dependencies */
1348  if (info->kind != STATS_EXT_DEPENDENCIES)
1349  continue;
1350 
1351  foreach(lc2, info->exprs)
1352  {
1353  Node *stat_expr = (Node *) lfirst(lc2);
1354 
1355  if (equal(clause_expr, stat_expr))
1356  {
1357  *expr = stat_expr;
1358  return true;
1359  }
1360  }
1361  }
1362 
1363  return false;
1364 }
1365 
1366 /*
1367  * dependencies_clauselist_selectivity
1368  * Return the estimated selectivity of (a subset of) the given clauses
1369  * using functional dependency statistics, or 1.0 if no useful functional
1370  * dependency statistic exists.
1371  *
1372  * 'estimatedclauses' is an input/output argument that gets a bit set
1373  * corresponding to the (zero-based) list index of each clause that is included
1374  * in the estimated selectivity.
1375  *
1376  * Given equality clauses on attributes (a,b) we find the strongest dependency
1377  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1378  * dependency, we then combine the per-clause selectivities using the formula
1379  *
1380  * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1381  *
1382  * where 'f' is the degree of the dependency. (Actually we use a slightly
1383  * modified version of this formula -- see clauselist_apply_dependencies()).
1384  *
1385  * With clauses on more than two attributes, the dependencies are applied
1386  * recursively, starting with the widest/strongest dependencies. For example
1387  * P(a,b,c) is first split like this:
1388  *
1389  * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1390  *
1391  * assuming (a,b=>c) is the strongest dependency.
1392  */
1395  List *clauses,
1396  int varRelid,
1397  JoinType jointype,
1398  SpecialJoinInfo *sjinfo,
1399  RelOptInfo *rel,
1400  Bitmapset **estimatedclauses)
1401 {
1402  Selectivity s1 = 1.0;
1403  ListCell *l;
1404  Bitmapset *clauses_attnums = NULL;
1405  AttrNumber *list_attnums;
1406  int listidx;
1407  MVDependencies **func_dependencies;
1408  int nfunc_dependencies;
1409  int total_ndeps;
1410  MVDependency **dependencies;
1411  int ndependencies;
1412  int i;
1413  AttrNumber attnum_offset;
1414  RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1415 
1416  /* unique expressions */
1417  Node **unique_exprs;
1418  int unique_exprs_cnt;
1419 
1420  /* check if there's any stats that might be useful for us. */
1421  if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1422  return 1.0;
1423 
1424  list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
1425  list_length(clauses));
1426 
1427  /*
1428  * We allocate space as if every clause was a unique expression, although
1429  * that's probably overkill. Some will be simple column references that
1430  * we'll translate to attnums, and there might be duplicates. But it's
1431  * easier and cheaper to just do one allocation than repalloc later.
1432  */
1433  unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
1434  unique_exprs_cnt = 0;
1435 
1436  /*
1437  * Pre-process the clauses list to extract the attnums seen in each item.
1438  * We need to determine if there's any clauses which will be useful for
1439  * dependency selectivity estimations. Along the way we'll record all of
1440  * the attnums for each clause in a list which we'll reference later so we
1441  * don't need to repeat the same work again. We'll also keep track of all
1442  * attnums seen.
1443  *
1444  * We also skip clauses that we already estimated using different types of
1445  * statistics (we treat them as incompatible).
1446  *
1447  * To handle expressions, we assign them negative attnums, as if it was a
1448  * system attribute (this is fine, as we only allow extended stats on user
1449  * attributes). And then we offset everything by the number of
1450  * expressions, so that we can store the values in a bitmapset.
1451  */
1452  listidx = 0;
1453  foreach(l, clauses)
1454  {
1455  Node *clause = (Node *) lfirst(l);
1457  Node *expr = NULL;
1458 
1459  /* ignore clause by default */
1460  list_attnums[listidx] = InvalidAttrNumber;
1461 
1462  if (!bms_is_member(listidx, *estimatedclauses))
1463  {
1464  /*
1465  * If it's a simple column reference, just extract the attnum. If
1466  * it's an expression, assign a negative attnum as if it was a
1467  * system attribute.
1468  */
1469  if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1470  {
1471  list_attnums[listidx] = attnum;
1472  }
1473  else if (dependency_is_compatible_expression(clause, rel->relid,
1474  rel->statlist,
1475  &expr))
1476  {
1477  /* special attnum assigned to this expression */
1479 
1480  Assert(expr != NULL);
1481 
1482  /* If the expression is duplicate, use the same attnum. */
1483  for (i = 0; i < unique_exprs_cnt; i++)
1484  {
1485  if (equal(unique_exprs[i], expr))
1486  {
1487  /* negative attribute number to expression */
1488  attnum = -(i + 1);
1489  break;
1490  }
1491  }
1492 
1493  /* not found in the list, so add it */
1494  if (attnum == InvalidAttrNumber)
1495  {
1496  unique_exprs[unique_exprs_cnt++] = expr;
1497 
1498  /* after incrementing the value, to get -1, -2, ... */
1499  attnum = (-unique_exprs_cnt);
1500  }
1501 
1502  /* remember which attnum was assigned to this clause */
1503  list_attnums[listidx] = attnum;
1504  }
1505  }
1506 
1507  listidx++;
1508  }
1509 
1510  Assert(listidx == list_length(clauses));
1511 
1512  /*
1513  * How much we need to offset the attnums? If there are no expressions,
1514  * then no offset is needed. Otherwise we need to offset enough for the
1515  * lowest value (-unique_exprs_cnt) to become 1.
1516  */
1517  if (unique_exprs_cnt > 0)
1518  attnum_offset = (unique_exprs_cnt + 1);
1519  else
1520  attnum_offset = 0;
1521 
1522  /*
1523  * Now that we know how many expressions there are, we can offset the
1524  * values just enough to build the bitmapset.
1525  */
1526  for (i = 0; i < list_length(clauses); i++)
1527  {
1529 
1530  /* ignore incompatible or already estimated clauses */
1531  if (list_attnums[i] == InvalidAttrNumber)
1532  continue;
1533 
1534  /* make sure the attnum is in the expected range */
1535  Assert(list_attnums[i] >= (-unique_exprs_cnt));
1536  Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1537 
1538  /* make sure the attnum is positive (valid AttrNumber) */
1539  attnum = list_attnums[i] + attnum_offset;
1540 
1541  /*
1542  * Either it's a regular attribute, or it's an expression, in which
1543  * case we must not have seen it before (expressions are unique).
1544  *
1545  * XXX Check whether it's a regular attribute has to be done using the
1546  * original attnum, while the second check has to use the value with
1547  * an offset.
1548  */
1549  Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1550  !bms_is_member(attnum, clauses_attnums));
1551 
1552  /*
1553  * Remember the offset attnum, both for attributes and expressions.
1554  * We'll pass list_attnums to clauselist_apply_dependencies, which
1555  * uses it to identify clauses in a bitmap. We could also pass the
1556  * offset, but this is more convenient.
1557  */
1558  list_attnums[i] = attnum;
1559 
1560  clauses_attnums = bms_add_member(clauses_attnums, attnum);
1561  }
1562 
1563  /*
1564  * If there's not at least two distinct attnums and expressions, then
1565  * reject the whole list of clauses. We must return 1.0 so the calling
1566  * function's selectivity is unaffected.
1567  */
1568  if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1569  {
1570  bms_free(clauses_attnums);
1571  pfree(list_attnums);
1572  return 1.0;
1573  }
1574 
1575  /*
1576  * Load all functional dependencies matching at least two parameters. We
1577  * can simply consider all dependencies at once, without having to search
1578  * for the best statistics object.
1579  *
1580  * To not waste cycles and memory, we deserialize dependencies only for
1581  * statistics that match at least two attributes. The array is allocated
1582  * with the assumption that all objects match - we could grow the array to
1583  * make it just the right size, but it's likely wasteful anyway thanks to
1584  * moving the freed chunks to freelists etc.
1585  */
1586  func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
1587  list_length(rel->statlist));
1588  nfunc_dependencies = 0;
1589  total_ndeps = 0;
1590 
1591  foreach(l, rel->statlist)
1592  {
1594  int nmatched;
1595  int nexprs;
1596  int k;
1597  MVDependencies *deps;
1598 
1599  /* skip statistics that are not of the correct type */
1600  if (stat->kind != STATS_EXT_DEPENDENCIES)
1601  continue;
1602 
1603  /* skip statistics with mismatching stxdinherit value */
1604  if (stat->inherit != rte->inh)
1605  continue;
1606 
1607  /*
1608  * Count matching attributes - we have to undo the attnum offsets. The
1609  * input attribute numbers are not offset (expressions are not
1610  * included in stat->keys, so it's not necessary). But we need to
1611  * offset it before checking against clauses_attnums.
1612  */
1613  nmatched = 0;
1614  k = -1;
1615  while ((k = bms_next_member(stat->keys, k)) >= 0)
1616  {
1618 
1619  /* skip expressions */
1621  continue;
1622 
1623  /* apply the same offset as above */
1624  attnum += attnum_offset;
1625 
1626  if (bms_is_member(attnum, clauses_attnums))
1627  nmatched++;
1628  }
1629 
1630  /* count matching expressions */
1631  nexprs = 0;
1632  for (i = 0; i < unique_exprs_cnt; i++)
1633  {
1634  ListCell *lc;
1635 
1636  foreach(lc, stat->exprs)
1637  {
1638  Node *stat_expr = (Node *) lfirst(lc);
1639 
1640  /* try to match it */
1641  if (equal(stat_expr, unique_exprs[i]))
1642  nexprs++;
1643  }
1644  }
1645 
1646  /*
1647  * Skip objects matching fewer than two attributes/expressions from
1648  * clauses.
1649  */
1650  if (nmatched + nexprs < 2)
1651  continue;
1652 
1653  deps = statext_dependencies_load(stat->statOid, rte->inh);
1654 
1655  /*
1656  * The expressions may be represented by different attnums in the
1657  * stats, we need to remap them to be consistent with the clauses.
1658  * That will make the later steps (e.g. picking the strongest item and
1659  * so on) much simpler and cheaper, because it won't need to care
1660  * about the offset at all.
1661  *
1662  * When we're at it, we can ignore dependencies that are not fully
1663  * matched by clauses (i.e. referencing attributes or expressions that
1664  * are not in the clauses).
1665  *
1666  * We have to do this for all statistics, as long as there are any
1667  * expressions - we need to shift the attnums in all dependencies.
1668  *
1669  * XXX Maybe we should do this always, because it also eliminates some
1670  * of the dependencies early. It might be cheaper than having to walk
1671  * the longer list in find_strongest_dependency later, especially as
1672  * we need to do that repeatedly?
1673  *
1674  * XXX We have to do this even when there are no expressions in
1675  * clauses, otherwise find_strongest_dependency may fail for stats
1676  * with expressions (due to lookup of negative value in bitmap). So we
1677  * need to at least filter out those dependencies. Maybe we could do
1678  * it in a cheaper way (if there are no expr clauses, we can just
1679  * discard all negative attnums without any lookups).
1680  */
1681  if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1682  {
1683  int ndeps = 0;
1684 
1685  for (i = 0; i < deps->ndeps; i++)
1686  {
1687  bool skip = false;
1688  MVDependency *dep = deps->deps[i];
1689  int j;
1690 
1691  for (j = 0; j < dep->nattributes; j++)
1692  {
1693  int idx;
1694  Node *expr;
1695  AttrNumber unique_attnum = InvalidAttrNumber;
1697 
1698  /* undo the per-statistics offset */
1699  attnum = dep->attributes[j];
1700 
1701  /*
1702  * For regular attributes we can simply check if it
1703  * matches any clause. If there's no matching clause, we
1704  * can just ignore it. We need to offset the attnum
1705  * though.
1706  */
1708  {
1709  dep->attributes[j] = attnum + attnum_offset;
1710 
1711  if (!bms_is_member(dep->attributes[j], clauses_attnums))
1712  {
1713  skip = true;
1714  break;
1715  }
1716 
1717  continue;
1718  }
1719 
1720  /*
1721  * the attnum should be a valid system attnum (-1, -2,
1722  * ...)
1723  */
1725 
1726  /*
1727  * For expressions, we need to do two translations. First
1728  * we have to translate the negative attnum to index in
1729  * the list of expressions (in the statistics object).
1730  * Then we need to see if there's a matching clause. The
1731  * index of the unique expression determines the attnum
1732  * (and we offset it).
1733  */
1734  idx = -(1 + attnum);
1735 
1736  /* Is the expression index is valid? */
1737  Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1738 
1739  expr = (Node *) list_nth(stat->exprs, idx);
1740 
1741  /* try to find the expression in the unique list */
1742  for (int m = 0; m < unique_exprs_cnt; m++)
1743  {
1744  /*
1745  * found a matching unique expression, use the attnum
1746  * (derived from index of the unique expression)
1747  */
1748  if (equal(unique_exprs[m], expr))
1749  {
1750  unique_attnum = -(m + 1) + attnum_offset;
1751  break;
1752  }
1753  }
1754 
1755  /*
1756  * Found no matching expression, so we can simply skip
1757  * this dependency, because there's no chance it will be
1758  * fully covered.
1759  */
1760  if (unique_attnum == InvalidAttrNumber)
1761  {
1762  skip = true;
1763  break;
1764  }
1765 
1766  /* otherwise remap it to the new attnum */
1767  dep->attributes[j] = unique_attnum;
1768  }
1769 
1770  /* if found a matching dependency, keep it */
1771  if (!skip)
1772  {
1773  /* maybe we've skipped something earlier, so move it */
1774  if (ndeps != i)
1775  deps->deps[ndeps] = deps->deps[i];
1776 
1777  ndeps++;
1778  }
1779  }
1780 
1781  deps->ndeps = ndeps;
1782  }
1783 
1784  /*
1785  * It's possible we've removed all dependencies, in which case we
1786  * don't bother adding it to the list.
1787  */
1788  if (deps->ndeps > 0)
1789  {
1790  func_dependencies[nfunc_dependencies] = deps;
1791  total_ndeps += deps->ndeps;
1792  nfunc_dependencies++;
1793  }
1794  }
1795 
1796  /* if no matching stats could be found then we've nothing to do */
1797  if (nfunc_dependencies == 0)
1798  {
1799  pfree(func_dependencies);
1800  bms_free(clauses_attnums);
1801  pfree(list_attnums);
1802  pfree(unique_exprs);
1803  return 1.0;
1804  }
1805 
1806  /*
1807  * Work out which dependencies we can apply, starting with the
1808  * widest/strongest ones, and proceeding to smaller/weaker ones.
1809  */
1810  dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
1811  total_ndeps);
1812  ndependencies = 0;
1813 
1814  while (true)
1815  {
1816  MVDependency *dependency;
1818 
1819  /* the widest/strongest dependency, fully matched by clauses */
1820  dependency = find_strongest_dependency(func_dependencies,
1821  nfunc_dependencies,
1822  clauses_attnums);
1823  if (!dependency)
1824  break;
1825 
1826  dependencies[ndependencies++] = dependency;
1827 
1828  /* Ignore dependencies using this implied attribute in later loops */
1829  attnum = dependency->attributes[dependency->nattributes - 1];
1830  clauses_attnums = bms_del_member(clauses_attnums, attnum);
1831  }
1832 
1833  /*
1834  * If we found applicable dependencies, use them to estimate all
1835  * compatible clauses on attributes that they refer to.
1836  */
1837  if (ndependencies != 0)
1838  s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1839  sjinfo, dependencies, ndependencies,
1840  list_attnums, estimatedclauses);
1841 
1842  /* free deserialized functional dependencies (and then the array) */
1843  for (i = 0; i < nfunc_dependencies; i++)
1844  pfree(func_dependencies[i]);
1845 
1846  pfree(dependencies);
1847  pfree(func_dependencies);
1848  bms_free(clauses_attnums);
1849  pfree(list_attnums);
1850  pfree(unique_exprs);
1851 
1852  return s1;
1853 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int16 AttrNumber
Definition: attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
#define InvalidAttrNumber
Definition: attnum.h:23
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1047
void bms_free(Bitmapset *a)
Definition: bitmapset.c:209
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:649
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:428
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:739
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:776
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:675
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:454
@ BMS_SINGLETON
Definition: bitmapset.h:74
@ BMS_MULTIPLE
Definition: bitmapset.h:75
unsigned int uint32
Definition: c.h:442
#define VARHDRSZ
Definition: c.h:628
unsigned int Index
Definition: c.h:550
size_t Size
Definition: c.h:541
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:1968
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:119
MVDependencies * statext_dependencies_deserialize(bytea *data)
Definition: dependencies.c:501
static void generate_dependencies(DependencyGenerator state)
Definition: dependencies.c:159
Datum pg_dependencies_in(PG_FUNCTION_ARGS)
Definition: dependencies.c:655
#define SizeOfHeader
Definition: dependencies.c:40
static bool dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:206
bytea * statext_dependencies_serialize(MVDependencies *dependencies)
Definition: dependencies.c:446
Datum pg_dependencies_out(PG_FUNCTION_ARGS)
Definition: dependencies.c:672
static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
Definition: dependencies.c:743
static bool dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
Definition: dependencies.c:597
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:198
struct DependencyGeneratorData DependencyGeneratorData
MVDependencies * statext_dependencies_load(Oid mvoid, bool inh)
Definition: dependencies.c:621
static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, MVDependency **dependencies, int ndependencies, AttrNumber *list_attnums, Bitmapset **estimatedclauses)
Datum pg_dependencies_send(PG_FUNCTION_ARGS)
Definition: dependencies.c:728
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:175
#define SizeOfItem(natts)
Definition: dependencies.c:43
Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses)
MVDependencies * statext_dependencies_build(StatsBuildData *data)
Definition: dependencies.c:350
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
Definition: dependencies.c:223
Datum pg_dependencies_recv(PG_FUNCTION_ARGS)
Definition: dependencies.c:712
static void generate_dependencies_recurse(DependencyGenerator state, int index, AttrNumber start, AttrNumber *current)
Definition: dependencies.c:93
DependencyGeneratorData * DependencyGenerator
Definition: dependencies.c:67
static MVDependency * find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums)
Definition: dependencies.c:931
int errcode(int sqlerrcode)
Definition: elog.c:695
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:225
bool has_stats_of_kind(List *stats, char requiredkind)
int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:362
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define MaxHeapAttributeNumber
Definition: htup_details.h:47
int j
Definition: isn.c:74
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:338
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1539
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:303
void pfree(void *pointer)
Definition: mcxt.c:1306
void * palloc0(Size size)
Definition: mcxt.c:1230
MemoryContext CurrentMemoryContext
Definition: mcxt.c:124
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1321
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:376
void * palloc(Size size)
Definition: mcxt.c:1199
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:132
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:114
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:74
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:123
#define IsA(nodeptr, _type_)
Definition: nodes.h:168
double Selectivity
Definition: nodes.h:250
JoinType
Definition: nodes.h:288
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:135
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:520
int16 attnum
Definition: pg_attribute.h:83
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:116
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:170
static int list_length(const List *l)
Definition: pg_list.h:150
#define NIL
Definition: pg_list.h:66
#define linitial(l)
Definition: pg_list.h:176
#define lsecond(l)
Definition: pg_list.h:181
static void * list_nth(const List *list, int n)
Definition: pg_list.h:297
static void output(uint64 loop_count)
uintptr_t Datum
Definition: postgres.h:412
#define VARSIZE_ANY(PTR)
Definition: postgres.h:349
#define VARDATA(PTR)
Definition: postgres.h:316
static Datum BoolGetDatum(bool X)
Definition: postgres.h:450
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:600
#define VARDATA_ANY(PTR)
Definition: postgres.h:362
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:343
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:355
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
char * s1
char * s2
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define STATS_DEPS_MAGIC
Definition: statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:44
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
List * args
Definition: primnodes.h:767
AttrNumber * dependencies
Definition: dependencies.c:64
Definition: pg_list.h:52
uint32 ndeps
Definition: statistics.h:61
uint32 magic
Definition: statistics.h:59
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:62
AttrNumber nattributes
Definition: statistics.h:53
double degree
Definition: statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:54
Definition: nodes.h:118
Oid opno
Definition: primnodes.h:648
List * args
Definition: primnodes.h:666
Index relid
Definition: pathnodes.h:868
List * statlist
Definition: pathnodes.h:888
Expr * clause
Definition: pathnodes.h:2432
Oid attrtypid
Definition: vacuum.h:129
Oid attrcollid
Definition: vacuum.h:132
Definition: primnodes.h:205
AttrNumber varattno
Definition: primnodes.h:217
int varno
Definition: primnodes.h:212
Index varlevelsup
Definition: primnodes.h:230
Definition: type.h:95
Definition: regguts.h:318
Definition: c.h:623
Definition: regcomp.c:282
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:1221
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:1434
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition: syscache.c:1184
@ STATEXTDATASTXOID
Definition: syscache.h:94
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:339
#define TYPECACHE_LT_OPR
Definition: typcache.h:137
List * pull_var_clause(Node *node, int flags)
Definition: var.c:597
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:493