PostgreSQL Source Code  git master
dependencies.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * dependencies.c
4  * POSTGRES functional dependencies
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/statistics/dependencies.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/htup_details.h"
19 #include "lib/stringinfo.h"
20 #include "nodes/nodeFuncs.h"
21 #include "nodes/nodes.h"
22 #include "nodes/pathnodes.h"
23 #include "optimizer/clauses.h"
24 #include "optimizer/optimizer.h"
25 #include "parser/parsetree.h"
27 #include "statistics/statistics.h"
28 #include "utils/fmgroids.h"
29 #include "utils/fmgrprotos.h"
30 #include "utils/lsyscache.h"
31 #include "utils/memutils.h"
32 #include "utils/selfuncs.h"
33 #include "utils/syscache.h"
34 #include "utils/typcache.h"
35 #include "varatt.h"
36 
37 /* size of the struct header fields (magic, type, ndeps) */
38 #define SizeOfHeader (3 * sizeof(uint32))
39 
40 /* size of a serialized dependency (degree, natts, atts) */
41 #define SizeOfItem(natts) \
42  (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
43 
44 /* minimal size of a dependency (with two attributes) */
45 #define MinSizeOfItem SizeOfItem(2)
46 
47 /* minimal size of dependencies, when all deps are minimal */
48 #define MinSizeOfItems(ndeps) \
49  (SizeOfHeader + (ndeps) * MinSizeOfItem)
50 
51 /*
52  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
53  * k-permutations of n elements, except that the order does not matter for the
54  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
55  */
57 {
58  int k; /* size of the dependency */
59  int n; /* number of possible attributes */
60  int current; /* next dependency to return (index) */
61  AttrNumber ndependencies; /* number of dependencies generated */
62  AttrNumber *dependencies; /* array of pre-generated dependencies */
64 
66 
68  int index, AttrNumber start, AttrNumber *current);
73 static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
74 static bool dependency_is_fully_matched(MVDependency *dependency,
75  Bitmapset *attnums);
76 static bool dependency_is_compatible_clause(Node *clause, Index relid,
78 static bool dependency_is_compatible_expression(Node *clause, Index relid,
79  List *statlist, Node **expr);
81  int ndependencies, Bitmapset *attnums);
83  int varRelid, JoinType jointype,
84  SpecialJoinInfo *sjinfo,
85  MVDependency **dependencies,
86  int ndependencies,
87  AttrNumber *list_attnums,
88  Bitmapset **estimatedclauses);
89 
90 static void
92  AttrNumber start, AttrNumber *current)
93 {
94  /*
95  * The generator handles the first (k-1) elements differently from the
96  * last element.
97  */
98  if (index < (state->k - 1))
99  {
100  AttrNumber i;
101 
102  /*
103  * The first (k-1) values have to be in ascending order, which we
104  * generate recursively.
105  */
106 
107  for (i = start; i < state->n; i++)
108  {
109  current[index] = i;
110  generate_dependencies_recurse(state, (index + 1), (i + 1), current);
111  }
112  }
113  else
114  {
115  int i;
116 
117  /*
118  * the last element is the implied value, which does not respect the
119  * ascending order. We just need to check that the value is not in the
120  * first (k-1) elements.
121  */
122 
123  for (i = 0; i < state->n; i++)
124  {
125  int j;
126  bool match = false;
127 
128  current[index] = i;
129 
130  for (j = 0; j < index; j++)
131  {
132  if (current[j] == i)
133  {
134  match = true;
135  break;
136  }
137  }
138 
139  /*
140  * If the value is not found in the first part of the dependency,
141  * we're done.
142  */
143  if (!match)
144  {
145  state->dependencies = (AttrNumber *) repalloc(state->dependencies,
146  state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
147  memcpy(&state->dependencies[(state->k * state->ndependencies)],
148  current, state->k * sizeof(AttrNumber));
149  state->ndependencies++;
150  }
151  }
152  }
153 }
154 
155 /* generate all dependencies (k-permutations of n elements) */
156 static void
158 {
159  AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
160 
161  generate_dependencies_recurse(state, 0, 0, current);
162 
163  pfree(current);
164 }
165 
166 /*
167  * initialize the DependencyGenerator of variations, and prebuild the variations
168  *
169  * This pre-builds all the variations. We could also generate them in
170  * DependencyGenerator_next(), but this seems simpler.
171  */
172 static DependencyGenerator
174 {
176 
177  Assert((n >= k) && (k > 0));
178 
179  /* allocate the DependencyGenerator state */
181  state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
182 
183  state->ndependencies = 0;
184  state->current = 0;
185  state->k = k;
186  state->n = n;
187 
188  /* now actually pre-generate all the variations */
190 
191  return state;
192 }
193 
194 /* free the DependencyGenerator state */
195 static void
197 {
198  pfree(state->dependencies);
199  pfree(state);
200 }
201 
202 /* generate next combination */
203 static AttrNumber *
205 {
206  if (state->current == state->ndependencies)
207  return NULL;
208 
209  return &state->dependencies[state->k * state->current++];
210 }
211 
212 
213 /*
214  * validates functional dependency on the data
215  *
216  * An actual work horse of detecting functional dependencies. Given a variation
217  * of k attributes, it checks that the first (k-1) are sufficient to determine
218  * the last one.
219  */
220 static double
222 {
223  int i,
224  nitems;
225  MultiSortSupport mss;
226  SortItem *items;
227  AttrNumber *attnums_dep;
228 
229  /* counters valid within a group */
230  int group_size = 0;
231  int n_violations = 0;
232 
233  /* total number of rows supporting (consistent with) the dependency */
234  int n_supporting_rows = 0;
235 
236  /* Make sure we have at least two input attributes. */
237  Assert(k >= 2);
238 
239  /* sort info for all attributes columns */
240  mss = multi_sort_init(k);
241 
242  /*
243  * Translate the array of indexes to regular attnums for the dependency
244  * (we will need this to identify the columns in StatsBuildData).
245  */
246  attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
247  for (i = 0; i < k; i++)
248  attnums_dep[i] = data->attnums[dependency[i]];
249 
250  /*
251  * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
252  *
253  * (a) sort the data lexicographically
254  *
255  * (b) split the data into groups by first (k-1) columns
256  *
257  * (c) for each group count different values in the last column
258  *
259  * We use the column data types' default sort operators and collations;
260  * perhaps at some point it'd be worth using column-specific collations?
261  */
262 
263  /* prepare the sort function for the dimensions */
264  for (i = 0; i < k; i++)
265  {
266  VacAttrStats *colstat = data->stats[dependency[i]];
268 
270  if (type->lt_opr == InvalidOid) /* shouldn't happen */
271  elog(ERROR, "cache lookup failed for ordering operator for type %u",
272  colstat->attrtypid);
273 
274  /* prepare the sort function for this dimension */
275  multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
276  }
277 
278  /*
279  * build an array of SortItem(s) sorted using the multi-sort support
280  *
281  * XXX This relies on all stats entries pointing to the same tuple
282  * descriptor. For now that assumption holds, but it might change in the
283  * future for example if we support statistics on multiple tables.
284  */
285  items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
286 
287  /*
288  * Walk through the sorted array, split it into rows according to the
289  * first (k-1) columns. If there's a single value in the last column, we
290  * count the group as 'supporting' the functional dependency. Otherwise we
291  * count it as contradicting.
292  */
293 
294  /* start with the first row forming a group */
295  group_size = 1;
296 
297  /* loop 1 beyond the end of the array so that we count the final group */
298  for (i = 1; i <= nitems; i++)
299  {
300  /*
301  * Check if the group ended, which may be either because we processed
302  * all the items (i==nitems), or because the i-th item is not equal to
303  * the preceding one.
304  */
305  if (i == nitems ||
306  multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
307  {
308  /*
309  * If no violations were found in the group then track the rows of
310  * the group as supporting the functional dependency.
311  */
312  if (n_violations == 0)
313  n_supporting_rows += group_size;
314 
315  /* Reset counters for the new group */
316  n_violations = 0;
317  group_size = 1;
318  continue;
319  }
320  /* first columns match, but the last one does not (so contradicting) */
321  else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
322  n_violations++;
323 
324  group_size++;
325  }
326 
327  /* Compute the 'degree of validity' as (supporting/total). */
328  return (n_supporting_rows * 1.0 / data->numrows);
329 }
330 
331 /*
332  * detects functional dependencies between groups of columns
333  *
334  * Generates all possible subsets of columns (variations) and computes
335  * the degree of validity for each one. For example when creating statistics
336  * on three columns (a,b,c) there are 9 possible dependencies
337  *
338  * two columns three columns
339  * ----------- -------------
340  * (a) -> b (a,b) -> c
341  * (a) -> c (a,c) -> b
342  * (b) -> a (b,c) -> a
343  * (b) -> c
344  * (c) -> a
345  * (c) -> b
346  */
349 {
350  int i,
351  k;
352 
353  /* result */
354  MVDependencies *dependencies = NULL;
355  MemoryContext cxt;
356 
357  Assert(data->nattnums >= 2);
358 
359  /* tracks memory allocated by dependency_degree calls */
361  "dependency_degree cxt",
363 
364  /*
365  * We'll try build functional dependencies starting from the smallest ones
366  * covering just 2 columns, to the largest ones, covering all columns
367  * included in the statistics object. We start from the smallest ones
368  * because we want to be able to skip already implied ones.
369  */
370  for (k = 2; k <= data->nattnums; k++)
371  {
372  AttrNumber *dependency; /* array with k elements */
373 
374  /* prepare a DependencyGenerator of variation */
376 
377  /* generate all possible variations of k values (out of n) */
378  while ((dependency = DependencyGenerator_next(DependencyGenerator)))
379  {
380  double degree;
381  MVDependency *d;
382  MemoryContext oldcxt;
383 
384  /* release memory used by dependency degree calculation */
385  oldcxt = MemoryContextSwitchTo(cxt);
386 
387  /* compute how valid the dependency seems */
388  degree = dependency_degree(data, k, dependency);
389 
390  MemoryContextSwitchTo(oldcxt);
391  MemoryContextReset(cxt);
392 
393  /*
394  * if the dependency seems entirely invalid, don't store it
395  */
396  if (degree == 0.0)
397  continue;
398 
399  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
400  + k * sizeof(AttrNumber));
401 
402  /* copy the dependency (and keep the indexes into stxkeys) */
403  d->degree = degree;
404  d->nattributes = k;
405  for (i = 0; i < k; i++)
406  d->attributes[i] = data->attnums[dependency[i]];
407 
408  /* initialize the list of dependencies */
409  if (dependencies == NULL)
410  {
411  dependencies
412  = (MVDependencies *) palloc0(sizeof(MVDependencies));
413 
414  dependencies->magic = STATS_DEPS_MAGIC;
415  dependencies->type = STATS_DEPS_TYPE_BASIC;
416  dependencies->ndeps = 0;
417  }
418 
419  dependencies->ndeps++;
420  dependencies = (MVDependencies *) repalloc(dependencies,
421  offsetof(MVDependencies, deps)
422  + dependencies->ndeps * sizeof(MVDependency *));
423 
424  dependencies->deps[dependencies->ndeps - 1] = d;
425  }
426 
427  /*
428  * we're done with variations of k elements, so free the
429  * DependencyGenerator
430  */
432  }
433 
434  MemoryContextDelete(cxt);
435 
436  return dependencies;
437 }
438 
439 
440 /*
441  * Serialize list of dependencies into a bytea value.
442  */
443 bytea *
445 {
446  int i;
447  bytea *output;
448  char *tmp;
449  Size len;
450 
451  /* we need to store ndeps, with a number of attributes for each one */
453 
454  /* and also include space for the actual attribute numbers and degrees */
455  for (i = 0; i < dependencies->ndeps; i++)
456  len += SizeOfItem(dependencies->deps[i]->nattributes);
457 
458  output = (bytea *) palloc0(len);
460 
461  tmp = VARDATA(output);
462 
463  /* Store the base struct values (magic, type, ndeps) */
464  memcpy(tmp, &dependencies->magic, sizeof(uint32));
465  tmp += sizeof(uint32);
466  memcpy(tmp, &dependencies->type, sizeof(uint32));
467  tmp += sizeof(uint32);
468  memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
469  tmp += sizeof(uint32);
470 
471  /* store number of attributes and attribute numbers for each dependency */
472  for (i = 0; i < dependencies->ndeps; i++)
473  {
474  MVDependency *d = dependencies->deps[i];
475 
476  memcpy(tmp, &d->degree, sizeof(double));
477  tmp += sizeof(double);
478 
479  memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
480  tmp += sizeof(AttrNumber);
481 
482  memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
483  tmp += sizeof(AttrNumber) * d->nattributes;
484 
485  /* protect against overflow */
486  Assert(tmp <= ((char *) output + len));
487  }
488 
489  /* make sure we've produced exactly the right amount of data */
490  Assert(tmp == ((char *) output + len));
491 
492  return output;
493 }
494 
495 /*
496  * Reads serialized dependencies into MVDependencies structure.
497  */
500 {
501  int i;
502  Size min_expected_size;
503  MVDependencies *dependencies;
504  char *tmp;
505 
506  if (data == NULL)
507  return NULL;
508 
510  elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
512 
513  /* read the MVDependencies header */
514  dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
515 
516  /* initialize pointer to the data part (skip the varlena header) */
517  tmp = VARDATA_ANY(data);
518 
519  /* read the header fields and perform basic sanity checks */
520  memcpy(&dependencies->magic, tmp, sizeof(uint32));
521  tmp += sizeof(uint32);
522  memcpy(&dependencies->type, tmp, sizeof(uint32));
523  tmp += sizeof(uint32);
524  memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
525  tmp += sizeof(uint32);
526 
527  if (dependencies->magic != STATS_DEPS_MAGIC)
528  elog(ERROR, "invalid dependency magic %d (expected %d)",
529  dependencies->magic, STATS_DEPS_MAGIC);
530 
531  if (dependencies->type != STATS_DEPS_TYPE_BASIC)
532  elog(ERROR, "invalid dependency type %d (expected %d)",
533  dependencies->type, STATS_DEPS_TYPE_BASIC);
534 
535  if (dependencies->ndeps == 0)
536  elog(ERROR, "invalid zero-length item array in MVDependencies");
537 
538  /* what minimum bytea size do we expect for those parameters */
539  min_expected_size = SizeOfItem(dependencies->ndeps);
540 
541  if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
542  elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
543  VARSIZE_ANY_EXHDR(data), min_expected_size);
544 
545  /* allocate space for the MCV items */
546  dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
547  + (dependencies->ndeps * sizeof(MVDependency *)));
548 
549  for (i = 0; i < dependencies->ndeps; i++)
550  {
551  double degree;
552  AttrNumber k;
553  MVDependency *d;
554 
555  /* degree of validity */
556  memcpy(&degree, tmp, sizeof(double));
557  tmp += sizeof(double);
558 
559  /* number of attributes */
560  memcpy(&k, tmp, sizeof(AttrNumber));
561  tmp += sizeof(AttrNumber);
562 
563  /* is the number of attributes valid? */
564  Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
565 
566  /* now that we know the number of attributes, allocate the dependency */
567  d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
568  + (k * sizeof(AttrNumber)));
569 
570  d->degree = degree;
571  d->nattributes = k;
572 
573  /* copy attribute numbers */
574  memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
575  tmp += sizeof(AttrNumber) * d->nattributes;
576 
577  dependencies->deps[i] = d;
578 
579  /* still within the bytea */
580  Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
581  }
582 
583  /* we should have consumed the whole bytea exactly */
584  Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
585 
586  return dependencies;
587 }
588 
589 /*
590  * dependency_is_fully_matched
591  * checks that a functional dependency is fully matched given clauses on
592  * attributes (assuming the clauses are suitable equality clauses)
593  */
594 static bool
596 {
597  int j;
598 
599  /*
600  * Check that the dependency actually is fully covered by clauses. We have
601  * to translate all attribute numbers, as those are referenced
602  */
603  for (j = 0; j < dependency->nattributes; j++)
604  {
605  int attnum = dependency->attributes[j];
606 
607  if (!bms_is_member(attnum, attnums))
608  return false;
609  }
610 
611  return true;
612 }
613 
614 /*
615  * statext_dependencies_load
616  * Load the functional dependencies for the indicated pg_statistic_ext tuple
617  */
620 {
621  MVDependencies *result;
622  bool isnull;
623  Datum deps;
624  HeapTuple htup;
625 
626  htup = SearchSysCache2(STATEXTDATASTXOID,
627  ObjectIdGetDatum(mvoid),
628  BoolGetDatum(inh));
629  if (!HeapTupleIsValid(htup))
630  elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
631 
632  deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
633  Anum_pg_statistic_ext_data_stxddependencies, &isnull);
634  if (isnull)
635  elog(ERROR,
636  "requested statistics kind \"%c\" is not yet built for statistics object %u",
637  STATS_EXT_DEPENDENCIES, mvoid);
638 
640 
641  ReleaseSysCache(htup);
642 
643  return result;
644 }
645 
646 /*
647  * pg_dependencies_in - input routine for type pg_dependencies.
648  *
649  * pg_dependencies is real enough to be a table column, but it has no operations
650  * of its own, and disallows input too
651  */
652 Datum
654 {
655  /*
656  * pg_node_list stores the data in binary form and parsing text input is
657  * not needed, so disallow this.
658  */
659  ereport(ERROR,
660  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
661  errmsg("cannot accept a value of type %s", "pg_dependencies")));
662 
663  PG_RETURN_VOID(); /* keep compiler quiet */
664 }
665 
666 /*
667  * pg_dependencies - output routine for type pg_dependencies.
668  */
669 Datum
671 {
674  int i,
675  j;
677 
679  appendStringInfoChar(&str, '{');
680 
681  for (i = 0; i < dependencies->ndeps; i++)
682  {
683  MVDependency *dependency = dependencies->deps[i];
684 
685  if (i > 0)
686  appendStringInfoString(&str, ", ");
687 
688  appendStringInfoChar(&str, '"');
689  for (j = 0; j < dependency->nattributes; j++)
690  {
691  if (j == dependency->nattributes - 1)
692  appendStringInfoString(&str, " => ");
693  else if (j > 0)
694  appendStringInfoString(&str, ", ");
695 
696  appendStringInfo(&str, "%d", dependency->attributes[j]);
697  }
698  appendStringInfo(&str, "\": %f", dependency->degree);
699  }
700 
701  appendStringInfoChar(&str, '}');
702 
703  PG_RETURN_CSTRING(str.data);
704 }
705 
706 /*
707  * pg_dependencies_recv - binary input routine for type pg_dependencies.
708  */
709 Datum
711 {
712  ereport(ERROR,
713  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
714  errmsg("cannot accept a value of type %s", "pg_dependencies")));
715 
716  PG_RETURN_VOID(); /* keep compiler quiet */
717 }
718 
719 /*
720  * pg_dependencies_send - binary output routine for type pg_dependencies.
721  *
722  * Functional dependencies are serialized in a bytea value (although the type
723  * is named differently), so let's just send that.
724  */
725 Datum
727 {
728  return byteasend(fcinfo);
729 }
730 
731 /*
732  * dependency_is_compatible_clause
733  * Determines if the clause is compatible with functional dependencies
734  *
735  * Only clauses that have the form of equality to a pseudoconstant, or can be
736  * interpreted that way, are currently accepted. Furthermore the variable
737  * part of the clause must be a simple Var belonging to the specified
738  * relation, whose attribute number we return in *attnum on success.
739  */
740 static bool
742 {
743  Var *var;
744  Node *clause_expr;
745 
746  if (IsA(clause, RestrictInfo))
747  {
748  RestrictInfo *rinfo = (RestrictInfo *) clause;
749 
750  /* Pseudoconstants are not interesting (they couldn't contain a Var) */
751  if (rinfo->pseudoconstant)
752  return false;
753 
754  /* Clauses referencing multiple, or no, varnos are incompatible */
755  if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
756  return false;
757 
758  clause = (Node *) rinfo->clause;
759  }
760 
761  if (is_opclause(clause))
762  {
763  /* If it's an opclause, check for Var = Const or Const = Var. */
764  OpExpr *expr = (OpExpr *) clause;
765 
766  /* Only expressions with two arguments are candidates. */
767  if (list_length(expr->args) != 2)
768  return false;
769 
770  /* Make sure non-selected argument is a pseudoconstant. */
772  clause_expr = linitial(expr->args);
773  else if (is_pseudo_constant_clause(linitial(expr->args)))
774  clause_expr = lsecond(expr->args);
775  else
776  return false;
777 
778  /*
779  * If it's not an "=" operator, just ignore the clause, as it's not
780  * compatible with functional dependencies.
781  *
782  * This uses the function for estimating selectivity, not the operator
783  * directly (a bit awkward, but well ...).
784  *
785  * XXX this is pretty dubious; probably it'd be better to check btree
786  * or hash opclass membership, so as not to be fooled by custom
787  * selectivity functions, and to be more consistent with decisions
788  * elsewhere in the planner.
789  */
790  if (get_oprrest(expr->opno) != F_EQSEL)
791  return false;
792 
793  /* OK to proceed with checking "var" */
794  }
795  else if (IsA(clause, ScalarArrayOpExpr))
796  {
797  /* If it's a scalar array operator, check for Var IN Const. */
798  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
799 
800  /*
801  * Reject ALL() variant, we only care about ANY/IN.
802  *
803  * XXX Maybe we should check if all the values are the same, and allow
804  * ALL in that case? Doesn't seem very practical, though.
805  */
806  if (!expr->useOr)
807  return false;
808 
809  /* Only expressions with two arguments are candidates. */
810  if (list_length(expr->args) != 2)
811  return false;
812 
813  /*
814  * We know it's always (Var IN Const), so we assume the var is the
815  * first argument, and pseudoconstant is the second one.
816  */
818  return false;
819 
820  clause_expr = linitial(expr->args);
821 
822  /*
823  * If it's not an "=" operator, just ignore the clause, as it's not
824  * compatible with functional dependencies. The operator is identified
825  * simply by looking at which function it uses to estimate
826  * selectivity. That's a bit strange, but it's what other similar
827  * places do.
828  */
829  if (get_oprrest(expr->opno) != F_EQSEL)
830  return false;
831 
832  /* OK to proceed with checking "var" */
833  }
834  else if (is_orclause(clause))
835  {
836  BoolExpr *bool_expr = (BoolExpr *) clause;
837  ListCell *lc;
838 
839  /* start with no attribute number */
841 
842  foreach(lc, bool_expr->args)
843  {
844  AttrNumber clause_attnum;
845 
846  /*
847  * Had we found incompatible clause in the arguments, treat the
848  * whole clause as incompatible.
849  */
851  relid, &clause_attnum))
852  return false;
853 
854  if (*attnum == InvalidAttrNumber)
855  *attnum = clause_attnum;
856 
857  /* ensure all the variables are the same (same attnum) */
858  if (*attnum != clause_attnum)
859  return false;
860  }
861 
862  /* the Var is already checked by the recursive call */
863  return true;
864  }
865  else if (is_notclause(clause))
866  {
867  /*
868  * "NOT x" can be interpreted as "x = false", so get the argument and
869  * proceed with seeing if it's a suitable Var.
870  */
871  clause_expr = (Node *) get_notclausearg(clause);
872  }
873  else
874  {
875  /*
876  * A boolean expression "x" can be interpreted as "x = true", so
877  * proceed with seeing if it's a suitable Var.
878  */
879  clause_expr = (Node *) clause;
880  }
881 
882  /*
883  * We may ignore any RelabelType node above the operand. (There won't be
884  * more than one, since eval_const_expressions has been applied already.)
885  */
886  if (IsA(clause_expr, RelabelType))
887  clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
888 
889  /* We only support plain Vars for now */
890  if (!IsA(clause_expr, Var))
891  return false;
892 
893  /* OK, we know we have a Var */
894  var = (Var *) clause_expr;
895 
896  /* Ensure Var is from the correct relation */
897  if (var->varno != relid)
898  return false;
899 
900  /* We also better ensure the Var is from the current level */
901  if (var->varlevelsup != 0)
902  return false;
903 
904  /* Also ignore system attributes (we don't allow stats on those) */
906  return false;
907 
908  *attnum = var->varattno;
909  return true;
910 }
911 
912 /*
913  * find_strongest_dependency
914  * find the strongest dependency on the attributes
915  *
916  * When applying functional dependencies, we start with the strongest
917  * dependencies. That is, we select the dependency that:
918  *
919  * (a) has all attributes covered by equality clauses
920  *
921  * (b) has the most attributes
922  *
923  * (c) has the highest degree of validity
924  *
925  * This guarantees that we eliminate the most redundant conditions first
926  * (see the comment in dependencies_clauselist_selectivity).
927  */
928 static MVDependency *
929 find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
930  Bitmapset *attnums)
931 {
932  int i,
933  j;
934  MVDependency *strongest = NULL;
935 
936  /* number of attnums in clauses */
937  int nattnums = bms_num_members(attnums);
938 
939  /*
940  * Iterate over the MVDependency items and find the strongest one from the
941  * fully-matched dependencies. We do the cheap checks first, before
942  * matching it against the attnums.
943  */
944  for (i = 0; i < ndependencies; i++)
945  {
946  for (j = 0; j < dependencies[i]->ndeps; j++)
947  {
948  MVDependency *dependency = dependencies[i]->deps[j];
949 
950  /*
951  * Skip dependencies referencing more attributes than available
952  * clauses, as those can't be fully matched.
953  */
954  if (dependency->nattributes > nattnums)
955  continue;
956 
957  if (strongest)
958  {
959  /* skip dependencies on fewer attributes than the strongest. */
960  if (dependency->nattributes < strongest->nattributes)
961  continue;
962 
963  /* also skip weaker dependencies when attribute count matches */
964  if (strongest->nattributes == dependency->nattributes &&
965  strongest->degree > dependency->degree)
966  continue;
967  }
968 
969  /*
970  * this dependency is stronger, but we must still check that it's
971  * fully matched to these attnums. We perform this check last as
972  * it's slightly more expensive than the previous checks.
973  */
974  if (dependency_is_fully_matched(dependency, attnums))
975  strongest = dependency; /* save new best match */
976  }
977  }
978 
979  return strongest;
980 }
981 
982 /*
983  * clauselist_apply_dependencies
984  * Apply the specified functional dependencies to a list of clauses and
985  * return the estimated selectivity of the clauses that are compatible
986  * with any of the given dependencies.
987  *
988  * This will estimate all not-already-estimated clauses that are compatible
989  * with functional dependencies, and which have an attribute mentioned by any
990  * of the given dependencies (either as an implying or implied attribute).
991  *
992  * Given (lists of) clauses on attributes (a,b) and a functional dependency
993  * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
994  * using the formula
995  *
996  * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
997  *
998  * where 'f' is the degree of dependency. This reflects the fact that we
999  * expect a fraction f of all rows to be consistent with the dependency
1000  * (a=>b), and so have a selectivity of P(a), while the remaining rows are
1001  * treated as independent.
1002  *
1003  * In practice, we use a slightly modified version of this formula, which uses
1004  * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
1005  * should obviously not exceed either column's individual selectivity. I.e.,
1006  * we actually combine selectivities using the formula
1007  *
1008  * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1009  *
1010  * This can make quite a difference if the specific values matching the
1011  * clauses are not consistent with the functional dependency.
1012  */
1013 static Selectivity
1015  int varRelid, JoinType jointype,
1016  SpecialJoinInfo *sjinfo,
1017  MVDependency **dependencies, int ndependencies,
1018  AttrNumber *list_attnums,
1019  Bitmapset **estimatedclauses)
1020 {
1021  Bitmapset *attnums;
1022  int i;
1023  int j;
1024  int nattrs;
1025  Selectivity *attr_sel;
1026  int attidx;
1027  int listidx;
1028  ListCell *l;
1029  Selectivity s1;
1030 
1031  /*
1032  * Extract the attnums of all implying and implied attributes from all the
1033  * given dependencies. Each of these attributes is expected to have at
1034  * least 1 not-already-estimated compatible clause that we will estimate
1035  * here.
1036  */
1037  attnums = NULL;
1038  for (i = 0; i < ndependencies; i++)
1039  {
1040  for (j = 0; j < dependencies[i]->nattributes; j++)
1041  {
1042  AttrNumber attnum = dependencies[i]->attributes[j];
1043 
1044  attnums = bms_add_member(attnums, attnum);
1045  }
1046  }
1047 
1048  /*
1049  * Compute per-column selectivity estimates for each of these attributes,
1050  * and mark all the corresponding clauses as estimated.
1051  */
1052  nattrs = bms_num_members(attnums);
1053  attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
1054 
1055  attidx = 0;
1056  i = -1;
1057  while ((i = bms_next_member(attnums, i)) >= 0)
1058  {
1059  List *attr_clauses = NIL;
1060  Selectivity simple_sel;
1061 
1062  listidx = -1;
1063  foreach(l, clauses)
1064  {
1065  Node *clause = (Node *) lfirst(l);
1066 
1067  listidx++;
1068  if (list_attnums[listidx] == i)
1069  {
1070  attr_clauses = lappend(attr_clauses, clause);
1071  *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1072  }
1073  }
1074 
1075  simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
1076  jointype, sjinfo, false);
1077  attr_sel[attidx++] = simple_sel;
1078  }
1079 
1080  /*
1081  * Now combine these selectivities using the dependency information. For
1082  * chains of dependencies such as a -> b -> c, the b -> c dependency will
1083  * come before the a -> b dependency in the array, so we traverse the
1084  * array backwards to ensure such chains are computed in the right order.
1085  *
1086  * As explained above, pairs of selectivities are combined using the
1087  * formula
1088  *
1089  * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1090  *
1091  * to ensure that the combined selectivity is never greater than either
1092  * individual selectivity.
1093  *
1094  * Where multiple dependencies apply (e.g., a -> b -> c), we use
1095  * conditional probabilities to compute the overall result as follows:
1096  *
1097  * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1098  *
1099  * so we replace the selectivities of all implied attributes with
1100  * conditional probabilities, that are conditional on all their implying
1101  * attributes. The selectivities of all other non-implied attributes are
1102  * left as they are.
1103  */
1104  for (i = ndependencies - 1; i >= 0; i--)
1105  {
1106  MVDependency *dependency = dependencies[i];
1108  Selectivity s2;
1109  double f;
1110 
1111  /* Selectivity of all the implying attributes */
1112  s1 = 1.0;
1113  for (j = 0; j < dependency->nattributes - 1; j++)
1114  {
1115  attnum = dependency->attributes[j];
1116  attidx = bms_member_index(attnums, attnum);
1117  s1 *= attr_sel[attidx];
1118  }
1119 
1120  /* Original selectivity of the implied attribute */
1121  attnum = dependency->attributes[j];
1122  attidx = bms_member_index(attnums, attnum);
1123  s2 = attr_sel[attidx];
1124 
1125  /*
1126  * Replace s2 with the conditional probability s2 given s1, computed
1127  * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1128  *
1129  * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1130  *
1131  * where P(a) = s1, the selectivity of the implying attributes, and
1132  * P(b) = s2, the selectivity of the implied attribute.
1133  */
1134  f = dependency->degree;
1135 
1136  if (s1 <= s2)
1137  attr_sel[attidx] = f + (1 - f) * s2;
1138  else
1139  attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1140  }
1141 
1142  /*
1143  * The overall selectivity of all the clauses on all these attributes is
1144  * then the product of all the original (non-implied) probabilities and
1145  * the new conditional (implied) probabilities.
1146  */
1147  s1 = 1.0;
1148  for (i = 0; i < nattrs; i++)
1149  s1 *= attr_sel[i];
1150 
1152 
1153  pfree(attr_sel);
1154  bms_free(attnums);
1155 
1156  return s1;
1157 }
1158 
1159 /*
1160  * dependency_is_compatible_expression
1161  * Determines if the expression is compatible with functional dependencies
1162  *
1163  * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1164  * expression is a simple Var. On success, return the matching statistics
1165  * expression into *expr.
1166  */
1167 static bool
1168 dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1169 {
1170  ListCell *lc,
1171  *lc2;
1172  Node *clause_expr;
1173 
1174  if (IsA(clause, RestrictInfo))
1175  {
1176  RestrictInfo *rinfo = (RestrictInfo *) clause;
1177 
1178  /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1179  if (rinfo->pseudoconstant)
1180  return false;
1181 
1182  /* Clauses referencing multiple, or no, varnos are incompatible */
1183  if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1184  return false;
1185 
1186  clause = (Node *) rinfo->clause;
1187  }
1188 
1189  if (is_opclause(clause))
1190  {
1191  /* If it's an opclause, check for Var = Const or Const = Var. */
1192  OpExpr *expr = (OpExpr *) clause;
1193 
1194  /* Only expressions with two arguments are candidates. */
1195  if (list_length(expr->args) != 2)
1196  return false;
1197 
1198  /* Make sure non-selected argument is a pseudoconstant. */
1200  clause_expr = linitial(expr->args);
1201  else if (is_pseudo_constant_clause(linitial(expr->args)))
1202  clause_expr = lsecond(expr->args);
1203  else
1204  return false;
1205 
1206  /*
1207  * If it's not an "=" operator, just ignore the clause, as it's not
1208  * compatible with functional dependencies.
1209  *
1210  * This uses the function for estimating selectivity, not the operator
1211  * directly (a bit awkward, but well ...).
1212  *
1213  * XXX this is pretty dubious; probably it'd be better to check btree
1214  * or hash opclass membership, so as not to be fooled by custom
1215  * selectivity functions, and to be more consistent with decisions
1216  * elsewhere in the planner.
1217  */
1218  if (get_oprrest(expr->opno) != F_EQSEL)
1219  return false;
1220 
1221  /* OK to proceed with checking "var" */
1222  }
1223  else if (IsA(clause, ScalarArrayOpExpr))
1224  {
1225  /* If it's a scalar array operator, check for Var IN Const. */
1226  ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1227 
1228  /*
1229  * Reject ALL() variant, we only care about ANY/IN.
1230  *
1231  * FIXME Maybe we should check if all the values are the same, and
1232  * allow ALL in that case? Doesn't seem very practical, though.
1233  */
1234  if (!expr->useOr)
1235  return false;
1236 
1237  /* Only expressions with two arguments are candidates. */
1238  if (list_length(expr->args) != 2)
1239  return false;
1240 
1241  /*
1242  * We know it's always (Var IN Const), so we assume the var is the
1243  * first argument, and pseudoconstant is the second one.
1244  */
1245  if (!is_pseudo_constant_clause(lsecond(expr->args)))
1246  return false;
1247 
1248  clause_expr = linitial(expr->args);
1249 
1250  /*
1251  * If it's not an "=" operator, just ignore the clause, as it's not
1252  * compatible with functional dependencies. The operator is identified
1253  * simply by looking at which function it uses to estimate
1254  * selectivity. That's a bit strange, but it's what other similar
1255  * places do.
1256  */
1257  if (get_oprrest(expr->opno) != F_EQSEL)
1258  return false;
1259 
1260  /* OK to proceed with checking "var" */
1261  }
1262  else if (is_orclause(clause))
1263  {
1264  BoolExpr *bool_expr = (BoolExpr *) clause;
1265 
1266  /* start with no expression (we'll use the first match) */
1267  *expr = NULL;
1268 
1269  foreach(lc, bool_expr->args)
1270  {
1271  Node *or_expr = NULL;
1272 
1273  /*
1274  * Had we found incompatible expression in the arguments, treat
1275  * the whole expression as incompatible.
1276  */
1277  if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
1278  statlist, &or_expr))
1279  return false;
1280 
1281  if (*expr == NULL)
1282  *expr = or_expr;
1283 
1284  /* ensure all the expressions are the same */
1285  if (!equal(or_expr, *expr))
1286  return false;
1287  }
1288 
1289  /* the expression is already checked by the recursive call */
1290  return true;
1291  }
1292  else if (is_notclause(clause))
1293  {
1294  /*
1295  * "NOT x" can be interpreted as "x = false", so get the argument and
1296  * proceed with seeing if it's a suitable Var.
1297  */
1298  clause_expr = (Node *) get_notclausearg(clause);
1299  }
1300  else
1301  {
1302  /*
1303  * A boolean expression "x" can be interpreted as "x = true", so
1304  * proceed with seeing if it's a suitable Var.
1305  */
1306  clause_expr = (Node *) clause;
1307  }
1308 
1309  /*
1310  * We may ignore any RelabelType node above the operand. (There won't be
1311  * more than one, since eval_const_expressions has been applied already.)
1312  */
1313  if (IsA(clause_expr, RelabelType))
1314  clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1315 
1316  /*
1317  * Search for a matching statistics expression.
1318  */
1319  foreach(lc, statlist)
1320  {
1321  StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1322 
1323  /* ignore stats without dependencies */
1324  if (info->kind != STATS_EXT_DEPENDENCIES)
1325  continue;
1326 
1327  foreach(lc2, info->exprs)
1328  {
1329  Node *stat_expr = (Node *) lfirst(lc2);
1330 
1331  if (equal(clause_expr, stat_expr))
1332  {
1333  *expr = stat_expr;
1334  return true;
1335  }
1336  }
1337  }
1338 
1339  return false;
1340 }
1341 
1342 /*
1343  * dependencies_clauselist_selectivity
1344  * Return the estimated selectivity of (a subset of) the given clauses
1345  * using functional dependency statistics, or 1.0 if no useful functional
1346  * dependency statistic exists.
1347  *
1348  * 'estimatedclauses' is an input/output argument that gets a bit set
1349  * corresponding to the (zero-based) list index of each clause that is included
1350  * in the estimated selectivity.
1351  *
1352  * Given equality clauses on attributes (a,b) we find the strongest dependency
1353  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1354  * dependency, we then combine the per-clause selectivities using the formula
1355  *
1356  * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1357  *
1358  * where 'f' is the degree of the dependency. (Actually we use a slightly
1359  * modified version of this formula -- see clauselist_apply_dependencies()).
1360  *
1361  * With clauses on more than two attributes, the dependencies are applied
1362  * recursively, starting with the widest/strongest dependencies. For example
1363  * P(a,b,c) is first split like this:
1364  *
1365  * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1366  *
1367  * assuming (a,b=>c) is the strongest dependency.
1368  */
1371  List *clauses,
1372  int varRelid,
1373  JoinType jointype,
1374  SpecialJoinInfo *sjinfo,
1375  RelOptInfo *rel,
1376  Bitmapset **estimatedclauses)
1377 {
1378  Selectivity s1 = 1.0;
1379  ListCell *l;
1380  Bitmapset *clauses_attnums = NULL;
1381  AttrNumber *list_attnums;
1382  int listidx;
1383  MVDependencies **func_dependencies;
1384  int nfunc_dependencies;
1385  int total_ndeps;
1386  MVDependency **dependencies;
1387  int ndependencies;
1388  int i;
1389  AttrNumber attnum_offset;
1390  RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1391 
1392  /* unique expressions */
1393  Node **unique_exprs;
1394  int unique_exprs_cnt;
1395 
1396  /* check if there's any stats that might be useful for us. */
1397  if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1398  return 1.0;
1399 
1400  list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
1401  list_length(clauses));
1402 
1403  /*
1404  * We allocate space as if every clause was a unique expression, although
1405  * that's probably overkill. Some will be simple column references that
1406  * we'll translate to attnums, and there might be duplicates. But it's
1407  * easier and cheaper to just do one allocation than repalloc later.
1408  */
1409  unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
1410  unique_exprs_cnt = 0;
1411 
1412  /*
1413  * Pre-process the clauses list to extract the attnums seen in each item.
1414  * We need to determine if there's any clauses which will be useful for
1415  * dependency selectivity estimations. Along the way we'll record all of
1416  * the attnums for each clause in a list which we'll reference later so we
1417  * don't need to repeat the same work again. We'll also keep track of all
1418  * attnums seen.
1419  *
1420  * We also skip clauses that we already estimated using different types of
1421  * statistics (we treat them as incompatible).
1422  *
1423  * To handle expressions, we assign them negative attnums, as if it was a
1424  * system attribute (this is fine, as we only allow extended stats on user
1425  * attributes). And then we offset everything by the number of
1426  * expressions, so that we can store the values in a bitmapset.
1427  */
1428  listidx = 0;
1429  foreach(l, clauses)
1430  {
1431  Node *clause = (Node *) lfirst(l);
1433  Node *expr = NULL;
1434 
1435  /* ignore clause by default */
1436  list_attnums[listidx] = InvalidAttrNumber;
1437 
1438  if (!bms_is_member(listidx, *estimatedclauses))
1439  {
1440  /*
1441  * If it's a simple column reference, just extract the attnum. If
1442  * it's an expression, assign a negative attnum as if it was a
1443  * system attribute.
1444  */
1445  if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1446  {
1447  list_attnums[listidx] = attnum;
1448  }
1449  else if (dependency_is_compatible_expression(clause, rel->relid,
1450  rel->statlist,
1451  &expr))
1452  {
1453  /* special attnum assigned to this expression */
1455 
1456  Assert(expr != NULL);
1457 
1458  /* If the expression is duplicate, use the same attnum. */
1459  for (i = 0; i < unique_exprs_cnt; i++)
1460  {
1461  if (equal(unique_exprs[i], expr))
1462  {
1463  /* negative attribute number to expression */
1464  attnum = -(i + 1);
1465  break;
1466  }
1467  }
1468 
1469  /* not found in the list, so add it */
1470  if (attnum == InvalidAttrNumber)
1471  {
1472  unique_exprs[unique_exprs_cnt++] = expr;
1473 
1474  /* after incrementing the value, to get -1, -2, ... */
1475  attnum = (-unique_exprs_cnt);
1476  }
1477 
1478  /* remember which attnum was assigned to this clause */
1479  list_attnums[listidx] = attnum;
1480  }
1481  }
1482 
1483  listidx++;
1484  }
1485 
1486  Assert(listidx == list_length(clauses));
1487 
1488  /*
1489  * How much we need to offset the attnums? If there are no expressions,
1490  * then no offset is needed. Otherwise we need to offset enough for the
1491  * lowest value (-unique_exprs_cnt) to become 1.
1492  */
1493  if (unique_exprs_cnt > 0)
1494  attnum_offset = (unique_exprs_cnt + 1);
1495  else
1496  attnum_offset = 0;
1497 
1498  /*
1499  * Now that we know how many expressions there are, we can offset the
1500  * values just enough to build the bitmapset.
1501  */
1502  for (i = 0; i < list_length(clauses); i++)
1503  {
1505 
1506  /* ignore incompatible or already estimated clauses */
1507  if (list_attnums[i] == InvalidAttrNumber)
1508  continue;
1509 
1510  /* make sure the attnum is in the expected range */
1511  Assert(list_attnums[i] >= (-unique_exprs_cnt));
1512  Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1513 
1514  /* make sure the attnum is positive (valid AttrNumber) */
1515  attnum = list_attnums[i] + attnum_offset;
1516 
1517  /*
1518  * Either it's a regular attribute, or it's an expression, in which
1519  * case we must not have seen it before (expressions are unique).
1520  *
1521  * XXX Check whether it's a regular attribute has to be done using the
1522  * original attnum, while the second check has to use the value with
1523  * an offset.
1524  */
1525  Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1526  !bms_is_member(attnum, clauses_attnums));
1527 
1528  /*
1529  * Remember the offset attnum, both for attributes and expressions.
1530  * We'll pass list_attnums to clauselist_apply_dependencies, which
1531  * uses it to identify clauses in a bitmap. We could also pass the
1532  * offset, but this is more convenient.
1533  */
1534  list_attnums[i] = attnum;
1535 
1536  clauses_attnums = bms_add_member(clauses_attnums, attnum);
1537  }
1538 
1539  /*
1540  * If there's not at least two distinct attnums and expressions, then
1541  * reject the whole list of clauses. We must return 1.0 so the calling
1542  * function's selectivity is unaffected.
1543  */
1544  if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1545  {
1546  bms_free(clauses_attnums);
1547  pfree(list_attnums);
1548  return 1.0;
1549  }
1550 
1551  /*
1552  * Load all functional dependencies matching at least two parameters. We
1553  * can simply consider all dependencies at once, without having to search
1554  * for the best statistics object.
1555  *
1556  * To not waste cycles and memory, we deserialize dependencies only for
1557  * statistics that match at least two attributes. The array is allocated
1558  * with the assumption that all objects match - we could grow the array to
1559  * make it just the right size, but it's likely wasteful anyway thanks to
1560  * moving the freed chunks to freelists etc.
1561  */
1562  func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
1563  list_length(rel->statlist));
1564  nfunc_dependencies = 0;
1565  total_ndeps = 0;
1566 
1567  foreach(l, rel->statlist)
1568  {
1570  int nmatched;
1571  int nexprs;
1572  int k;
1573  MVDependencies *deps;
1574 
1575  /* skip statistics that are not of the correct type */
1576  if (stat->kind != STATS_EXT_DEPENDENCIES)
1577  continue;
1578 
1579  /* skip statistics with mismatching stxdinherit value */
1580  if (stat->inherit != rte->inh)
1581  continue;
1582 
1583  /*
1584  * Count matching attributes - we have to undo the attnum offsets. The
1585  * input attribute numbers are not offset (expressions are not
1586  * included in stat->keys, so it's not necessary). But we need to
1587  * offset it before checking against clauses_attnums.
1588  */
1589  nmatched = 0;
1590  k = -1;
1591  while ((k = bms_next_member(stat->keys, k)) >= 0)
1592  {
1594 
1595  /* skip expressions */
1597  continue;
1598 
1599  /* apply the same offset as above */
1600  attnum += attnum_offset;
1601 
1602  if (bms_is_member(attnum, clauses_attnums))
1603  nmatched++;
1604  }
1605 
1606  /* count matching expressions */
1607  nexprs = 0;
1608  for (i = 0; i < unique_exprs_cnt; i++)
1609  {
1610  ListCell *lc;
1611 
1612  foreach(lc, stat->exprs)
1613  {
1614  Node *stat_expr = (Node *) lfirst(lc);
1615 
1616  /* try to match it */
1617  if (equal(stat_expr, unique_exprs[i]))
1618  nexprs++;
1619  }
1620  }
1621 
1622  /*
1623  * Skip objects matching fewer than two attributes/expressions from
1624  * clauses.
1625  */
1626  if (nmatched + nexprs < 2)
1627  continue;
1628 
1629  deps = statext_dependencies_load(stat->statOid, rte->inh);
1630 
1631  /*
1632  * The expressions may be represented by different attnums in the
1633  * stats, we need to remap them to be consistent with the clauses.
1634  * That will make the later steps (e.g. picking the strongest item and
1635  * so on) much simpler and cheaper, because it won't need to care
1636  * about the offset at all.
1637  *
1638  * When we're at it, we can ignore dependencies that are not fully
1639  * matched by clauses (i.e. referencing attributes or expressions that
1640  * are not in the clauses).
1641  *
1642  * We have to do this for all statistics, as long as there are any
1643  * expressions - we need to shift the attnums in all dependencies.
1644  *
1645  * XXX Maybe we should do this always, because it also eliminates some
1646  * of the dependencies early. It might be cheaper than having to walk
1647  * the longer list in find_strongest_dependency later, especially as
1648  * we need to do that repeatedly?
1649  *
1650  * XXX We have to do this even when there are no expressions in
1651  * clauses, otherwise find_strongest_dependency may fail for stats
1652  * with expressions (due to lookup of negative value in bitmap). So we
1653  * need to at least filter out those dependencies. Maybe we could do
1654  * it in a cheaper way (if there are no expr clauses, we can just
1655  * discard all negative attnums without any lookups).
1656  */
1657  if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1658  {
1659  int ndeps = 0;
1660 
1661  for (i = 0; i < deps->ndeps; i++)
1662  {
1663  bool skip = false;
1664  MVDependency *dep = deps->deps[i];
1665  int j;
1666 
1667  for (j = 0; j < dep->nattributes; j++)
1668  {
1669  int idx;
1670  Node *expr;
1671  AttrNumber unique_attnum = InvalidAttrNumber;
1673 
1674  /* undo the per-statistics offset */
1675  attnum = dep->attributes[j];
1676 
1677  /*
1678  * For regular attributes we can simply check if it
1679  * matches any clause. If there's no matching clause, we
1680  * can just ignore it. We need to offset the attnum
1681  * though.
1682  */
1684  {
1685  dep->attributes[j] = attnum + attnum_offset;
1686 
1687  if (!bms_is_member(dep->attributes[j], clauses_attnums))
1688  {
1689  skip = true;
1690  break;
1691  }
1692 
1693  continue;
1694  }
1695 
1696  /*
1697  * the attnum should be a valid system attnum (-1, -2,
1698  * ...)
1699  */
1701 
1702  /*
1703  * For expressions, we need to do two translations. First
1704  * we have to translate the negative attnum to index in
1705  * the list of expressions (in the statistics object).
1706  * Then we need to see if there's a matching clause. The
1707  * index of the unique expression determines the attnum
1708  * (and we offset it).
1709  */
1710  idx = -(1 + attnum);
1711 
1712  /* Is the expression index is valid? */
1713  Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1714 
1715  expr = (Node *) list_nth(stat->exprs, idx);
1716 
1717  /* try to find the expression in the unique list */
1718  for (int m = 0; m < unique_exprs_cnt; m++)
1719  {
1720  /*
1721  * found a matching unique expression, use the attnum
1722  * (derived from index of the unique expression)
1723  */
1724  if (equal(unique_exprs[m], expr))
1725  {
1726  unique_attnum = -(m + 1) + attnum_offset;
1727  break;
1728  }
1729  }
1730 
1731  /*
1732  * Found no matching expression, so we can simply skip
1733  * this dependency, because there's no chance it will be
1734  * fully covered.
1735  */
1736  if (unique_attnum == InvalidAttrNumber)
1737  {
1738  skip = true;
1739  break;
1740  }
1741 
1742  /* otherwise remap it to the new attnum */
1743  dep->attributes[j] = unique_attnum;
1744  }
1745 
1746  /* if found a matching dependency, keep it */
1747  if (!skip)
1748  {
1749  /* maybe we've skipped something earlier, so move it */
1750  if (ndeps != i)
1751  deps->deps[ndeps] = deps->deps[i];
1752 
1753  ndeps++;
1754  }
1755  }
1756 
1757  deps->ndeps = ndeps;
1758  }
1759 
1760  /*
1761  * It's possible we've removed all dependencies, in which case we
1762  * don't bother adding it to the list.
1763  */
1764  if (deps->ndeps > 0)
1765  {
1766  func_dependencies[nfunc_dependencies] = deps;
1767  total_ndeps += deps->ndeps;
1768  nfunc_dependencies++;
1769  }
1770  }
1771 
1772  /* if no matching stats could be found then we've nothing to do */
1773  if (nfunc_dependencies == 0)
1774  {
1775  pfree(func_dependencies);
1776  bms_free(clauses_attnums);
1777  pfree(list_attnums);
1778  pfree(unique_exprs);
1779  return 1.0;
1780  }
1781 
1782  /*
1783  * Work out which dependencies we can apply, starting with the
1784  * widest/strongest ones, and proceeding to smaller/weaker ones.
1785  */
1786  dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
1787  total_ndeps);
1788  ndependencies = 0;
1789 
1790  while (true)
1791  {
1792  MVDependency *dependency;
1794 
1795  /* the widest/strongest dependency, fully matched by clauses */
1796  dependency = find_strongest_dependency(func_dependencies,
1797  nfunc_dependencies,
1798  clauses_attnums);
1799  if (!dependency)
1800  break;
1801 
1802  dependencies[ndependencies++] = dependency;
1803 
1804  /* Ignore dependencies using this implied attribute in later loops */
1805  attnum = dependency->attributes[dependency->nattributes - 1];
1806  clauses_attnums = bms_del_member(clauses_attnums, attnum);
1807  }
1808 
1809  /*
1810  * If we found applicable dependencies, use them to estimate all
1811  * compatible clauses on attributes that they refer to.
1812  */
1813  if (ndependencies != 0)
1814  s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1815  sjinfo, dependencies, ndependencies,
1816  list_attnums, estimatedclauses);
1817 
1818  /* free deserialized functional dependencies (and then the array) */
1819  for (i = 0; i < nfunc_dependencies; i++)
1820  pfree(func_dependencies[i]);
1821 
1822  pfree(dependencies);
1823  pfree(func_dependencies);
1824  bms_free(clauses_attnums);
1825  pfree(list_attnums);
1826  pfree(unique_exprs);
1827 
1828  return s1;
1829 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int16 AttrNumber
Definition: attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
#define InvalidAttrNumber
Definition: attnum.h:23
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1306
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:751
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:815
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:868
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:781
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:539
@ BMS_SINGLETON
Definition: bitmapset.h:72
@ BMS_MULTIPLE
Definition: bitmapset.h:73
unsigned int uint32
Definition: c.h:506
#define VARHDRSZ
Definition: c.h:692
#define Assert(condition)
Definition: c.h:858
unsigned int Index
Definition: c.h:614
size_t Size
Definition: c.h:605
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:2088
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:117
MVDependencies * statext_dependencies_deserialize(bytea *data)
Definition: dependencies.c:499
static void generate_dependencies(DependencyGenerator state)
Definition: dependencies.c:157
Datum pg_dependencies_in(PG_FUNCTION_ARGS)
Definition: dependencies.c:653
#define SizeOfHeader
Definition: dependencies.c:38
static bool dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:204
bytea * statext_dependencies_serialize(MVDependencies *dependencies)
Definition: dependencies.c:444
Datum pg_dependencies_out(PG_FUNCTION_ARGS)
Definition: dependencies.c:670
static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
Definition: dependencies.c:741
static bool dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
Definition: dependencies.c:595
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:196
struct DependencyGeneratorData DependencyGeneratorData
MVDependencies * statext_dependencies_load(Oid mvoid, bool inh)
Definition: dependencies.c:619
static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, MVDependency **dependencies, int ndependencies, AttrNumber *list_attnums, Bitmapset **estimatedclauses)
Datum pg_dependencies_send(PG_FUNCTION_ARGS)
Definition: dependencies.c:726
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:173
#define SizeOfItem(natts)
Definition: dependencies.c:41
Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses)
MVDependencies * statext_dependencies_build(StatsBuildData *data)
Definition: dependencies.c:348
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
Definition: dependencies.c:221
Datum pg_dependencies_recv(PG_FUNCTION_ARGS)
Definition: dependencies.c:710
static void generate_dependencies_recurse(DependencyGenerator state, int index, AttrNumber start, AttrNumber *current)
Definition: dependencies.c:91
DependencyGeneratorData * DependencyGenerator
Definition: dependencies.c:65
static MVDependency * find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums)
Definition: dependencies.c:929
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
bool has_stats_of_kind(List *stats, char requiredkind)
int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_BYTEA_PP(n)
Definition: fmgr.h:308
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:362
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
return str start
for(;;)
const char * str
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define MaxHeapAttributeNumber
Definition: htup_details.h:48
#define nitems(x)
Definition: indent.h:31
FILE * output
int j
Definition: isn.c:74
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
List * lappend(List *list, void *datum)
Definition: list.c:339
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1557
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
void * palloc(Size size)
Definition: mcxt.c:1317
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:134
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:116
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:125
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
double Selectivity
Definition: nodes.h:250
JoinType
Definition: nodes.h:288
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:570
int16 attnum
Definition: pg_attribute.h:74
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
uintptr_t Datum
Definition: postgres.h:64
static Datum BoolGetDatum(bool X)
Definition: postgres.h:102
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
char * s1
char * s2
MemoryContextSwitchTo(old_ctx)
tree ctl root
Definition: radixtree.h:1886
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define STATS_DEPS_MAGIC
Definition: statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:44
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:194
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
List * args
Definition: primnodes.h:940
AttrNumber * dependencies
Definition: dependencies.c:62
Definition: pg_list.h:54
uint32 ndeps
Definition: statistics.h:61
uint32 magic
Definition: statistics.h:59
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:62
AttrNumber nattributes
Definition: statistics.h:53
double degree
Definition: statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:54
Definition: nodes.h:129
Oid opno
Definition: primnodes.h:818
List * args
Definition: primnodes.h:836
Index relid
Definition: pathnodes.h:918
List * statlist
Definition: pathnodes.h:946
Expr * clause
Definition: pathnodes.h:2571
Oid attrtypid
Definition: vacuum.h:126
Oid attrcollid
Definition: vacuum.h:129
Definition: primnodes.h:248
AttrNumber varattno
Definition: primnodes.h:260
int varno
Definition: primnodes.h:255
Index varlevelsup
Definition: primnodes.h:280
Definition: type.h:95
Definition: regguts.h:323
Definition: c.h:687
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:266
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:479
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition: syscache.c:229
static ItemArray items
Definition: test_tidstore.c:49
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:356
#define TYPECACHE_LT_OPR
Definition: typcache.h:138
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:490
const char * type