PostgreSQL Source Code git master
Loading...
Searching...
No Matches
dependencies.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * dependencies.c
4 * POSTGRES functional dependencies
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/statistics/dependencies.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/htup_details.h"
19#include "nodes/nodeFuncs.h"
20#include "optimizer/clauses.h"
21#include "optimizer/optimizer.h"
22#include "parser/parsetree.h"
24#include "utils/fmgroids.h"
25#include "utils/lsyscache.h"
26#include "utils/memutils.h"
27#include "utils/selfuncs.h"
28#include "utils/syscache.h"
29#include "utils/typcache.h"
30
31/* size of the struct header fields (magic, type, ndeps) */
32#define SizeOfHeader (3 * sizeof(uint32))
33
34/* size of a serialized dependency (degree, natts, atts) */
35#define SizeOfItem(natts) \
36 (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
37
38/* minimal size of a dependency (with two attributes) */
39#define MinSizeOfItem SizeOfItem(2)
40
41/* minimal size of dependencies, when all deps are minimal */
42#define MinSizeOfItems(ndeps) \
43 (SizeOfHeader + (ndeps) * MinSizeOfItem)
44
45/*
46 * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
47 * k-permutations of n elements, except that the order does not matter for the
48 * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
49 */
51{
52 int k; /* size of the dependency */
53 int n; /* number of possible attributes */
54 int current; /* next dependency to return (index) */
55 AttrNumber ndependencies; /* number of dependencies generated */
56 AttrNumber *dependencies; /* array of pre-generated dependencies */
58
60
62 int index, AttrNumber start, AttrNumber *current);
67static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
68static bool dependency_is_fully_matched(MVDependency *dependency,
69 Bitmapset *attnums);
70static bool dependency_is_compatible_clause(Node *clause, Index relid,
72static bool dependency_is_compatible_expression(Node *clause, Index relid,
73 List *statlist, Node **expr);
75 int ndependencies, Bitmapset *attnums);
77 int varRelid, JoinType jointype,
78 SpecialJoinInfo *sjinfo,
79 MVDependency **dependencies,
80 int ndependencies,
83
84static void
86 AttrNumber start, AttrNumber *current)
87{
88 /*
89 * The generator handles the first (k-1) elements differently from the
90 * last element.
91 */
92 if (index < (state->k - 1))
93 {
95
96 /*
97 * The first (k-1) values have to be in ascending order, which we
98 * generate recursively.
99 */
100
101 for (i = start; i < state->n; i++)
102 {
103 current[index] = i;
104 generate_dependencies_recurse(state, (index + 1), (i + 1), current);
105 }
106 }
107 else
108 {
109 int i;
110
111 /*
112 * the last element is the implied value, which does not respect the
113 * ascending order. We just need to check that the value is not in the
114 * first (k-1) elements.
115 */
116
117 for (i = 0; i < state->n; i++)
118 {
119 int j;
120 bool match = false;
121
122 current[index] = i;
123
124 for (j = 0; j < index; j++)
125 {
126 if (current[j] == i)
127 {
128 match = true;
129 break;
130 }
131 }
132
133 /*
134 * If the value is not found in the first part of the dependency,
135 * we're done.
136 */
137 if (!match)
138 {
139 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
140 state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
141 memcpy(&state->dependencies[(state->k * state->ndependencies)],
142 current, state->k * sizeof(AttrNumber));
143 state->ndependencies++;
144 }
145 }
146 }
147}
148
149/* generate all dependencies (k-permutations of n elements) */
150static void
152{
154
155 generate_dependencies_recurse(state, 0, 0, current);
156
157 pfree(current);
158}
159
160/*
161 * initialize the DependencyGenerator of variations, and prebuild the variations
162 *
163 * This pre-builds all the variations. We could also generate them in
164 * DependencyGenerator_next(), but this seems simpler.
165 */
168{
170
171 Assert((n >= k) && (k > 0));
172
173 /* allocate the DependencyGenerator state */
175 state->dependencies = palloc_array(AttrNumber, k);
176
177 state->ndependencies = 0;
178 state->current = 0;
179 state->k = k;
180 state->n = n;
181
182 /* now actually pre-generate all the variations */
184
185 return state;
186}
187
188/* free the DependencyGenerator state */
189static void
195
196/* generate next combination */
197static AttrNumber *
199{
200 if (state->current == state->ndependencies)
201 return NULL;
202
203 return &state->dependencies[state->k * state->current++];
204}
205
206
207/*
208 * validates functional dependency on the data
209 *
210 * An actual work horse of detecting functional dependencies. Given a variation
211 * of k attributes, it checks that the first (k-1) are sufficient to determine
212 * the last one.
213 */
214static double
216{
217 int i,
218 nitems;
222
223 /* counters valid within a group */
224 int group_size = 0;
225 int n_violations = 0;
226
227 /* total number of rows supporting (consistent with) the dependency */
228 int n_supporting_rows = 0;
229
230 /* Make sure we have at least two input attributes. */
231 Assert(k >= 2);
232
233 /* sort info for all attributes columns */
234 mss = multi_sort_init(k);
235
236 /*
237 * Translate the array of indexes to regular attnums for the dependency
238 * (we will need this to identify the columns in StatsBuildData).
239 */
241 for (i = 0; i < k; i++)
242 attnums_dep[i] = data->attnums[dependency[i]];
243
244 /*
245 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
246 *
247 * (a) sort the data lexicographically
248 *
249 * (b) split the data into groups by first (k-1) columns
250 *
251 * (c) for each group count different values in the last column
252 *
253 * We use the column data types' default sort operators and collations;
254 * perhaps at some point it'd be worth using column-specific collations?
255 */
256
257 /* prepare the sort function for the dimensions */
258 for (i = 0; i < k; i++)
259 {
260 VacAttrStats *colstat = data->stats[dependency[i]];
262
264 if (type->lt_opr == InvalidOid) /* shouldn't happen */
265 elog(ERROR, "cache lookup failed for ordering operator for type %u",
266 colstat->attrtypid);
267
268 /* prepare the sort function for this dimension */
269 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
270 }
271
272 /*
273 * build an array of SortItem(s) sorted using the multi-sort support
274 *
275 * XXX This relies on all stats entries pointing to the same tuple
276 * descriptor. For now that assumption holds, but it might change in the
277 * future for example if we support statistics on multiple tables.
278 */
280
281 /*
282 * Walk through the sorted array, split it into rows according to the
283 * first (k-1) columns. If there's a single value in the last column, we
284 * count the group as 'supporting' the functional dependency. Otherwise we
285 * count it as contradicting.
286 */
287
288 /* start with the first row forming a group */
289 group_size = 1;
290
291 /* loop 1 beyond the end of the array so that we count the final group */
292 for (i = 1; i <= nitems; i++)
293 {
294 /*
295 * Check if the group ended, which may be either because we processed
296 * all the items (i==nitems), or because the i-th item is not equal to
297 * the preceding one.
298 */
299 if (i == nitems ||
300 multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
301 {
302 /*
303 * If no violations were found in the group then track the rows of
304 * the group as supporting the functional dependency.
305 */
306 if (n_violations == 0)
308
309 /* Reset counters for the new group */
310 n_violations = 0;
311 group_size = 1;
312 continue;
313 }
314 /* first columns match, but the last one does not (so contradicting) */
315 else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
316 n_violations++;
317
318 group_size++;
319 }
320
321 /* Compute the 'degree of validity' as (supporting/total). */
322 return (n_supporting_rows * 1.0 / data->numrows);
323}
324
325/*
326 * detects functional dependencies between groups of columns
327 *
328 * Generates all possible subsets of columns (variations) and computes
329 * the degree of validity for each one. For example when creating statistics
330 * on three columns (a,b,c) there are 9 possible dependencies
331 *
332 * two columns three columns
333 * ----------- -------------
334 * (a) -> b (a,b) -> c
335 * (a) -> c (a,c) -> b
336 * (b) -> a (b,c) -> a
337 * (b) -> c
338 * (c) -> a
339 * (c) -> b
340 */
343{
344 int i,
345 k;
346
347 /* result */
348 MVDependencies *dependencies = NULL;
349 MemoryContext cxt;
350
351 Assert(data->nattnums >= 2);
352
353 /* tracks memory allocated by dependency_degree calls */
355 "dependency_degree cxt",
357
358 /*
359 * We'll try build functional dependencies starting from the smallest ones
360 * covering just 2 columns, to the largest ones, covering all columns
361 * included in the statistics object. We start from the smallest ones
362 * because we want to be able to skip already implied ones.
363 */
364 for (k = 2; k <= data->nattnums; k++)
365 {
366 AttrNumber *dependency; /* array with k elements */
367
368 /* prepare a DependencyGenerator of variation */
370
371 /* generate all possible variations of k values (out of n) */
372 while ((dependency = DependencyGenerator_next(DependencyGenerator)))
373 {
374 double degree;
375 MVDependency *d;
377
378 /* release memory used by dependency degree calculation */
380
381 /* compute how valid the dependency seems */
382 degree = dependency_degree(data, k, dependency);
383
386
387 /*
388 * if the dependency seems entirely invalid, don't store it
389 */
390 if (degree == 0.0)
391 continue;
392
393 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
394 + k * sizeof(AttrNumber));
395
396 /* copy the dependency (and keep the indexes into stxkeys) */
397 d->degree = degree;
398 d->nattributes = k;
399 for (i = 0; i < k; i++)
400 d->attributes[i] = data->attnums[dependency[i]];
401
402 /* initialize the list of dependencies */
403 if (dependencies == NULL)
404 {
405 dependencies = palloc0_object(MVDependencies);
406
407 dependencies->magic = STATS_DEPS_MAGIC;
408 dependencies->type = STATS_DEPS_TYPE_BASIC;
409 dependencies->ndeps = 0;
410 }
411
412 dependencies->ndeps++;
413 dependencies = (MVDependencies *) repalloc(dependencies,
415 + dependencies->ndeps * sizeof(MVDependency *));
416
417 dependencies->deps[dependencies->ndeps - 1] = d;
418 }
419
420 /*
421 * we're done with variations of k elements, so free the
422 * DependencyGenerator
423 */
425 }
426
428
429 return dependencies;
430}
431
432
433/*
434 * Serialize list of dependencies into a bytea value.
435 */
436bytea *
438{
439 int i;
440 bytea *output;
441 char *tmp;
442 Size len;
443
444 /* we need to store ndeps, with a number of attributes for each one */
446
447 /* and also include space for the actual attribute numbers and degrees */
448 for (i = 0; i < dependencies->ndeps; i++)
449 len += SizeOfItem(dependencies->deps[i]->nattributes);
450
451 output = (bytea *) palloc0(len);
453
454 tmp = VARDATA(output);
455
456 /* Store the base struct values (magic, type, ndeps) */
457 memcpy(tmp, &dependencies->magic, sizeof(uint32));
458 tmp += sizeof(uint32);
459 memcpy(tmp, &dependencies->type, sizeof(uint32));
460 tmp += sizeof(uint32);
461 memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
462 tmp += sizeof(uint32);
463
464 /* store number of attributes and attribute numbers for each dependency */
465 for (i = 0; i < dependencies->ndeps; i++)
466 {
467 MVDependency *d = dependencies->deps[i];
468
469 memcpy(tmp, &d->degree, sizeof(double));
470 tmp += sizeof(double);
471
472 memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
473 tmp += sizeof(AttrNumber);
474
475 memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
476 tmp += sizeof(AttrNumber) * d->nattributes;
477
478 /* protect against overflow */
479 Assert(tmp <= ((char *) output + len));
480 }
481
482 /* make sure we've produced exactly the right amount of data */
483 Assert(tmp == ((char *) output + len));
484
485 return output;
486}
487
488/*
489 * Reads serialized dependencies into MVDependencies structure.
490 */
493{
494 int i;
496 MVDependencies *dependencies;
497 char *tmp;
498
499 if (data == NULL)
500 return NULL;
501
503 elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
505
506 /* read the MVDependencies header */
507 dependencies = palloc0_object(MVDependencies);
508
509 /* initialize pointer to the data part (skip the varlena header) */
510 tmp = VARDATA_ANY(data);
511
512 /* read the header fields and perform basic sanity checks */
513 memcpy(&dependencies->magic, tmp, sizeof(uint32));
514 tmp += sizeof(uint32);
515 memcpy(&dependencies->type, tmp, sizeof(uint32));
516 tmp += sizeof(uint32);
517 memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
518 tmp += sizeof(uint32);
519
520 if (dependencies->magic != STATS_DEPS_MAGIC)
521 elog(ERROR, "invalid dependency magic %d (expected %d)",
522 dependencies->magic, STATS_DEPS_MAGIC);
523
524 if (dependencies->type != STATS_DEPS_TYPE_BASIC)
525 elog(ERROR, "invalid dependency type %d (expected %d)",
526 dependencies->type, STATS_DEPS_TYPE_BASIC);
527
528 if (dependencies->ndeps == 0)
529 elog(ERROR, "invalid zero-length item array in MVDependencies");
530
531 /* what minimum bytea size do we expect for those parameters */
532 min_expected_size = SizeOfItem(dependencies->ndeps);
533
535 elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
537
538 /* allocate space for the MCV items */
539 dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
540 + (dependencies->ndeps * sizeof(MVDependency *)));
541
542 for (i = 0; i < dependencies->ndeps; i++)
543 {
544 double degree;
545 AttrNumber k;
546 MVDependency *d;
547
548 /* degree of validity */
549 memcpy(&degree, tmp, sizeof(double));
550 tmp += sizeof(double);
551
552 /* number of attributes */
553 memcpy(&k, tmp, sizeof(AttrNumber));
554 tmp += sizeof(AttrNumber);
555
556 /* is the number of attributes valid? */
557 Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
558
559 /* now that we know the number of attributes, allocate the dependency */
560 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
561 + (k * sizeof(AttrNumber)));
562
563 d->degree = degree;
564 d->nattributes = k;
565
566 /* copy attribute numbers */
567 memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
568 tmp += sizeof(AttrNumber) * d->nattributes;
569
570 dependencies->deps[i] = d;
571
572 /* still within the bytea */
573 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
574 }
575
576 /* we should have consumed the whole bytea exactly */
577 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
578
579 return dependencies;
580}
581
582/*
583 * Free allocations of a MVDependencies.
584 */
585void
587{
588 for (int i = 0; i < dependencies->ndeps; i++)
589 pfree(dependencies->deps[i]);
590 pfree(dependencies);
591}
592
593/*
594 * Validate a set of MVDependencies against the extended statistics object
595 * definition.
596 *
597 * Every MVDependencies must be checked to ensure that the attnums in the
598 * attributes list correspond to attnums/expressions defined by the
599 * extended statistics object.
600 *
601 * Positive attnums are attributes which must be found in the stxkeys, while
602 * negative attnums correspond to an expression number, no attribute number
603 * can be below (0 - numexprs).
604 */
605bool
607 const int2vector *stxkeys,
608 int numexprs, int elevel)
609{
611
612 /* Scan through each dependency entry */
613 for (int i = 0; i < dependencies->ndeps; i++)
614 {
615 const MVDependency *dep = dependencies->deps[i];
616
617 /*
618 * Cross-check each attribute in a dependency entry with the extended
619 * stats object definition.
620 */
621 for (int j = 0; j < dep->nattributes; j++)
622 {
624 bool ok = false;
625
626 if (attnum > 0)
627 {
628 /* attribute number in stxkeys */
629 for (int k = 0; k < stxkeys->dim1; k++)
630 {
631 if (attnum == stxkeys->values[k])
632 {
633 ok = true;
634 break;
635 }
636 }
637 }
638 else if ((attnum < 0) && (attnum >= attnum_expr_lowbound))
639 {
640 /* attribute number for an expression */
641 ok = true;
642 }
643
644 if (!ok)
645 {
646 ereport(elevel,
648 errmsg("could not validate \"%s\" object: invalid attribute number %d found",
649 "pg_dependencies", attnum)));
650 return false;
651 }
652 }
653 }
654
655 return true;
656}
657
658/*
659 * dependency_is_fully_matched
660 * checks that a functional dependency is fully matched given clauses on
661 * attributes (assuming the clauses are suitable equality clauses)
662 */
663static bool
665{
666 int j;
667
668 /*
669 * Check that the dependency actually is fully covered by clauses. We have
670 * to translate all attribute numbers, as those are referenced
671 */
672 for (j = 0; j < dependency->nattributes; j++)
673 {
674 int attnum = dependency->attributes[j];
675
676 if (!bms_is_member(attnum, attnums))
677 return false;
678 }
679
680 return true;
681}
682
683/*
684 * statext_dependencies_load
685 * Load the functional dependencies for the indicated pg_statistic_ext tuple
686 */
689{
690 MVDependencies *result;
691 bool isnull;
692 Datum deps;
693 HeapTuple htup;
694
697 BoolGetDatum(inh));
698 if (!HeapTupleIsValid(htup))
699 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
700
703 if (isnull)
704 elog(ERROR,
705 "requested statistics kind \"%c\" is not yet built for statistics object %u",
707
709
710 ReleaseSysCache(htup);
711
712 return result;
713}
714
715/*
716 * dependency_is_compatible_clause
717 * Determines if the clause is compatible with functional dependencies
718 *
719 * Only clauses that have the form of equality to a pseudoconstant, or can be
720 * interpreted that way, are currently accepted. Furthermore the variable
721 * part of the clause must be a simple Var belonging to the specified
722 * relation, whose attribute number we return in *attnum on success.
723 */
724static bool
726{
727 Var *var;
729
730 if (IsA(clause, RestrictInfo))
731 {
732 RestrictInfo *rinfo = (RestrictInfo *) clause;
733
734 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
735 if (rinfo->pseudoconstant)
736 return false;
737
738 /* Clauses referencing multiple, or no, varnos are incompatible */
739 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
740 return false;
741
742 clause = (Node *) rinfo->clause;
743 }
744
745 if (is_opclause(clause))
746 {
747 /* If it's an opclause, check for Var = Const or Const = Var. */
748 OpExpr *expr = (OpExpr *) clause;
749
750 /* Only expressions with two arguments are candidates. */
751 if (list_length(expr->args) != 2)
752 return false;
753
754 /* Make sure non-selected argument is a pseudoconstant. */
756 clause_expr = linitial(expr->args);
757 else if (is_pseudo_constant_clause(linitial(expr->args)))
758 clause_expr = lsecond(expr->args);
759 else
760 return false;
761
762 /*
763 * If it's not an "=" operator, just ignore the clause, as it's not
764 * compatible with functional dependencies.
765 *
766 * This uses the function for estimating selectivity, not the operator
767 * directly (a bit awkward, but well ...).
768 *
769 * XXX this is pretty dubious; probably it'd be better to check btree
770 * or hash opclass membership, so as not to be fooled by custom
771 * selectivity functions, and to be more consistent with decisions
772 * elsewhere in the planner.
773 */
774 if (get_oprrest(expr->opno) != F_EQSEL)
775 return false;
776
777 /* OK to proceed with checking "var" */
778 }
779 else if (IsA(clause, ScalarArrayOpExpr))
780 {
781 /* If it's a scalar array operator, check for Var IN Const. */
782 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
783
784 /*
785 * Reject ALL() variant, we only care about ANY/IN.
786 *
787 * XXX Maybe we should check if all the values are the same, and allow
788 * ALL in that case? Doesn't seem very practical, though.
789 */
790 if (!expr->useOr)
791 return false;
792
793 /* Only expressions with two arguments are candidates. */
794 if (list_length(expr->args) != 2)
795 return false;
796
797 /*
798 * We know it's always (Var IN Const), so we assume the var is the
799 * first argument, and pseudoconstant is the second one.
800 */
802 return false;
803
804 clause_expr = linitial(expr->args);
805
806 /*
807 * If it's not an "=" operator, just ignore the clause, as it's not
808 * compatible with functional dependencies. The operator is identified
809 * simply by looking at which function it uses to estimate
810 * selectivity. That's a bit strange, but it's what other similar
811 * places do.
812 */
813 if (get_oprrest(expr->opno) != F_EQSEL)
814 return false;
815
816 /* OK to proceed with checking "var" */
817 }
818 else if (is_orclause(clause))
819 {
820 BoolExpr *bool_expr = (BoolExpr *) clause;
821 ListCell *lc;
822
823 /* start with no attribute number */
825
826 foreach(lc, bool_expr->args)
827 {
829
830 /*
831 * Had we found incompatible clause in the arguments, treat the
832 * whole clause as incompatible.
833 */
835 relid, &clause_attnum))
836 return false;
837
840
841 /* ensure all the variables are the same (same attnum) */
842 if (*attnum != clause_attnum)
843 return false;
844 }
845
846 /* the Var is already checked by the recursive call */
847 return true;
848 }
849 else if (is_notclause(clause))
850 {
851 /*
852 * "NOT x" can be interpreted as "x = false", so get the argument and
853 * proceed with seeing if it's a suitable Var.
854 */
855 clause_expr = (Node *) get_notclausearg(clause);
856 }
857 else
858 {
859 /*
860 * A boolean expression "x" can be interpreted as "x = true", so
861 * proceed with seeing if it's a suitable Var.
862 */
863 clause_expr = clause;
864 }
865
866 /*
867 * We may ignore any RelabelType node above the operand. (There won't be
868 * more than one, since eval_const_expressions has been applied already.)
869 */
871 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
872
873 /* We only support plain Vars for now */
874 if (!IsA(clause_expr, Var))
875 return false;
876
877 /* OK, we know we have a Var */
878 var = (Var *) clause_expr;
879
880 /* Ensure Var is from the correct relation */
881 if (var->varno != relid)
882 return false;
883
884 /* We also better ensure the Var is from the current level */
885 if (var->varlevelsup != 0)
886 return false;
887
888 /* Also ignore system attributes (we don't allow stats on those) */
890 return false;
891
892 *attnum = var->varattno;
893 return true;
894}
895
896/*
897 * find_strongest_dependency
898 * find the strongest dependency on the attributes
899 *
900 * When applying functional dependencies, we start with the strongest
901 * dependencies. That is, we select the dependency that:
902 *
903 * (a) has all attributes covered by equality clauses
904 *
905 * (b) has the most attributes
906 *
907 * (c) has the highest degree of validity
908 *
909 * This guarantees that we eliminate the most redundant conditions first
910 * (see the comment in dependencies_clauselist_selectivity).
911 */
912static MVDependency *
913find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
914 Bitmapset *attnums)
915{
916 int i,
917 j;
919
920 /* number of attnums in clauses */
921 int nattnums = bms_num_members(attnums);
922
923 /*
924 * Iterate over the MVDependency items and find the strongest one from the
925 * fully-matched dependencies. We do the cheap checks first, before
926 * matching it against the attnums.
927 */
928 for (i = 0; i < ndependencies; i++)
929 {
930 for (j = 0; j < dependencies[i]->ndeps; j++)
931 {
932 MVDependency *dependency = dependencies[i]->deps[j];
933
934 /*
935 * Skip dependencies referencing more attributes than available
936 * clauses, as those can't be fully matched.
937 */
938 if (dependency->nattributes > nattnums)
939 continue;
940
941 if (strongest)
942 {
943 /* skip dependencies on fewer attributes than the strongest. */
944 if (dependency->nattributes < strongest->nattributes)
945 continue;
946
947 /* also skip weaker dependencies when attribute count matches */
948 if (strongest->nattributes == dependency->nattributes &&
949 strongest->degree > dependency->degree)
950 continue;
951 }
952
953 /*
954 * this dependency is stronger, but we must still check that it's
955 * fully matched to these attnums. We perform this check last as
956 * it's slightly more expensive than the previous checks.
957 */
958 if (dependency_is_fully_matched(dependency, attnums))
959 strongest = dependency; /* save new best match */
960 }
961 }
962
963 return strongest;
964}
965
966/*
967 * clauselist_apply_dependencies
968 * Apply the specified functional dependencies to a list of clauses and
969 * return the estimated selectivity of the clauses that are compatible
970 * with any of the given dependencies.
971 *
972 * This will estimate all not-already-estimated clauses that are compatible
973 * with functional dependencies, and which have an attribute mentioned by any
974 * of the given dependencies (either as an implying or implied attribute).
975 *
976 * Given (lists of) clauses on attributes (a,b) and a functional dependency
977 * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
978 * using the formula
979 *
980 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
981 *
982 * where 'f' is the degree of dependency. This reflects the fact that we
983 * expect a fraction f of all rows to be consistent with the dependency
984 * (a=>b), and so have a selectivity of P(a), while the remaining rows are
985 * treated as independent.
986 *
987 * In practice, we use a slightly modified version of this formula, which uses
988 * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
989 * should obviously not exceed either column's individual selectivity. I.e.,
990 * we actually combine selectivities using the formula
991 *
992 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
993 *
994 * This can make quite a difference if the specific values matching the
995 * clauses are not consistent with the functional dependency.
996 */
997static Selectivity
999 int varRelid, JoinType jointype,
1000 SpecialJoinInfo *sjinfo,
1001 MVDependency **dependencies, int ndependencies,
1004{
1005 Bitmapset *attnums;
1006 int i;
1007 int j;
1008 int nattrs;
1010 int attidx;
1011 int listidx;
1012 ListCell *l;
1014
1015 /*
1016 * Extract the attnums of all implying and implied attributes from all the
1017 * given dependencies. Each of these attributes is expected to have at
1018 * least 1 not-already-estimated compatible clause that we will estimate
1019 * here.
1020 */
1021 attnums = NULL;
1022 for (i = 0; i < ndependencies; i++)
1023 {
1024 for (j = 0; j < dependencies[i]->nattributes; j++)
1025 {
1026 AttrNumber attnum = dependencies[i]->attributes[j];
1027
1028 attnums = bms_add_member(attnums, attnum);
1029 }
1030 }
1031
1032 /*
1033 * Compute per-column selectivity estimates for each of these attributes,
1034 * and mark all the corresponding clauses as estimated.
1035 */
1036 nattrs = bms_num_members(attnums);
1038
1039 attidx = 0;
1040 i = -1;
1041 while ((i = bms_next_member(attnums, i)) >= 0)
1042 {
1045
1046 listidx = -1;
1047 foreach(l, clauses)
1048 {
1049 Node *clause = (Node *) lfirst(l);
1050
1051 listidx++;
1052 if (list_attnums[listidx] == i)
1053 {
1056 }
1057 }
1058
1060 jointype, sjinfo, false);
1061 attr_sel[attidx++] = simple_sel;
1062 }
1063
1064 /*
1065 * Now combine these selectivities using the dependency information. For
1066 * chains of dependencies such as a -> b -> c, the b -> c dependency will
1067 * come before the a -> b dependency in the array, so we traverse the
1068 * array backwards to ensure such chains are computed in the right order.
1069 *
1070 * As explained above, pairs of selectivities are combined using the
1071 * formula
1072 *
1073 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1074 *
1075 * to ensure that the combined selectivity is never greater than either
1076 * individual selectivity.
1077 *
1078 * Where multiple dependencies apply (e.g., a -> b -> c), we use
1079 * conditional probabilities to compute the overall result as follows:
1080 *
1081 * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1082 *
1083 * so we replace the selectivities of all implied attributes with
1084 * conditional probabilities, that are conditional on all their implying
1085 * attributes. The selectivities of all other non-implied attributes are
1086 * left as they are.
1087 */
1088 for (i = ndependencies - 1; i >= 0; i--)
1089 {
1090 MVDependency *dependency = dependencies[i];
1093 double f;
1094
1095 /* Selectivity of all the implying attributes */
1096 s1 = 1.0;
1097 for (j = 0; j < dependency->nattributes - 1; j++)
1098 {
1099 attnum = dependency->attributes[j];
1100 attidx = bms_member_index(attnums, attnum);
1101 s1 *= attr_sel[attidx];
1102 }
1103
1104 /* Original selectivity of the implied attribute */
1105 attnum = dependency->attributes[j];
1106 attidx = bms_member_index(attnums, attnum);
1107 s2 = attr_sel[attidx];
1108
1109 /*
1110 * Replace s2 with the conditional probability s2 given s1, computed
1111 * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1112 *
1113 * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1114 *
1115 * where P(a) = s1, the selectivity of the implying attributes, and
1116 * P(b) = s2, the selectivity of the implied attribute.
1117 */
1118 f = dependency->degree;
1119
1120 if (s1 <= s2)
1121 attr_sel[attidx] = f + (1 - f) * s2;
1122 else
1123 attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1124 }
1125
1126 /*
1127 * The overall selectivity of all the clauses on all these attributes is
1128 * then the product of all the original (non-implied) probabilities and
1129 * the new conditional (implied) probabilities.
1130 */
1131 s1 = 1.0;
1132 for (i = 0; i < nattrs; i++)
1133 s1 *= attr_sel[i];
1134
1136
1137 pfree(attr_sel);
1138 bms_free(attnums);
1139
1140 return s1;
1141}
1142
1143/*
1144 * dependency_is_compatible_expression
1145 * Determines if the expression is compatible with functional dependencies
1146 *
1147 * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1148 * expression is a simple Var. On success, return the matching statistics
1149 * expression into *expr.
1150 */
1151static bool
1152dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1153{
1154 ListCell *lc,
1155 *lc2;
1157
1158 if (IsA(clause, RestrictInfo))
1159 {
1160 RestrictInfo *rinfo = (RestrictInfo *) clause;
1161
1162 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1163 if (rinfo->pseudoconstant)
1164 return false;
1165
1166 /* Clauses referencing multiple, or no, varnos are incompatible */
1167 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1168 return false;
1169
1170 clause = (Node *) rinfo->clause;
1171 }
1172
1173 if (is_opclause(clause))
1174 {
1175 /* If it's an opclause, check for Var = Const or Const = Var. */
1176 OpExpr *expr = (OpExpr *) clause;
1177
1178 /* Only expressions with two arguments are candidates. */
1179 if (list_length(expr->args) != 2)
1180 return false;
1181
1182 /* Make sure non-selected argument is a pseudoconstant. */
1184 clause_expr = linitial(expr->args);
1185 else if (is_pseudo_constant_clause(linitial(expr->args)))
1186 clause_expr = lsecond(expr->args);
1187 else
1188 return false;
1189
1190 /*
1191 * If it's not an "=" operator, just ignore the clause, as it's not
1192 * compatible with functional dependencies.
1193 *
1194 * This uses the function for estimating selectivity, not the operator
1195 * directly (a bit awkward, but well ...).
1196 *
1197 * XXX this is pretty dubious; probably it'd be better to check btree
1198 * or hash opclass membership, so as not to be fooled by custom
1199 * selectivity functions, and to be more consistent with decisions
1200 * elsewhere in the planner.
1201 */
1202 if (get_oprrest(expr->opno) != F_EQSEL)
1203 return false;
1204
1205 /* OK to proceed with checking "var" */
1206 }
1207 else if (IsA(clause, ScalarArrayOpExpr))
1208 {
1209 /* If it's a scalar array operator, check for Var IN Const. */
1210 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1211
1212 /*
1213 * Reject ALL() variant, we only care about ANY/IN.
1214 *
1215 * FIXME Maybe we should check if all the values are the same, and
1216 * allow ALL in that case? Doesn't seem very practical, though.
1217 */
1218 if (!expr->useOr)
1219 return false;
1220
1221 /* Only expressions with two arguments are candidates. */
1222 if (list_length(expr->args) != 2)
1223 return false;
1224
1225 /*
1226 * We know it's always (Var IN Const), so we assume the var is the
1227 * first argument, and pseudoconstant is the second one.
1228 */
1230 return false;
1231
1232 clause_expr = linitial(expr->args);
1233
1234 /*
1235 * If it's not an "=" operator, just ignore the clause, as it's not
1236 * compatible with functional dependencies. The operator is identified
1237 * simply by looking at which function it uses to estimate
1238 * selectivity. That's a bit strange, but it's what other similar
1239 * places do.
1240 */
1241 if (get_oprrest(expr->opno) != F_EQSEL)
1242 return false;
1243
1244 /* OK to proceed with checking "var" */
1245 }
1246 else if (is_orclause(clause))
1247 {
1248 BoolExpr *bool_expr = (BoolExpr *) clause;
1249
1250 /* start with no expression (we'll use the first match) */
1251 *expr = NULL;
1252
1253 foreach(lc, bool_expr->args)
1254 {
1255 Node *or_expr = NULL;
1256
1257 /*
1258 * Had we found incompatible expression in the arguments, treat
1259 * the whole expression as incompatible.
1260 */
1262 statlist, &or_expr))
1263 return false;
1264
1265 if (*expr == NULL)
1266 *expr = or_expr;
1267
1268 /* ensure all the expressions are the same */
1269 if (!equal(or_expr, *expr))
1270 return false;
1271 }
1272
1273 /* the expression is already checked by the recursive call */
1274 return true;
1275 }
1276 else if (is_notclause(clause))
1277 {
1278 /*
1279 * "NOT x" can be interpreted as "x = false", so get the argument and
1280 * proceed with seeing if it's a suitable Var.
1281 */
1282 clause_expr = (Node *) get_notclausearg(clause);
1283 }
1284 else
1285 {
1286 /*
1287 * A boolean expression "x" can be interpreted as "x = true", so
1288 * proceed with seeing if it's a suitable Var.
1289 */
1290 clause_expr = clause;
1291 }
1292
1293 /*
1294 * We may ignore any RelabelType node above the operand. (There won't be
1295 * more than one, since eval_const_expressions has been applied already.)
1296 */
1298 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1299
1300 /*
1301 * Search for a matching statistics expression.
1302 */
1303 foreach(lc, statlist)
1304 {
1306
1307 /* ignore stats without dependencies */
1308 if (info->kind != STATS_EXT_DEPENDENCIES)
1309 continue;
1310
1311 foreach(lc2, info->exprs)
1312 {
1313 Node *stat_expr = (Node *) lfirst(lc2);
1314
1316 {
1317 *expr = stat_expr;
1318 return true;
1319 }
1320 }
1321 }
1322
1323 return false;
1324}
1325
1326/*
1327 * dependencies_clauselist_selectivity
1328 * Return the estimated selectivity of (a subset of) the given clauses
1329 * using functional dependency statistics, or 1.0 if no useful functional
1330 * dependency statistic exists.
1331 *
1332 * 'estimatedclauses' is an input/output argument that gets a bit set
1333 * corresponding to the (zero-based) list index of each clause that is included
1334 * in the estimated selectivity.
1335 *
1336 * Given equality clauses on attributes (a,b) we find the strongest dependency
1337 * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1338 * dependency, we then combine the per-clause selectivities using the formula
1339 *
1340 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1341 *
1342 * where 'f' is the degree of the dependency. (Actually we use a slightly
1343 * modified version of this formula -- see clauselist_apply_dependencies()).
1344 *
1345 * With clauses on more than two attributes, the dependencies are applied
1346 * recursively, starting with the widest/strongest dependencies. For example
1347 * P(a,b,c) is first split like this:
1348 *
1349 * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1350 *
1351 * assuming (a,b=>c) is the strongest dependency.
1352 */
1355 List *clauses,
1356 int varRelid,
1357 JoinType jointype,
1358 SpecialJoinInfo *sjinfo,
1359 RelOptInfo *rel,
1361{
1362 Selectivity s1 = 1.0;
1363 ListCell *l;
1366 int listidx;
1369 int total_ndeps;
1370 MVDependency **dependencies;
1371 int ndependencies;
1372 int i;
1375
1376 /* unique expressions */
1378 int unique_exprs_cnt;
1379
1380 /* check if there's any stats that might be useful for us. */
1382 return 1.0;
1383
1385
1386 /*
1387 * We allocate space as if every clause was a unique expression, although
1388 * that's probably overkill. Some will be simple column references that
1389 * we'll translate to attnums, and there might be duplicates. But it's
1390 * easier and cheaper to just do one allocation than repalloc later.
1391 */
1393 unique_exprs_cnt = 0;
1394
1395 /*
1396 * Pre-process the clauses list to extract the attnums seen in each item.
1397 * We need to determine if there's any clauses which will be useful for
1398 * dependency selectivity estimations. Along the way we'll record all of
1399 * the attnums for each clause in a list which we'll reference later so we
1400 * don't need to repeat the same work again. We'll also keep track of all
1401 * attnums seen.
1402 *
1403 * We also skip clauses that we already estimated using different types of
1404 * statistics (we treat them as incompatible).
1405 *
1406 * To handle expressions, we assign them negative attnums, as if it was a
1407 * system attribute (this is fine, as we only allow extended stats on user
1408 * attributes). And then we offset everything by the number of
1409 * expressions, so that we can store the values in a bitmapset.
1410 */
1411 listidx = 0;
1412 foreach(l, clauses)
1413 {
1414 Node *clause = (Node *) lfirst(l);
1416 Node *expr = NULL;
1417
1418 /* ignore clause by default */
1420
1422 {
1423 /*
1424 * If it's a simple column reference, just extract the attnum. If
1425 * it's an expression, assign a negative attnum as if it was a
1426 * system attribute.
1427 */
1428 if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1429 {
1431 }
1432 else if (dependency_is_compatible_expression(clause, rel->relid,
1433 rel->statlist,
1434 &expr))
1435 {
1436 /* special attnum assigned to this expression */
1438
1439 Assert(expr != NULL);
1440
1441 /* If the expression is duplicate, use the same attnum. */
1442 for (i = 0; i < unique_exprs_cnt; i++)
1443 {
1444 if (equal(unique_exprs[i], expr))
1445 {
1446 /* negative attribute number to expression */
1447 attnum = -(i + 1);
1448 break;
1449 }
1450 }
1451
1452 /* not found in the list, so add it */
1454 {
1456
1457 /* after incrementing the value, to get -1, -2, ... */
1459 }
1460
1461 /* remember which attnum was assigned to this clause */
1463 }
1464 }
1465
1466 listidx++;
1467 }
1468
1469 Assert(listidx == list_length(clauses));
1470
1471 /*
1472 * How much we need to offset the attnums? If there are no expressions,
1473 * then no offset is needed. Otherwise we need to offset enough for the
1474 * lowest value (-unique_exprs_cnt) to become 1.
1475 */
1476 if (unique_exprs_cnt > 0)
1478 else
1479 attnum_offset = 0;
1480
1481 /*
1482 * Now that we know how many expressions there are, we can offset the
1483 * values just enough to build the bitmapset.
1484 */
1485 for (i = 0; i < list_length(clauses); i++)
1486 {
1488
1489 /* ignore incompatible or already estimated clauses */
1491 continue;
1492
1493 /* make sure the attnum is in the expected range */
1496
1497 /* make sure the attnum is positive (valid AttrNumber) */
1499
1500 /*
1501 * Either it's a regular attribute, or it's an expression, in which
1502 * case we must not have seen it before (expressions are unique).
1503 *
1504 * XXX Check whether it's a regular attribute has to be done using the
1505 * original attnum, while the second check has to use the value with
1506 * an offset.
1507 */
1510
1511 /*
1512 * Remember the offset attnum, both for attributes and expressions.
1513 * We'll pass list_attnums to clauselist_apply_dependencies, which
1514 * uses it to identify clauses in a bitmap. We could also pass the
1515 * offset, but this is more convenient.
1516 */
1518
1520 }
1521
1522 /*
1523 * If there's not at least two distinct attnums and expressions, then
1524 * reject the whole list of clauses. We must return 1.0 so the calling
1525 * function's selectivity is unaffected.
1526 */
1528 {
1531 return 1.0;
1532 }
1533
1534 /*
1535 * Load all functional dependencies matching at least two parameters. We
1536 * can simply consider all dependencies at once, without having to search
1537 * for the best statistics object.
1538 *
1539 * To not waste cycles and memory, we deserialize dependencies only for
1540 * statistics that match at least two attributes. The array is allocated
1541 * with the assumption that all objects match - we could grow the array to
1542 * make it just the right size, but it's likely wasteful anyway thanks to
1543 * moving the freed chunks to freelists etc.
1544 */
1547 total_ndeps = 0;
1548
1549 foreach(l, rel->statlist)
1550 {
1552 int nmatched;
1553 int nexprs;
1554 int k;
1555 MVDependencies *deps;
1556
1557 /* skip statistics that are not of the correct type */
1558 if (stat->kind != STATS_EXT_DEPENDENCIES)
1559 continue;
1560
1561 /* skip statistics with mismatching stxdinherit value */
1562 if (stat->inherit != rte->inh)
1563 continue;
1564
1565 /*
1566 * Count matching attributes - we have to undo the attnum offsets. The
1567 * input attribute numbers are not offset (expressions are not
1568 * included in stat->keys, so it's not necessary). But we need to
1569 * offset it before checking against clauses_attnums.
1570 */
1571 nmatched = 0;
1572 k = -1;
1573 while ((k = bms_next_member(stat->keys, k)) >= 0)
1574 {
1576
1577 /* skip expressions */
1579 continue;
1580
1581 /* apply the same offset as above */
1583
1585 nmatched++;
1586 }
1587
1588 /* count matching expressions */
1589 nexprs = 0;
1590 for (i = 0; i < unique_exprs_cnt; i++)
1591 {
1592 ListCell *lc;
1593
1594 foreach(lc, stat->exprs)
1595 {
1596 Node *stat_expr = (Node *) lfirst(lc);
1597
1598 /* try to match it */
1600 nexprs++;
1601 }
1602 }
1603
1604 /*
1605 * Skip objects matching fewer than two attributes/expressions from
1606 * clauses.
1607 */
1608 if (nmatched + nexprs < 2)
1609 continue;
1610
1611 deps = statext_dependencies_load(stat->statOid, rte->inh);
1612
1613 /*
1614 * The expressions may be represented by different attnums in the
1615 * stats, we need to remap them to be consistent with the clauses.
1616 * That will make the later steps (e.g. picking the strongest item and
1617 * so on) much simpler and cheaper, because it won't need to care
1618 * about the offset at all.
1619 *
1620 * When we're at it, we can ignore dependencies that are not fully
1621 * matched by clauses (i.e. referencing attributes or expressions that
1622 * are not in the clauses).
1623 *
1624 * We have to do this for all statistics, as long as there are any
1625 * expressions - we need to shift the attnums in all dependencies.
1626 *
1627 * XXX Maybe we should do this always, because it also eliminates some
1628 * of the dependencies early. It might be cheaper than having to walk
1629 * the longer list in find_strongest_dependency later, especially as
1630 * we need to do that repeatedly?
1631 *
1632 * XXX We have to do this even when there are no expressions in
1633 * clauses, otherwise find_strongest_dependency may fail for stats
1634 * with expressions (due to lookup of negative value in bitmap). So we
1635 * need to at least filter out those dependencies. Maybe we could do
1636 * it in a cheaper way (if there are no expr clauses, we can just
1637 * discard all negative attnums without any lookups).
1638 */
1639 if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1640 {
1641 int ndeps = 0;
1642
1643 for (i = 0; i < deps->ndeps; i++)
1644 {
1645 bool skip = false;
1646 MVDependency *dep = deps->deps[i];
1647 int j;
1648
1649 for (j = 0; j < dep->nattributes; j++)
1650 {
1651 int idx;
1652 Node *expr;
1655
1656 /* undo the per-statistics offset */
1657 attnum = dep->attributes[j];
1658
1659 /*
1660 * For regular attributes we can simply check if it
1661 * matches any clause. If there's no matching clause, we
1662 * can just ignore it. We need to offset the attnum
1663 * though.
1664 */
1666 {
1667 dep->attributes[j] = attnum + attnum_offset;
1668
1669 if (!bms_is_member(dep->attributes[j], clauses_attnums))
1670 {
1671 skip = true;
1672 break;
1673 }
1674
1675 continue;
1676 }
1677
1678 /*
1679 * the attnum should be a valid system attnum (-1, -2,
1680 * ...)
1681 */
1683
1684 /*
1685 * For expressions, we need to do two translations. First
1686 * we have to translate the negative attnum to index in
1687 * the list of expressions (in the statistics object).
1688 * Then we need to see if there's a matching clause. The
1689 * index of the unique expression determines the attnum
1690 * (and we offset it).
1691 */
1692 idx = -(1 + attnum);
1693
1694 /* Is the expression index is valid? */
1695 Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1696
1697 expr = (Node *) list_nth(stat->exprs, idx);
1698
1699 /* try to find the expression in the unique list */
1700 for (int m = 0; m < unique_exprs_cnt; m++)
1701 {
1702 /*
1703 * found a matching unique expression, use the attnum
1704 * (derived from index of the unique expression)
1705 */
1706 if (equal(unique_exprs[m], expr))
1707 {
1708 unique_attnum = -(m + 1) + attnum_offset;
1709 break;
1710 }
1711 }
1712
1713 /*
1714 * Found no matching expression, so we can simply skip
1715 * this dependency, because there's no chance it will be
1716 * fully covered.
1717 */
1719 {
1720 skip = true;
1721 break;
1722 }
1723
1724 /* otherwise remap it to the new attnum */
1725 dep->attributes[j] = unique_attnum;
1726 }
1727
1728 /* if found a matching dependency, keep it */
1729 if (!skip)
1730 {
1731 /* maybe we've skipped something earlier, so move it */
1732 if (ndeps != i)
1733 deps->deps[ndeps] = deps->deps[i];
1734
1735 ndeps++;
1736 }
1737 }
1738
1739 deps->ndeps = ndeps;
1740 }
1741
1742 /*
1743 * It's possible we've removed all dependencies, in which case we
1744 * don't bother adding it to the list.
1745 */
1746 if (deps->ndeps > 0)
1747 {
1749 total_ndeps += deps->ndeps;
1751 }
1752 }
1753
1754 /* if no matching stats could be found then we've nothing to do */
1755 if (nfunc_dependencies == 0)
1756 {
1761 return 1.0;
1762 }
1763
1764 /*
1765 * Work out which dependencies we can apply, starting with the
1766 * widest/strongest ones, and proceeding to smaller/weaker ones.
1767 */
1768 dependencies = palloc_array(MVDependency *, total_ndeps);
1769 ndependencies = 0;
1770
1771 while (true)
1772 {
1773 MVDependency *dependency;
1775
1776 /* the widest/strongest dependency, fully matched by clauses */
1780 if (!dependency)
1781 break;
1782
1783 dependencies[ndependencies++] = dependency;
1784
1785 /* Ignore dependencies using this implied attribute in later loops */
1786 attnum = dependency->attributes[dependency->nattributes - 1];
1788 }
1789
1790 /*
1791 * If we found applicable dependencies, use them to estimate all
1792 * compatible clauses on attributes that they refer to.
1793 */
1794 if (ndependencies != 0)
1795 s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1796 sjinfo, dependencies, ndependencies,
1798
1799 /* free deserialized functional dependencies (and then the array) */
1800 for (i = 0; i < nfunc_dependencies; i++)
1802
1803 pfree(dependencies);
1808
1809 return s1;
1810}
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262
int16 AttrNumber
Definition attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition attnum.h:34
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition attnum.h:41
#define InvalidAttrNumber
Definition attnum.h:23
int bms_next_member(const Bitmapset *a, int prevbit)
Definition bitmapset.c:1305
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition bitmapset.c:867
void bms_free(Bitmapset *a)
Definition bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition bitmapset.c:750
bool bms_is_member(int x, const Bitmapset *a)
Definition bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition bitmapset.c:814
BMS_Membership bms_membership(const Bitmapset *a)
Definition bitmapset.c:780
int bms_member_index(Bitmapset *a, int x)
Definition bitmapset.c:539
@ BMS_SINGLETON
Definition bitmapset.h:72
@ BMS_MULTIPLE
Definition bitmapset.h:73
#define VARHDRSZ
Definition c.h:711
#define Assert(condition)
Definition c.h:873
uint32_t uint32
Definition c.h:546
unsigned int Index
Definition c.h:628
size_t Size
Definition c.h:619
bool is_pseudo_constant_clause(Node *clause)
Definition clauses.c:2100
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition clausesel.c:117
static void generate_dependencies(DependencyGenerator state)
#define SizeOfHeader
MVDependencies * statext_dependencies_deserialize(bytea *data)
MVDependencies * statext_dependencies_load(Oid mvoid, bool inh)
static bool dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
MVDependencies * statext_dependencies_build(StatsBuildData *data)
static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
bool statext_dependencies_validate(const MVDependencies *dependencies, const int2vector *stxkeys, int numexprs, int elevel)
static bool dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
bytea * statext_dependencies_serialize(MVDependencies *dependencies)
void statext_dependencies_free(MVDependencies *dependencies)
static void DependencyGenerator_free(DependencyGenerator state)
static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, MVDependency **dependencies, int ndependencies, AttrNumber *list_attnums, Bitmapset **estimatedclauses)
static DependencyGenerator DependencyGenerator_init(int n, int k)
#define SizeOfItem(natts)
Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses)
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
static void generate_dependencies_recurse(DependencyGenerator state, int index, AttrNumber start, AttrNumber *current)
DependencyGeneratorData * DependencyGenerator
static MVDependency * find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums)
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
bool equal(const void *a, const void *b)
Definition equalfuncs.c:223
bool has_stats_of_kind(List *stats, char requiredkind)
int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
#define DatumGetByteaPP(X)
Definition fmgr.h:292
return str start
#define HeapTupleIsValid(tuple)
Definition htup.h:78
#define MaxHeapAttributeNumber
#define nitems(x)
Definition indent.h:31
FILE * output
int j
Definition isn.c:78
int i
Definition isn.c:77
List * lappend(List *list, void *datum)
Definition list.c:339
RegProcedure get_oprrest(Oid opno)
Definition lsyscache.c:1707
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
static bool is_orclause(const void *clause)
Definition nodeFuncs.h:116
static bool is_opclause(const void *clause)
Definition nodeFuncs.h:76
static bool is_notclause(const void *clause)
Definition nodeFuncs.h:125
static Expr * get_notclausearg(const void *notclause)
Definition nodeFuncs.h:134
#define IsA(nodeptr, _type_)
Definition nodes.h:164
double Selectivity
Definition nodes.h:260
JoinType
Definition nodes.h:298
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
#define planner_rt_fetch(rti, root)
Definition pathnodes.h:610
int16 attnum
static const struct exclude_list_item skip[]
const void size_t len
const void * data
#define lfirst(lc)
Definition pg_list.h:172
static int list_length(const List *l)
Definition pg_list.h:152
#define NIL
Definition pg_list.h:68
static void * list_nth(const List *list, int n)
Definition pg_list.h:299
#define linitial(l)
Definition pg_list.h:178
#define lsecond(l)
Definition pg_list.h:183
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:262
uint64_t Datum
Definition postgres.h:70
#define InvalidOid
unsigned int Oid
static int fb(int x)
char * s1
char * s2
tree ctl root
Definition radixtree.h:1857
#define CLAMP_PROBABILITY(p)
Definition selfuncs.h:63
#define STATS_MAX_DIMENSIONS
Definition statistics.h:19
#define STATS_DEPS_MAGIC
Definition statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition statistics.h:44
AttrNumber * dependencies
Definition pg_list.h:54
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition statistics.h:62
AttrNumber nattributes
Definition statistics.h:53
double degree
Definition statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition statistics.h:54
Definition nodes.h:135
Oid opno
Definition primnodes.h:850
List * args
Definition primnodes.h:868
Index relid
Definition pathnodes.h:973
List * statlist
Definition pathnodes.h:997
Expr * clause
Definition pathnodes.h:2792
AttrNumber varattno
Definition primnodes.h:274
int varno
Definition primnodes.h:269
Index varlevelsup
Definition primnodes.h:294
Definition type.h:96
Definition c.h:706
void ReleaseSysCache(HeapTuple tuple)
Definition syscache.c:264
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition syscache.c:595
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition syscache.c:230
static ItemArray items
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition typcache.c:386
#define TYPECACHE_LT_OPR
Definition typcache.h:139
static Size VARSIZE_ANY(const void *PTR)
Definition varatt.h:460
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
const char * type