PostgreSQL Source Code git master
dependencies.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * dependencies.c
4 * POSTGRES functional dependencies
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/statistics/dependencies.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/htup_details.h"
19#include "nodes/nodeFuncs.h"
20#include "optimizer/clauses.h"
21#include "optimizer/optimizer.h"
22#include "parser/parsetree.h"
24#include "utils/fmgroids.h"
25#include "utils/lsyscache.h"
26#include "utils/memutils.h"
27#include "utils/selfuncs.h"
28#include "utils/syscache.h"
29#include "utils/typcache.h"
30
31/* size of the struct header fields (magic, type, ndeps) */
32#define SizeOfHeader (3 * sizeof(uint32))
33
34/* size of a serialized dependency (degree, natts, atts) */
35#define SizeOfItem(natts) \
36 (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
37
38/* minimal size of a dependency (with two attributes) */
39#define MinSizeOfItem SizeOfItem(2)
40
41/* minimal size of dependencies, when all deps are minimal */
42#define MinSizeOfItems(ndeps) \
43 (SizeOfHeader + (ndeps) * MinSizeOfItem)
44
45/*
46 * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
47 * k-permutations of n elements, except that the order does not matter for the
48 * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
49 */
51{
52 int k; /* size of the dependency */
53 int n; /* number of possible attributes */
54 int current; /* next dependency to return (index) */
55 AttrNumber ndependencies; /* number of dependencies generated */
56 AttrNumber *dependencies; /* array of pre-generated dependencies */
58
60
62 int index, AttrNumber start, AttrNumber *current);
67static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
68static bool dependency_is_fully_matched(MVDependency *dependency,
69 Bitmapset *attnums);
70static bool dependency_is_compatible_clause(Node *clause, Index relid,
72static bool dependency_is_compatible_expression(Node *clause, Index relid,
73 List *statlist, Node **expr);
75 int ndependencies, Bitmapset *attnums);
77 int varRelid, JoinType jointype,
78 SpecialJoinInfo *sjinfo,
79 MVDependency **dependencies,
80 int ndependencies,
81 AttrNumber *list_attnums,
82 Bitmapset **estimatedclauses);
83
84static void
86 AttrNumber start, AttrNumber *current)
87{
88 /*
89 * The generator handles the first (k-1) elements differently from the
90 * last element.
91 */
92 if (index < (state->k - 1))
93 {
95
96 /*
97 * The first (k-1) values have to be in ascending order, which we
98 * generate recursively.
99 */
100
101 for (i = start; i < state->n; i++)
102 {
103 current[index] = i;
104 generate_dependencies_recurse(state, (index + 1), (i + 1), current);
105 }
106 }
107 else
108 {
109 int i;
110
111 /*
112 * the last element is the implied value, which does not respect the
113 * ascending order. We just need to check that the value is not in the
114 * first (k-1) elements.
115 */
116
117 for (i = 0; i < state->n; i++)
118 {
119 int j;
120 bool match = false;
121
122 current[index] = i;
123
124 for (j = 0; j < index; j++)
125 {
126 if (current[j] == i)
127 {
128 match = true;
129 break;
130 }
131 }
132
133 /*
134 * If the value is not found in the first part of the dependency,
135 * we're done.
136 */
137 if (!match)
138 {
139 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
140 state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
141 memcpy(&state->dependencies[(state->k * state->ndependencies)],
142 current, state->k * sizeof(AttrNumber));
143 state->ndependencies++;
144 }
145 }
146 }
147}
148
149/* generate all dependencies (k-permutations of n elements) */
150static void
152{
154
155 generate_dependencies_recurse(state, 0, 0, current);
156
157 pfree(current);
158}
159
160/*
161 * initialize the DependencyGenerator of variations, and prebuild the variations
162 *
163 * This pre-builds all the variations. We could also generate them in
164 * DependencyGenerator_next(), but this seems simpler.
165 */
168{
170
171 Assert((n >= k) && (k > 0));
172
173 /* allocate the DependencyGenerator state */
175 state->dependencies = palloc_array(AttrNumber, k);
176
177 state->ndependencies = 0;
178 state->current = 0;
179 state->k = k;
180 state->n = n;
181
182 /* now actually pre-generate all the variations */
184
185 return state;
186}
187
188/* free the DependencyGenerator state */
189static void
191{
192 pfree(state->dependencies);
193 pfree(state);
194}
195
196/* generate next combination */
197static AttrNumber *
199{
200 if (state->current == state->ndependencies)
201 return NULL;
202
203 return &state->dependencies[state->k * state->current++];
204}
205
206
207/*
208 * validates functional dependency on the data
209 *
210 * An actual work horse of detecting functional dependencies. Given a variation
211 * of k attributes, it checks that the first (k-1) are sufficient to determine
212 * the last one.
213 */
214static double
216{
217 int i,
218 nitems;
221 AttrNumber *attnums_dep;
222
223 /* counters valid within a group */
224 int group_size = 0;
225 int n_violations = 0;
226
227 /* total number of rows supporting (consistent with) the dependency */
228 int n_supporting_rows = 0;
229
230 /* Make sure we have at least two input attributes. */
231 Assert(k >= 2);
232
233 /* sort info for all attributes columns */
234 mss = multi_sort_init(k);
235
236 /*
237 * Translate the array of indexes to regular attnums for the dependency
238 * (we will need this to identify the columns in StatsBuildData).
239 */
240 attnums_dep = palloc_array(AttrNumber, k);
241 for (i = 0; i < k; i++)
242 attnums_dep[i] = data->attnums[dependency[i]];
243
244 /*
245 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
246 *
247 * (a) sort the data lexicographically
248 *
249 * (b) split the data into groups by first (k-1) columns
250 *
251 * (c) for each group count different values in the last column
252 *
253 * We use the column data types' default sort operators and collations;
254 * perhaps at some point it'd be worth using column-specific collations?
255 */
256
257 /* prepare the sort function for the dimensions */
258 for (i = 0; i < k; i++)
259 {
260 VacAttrStats *colstat = data->stats[dependency[i]];
262
264 if (type->lt_opr == InvalidOid) /* shouldn't happen */
265 elog(ERROR, "cache lookup failed for ordering operator for type %u",
266 colstat->attrtypid);
267
268 /* prepare the sort function for this dimension */
269 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
270 }
271
272 /*
273 * build an array of SortItem(s) sorted using the multi-sort support
274 *
275 * XXX This relies on all stats entries pointing to the same tuple
276 * descriptor. For now that assumption holds, but it might change in the
277 * future for example if we support statistics on multiple tables.
278 */
279 items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
280
281 /*
282 * Walk through the sorted array, split it into rows according to the
283 * first (k-1) columns. If there's a single value in the last column, we
284 * count the group as 'supporting' the functional dependency. Otherwise we
285 * count it as contradicting.
286 */
287
288 /* start with the first row forming a group */
289 group_size = 1;
290
291 /* loop 1 beyond the end of the array so that we count the final group */
292 for (i = 1; i <= nitems; i++)
293 {
294 /*
295 * Check if the group ended, which may be either because we processed
296 * all the items (i==nitems), or because the i-th item is not equal to
297 * the preceding one.
298 */
299 if (i == nitems ||
300 multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
301 {
302 /*
303 * If no violations were found in the group then track the rows of
304 * the group as supporting the functional dependency.
305 */
306 if (n_violations == 0)
307 n_supporting_rows += group_size;
308
309 /* Reset counters for the new group */
310 n_violations = 0;
311 group_size = 1;
312 continue;
313 }
314 /* first columns match, but the last one does not (so contradicting) */
315 else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
316 n_violations++;
317
318 group_size++;
319 }
320
321 /* Compute the 'degree of validity' as (supporting/total). */
322 return (n_supporting_rows * 1.0 / data->numrows);
323}
324
325/*
326 * detects functional dependencies between groups of columns
327 *
328 * Generates all possible subsets of columns (variations) and computes
329 * the degree of validity for each one. For example when creating statistics
330 * on three columns (a,b,c) there are 9 possible dependencies
331 *
332 * two columns three columns
333 * ----------- -------------
334 * (a) -> b (a,b) -> c
335 * (a) -> c (a,c) -> b
336 * (b) -> a (b,c) -> a
337 * (b) -> c
338 * (c) -> a
339 * (c) -> b
340 */
343{
344 int i,
345 k;
346
347 /* result */
348 MVDependencies *dependencies = NULL;
349 MemoryContext cxt;
350
351 Assert(data->nattnums >= 2);
352
353 /* tracks memory allocated by dependency_degree calls */
355 "dependency_degree cxt",
357
358 /*
359 * We'll try build functional dependencies starting from the smallest ones
360 * covering just 2 columns, to the largest ones, covering all columns
361 * included in the statistics object. We start from the smallest ones
362 * because we want to be able to skip already implied ones.
363 */
364 for (k = 2; k <= data->nattnums; k++)
365 {
366 AttrNumber *dependency; /* array with k elements */
367
368 /* prepare a DependencyGenerator of variation */
370
371 /* generate all possible variations of k values (out of n) */
372 while ((dependency = DependencyGenerator_next(DependencyGenerator)))
373 {
374 double degree;
375 MVDependency *d;
376 MemoryContext oldcxt;
377
378 /* release memory used by dependency degree calculation */
379 oldcxt = MemoryContextSwitchTo(cxt);
380
381 /* compute how valid the dependency seems */
382 degree = dependency_degree(data, k, dependency);
383
384 MemoryContextSwitchTo(oldcxt);
386
387 /*
388 * if the dependency seems entirely invalid, don't store it
389 */
390 if (degree == 0.0)
391 continue;
392
393 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
394 + k * sizeof(AttrNumber));
395
396 /* copy the dependency (and keep the indexes into stxkeys) */
397 d->degree = degree;
398 d->nattributes = k;
399 for (i = 0; i < k; i++)
400 d->attributes[i] = data->attnums[dependency[i]];
401
402 /* initialize the list of dependencies */
403 if (dependencies == NULL)
404 {
405 dependencies = palloc0_object(MVDependencies);
406
407 dependencies->magic = STATS_DEPS_MAGIC;
408 dependencies->type = STATS_DEPS_TYPE_BASIC;
409 dependencies->ndeps = 0;
410 }
411
412 dependencies->ndeps++;
413 dependencies = (MVDependencies *) repalloc(dependencies,
414 offsetof(MVDependencies, deps)
415 + dependencies->ndeps * sizeof(MVDependency *));
416
417 dependencies->deps[dependencies->ndeps - 1] = d;
418 }
419
420 /*
421 * we're done with variations of k elements, so free the
422 * DependencyGenerator
423 */
425 }
426
428
429 return dependencies;
430}
431
432
433/*
434 * Serialize list of dependencies into a bytea value.
435 */
436bytea *
438{
439 int i;
440 bytea *output;
441 char *tmp;
442 Size len;
443
444 /* we need to store ndeps, with a number of attributes for each one */
446
447 /* and also include space for the actual attribute numbers and degrees */
448 for (i = 0; i < dependencies->ndeps; i++)
449 len += SizeOfItem(dependencies->deps[i]->nattributes);
450
451 output = (bytea *) palloc0(len);
453
454 tmp = VARDATA(output);
455
456 /* Store the base struct values (magic, type, ndeps) */
457 memcpy(tmp, &dependencies->magic, sizeof(uint32));
458 tmp += sizeof(uint32);
459 memcpy(tmp, &dependencies->type, sizeof(uint32));
460 tmp += sizeof(uint32);
461 memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
462 tmp += sizeof(uint32);
463
464 /* store number of attributes and attribute numbers for each dependency */
465 for (i = 0; i < dependencies->ndeps; i++)
466 {
467 MVDependency *d = dependencies->deps[i];
468
469 memcpy(tmp, &d->degree, sizeof(double));
470 tmp += sizeof(double);
471
472 memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
473 tmp += sizeof(AttrNumber);
474
475 memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
476 tmp += sizeof(AttrNumber) * d->nattributes;
477
478 /* protect against overflow */
479 Assert(tmp <= ((char *) output + len));
480 }
481
482 /* make sure we've produced exactly the right amount of data */
483 Assert(tmp == ((char *) output + len));
484
485 return output;
486}
487
488/*
489 * Reads serialized dependencies into MVDependencies structure.
490 */
493{
494 int i;
495 Size min_expected_size;
496 MVDependencies *dependencies;
497 char *tmp;
498
499 if (data == NULL)
500 return NULL;
501
503 elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
505
506 /* read the MVDependencies header */
507 dependencies = palloc0_object(MVDependencies);
508
509 /* initialize pointer to the data part (skip the varlena header) */
510 tmp = VARDATA_ANY(data);
511
512 /* read the header fields and perform basic sanity checks */
513 memcpy(&dependencies->magic, tmp, sizeof(uint32));
514 tmp += sizeof(uint32);
515 memcpy(&dependencies->type, tmp, sizeof(uint32));
516 tmp += sizeof(uint32);
517 memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
518 tmp += sizeof(uint32);
519
520 if (dependencies->magic != STATS_DEPS_MAGIC)
521 elog(ERROR, "invalid dependency magic %d (expected %d)",
522 dependencies->magic, STATS_DEPS_MAGIC);
523
524 if (dependencies->type != STATS_DEPS_TYPE_BASIC)
525 elog(ERROR, "invalid dependency type %d (expected %d)",
526 dependencies->type, STATS_DEPS_TYPE_BASIC);
527
528 if (dependencies->ndeps == 0)
529 elog(ERROR, "invalid zero-length item array in MVDependencies");
530
531 /* what minimum bytea size do we expect for those parameters */
532 min_expected_size = SizeOfItem(dependencies->ndeps);
533
534 if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
535 elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
536 VARSIZE_ANY_EXHDR(data), min_expected_size);
537
538 /* allocate space for the MCV items */
539 dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
540 + (dependencies->ndeps * sizeof(MVDependency *)));
541
542 for (i = 0; i < dependencies->ndeps; i++)
543 {
544 double degree;
545 AttrNumber k;
546 MVDependency *d;
547
548 /* degree of validity */
549 memcpy(&degree, tmp, sizeof(double));
550 tmp += sizeof(double);
551
552 /* number of attributes */
553 memcpy(&k, tmp, sizeof(AttrNumber));
554 tmp += sizeof(AttrNumber);
555
556 /* is the number of attributes valid? */
557 Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
558
559 /* now that we know the number of attributes, allocate the dependency */
560 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
561 + (k * sizeof(AttrNumber)));
562
563 d->degree = degree;
564 d->nattributes = k;
565
566 /* copy attribute numbers */
567 memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
568 tmp += sizeof(AttrNumber) * d->nattributes;
569
570 dependencies->deps[i] = d;
571
572 /* still within the bytea */
573 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
574 }
575
576 /* we should have consumed the whole bytea exactly */
577 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
578
579 return dependencies;
580}
581
582/*
583 * dependency_is_fully_matched
584 * checks that a functional dependency is fully matched given clauses on
585 * attributes (assuming the clauses are suitable equality clauses)
586 */
587static bool
589{
590 int j;
591
592 /*
593 * Check that the dependency actually is fully covered by clauses. We have
594 * to translate all attribute numbers, as those are referenced
595 */
596 for (j = 0; j < dependency->nattributes; j++)
597 {
598 int attnum = dependency->attributes[j];
599
600 if (!bms_is_member(attnum, attnums))
601 return false;
602 }
603
604 return true;
605}
606
607/*
608 * statext_dependencies_load
609 * Load the functional dependencies for the indicated pg_statistic_ext tuple
610 */
613{
614 MVDependencies *result;
615 bool isnull;
616 Datum deps;
617 HeapTuple htup;
618
619 htup = SearchSysCache2(STATEXTDATASTXOID,
620 ObjectIdGetDatum(mvoid),
621 BoolGetDatum(inh));
622 if (!HeapTupleIsValid(htup))
623 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
624
625 deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
626 Anum_pg_statistic_ext_data_stxddependencies, &isnull);
627 if (isnull)
628 elog(ERROR,
629 "requested statistics kind \"%c\" is not yet built for statistics object %u",
630 STATS_EXT_DEPENDENCIES, mvoid);
631
633
634 ReleaseSysCache(htup);
635
636 return result;
637}
638
639/*
640 * dependency_is_compatible_clause
641 * Determines if the clause is compatible with functional dependencies
642 *
643 * Only clauses that have the form of equality to a pseudoconstant, or can be
644 * interpreted that way, are currently accepted. Furthermore the variable
645 * part of the clause must be a simple Var belonging to the specified
646 * relation, whose attribute number we return in *attnum on success.
647 */
648static bool
650{
651 Var *var;
652 Node *clause_expr;
653
654 if (IsA(clause, RestrictInfo))
655 {
656 RestrictInfo *rinfo = (RestrictInfo *) clause;
657
658 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
659 if (rinfo->pseudoconstant)
660 return false;
661
662 /* Clauses referencing multiple, or no, varnos are incompatible */
663 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
664 return false;
665
666 clause = (Node *) rinfo->clause;
667 }
668
669 if (is_opclause(clause))
670 {
671 /* If it's an opclause, check for Var = Const or Const = Var. */
672 OpExpr *expr = (OpExpr *) clause;
673
674 /* Only expressions with two arguments are candidates. */
675 if (list_length(expr->args) != 2)
676 return false;
677
678 /* Make sure non-selected argument is a pseudoconstant. */
680 clause_expr = linitial(expr->args);
681 else if (is_pseudo_constant_clause(linitial(expr->args)))
682 clause_expr = lsecond(expr->args);
683 else
684 return false;
685
686 /*
687 * If it's not an "=" operator, just ignore the clause, as it's not
688 * compatible with functional dependencies.
689 *
690 * This uses the function for estimating selectivity, not the operator
691 * directly (a bit awkward, but well ...).
692 *
693 * XXX this is pretty dubious; probably it'd be better to check btree
694 * or hash opclass membership, so as not to be fooled by custom
695 * selectivity functions, and to be more consistent with decisions
696 * elsewhere in the planner.
697 */
698 if (get_oprrest(expr->opno) != F_EQSEL)
699 return false;
700
701 /* OK to proceed with checking "var" */
702 }
703 else if (IsA(clause, ScalarArrayOpExpr))
704 {
705 /* If it's a scalar array operator, check for Var IN Const. */
706 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
707
708 /*
709 * Reject ALL() variant, we only care about ANY/IN.
710 *
711 * XXX Maybe we should check if all the values are the same, and allow
712 * ALL in that case? Doesn't seem very practical, though.
713 */
714 if (!expr->useOr)
715 return false;
716
717 /* Only expressions with two arguments are candidates. */
718 if (list_length(expr->args) != 2)
719 return false;
720
721 /*
722 * We know it's always (Var IN Const), so we assume the var is the
723 * first argument, and pseudoconstant is the second one.
724 */
726 return false;
727
728 clause_expr = linitial(expr->args);
729
730 /*
731 * If it's not an "=" operator, just ignore the clause, as it's not
732 * compatible with functional dependencies. The operator is identified
733 * simply by looking at which function it uses to estimate
734 * selectivity. That's a bit strange, but it's what other similar
735 * places do.
736 */
737 if (get_oprrest(expr->opno) != F_EQSEL)
738 return false;
739
740 /* OK to proceed with checking "var" */
741 }
742 else if (is_orclause(clause))
743 {
744 BoolExpr *bool_expr = (BoolExpr *) clause;
745 ListCell *lc;
746
747 /* start with no attribute number */
749
750 foreach(lc, bool_expr->args)
751 {
752 AttrNumber clause_attnum;
753
754 /*
755 * Had we found incompatible clause in the arguments, treat the
756 * whole clause as incompatible.
757 */
759 relid, &clause_attnum))
760 return false;
761
763 *attnum = clause_attnum;
764
765 /* ensure all the variables are the same (same attnum) */
766 if (*attnum != clause_attnum)
767 return false;
768 }
769
770 /* the Var is already checked by the recursive call */
771 return true;
772 }
773 else if (is_notclause(clause))
774 {
775 /*
776 * "NOT x" can be interpreted as "x = false", so get the argument and
777 * proceed with seeing if it's a suitable Var.
778 */
779 clause_expr = (Node *) get_notclausearg(clause);
780 }
781 else
782 {
783 /*
784 * A boolean expression "x" can be interpreted as "x = true", so
785 * proceed with seeing if it's a suitable Var.
786 */
787 clause_expr = clause;
788 }
789
790 /*
791 * We may ignore any RelabelType node above the operand. (There won't be
792 * more than one, since eval_const_expressions has been applied already.)
793 */
794 if (IsA(clause_expr, RelabelType))
795 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
796
797 /* We only support plain Vars for now */
798 if (!IsA(clause_expr, Var))
799 return false;
800
801 /* OK, we know we have a Var */
802 var = (Var *) clause_expr;
803
804 /* Ensure Var is from the correct relation */
805 if (var->varno != relid)
806 return false;
807
808 /* We also better ensure the Var is from the current level */
809 if (var->varlevelsup != 0)
810 return false;
811
812 /* Also ignore system attributes (we don't allow stats on those) */
814 return false;
815
816 *attnum = var->varattno;
817 return true;
818}
819
820/*
821 * find_strongest_dependency
822 * find the strongest dependency on the attributes
823 *
824 * When applying functional dependencies, we start with the strongest
825 * dependencies. That is, we select the dependency that:
826 *
827 * (a) has all attributes covered by equality clauses
828 *
829 * (b) has the most attributes
830 *
831 * (c) has the highest degree of validity
832 *
833 * This guarantees that we eliminate the most redundant conditions first
834 * (see the comment in dependencies_clauselist_selectivity).
835 */
836static MVDependency *
837find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
838 Bitmapset *attnums)
839{
840 int i,
841 j;
842 MVDependency *strongest = NULL;
843
844 /* number of attnums in clauses */
845 int nattnums = bms_num_members(attnums);
846
847 /*
848 * Iterate over the MVDependency items and find the strongest one from the
849 * fully-matched dependencies. We do the cheap checks first, before
850 * matching it against the attnums.
851 */
852 for (i = 0; i < ndependencies; i++)
853 {
854 for (j = 0; j < dependencies[i]->ndeps; j++)
855 {
856 MVDependency *dependency = dependencies[i]->deps[j];
857
858 /*
859 * Skip dependencies referencing more attributes than available
860 * clauses, as those can't be fully matched.
861 */
862 if (dependency->nattributes > nattnums)
863 continue;
864
865 if (strongest)
866 {
867 /* skip dependencies on fewer attributes than the strongest. */
868 if (dependency->nattributes < strongest->nattributes)
869 continue;
870
871 /* also skip weaker dependencies when attribute count matches */
872 if (strongest->nattributes == dependency->nattributes &&
873 strongest->degree > dependency->degree)
874 continue;
875 }
876
877 /*
878 * this dependency is stronger, but we must still check that it's
879 * fully matched to these attnums. We perform this check last as
880 * it's slightly more expensive than the previous checks.
881 */
882 if (dependency_is_fully_matched(dependency, attnums))
883 strongest = dependency; /* save new best match */
884 }
885 }
886
887 return strongest;
888}
889
890/*
891 * clauselist_apply_dependencies
892 * Apply the specified functional dependencies to a list of clauses and
893 * return the estimated selectivity of the clauses that are compatible
894 * with any of the given dependencies.
895 *
896 * This will estimate all not-already-estimated clauses that are compatible
897 * with functional dependencies, and which have an attribute mentioned by any
898 * of the given dependencies (either as an implying or implied attribute).
899 *
900 * Given (lists of) clauses on attributes (a,b) and a functional dependency
901 * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
902 * using the formula
903 *
904 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
905 *
906 * where 'f' is the degree of dependency. This reflects the fact that we
907 * expect a fraction f of all rows to be consistent with the dependency
908 * (a=>b), and so have a selectivity of P(a), while the remaining rows are
909 * treated as independent.
910 *
911 * In practice, we use a slightly modified version of this formula, which uses
912 * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
913 * should obviously not exceed either column's individual selectivity. I.e.,
914 * we actually combine selectivities using the formula
915 *
916 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
917 *
918 * This can make quite a difference if the specific values matching the
919 * clauses are not consistent with the functional dependency.
920 */
921static Selectivity
923 int varRelid, JoinType jointype,
924 SpecialJoinInfo *sjinfo,
925 MVDependency **dependencies, int ndependencies,
926 AttrNumber *list_attnums,
927 Bitmapset **estimatedclauses)
928{
929 Bitmapset *attnums;
930 int i;
931 int j;
932 int nattrs;
933 Selectivity *attr_sel;
934 int attidx;
935 int listidx;
936 ListCell *l;
938
939 /*
940 * Extract the attnums of all implying and implied attributes from all the
941 * given dependencies. Each of these attributes is expected to have at
942 * least 1 not-already-estimated compatible clause that we will estimate
943 * here.
944 */
945 attnums = NULL;
946 for (i = 0; i < ndependencies; i++)
947 {
948 for (j = 0; j < dependencies[i]->nattributes; j++)
949 {
950 AttrNumber attnum = dependencies[i]->attributes[j];
951
952 attnums = bms_add_member(attnums, attnum);
953 }
954 }
955
956 /*
957 * Compute per-column selectivity estimates for each of these attributes,
958 * and mark all the corresponding clauses as estimated.
959 */
960 nattrs = bms_num_members(attnums);
961 attr_sel = palloc_array(Selectivity, nattrs);
962
963 attidx = 0;
964 i = -1;
965 while ((i = bms_next_member(attnums, i)) >= 0)
966 {
967 List *attr_clauses = NIL;
968 Selectivity simple_sel;
969
970 listidx = -1;
971 foreach(l, clauses)
972 {
973 Node *clause = (Node *) lfirst(l);
974
975 listidx++;
976 if (list_attnums[listidx] == i)
977 {
978 attr_clauses = lappend(attr_clauses, clause);
979 *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
980 }
981 }
982
983 simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
984 jointype, sjinfo, false);
985 attr_sel[attidx++] = simple_sel;
986 }
987
988 /*
989 * Now combine these selectivities using the dependency information. For
990 * chains of dependencies such as a -> b -> c, the b -> c dependency will
991 * come before the a -> b dependency in the array, so we traverse the
992 * array backwards to ensure such chains are computed in the right order.
993 *
994 * As explained above, pairs of selectivities are combined using the
995 * formula
996 *
997 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
998 *
999 * to ensure that the combined selectivity is never greater than either
1000 * individual selectivity.
1001 *
1002 * Where multiple dependencies apply (e.g., a -> b -> c), we use
1003 * conditional probabilities to compute the overall result as follows:
1004 *
1005 * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1006 *
1007 * so we replace the selectivities of all implied attributes with
1008 * conditional probabilities, that are conditional on all their implying
1009 * attributes. The selectivities of all other non-implied attributes are
1010 * left as they are.
1011 */
1012 for (i = ndependencies - 1; i >= 0; i--)
1013 {
1014 MVDependency *dependency = dependencies[i];
1017 double f;
1018
1019 /* Selectivity of all the implying attributes */
1020 s1 = 1.0;
1021 for (j = 0; j < dependency->nattributes - 1; j++)
1022 {
1023 attnum = dependency->attributes[j];
1024 attidx = bms_member_index(attnums, attnum);
1025 s1 *= attr_sel[attidx];
1026 }
1027
1028 /* Original selectivity of the implied attribute */
1029 attnum = dependency->attributes[j];
1030 attidx = bms_member_index(attnums, attnum);
1031 s2 = attr_sel[attidx];
1032
1033 /*
1034 * Replace s2 with the conditional probability s2 given s1, computed
1035 * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1036 *
1037 * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1038 *
1039 * where P(a) = s1, the selectivity of the implying attributes, and
1040 * P(b) = s2, the selectivity of the implied attribute.
1041 */
1042 f = dependency->degree;
1043
1044 if (s1 <= s2)
1045 attr_sel[attidx] = f + (1 - f) * s2;
1046 else
1047 attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1048 }
1049
1050 /*
1051 * The overall selectivity of all the clauses on all these attributes is
1052 * then the product of all the original (non-implied) probabilities and
1053 * the new conditional (implied) probabilities.
1054 */
1055 s1 = 1.0;
1056 for (i = 0; i < nattrs; i++)
1057 s1 *= attr_sel[i];
1058
1060
1061 pfree(attr_sel);
1062 bms_free(attnums);
1063
1064 return s1;
1065}
1066
1067/*
1068 * dependency_is_compatible_expression
1069 * Determines if the expression is compatible with functional dependencies
1070 *
1071 * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1072 * expression is a simple Var. On success, return the matching statistics
1073 * expression into *expr.
1074 */
1075static bool
1076dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1077{
1078 ListCell *lc,
1079 *lc2;
1080 Node *clause_expr;
1081
1082 if (IsA(clause, RestrictInfo))
1083 {
1084 RestrictInfo *rinfo = (RestrictInfo *) clause;
1085
1086 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1087 if (rinfo->pseudoconstant)
1088 return false;
1089
1090 /* Clauses referencing multiple, or no, varnos are incompatible */
1091 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1092 return false;
1093
1094 clause = (Node *) rinfo->clause;
1095 }
1096
1097 if (is_opclause(clause))
1098 {
1099 /* If it's an opclause, check for Var = Const or Const = Var. */
1100 OpExpr *expr = (OpExpr *) clause;
1101
1102 /* Only expressions with two arguments are candidates. */
1103 if (list_length(expr->args) != 2)
1104 return false;
1105
1106 /* Make sure non-selected argument is a pseudoconstant. */
1108 clause_expr = linitial(expr->args);
1109 else if (is_pseudo_constant_clause(linitial(expr->args)))
1110 clause_expr = lsecond(expr->args);
1111 else
1112 return false;
1113
1114 /*
1115 * If it's not an "=" operator, just ignore the clause, as it's not
1116 * compatible with functional dependencies.
1117 *
1118 * This uses the function for estimating selectivity, not the operator
1119 * directly (a bit awkward, but well ...).
1120 *
1121 * XXX this is pretty dubious; probably it'd be better to check btree
1122 * or hash opclass membership, so as not to be fooled by custom
1123 * selectivity functions, and to be more consistent with decisions
1124 * elsewhere in the planner.
1125 */
1126 if (get_oprrest(expr->opno) != F_EQSEL)
1127 return false;
1128
1129 /* OK to proceed with checking "var" */
1130 }
1131 else if (IsA(clause, ScalarArrayOpExpr))
1132 {
1133 /* If it's a scalar array operator, check for Var IN Const. */
1134 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1135
1136 /*
1137 * Reject ALL() variant, we only care about ANY/IN.
1138 *
1139 * FIXME Maybe we should check if all the values are the same, and
1140 * allow ALL in that case? Doesn't seem very practical, though.
1141 */
1142 if (!expr->useOr)
1143 return false;
1144
1145 /* Only expressions with two arguments are candidates. */
1146 if (list_length(expr->args) != 2)
1147 return false;
1148
1149 /*
1150 * We know it's always (Var IN Const), so we assume the var is the
1151 * first argument, and pseudoconstant is the second one.
1152 */
1154 return false;
1155
1156 clause_expr = linitial(expr->args);
1157
1158 /*
1159 * If it's not an "=" operator, just ignore the clause, as it's not
1160 * compatible with functional dependencies. The operator is identified
1161 * simply by looking at which function it uses to estimate
1162 * selectivity. That's a bit strange, but it's what other similar
1163 * places do.
1164 */
1165 if (get_oprrest(expr->opno) != F_EQSEL)
1166 return false;
1167
1168 /* OK to proceed with checking "var" */
1169 }
1170 else if (is_orclause(clause))
1171 {
1172 BoolExpr *bool_expr = (BoolExpr *) clause;
1173
1174 /* start with no expression (we'll use the first match) */
1175 *expr = NULL;
1176
1177 foreach(lc, bool_expr->args)
1178 {
1179 Node *or_expr = NULL;
1180
1181 /*
1182 * Had we found incompatible expression in the arguments, treat
1183 * the whole expression as incompatible.
1184 */
1186 statlist, &or_expr))
1187 return false;
1188
1189 if (*expr == NULL)
1190 *expr = or_expr;
1191
1192 /* ensure all the expressions are the same */
1193 if (!equal(or_expr, *expr))
1194 return false;
1195 }
1196
1197 /* the expression is already checked by the recursive call */
1198 return true;
1199 }
1200 else if (is_notclause(clause))
1201 {
1202 /*
1203 * "NOT x" can be interpreted as "x = false", so get the argument and
1204 * proceed with seeing if it's a suitable Var.
1205 */
1206 clause_expr = (Node *) get_notclausearg(clause);
1207 }
1208 else
1209 {
1210 /*
1211 * A boolean expression "x" can be interpreted as "x = true", so
1212 * proceed with seeing if it's a suitable Var.
1213 */
1214 clause_expr = clause;
1215 }
1216
1217 /*
1218 * We may ignore any RelabelType node above the operand. (There won't be
1219 * more than one, since eval_const_expressions has been applied already.)
1220 */
1221 if (IsA(clause_expr, RelabelType))
1222 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1223
1224 /*
1225 * Search for a matching statistics expression.
1226 */
1227 foreach(lc, statlist)
1228 {
1230
1231 /* ignore stats without dependencies */
1232 if (info->kind != STATS_EXT_DEPENDENCIES)
1233 continue;
1234
1235 foreach(lc2, info->exprs)
1236 {
1237 Node *stat_expr = (Node *) lfirst(lc2);
1238
1239 if (equal(clause_expr, stat_expr))
1240 {
1241 *expr = stat_expr;
1242 return true;
1243 }
1244 }
1245 }
1246
1247 return false;
1248}
1249
1250/*
1251 * dependencies_clauselist_selectivity
1252 * Return the estimated selectivity of (a subset of) the given clauses
1253 * using functional dependency statistics, or 1.0 if no useful functional
1254 * dependency statistic exists.
1255 *
1256 * 'estimatedclauses' is an input/output argument that gets a bit set
1257 * corresponding to the (zero-based) list index of each clause that is included
1258 * in the estimated selectivity.
1259 *
1260 * Given equality clauses on attributes (a,b) we find the strongest dependency
1261 * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1262 * dependency, we then combine the per-clause selectivities using the formula
1263 *
1264 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1265 *
1266 * where 'f' is the degree of the dependency. (Actually we use a slightly
1267 * modified version of this formula -- see clauselist_apply_dependencies()).
1268 *
1269 * With clauses on more than two attributes, the dependencies are applied
1270 * recursively, starting with the widest/strongest dependencies. For example
1271 * P(a,b,c) is first split like this:
1272 *
1273 * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1274 *
1275 * assuming (a,b=>c) is the strongest dependency.
1276 */
1279 List *clauses,
1280 int varRelid,
1281 JoinType jointype,
1282 SpecialJoinInfo *sjinfo,
1283 RelOptInfo *rel,
1284 Bitmapset **estimatedclauses)
1285{
1286 Selectivity s1 = 1.0;
1287 ListCell *l;
1288 Bitmapset *clauses_attnums = NULL;
1289 AttrNumber *list_attnums;
1290 int listidx;
1291 MVDependencies **func_dependencies;
1292 int nfunc_dependencies;
1293 int total_ndeps;
1294 MVDependency **dependencies;
1295 int ndependencies;
1296 int i;
1297 AttrNumber attnum_offset;
1299
1300 /* unique expressions */
1301 Node **unique_exprs;
1302 int unique_exprs_cnt;
1303
1304 /* check if there's any stats that might be useful for us. */
1305 if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1306 return 1.0;
1307
1308 list_attnums = palloc_array(AttrNumber, list_length(clauses));
1309
1310 /*
1311 * We allocate space as if every clause was a unique expression, although
1312 * that's probably overkill. Some will be simple column references that
1313 * we'll translate to attnums, and there might be duplicates. But it's
1314 * easier and cheaper to just do one allocation than repalloc later.
1315 */
1316 unique_exprs = palloc_array(Node *, list_length(clauses));
1317 unique_exprs_cnt = 0;
1318
1319 /*
1320 * Pre-process the clauses list to extract the attnums seen in each item.
1321 * We need to determine if there's any clauses which will be useful for
1322 * dependency selectivity estimations. Along the way we'll record all of
1323 * the attnums for each clause in a list which we'll reference later so we
1324 * don't need to repeat the same work again. We'll also keep track of all
1325 * attnums seen.
1326 *
1327 * We also skip clauses that we already estimated using different types of
1328 * statistics (we treat them as incompatible).
1329 *
1330 * To handle expressions, we assign them negative attnums, as if it was a
1331 * system attribute (this is fine, as we only allow extended stats on user
1332 * attributes). And then we offset everything by the number of
1333 * expressions, so that we can store the values in a bitmapset.
1334 */
1335 listidx = 0;
1336 foreach(l, clauses)
1337 {
1338 Node *clause = (Node *) lfirst(l);
1340 Node *expr = NULL;
1341
1342 /* ignore clause by default */
1343 list_attnums[listidx] = InvalidAttrNumber;
1344
1345 if (!bms_is_member(listidx, *estimatedclauses))
1346 {
1347 /*
1348 * If it's a simple column reference, just extract the attnum. If
1349 * it's an expression, assign a negative attnum as if it was a
1350 * system attribute.
1351 */
1352 if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1353 {
1354 list_attnums[listidx] = attnum;
1355 }
1356 else if (dependency_is_compatible_expression(clause, rel->relid,
1357 rel->statlist,
1358 &expr))
1359 {
1360 /* special attnum assigned to this expression */
1362
1363 Assert(expr != NULL);
1364
1365 /* If the expression is duplicate, use the same attnum. */
1366 for (i = 0; i < unique_exprs_cnt; i++)
1367 {
1368 if (equal(unique_exprs[i], expr))
1369 {
1370 /* negative attribute number to expression */
1371 attnum = -(i + 1);
1372 break;
1373 }
1374 }
1375
1376 /* not found in the list, so add it */
1378 {
1379 unique_exprs[unique_exprs_cnt++] = expr;
1380
1381 /* after incrementing the value, to get -1, -2, ... */
1382 attnum = (-unique_exprs_cnt);
1383 }
1384
1385 /* remember which attnum was assigned to this clause */
1386 list_attnums[listidx] = attnum;
1387 }
1388 }
1389
1390 listidx++;
1391 }
1392
1393 Assert(listidx == list_length(clauses));
1394
1395 /*
1396 * How much we need to offset the attnums? If there are no expressions,
1397 * then no offset is needed. Otherwise we need to offset enough for the
1398 * lowest value (-unique_exprs_cnt) to become 1.
1399 */
1400 if (unique_exprs_cnt > 0)
1401 attnum_offset = (unique_exprs_cnt + 1);
1402 else
1403 attnum_offset = 0;
1404
1405 /*
1406 * Now that we know how many expressions there are, we can offset the
1407 * values just enough to build the bitmapset.
1408 */
1409 for (i = 0; i < list_length(clauses); i++)
1410 {
1412
1413 /* ignore incompatible or already estimated clauses */
1414 if (list_attnums[i] == InvalidAttrNumber)
1415 continue;
1416
1417 /* make sure the attnum is in the expected range */
1418 Assert(list_attnums[i] >= (-unique_exprs_cnt));
1419 Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1420
1421 /* make sure the attnum is positive (valid AttrNumber) */
1422 attnum = list_attnums[i] + attnum_offset;
1423
1424 /*
1425 * Either it's a regular attribute, or it's an expression, in which
1426 * case we must not have seen it before (expressions are unique).
1427 *
1428 * XXX Check whether it's a regular attribute has to be done using the
1429 * original attnum, while the second check has to use the value with
1430 * an offset.
1431 */
1432 Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1433 !bms_is_member(attnum, clauses_attnums));
1434
1435 /*
1436 * Remember the offset attnum, both for attributes and expressions.
1437 * We'll pass list_attnums to clauselist_apply_dependencies, which
1438 * uses it to identify clauses in a bitmap. We could also pass the
1439 * offset, but this is more convenient.
1440 */
1441 list_attnums[i] = attnum;
1442
1443 clauses_attnums = bms_add_member(clauses_attnums, attnum);
1444 }
1445
1446 /*
1447 * If there's not at least two distinct attnums and expressions, then
1448 * reject the whole list of clauses. We must return 1.0 so the calling
1449 * function's selectivity is unaffected.
1450 */
1451 if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1452 {
1453 bms_free(clauses_attnums);
1454 pfree(list_attnums);
1455 return 1.0;
1456 }
1457
1458 /*
1459 * Load all functional dependencies matching at least two parameters. We
1460 * can simply consider all dependencies at once, without having to search
1461 * for the best statistics object.
1462 *
1463 * To not waste cycles and memory, we deserialize dependencies only for
1464 * statistics that match at least two attributes. The array is allocated
1465 * with the assumption that all objects match - we could grow the array to
1466 * make it just the right size, but it's likely wasteful anyway thanks to
1467 * moving the freed chunks to freelists etc.
1468 */
1469 func_dependencies = palloc_array(MVDependencies *, list_length(rel->statlist));
1470 nfunc_dependencies = 0;
1471 total_ndeps = 0;
1472
1473 foreach(l, rel->statlist)
1474 {
1476 int nmatched;
1477 int nexprs;
1478 int k;
1479 MVDependencies *deps;
1480
1481 /* skip statistics that are not of the correct type */
1482 if (stat->kind != STATS_EXT_DEPENDENCIES)
1483 continue;
1484
1485 /* skip statistics with mismatching stxdinherit value */
1486 if (stat->inherit != rte->inh)
1487 continue;
1488
1489 /*
1490 * Count matching attributes - we have to undo the attnum offsets. The
1491 * input attribute numbers are not offset (expressions are not
1492 * included in stat->keys, so it's not necessary). But we need to
1493 * offset it before checking against clauses_attnums.
1494 */
1495 nmatched = 0;
1496 k = -1;
1497 while ((k = bms_next_member(stat->keys, k)) >= 0)
1498 {
1500
1501 /* skip expressions */
1503 continue;
1504
1505 /* apply the same offset as above */
1506 attnum += attnum_offset;
1507
1508 if (bms_is_member(attnum, clauses_attnums))
1509 nmatched++;
1510 }
1511
1512 /* count matching expressions */
1513 nexprs = 0;
1514 for (i = 0; i < unique_exprs_cnt; i++)
1515 {
1516 ListCell *lc;
1517
1518 foreach(lc, stat->exprs)
1519 {
1520 Node *stat_expr = (Node *) lfirst(lc);
1521
1522 /* try to match it */
1523 if (equal(stat_expr, unique_exprs[i]))
1524 nexprs++;
1525 }
1526 }
1527
1528 /*
1529 * Skip objects matching fewer than two attributes/expressions from
1530 * clauses.
1531 */
1532 if (nmatched + nexprs < 2)
1533 continue;
1534
1535 deps = statext_dependencies_load(stat->statOid, rte->inh);
1536
1537 /*
1538 * The expressions may be represented by different attnums in the
1539 * stats, we need to remap them to be consistent with the clauses.
1540 * That will make the later steps (e.g. picking the strongest item and
1541 * so on) much simpler and cheaper, because it won't need to care
1542 * about the offset at all.
1543 *
1544 * When we're at it, we can ignore dependencies that are not fully
1545 * matched by clauses (i.e. referencing attributes or expressions that
1546 * are not in the clauses).
1547 *
1548 * We have to do this for all statistics, as long as there are any
1549 * expressions - we need to shift the attnums in all dependencies.
1550 *
1551 * XXX Maybe we should do this always, because it also eliminates some
1552 * of the dependencies early. It might be cheaper than having to walk
1553 * the longer list in find_strongest_dependency later, especially as
1554 * we need to do that repeatedly?
1555 *
1556 * XXX We have to do this even when there are no expressions in
1557 * clauses, otherwise find_strongest_dependency may fail for stats
1558 * with expressions (due to lookup of negative value in bitmap). So we
1559 * need to at least filter out those dependencies. Maybe we could do
1560 * it in a cheaper way (if there are no expr clauses, we can just
1561 * discard all negative attnums without any lookups).
1562 */
1563 if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1564 {
1565 int ndeps = 0;
1566
1567 for (i = 0; i < deps->ndeps; i++)
1568 {
1569 bool skip = false;
1570 MVDependency *dep = deps->deps[i];
1571 int j;
1572
1573 for (j = 0; j < dep->nattributes; j++)
1574 {
1575 int idx;
1576 Node *expr;
1577 AttrNumber unique_attnum = InvalidAttrNumber;
1579
1580 /* undo the per-statistics offset */
1581 attnum = dep->attributes[j];
1582
1583 /*
1584 * For regular attributes we can simply check if it
1585 * matches any clause. If there's no matching clause, we
1586 * can just ignore it. We need to offset the attnum
1587 * though.
1588 */
1590 {
1591 dep->attributes[j] = attnum + attnum_offset;
1592
1593 if (!bms_is_member(dep->attributes[j], clauses_attnums))
1594 {
1595 skip = true;
1596 break;
1597 }
1598
1599 continue;
1600 }
1601
1602 /*
1603 * the attnum should be a valid system attnum (-1, -2,
1604 * ...)
1605 */
1607
1608 /*
1609 * For expressions, we need to do two translations. First
1610 * we have to translate the negative attnum to index in
1611 * the list of expressions (in the statistics object).
1612 * Then we need to see if there's a matching clause. The
1613 * index of the unique expression determines the attnum
1614 * (and we offset it).
1615 */
1616 idx = -(1 + attnum);
1617
1618 /* Is the expression index is valid? */
1619 Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1620
1621 expr = (Node *) list_nth(stat->exprs, idx);
1622
1623 /* try to find the expression in the unique list */
1624 for (int m = 0; m < unique_exprs_cnt; m++)
1625 {
1626 /*
1627 * found a matching unique expression, use the attnum
1628 * (derived from index of the unique expression)
1629 */
1630 if (equal(unique_exprs[m], expr))
1631 {
1632 unique_attnum = -(m + 1) + attnum_offset;
1633 break;
1634 }
1635 }
1636
1637 /*
1638 * Found no matching expression, so we can simply skip
1639 * this dependency, because there's no chance it will be
1640 * fully covered.
1641 */
1642 if (unique_attnum == InvalidAttrNumber)
1643 {
1644 skip = true;
1645 break;
1646 }
1647
1648 /* otherwise remap it to the new attnum */
1649 dep->attributes[j] = unique_attnum;
1650 }
1651
1652 /* if found a matching dependency, keep it */
1653 if (!skip)
1654 {
1655 /* maybe we've skipped something earlier, so move it */
1656 if (ndeps != i)
1657 deps->deps[ndeps] = deps->deps[i];
1658
1659 ndeps++;
1660 }
1661 }
1662
1663 deps->ndeps = ndeps;
1664 }
1665
1666 /*
1667 * It's possible we've removed all dependencies, in which case we
1668 * don't bother adding it to the list.
1669 */
1670 if (deps->ndeps > 0)
1671 {
1672 func_dependencies[nfunc_dependencies] = deps;
1673 total_ndeps += deps->ndeps;
1674 nfunc_dependencies++;
1675 }
1676 }
1677
1678 /* if no matching stats could be found then we've nothing to do */
1679 if (nfunc_dependencies == 0)
1680 {
1681 pfree(func_dependencies);
1682 bms_free(clauses_attnums);
1683 pfree(list_attnums);
1684 pfree(unique_exprs);
1685 return 1.0;
1686 }
1687
1688 /*
1689 * Work out which dependencies we can apply, starting with the
1690 * widest/strongest ones, and proceeding to smaller/weaker ones.
1691 */
1692 dependencies = palloc_array(MVDependency *, total_ndeps);
1693 ndependencies = 0;
1694
1695 while (true)
1696 {
1697 MVDependency *dependency;
1699
1700 /* the widest/strongest dependency, fully matched by clauses */
1701 dependency = find_strongest_dependency(func_dependencies,
1702 nfunc_dependencies,
1703 clauses_attnums);
1704 if (!dependency)
1705 break;
1706
1707 dependencies[ndependencies++] = dependency;
1708
1709 /* Ignore dependencies using this implied attribute in later loops */
1710 attnum = dependency->attributes[dependency->nattributes - 1];
1711 clauses_attnums = bms_del_member(clauses_attnums, attnum);
1712 }
1713
1714 /*
1715 * If we found applicable dependencies, use them to estimate all
1716 * compatible clauses on attributes that they refer to.
1717 */
1718 if (ndependencies != 0)
1719 s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1720 sjinfo, dependencies, ndependencies,
1721 list_attnums, estimatedclauses);
1722
1723 /* free deserialized functional dependencies (and then the array) */
1724 for (i = 0; i < nfunc_dependencies; i++)
1725 pfree(func_dependencies[i]);
1726
1727 pfree(dependencies);
1728 pfree(func_dependencies);
1729 bms_free(clauses_attnums);
1730 pfree(list_attnums);
1731 pfree(unique_exprs);
1732
1733 return s1;
1734}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
int16 AttrNumber
Definition: attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition: attnum.h:34
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition: attnum.h:41
#define InvalidAttrNumber
Definition: attnum.h:23
int bms_next_member(const Bitmapset *a, int prevbit)
Definition: bitmapset.c:1305
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:867
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition: bitmapset.c:750
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition: bitmapset.c:814
BMS_Membership bms_membership(const Bitmapset *a)
Definition: bitmapset.c:780
int bms_member_index(Bitmapset *a, int x)
Definition: bitmapset.c:539
@ BMS_SINGLETON
Definition: bitmapset.h:72
@ BMS_MULTIPLE
Definition: bitmapset.h:73
#define VARHDRSZ
Definition: c.h:711
uint32_t uint32
Definition: c.h:552
unsigned int Index
Definition: c.h:633
size_t Size
Definition: c.h:624
bool is_pseudo_constant_clause(Node *clause)
Definition: clauses.c:2103
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition: clausesel.c:117
static void generate_dependencies(DependencyGenerator state)
Definition: dependencies.c:151
#define SizeOfHeader
Definition: dependencies.c:32
MVDependencies * statext_dependencies_deserialize(bytea *data)
Definition: dependencies.c:492
MVDependencies * statext_dependencies_load(Oid mvoid, bool inh)
Definition: dependencies.c:612
static bool dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
Definition: dependencies.c:198
MVDependencies * statext_dependencies_build(StatsBuildData *data)
Definition: dependencies.c:342
static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
Definition: dependencies.c:649
static bool dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
Definition: dependencies.c:588
bytea * statext_dependencies_serialize(MVDependencies *dependencies)
Definition: dependencies.c:437
static void DependencyGenerator_free(DependencyGenerator state)
Definition: dependencies.c:190
struct DependencyGeneratorData DependencyGeneratorData
static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, MVDependency **dependencies, int ndependencies, AttrNumber *list_attnums, Bitmapset **estimatedclauses)
Definition: dependencies.c:922
static DependencyGenerator DependencyGenerator_init(int n, int k)
Definition: dependencies.c:167
#define SizeOfItem(natts)
Definition: dependencies.c:35
Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses)
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
Definition: dependencies.c:215
static void generate_dependencies_recurse(DependencyGenerator state, int index, AttrNumber start, AttrNumber *current)
Definition: dependencies.c:85
DependencyGeneratorData * DependencyGenerator
Definition: dependencies.c:59
static MVDependency * find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums)
Definition: dependencies.c:837
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
bool has_stats_of_kind(List *stats, char requiredkind)
int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
#define palloc_array(type, count)
Definition: fe_memutils.h:76
#define palloc0_array(type, count)
Definition: fe_memutils.h:77
#define palloc0_object(type)
Definition: fe_memutils.h:75
#define DatumGetByteaPP(X)
Definition: fmgr.h:291
Assert(PointerIsAligned(start, uint64))
return str start
for(;;)
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define MaxHeapAttributeNumber
Definition: htup_details.h:48
#define nitems(x)
Definition: indent.h:31
FILE * output
int j
Definition: isn.c:78
int i
Definition: isn.c:77
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
List * lappend(List *list, void *datum)
Definition: list.c:339
RegProcedure get_oprrest(Oid opno)
Definition: lsyscache.c:1722
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:400
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc0(Size size)
Definition: mcxt.c:1395
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:469
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:116
static bool is_opclause(const void *clause)
Definition: nodeFuncs.h:76
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:125
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:134
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
double Selectivity
Definition: nodes.h:260
JoinType
Definition: nodes.h:298
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define planner_rt_fetch(rti, root)
Definition: pathnodes.h:610
int16 attnum
Definition: pg_attribute.h:74
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
char * s1
char * s2
tree ctl root
Definition: radixtree.h:1857
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define STATS_MAX_DIMENSIONS
Definition: statistics.h:19
#define STATS_DEPS_MAGIC
Definition: statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition: statistics.h:44
List * args
Definition: primnodes.h:972
AttrNumber * dependencies
Definition: dependencies.c:56
Definition: pg_list.h:54
uint32 ndeps
Definition: statistics.h:61
uint32 magic
Definition: statistics.h:59
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:62
AttrNumber nattributes
Definition: statistics.h:53
double degree
Definition: statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition: statistics.h:54
Definition: nodes.h:135
Oid opno
Definition: primnodes.h:850
List * args
Definition: primnodes.h:868
Index relid
Definition: pathnodes.h:973
List * statlist
Definition: pathnodes.h:997
Expr * clause
Definition: pathnodes.h:2792
Oid attrtypid
Definition: vacuum.h:126
Oid attrcollid
Definition: vacuum.h:129
Definition: primnodes.h:262
AttrNumber varattno
Definition: primnodes.h:274
int varno
Definition: primnodes.h:269
Index varlevelsup
Definition: primnodes.h:294
Definition: type.h:96
Definition: regguts.h:323
Definition: c.h:706
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:264
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition: syscache.c:595
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition: syscache.c:230
static ItemArray items
Definition: test_tidstore.c:48
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:386
#define TYPECACHE_LT_OPR
Definition: typcache.h:139
static Size VARSIZE_ANY(const void *PTR)
Definition: varatt.h:460
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition: varatt.h:472
static char * VARDATA(const void *PTR)
Definition: varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition: varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition: varatt.h:432
const char * type