PostgreSQL Source Code git master
Loading...
Searching...
No Matches
dependencies.c File Reference
#include "postgres.h"
#include "access/htup_details.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_statistic_ext_data.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/optimizer.h"
#include "parser/parsetree.h"
#include "statistics/extended_stats_internal.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
Include dependency graph for dependencies.c:

Go to the source code of this file.

Data Structures

struct  DependencyGeneratorData
 

Macros

#define SizeOfHeader   (3 * sizeof(uint32))
 
#define SizeOfItem(natts)    (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
 
#define MinSizeOfItem   SizeOfItem(2)
 
#define MinSizeOfItems(ndeps)    (SizeOfHeader + (ndeps) * MinSizeOfItem)
 

Typedefs

typedef struct DependencyGeneratorData DependencyGeneratorData
 
typedef DependencyGeneratorDataDependencyGenerator
 

Functions

static void generate_dependencies_recurse (DependencyGenerator state, int index, AttrNumber start, AttrNumber *current)
 
static void generate_dependencies (DependencyGenerator state)
 
static DependencyGenerator DependencyGenerator_init (int n, int k)
 
static void DependencyGenerator_free (DependencyGenerator state)
 
static AttrNumberDependencyGenerator_next (DependencyGenerator state)
 
static double dependency_degree (StatsBuildData *data, int k, AttrNumber *dependency)
 
static bool dependency_is_fully_matched (MVDependency *dependency, Bitmapset *attnums)
 
static bool dependency_is_compatible_clause (Node *clause, Index relid, AttrNumber *attnum)
 
static bool dependency_is_compatible_expression (Node *clause, Index relid, List *statlist, Node **expr)
 
static MVDependencyfind_strongest_dependency (MVDependencies **dependencies, int ndependencies, Bitmapset *attnums)
 
static Selectivity clauselist_apply_dependencies (PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, MVDependency **dependencies, int ndependencies, AttrNumber *list_attnums, Bitmapset **estimatedclauses)
 
MVDependenciesstatext_dependencies_build (StatsBuildData *data)
 
byteastatext_dependencies_serialize (MVDependencies *dependencies)
 
MVDependenciesstatext_dependencies_deserialize (bytea *data)
 
void statext_dependencies_free (MVDependencies *dependencies)
 
bool statext_dependencies_validate (const MVDependencies *dependencies, const int2vector *stxkeys, int numexprs, int elevel)
 
MVDependenciesstatext_dependencies_load (Oid mvoid, bool inh)
 
Selectivity dependencies_clauselist_selectivity (PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses)
 

Macro Definition Documentation

◆ MinSizeOfItem

#define MinSizeOfItem   SizeOfItem(2)

Definition at line 39 of file dependencies.c.

◆ MinSizeOfItems

#define MinSizeOfItems (   ndeps)     (SizeOfHeader + (ndeps) * MinSizeOfItem)

Definition at line 42 of file dependencies.c.

50{
51 int k; /* size of the dependency */
52 int n; /* number of possible attributes */
53 int current; /* next dependency to return (index) */
54 AttrNumber ndependencies; /* number of dependencies generated */
55 AttrNumber *dependencies; /* array of pre-generated dependencies */
57
59
61 int index, AttrNumber start, AttrNumber *current);
66static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
67static bool dependency_is_fully_matched(MVDependency *dependency,
68 Bitmapset *attnums);
69static bool dependency_is_compatible_clause(Node *clause, Index relid,
71static bool dependency_is_compatible_expression(Node *clause, Index relid,
72 List *statlist, Node **expr);
74 int ndependencies, Bitmapset *attnums);
76 int varRelid, JoinType jointype,
77 SpecialJoinInfo *sjinfo,
78 MVDependency **dependencies,
79 int ndependencies,
82
83static void
85 AttrNumber start, AttrNumber *current)
86{
87 /*
88 * The generator handles the first (k-1) elements differently from the
89 * last element.
90 */
91 if (index < (state->k - 1))
92 {
94
95 /*
96 * The first (k-1) values have to be in ascending order, which we
97 * generate recursively.
98 */
99
100 for (i = start; i < state->n; i++)
101 {
102 current[index] = i;
103 generate_dependencies_recurse(state, (index + 1), (i + 1), current);
104 }
105 }
106 else
107 {
108 int i;
109
110 /*
111 * the last element is the implied value, which does not respect the
112 * ascending order. We just need to check that the value is not in the
113 * first (k-1) elements.
114 */
115
116 for (i = 0; i < state->n; i++)
117 {
118 int j;
119 bool match = false;
120
121 current[index] = i;
122
123 for (j = 0; j < index; j++)
124 {
125 if (current[j] == i)
126 {
127 match = true;
128 break;
129 }
130 }
131
132 /*
133 * If the value is not found in the first part of the dependency,
134 * we're done.
135 */
136 if (!match)
137 {
138 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
139 state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
140 memcpy(&state->dependencies[(state->k * state->ndependencies)],
141 current, state->k * sizeof(AttrNumber));
142 state->ndependencies++;
143 }
144 }
145 }
146}
147
148/* generate all dependencies (k-permutations of n elements) */
149static void
151{
153
154 generate_dependencies_recurse(state, 0, 0, current);
155
156 pfree(current);
157}
158
159/*
160 * initialize the DependencyGenerator of variations, and prebuild the variations
161 *
162 * This pre-builds all the variations. We could also generate them in
163 * DependencyGenerator_next(), but this seems simpler.
164 */
166DependencyGenerator_init(int n, int k)
167{
169
170 Assert((n >= k) && (k > 0));
171
172 /* allocate the DependencyGenerator state */
174 state->dependencies = palloc_array(AttrNumber, k);
175
176 state->ndependencies = 0;
177 state->current = 0;
178 state->k = k;
179 state->n = n;
180
181 /* now actually pre-generate all the variations */
183
184 return state;
185}
186
187/* free the DependencyGenerator state */
188static void
190{
191 pfree(state->dependencies);
192 pfree(state);
193}
194
195/* generate next combination */
196static AttrNumber *
198{
199 if (state->current == state->ndependencies)
200 return NULL;
201
202 return &state->dependencies[state->k * state->current++];
203}
204
205
206/*
207 * validates functional dependency on the data
208 *
209 * An actual work horse of detecting functional dependencies. Given a variation
210 * of k attributes, it checks that the first (k-1) are sufficient to determine
211 * the last one.
212 */
213static double
215{
216 int i,
217 nitems;
221
222 /* counters valid within a group */
223 int group_size = 0;
224 int n_violations = 0;
225
226 /* total number of rows supporting (consistent with) the dependency */
227 int n_supporting_rows = 0;
228
229 /* Make sure we have at least two input attributes. */
230 Assert(k >= 2);
231
232 /* sort info for all attributes columns */
233 mss = multi_sort_init(k);
234
235 /*
236 * Translate the array of indexes to regular attnums for the dependency
237 * (we will need this to identify the columns in StatsBuildData).
238 */
240 for (i = 0; i < k; i++)
241 attnums_dep[i] = data->attnums[dependency[i]];
242
243 /*
244 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
245 *
246 * (a) sort the data lexicographically
247 *
248 * (b) split the data into groups by first (k-1) columns
249 *
250 * (c) for each group count different values in the last column
251 *
252 * We use the column data types' default sort operators and collations;
253 * perhaps at some point it'd be worth using column-specific collations?
254 */
255
256 /* prepare the sort function for the dimensions */
257 for (i = 0; i < k; i++)
258 {
259 VacAttrStats *colstat = data->stats[dependency[i]];
261
263 if (type->lt_opr == InvalidOid) /* shouldn't happen */
264 elog(ERROR, "cache lookup failed for ordering operator for type %u",
265 colstat->attrtypid);
266
267 /* prepare the sort function for this dimension */
268 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
269 }
270
271 /*
272 * build an array of SortItem(s) sorted using the multi-sort support
273 *
274 * XXX This relies on all stats entries pointing to the same tuple
275 * descriptor. For now that assumption holds, but it might change in the
276 * future for example if we support statistics on multiple tables.
277 */
279
280 /*
281 * Walk through the sorted array, split it into rows according to the
282 * first (k-1) columns. If there's a single value in the last column, we
283 * count the group as 'supporting' the functional dependency. Otherwise we
284 * count it as contradicting.
285 */
286
287 /* start with the first row forming a group */
288 group_size = 1;
289
290 /* loop 1 beyond the end of the array so that we count the final group */
291 for (i = 1; i <= nitems; i++)
292 {
293 /*
294 * Check if the group ended, which may be either because we processed
295 * all the items (i==nitems), or because the i-th item is not equal to
296 * the preceding one.
297 */
298 if (i == nitems ||
299 multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
300 {
301 /*
302 * If no violations were found in the group then track the rows of
303 * the group as supporting the functional dependency.
304 */
305 if (n_violations == 0)
307
308 /* Reset counters for the new group */
309 n_violations = 0;
310 group_size = 1;
311 continue;
312 }
313 /* first columns match, but the last one does not (so contradicting) */
314 else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
315 n_violations++;
316
317 group_size++;
318 }
319
320 /* Compute the 'degree of validity' as (supporting/total). */
321 return (n_supporting_rows * 1.0 / data->numrows);
322}
323
324/*
325 * detects functional dependencies between groups of columns
326 *
327 * Generates all possible subsets of columns (variations) and computes
328 * the degree of validity for each one. For example when creating statistics
329 * on three columns (a,b,c) there are 9 possible dependencies
330 *
331 * two columns three columns
332 * ----------- -------------
333 * (a) -> b (a,b) -> c
334 * (a) -> c (a,c) -> b
335 * (b) -> a (b,c) -> a
336 * (b) -> c
337 * (c) -> a
338 * (c) -> b
339 */
342{
343 int i,
344 k;
345
346 /* result */
347 MVDependencies *dependencies = NULL;
348 MemoryContext cxt;
349
350 Assert(data->nattnums >= 2);
351
352 /* tracks memory allocated by dependency_degree calls */
354 "dependency_degree cxt",
356
357 /*
358 * We'll try build functional dependencies starting from the smallest ones
359 * covering just 2 columns, to the largest ones, covering all columns
360 * included in the statistics object. We start from the smallest ones
361 * because we want to be able to skip already implied ones.
362 */
363 for (k = 2; k <= data->nattnums; k++)
364 {
365 AttrNumber *dependency; /* array with k elements */
366
367 /* prepare a DependencyGenerator of variation */
369
370 /* generate all possible variations of k values (out of n) */
371 while ((dependency = DependencyGenerator_next(DependencyGenerator)))
372 {
373 double degree;
374 MVDependency *d;
376
377 /* release memory used by dependency degree calculation */
379
380 /* compute how valid the dependency seems */
381 degree = dependency_degree(data, k, dependency);
382
385
386 /*
387 * if the dependency seems entirely invalid, don't store it
388 */
389 if (degree == 0.0)
390 continue;
391
392 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
393 + k * sizeof(AttrNumber));
394
395 /* copy the dependency (and keep the indexes into stxkeys) */
396 d->degree = degree;
397 d->nattributes = k;
398 for (i = 0; i < k; i++)
399 d->attributes[i] = data->attnums[dependency[i]];
400
401 /* initialize the list of dependencies */
402 if (dependencies == NULL)
403 {
404 dependencies = palloc0_object(MVDependencies);
405
406 dependencies->magic = STATS_DEPS_MAGIC;
407 dependencies->type = STATS_DEPS_TYPE_BASIC;
408 dependencies->ndeps = 0;
409 }
410
411 dependencies->ndeps++;
412 dependencies = (MVDependencies *) repalloc(dependencies,
414 + dependencies->ndeps * sizeof(MVDependency *));
415
416 dependencies->deps[dependencies->ndeps - 1] = d;
417 }
418
419 /*
420 * we're done with variations of k elements, so free the
421 * DependencyGenerator
422 */
424 }
425
427
428 return dependencies;
429}
430
431
432/*
433 * Serialize list of dependencies into a bytea value.
434 */
435bytea *
437{
438 int i;
439 bytea *output;
440 char *tmp;
441 Size len;
442
443 /* we need to store ndeps, with a number of attributes for each one */
445
446 /* and also include space for the actual attribute numbers and degrees */
447 for (i = 0; i < dependencies->ndeps; i++)
448 len += SizeOfItem(dependencies->deps[i]->nattributes);
449
450 output = (bytea *) palloc0(len);
452
453 tmp = VARDATA(output);
454
455 /* Store the base struct values (magic, type, ndeps) */
456 memcpy(tmp, &dependencies->magic, sizeof(uint32));
457 tmp += sizeof(uint32);
458 memcpy(tmp, &dependencies->type, sizeof(uint32));
459 tmp += sizeof(uint32);
460 memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
461 tmp += sizeof(uint32);
462
463 /* store number of attributes and attribute numbers for each dependency */
464 for (i = 0; i < dependencies->ndeps; i++)
465 {
466 MVDependency *d = dependencies->deps[i];
467
468 memcpy(tmp, &d->degree, sizeof(double));
469 tmp += sizeof(double);
470
471 memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
472 tmp += sizeof(AttrNumber);
473
474 memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
475 tmp += sizeof(AttrNumber) * d->nattributes;
476
477 /* protect against overflow */
478 Assert(tmp <= ((char *) output + len));
479 }
480
481 /* make sure we've produced exactly the right amount of data */
482 Assert(tmp == ((char *) output + len));
483
484 return output;
485}
486
487/*
488 * Reads serialized dependencies into MVDependencies structure.
489 */
492{
493 int i;
495 MVDependencies *dependencies;
496 char *tmp;
497
498 if (data == NULL)
499 return NULL;
500
502 elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
504
505 /* read the MVDependencies header */
506 dependencies = palloc0_object(MVDependencies);
507
508 /* initialize pointer to the data part (skip the varlena header) */
509 tmp = VARDATA_ANY(data);
510
511 /* read the header fields and perform basic sanity checks */
512 memcpy(&dependencies->magic, tmp, sizeof(uint32));
513 tmp += sizeof(uint32);
514 memcpy(&dependencies->type, tmp, sizeof(uint32));
515 tmp += sizeof(uint32);
516 memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
517 tmp += sizeof(uint32);
518
519 if (dependencies->magic != STATS_DEPS_MAGIC)
520 elog(ERROR, "invalid dependency magic %d (expected %d)",
521 dependencies->magic, STATS_DEPS_MAGIC);
522
523 if (dependencies->type != STATS_DEPS_TYPE_BASIC)
524 elog(ERROR, "invalid dependency type %d (expected %d)",
525 dependencies->type, STATS_DEPS_TYPE_BASIC);
526
527 if (dependencies->ndeps == 0)
528 elog(ERROR, "invalid zero-length item array in MVDependencies");
529
530 /* what minimum bytea size do we expect for those parameters */
531 min_expected_size = SizeOfItem(dependencies->ndeps);
532
534 elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
536
537 /* allocate space for the MCV items */
538 dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
539 + (dependencies->ndeps * sizeof(MVDependency *)));
540
541 for (i = 0; i < dependencies->ndeps; i++)
542 {
543 double degree;
544 AttrNumber k;
545 MVDependency *d;
546
547 /* degree of validity */
548 memcpy(&degree, tmp, sizeof(double));
549 tmp += sizeof(double);
550
551 /* number of attributes */
552 memcpy(&k, tmp, sizeof(AttrNumber));
553 tmp += sizeof(AttrNumber);
554
555 /* is the number of attributes valid? */
556 Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
557
558 /* now that we know the number of attributes, allocate the dependency */
559 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
560 + (k * sizeof(AttrNumber)));
561
562 d->degree = degree;
563 d->nattributes = k;
564
565 /* copy attribute numbers */
566 memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
567 tmp += sizeof(AttrNumber) * d->nattributes;
568
569 dependencies->deps[i] = d;
570
571 /* still within the bytea */
572 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
573 }
574
575 /* we should have consumed the whole bytea exactly */
576 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
577
578 return dependencies;
579}
580
581/*
582 * Free allocations of a MVDependencies.
583 */
584void
586{
587 for (int i = 0; i < dependencies->ndeps; i++)
588 pfree(dependencies->deps[i]);
589 pfree(dependencies);
590}
591
592/*
593 * Validate a set of MVDependencies against the extended statistics object
594 * definition.
595 *
596 * Every MVDependencies must be checked to ensure that the attnums in the
597 * attributes list correspond to attnums/expressions defined by the
598 * extended statistics object.
599 *
600 * Positive attnums are attributes which must be found in the stxkeys, while
601 * negative attnums correspond to an expression number, no attribute number
602 * can be below (0 - numexprs).
603 */
604bool
606 const int2vector *stxkeys,
607 int numexprs, int elevel)
608{
610
611 /* Scan through each dependency entry */
612 for (int i = 0; i < dependencies->ndeps; i++)
613 {
614 const MVDependency *dep = dependencies->deps[i];
615
616 /*
617 * Cross-check each attribute in a dependency entry with the extended
618 * stats object definition.
619 */
620 for (int j = 0; j < dep->nattributes; j++)
621 {
623 bool ok = false;
624
625 if (attnum > 0)
626 {
627 /* attribute number in stxkeys */
628 for (int k = 0; k < stxkeys->dim1; k++)
629 {
630 if (attnum == stxkeys->values[k])
631 {
632 ok = true;
633 break;
634 }
635 }
636 }
637 else if ((attnum < 0) && (attnum >= attnum_expr_lowbound))
638 {
639 /* attribute number for an expression */
640 ok = true;
641 }
642
643 if (!ok)
644 {
645 ereport(elevel,
647 errmsg("could not validate \"%s\" object: invalid attribute number %d found",
648 "pg_dependencies", attnum)));
649 return false;
650 }
651 }
652 }
653
654 return true;
655}
656
657/*
658 * dependency_is_fully_matched
659 * checks that a functional dependency is fully matched given clauses on
660 * attributes (assuming the clauses are suitable equality clauses)
661 */
662static bool
664{
665 int j;
666
667 /*
668 * Check that the dependency actually is fully covered by clauses. We have
669 * to translate all attribute numbers, as those are referenced
670 */
671 for (j = 0; j < dependency->nattributes; j++)
672 {
673 int attnum = dependency->attributes[j];
674
675 if (!bms_is_member(attnum, attnums))
676 return false;
677 }
678
679 return true;
680}
681
682/*
683 * statext_dependencies_load
684 * Load the functional dependencies for the indicated pg_statistic_ext tuple
685 */
688{
689 MVDependencies *result;
690 bool isnull;
691 Datum deps;
692 HeapTuple htup;
693
696 BoolGetDatum(inh));
697 if (!HeapTupleIsValid(htup))
698 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
699
702 if (isnull)
703 elog(ERROR,
704 "requested statistics kind \"%c\" is not yet built for statistics object %u",
706
708
709 ReleaseSysCache(htup);
710
711 return result;
712}
713
714/*
715 * dependency_is_compatible_clause
716 * Determines if the clause is compatible with functional dependencies
717 *
718 * Only clauses that have the form of equality to a pseudoconstant, or can be
719 * interpreted that way, are currently accepted. Furthermore the variable
720 * part of the clause must be a simple Var belonging to the specified
721 * relation, whose attribute number we return in *attnum on success.
722 */
723static bool
725{
726 Var *var;
728
729 if (IsA(clause, RestrictInfo))
730 {
731 RestrictInfo *rinfo = (RestrictInfo *) clause;
732
733 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
734 if (rinfo->pseudoconstant)
735 return false;
736
737 /* Clauses referencing multiple, or no, varnos are incompatible */
738 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
739 return false;
740
741 clause = (Node *) rinfo->clause;
742 }
743
744 if (is_opclause(clause))
745 {
746 /* If it's an opclause, check for Var = Const or Const = Var. */
747 OpExpr *expr = (OpExpr *) clause;
748
749 /* Only expressions with two arguments are candidates. */
750 if (list_length(expr->args) != 2)
751 return false;
752
753 /* Make sure non-selected argument is a pseudoconstant. */
755 clause_expr = linitial(expr->args);
756 else if (is_pseudo_constant_clause(linitial(expr->args)))
757 clause_expr = lsecond(expr->args);
758 else
759 return false;
760
761 /*
762 * If it's not an "=" operator, just ignore the clause, as it's not
763 * compatible with functional dependencies.
764 *
765 * This uses the function for estimating selectivity, not the operator
766 * directly (a bit awkward, but well ...).
767 *
768 * XXX this is pretty dubious; probably it'd be better to check btree
769 * or hash opclass membership, so as not to be fooled by custom
770 * selectivity functions, and to be more consistent with decisions
771 * elsewhere in the planner.
772 */
773 if (get_oprrest(expr->opno) != F_EQSEL)
774 return false;
775
776 /* OK to proceed with checking "var" */
777 }
778 else if (IsA(clause, ScalarArrayOpExpr))
779 {
780 /* If it's a scalar array operator, check for Var IN Const. */
781 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
782
783 /*
784 * Reject ALL() variant, we only care about ANY/IN.
785 *
786 * XXX Maybe we should check if all the values are the same, and allow
787 * ALL in that case? Doesn't seem very practical, though.
788 */
789 if (!expr->useOr)
790 return false;
791
792 /* Only expressions with two arguments are candidates. */
793 if (list_length(expr->args) != 2)
794 return false;
795
796 /*
797 * We know it's always (Var IN Const), so we assume the var is the
798 * first argument, and pseudoconstant is the second one.
799 */
801 return false;
802
803 clause_expr = linitial(expr->args);
804
805 /*
806 * If it's not an "=" operator, just ignore the clause, as it's not
807 * compatible with functional dependencies. The operator is identified
808 * simply by looking at which function it uses to estimate
809 * selectivity. That's a bit strange, but it's what other similar
810 * places do.
811 */
812 if (get_oprrest(expr->opno) != F_EQSEL)
813 return false;
814
815 /* OK to proceed with checking "var" */
816 }
817 else if (is_orclause(clause))
818 {
819 BoolExpr *bool_expr = (BoolExpr *) clause;
820 ListCell *lc;
821
822 /* start with no attribute number */
824
825 foreach(lc, bool_expr->args)
826 {
828
829 /*
830 * Had we found incompatible clause in the arguments, treat the
831 * whole clause as incompatible.
832 */
834 relid, &clause_attnum))
835 return false;
836
839
840 /* ensure all the variables are the same (same attnum) */
841 if (*attnum != clause_attnum)
842 return false;
843 }
844
845 /* the Var is already checked by the recursive call */
846 return true;
847 }
848 else if (is_notclause(clause))
849 {
850 /*
851 * "NOT x" can be interpreted as "x = false", so get the argument and
852 * proceed with seeing if it's a suitable Var.
853 */
854 clause_expr = (Node *) get_notclausearg(clause);
855 }
856 else
857 {
858 /*
859 * A boolean expression "x" can be interpreted as "x = true", so
860 * proceed with seeing if it's a suitable Var.
861 */
862 clause_expr = clause;
863 }
864
865 /*
866 * We may ignore any RelabelType node above the operand. (There won't be
867 * more than one, since eval_const_expressions has been applied already.)
868 */
870 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
871
872 /* We only support plain Vars for now */
873 if (!IsA(clause_expr, Var))
874 return false;
875
876 /* OK, we know we have a Var */
877 var = (Var *) clause_expr;
878
879 /* Ensure Var is from the correct relation */
880 if (var->varno != relid)
881 return false;
882
883 /* We also better ensure the Var is from the current level */
884 if (var->varlevelsup != 0)
885 return false;
886
887 /* Also ignore system attributes (we don't allow stats on those) */
889 return false;
890
891 *attnum = var->varattno;
892 return true;
893}
894
895/*
896 * find_strongest_dependency
897 * find the strongest dependency on the attributes
898 *
899 * When applying functional dependencies, we start with the strongest
900 * dependencies. That is, we select the dependency that:
901 *
902 * (a) has all attributes covered by equality clauses
903 *
904 * (b) has the most attributes
905 *
906 * (c) has the highest degree of validity
907 *
908 * This guarantees that we eliminate the most redundant conditions first
909 * (see the comment in dependencies_clauselist_selectivity).
910 */
911static MVDependency *
912find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
913 Bitmapset *attnums)
914{
915 int i,
916 j;
918
919 /* number of attnums in clauses */
920 int nattnums = bms_num_members(attnums);
921
922 /*
923 * Iterate over the MVDependency items and find the strongest one from the
924 * fully-matched dependencies. We do the cheap checks first, before
925 * matching it against the attnums.
926 */
927 for (i = 0; i < ndependencies; i++)
928 {
929 for (j = 0; j < dependencies[i]->ndeps; j++)
930 {
931 MVDependency *dependency = dependencies[i]->deps[j];
932
933 /*
934 * Skip dependencies referencing more attributes than available
935 * clauses, as those can't be fully matched.
936 */
937 if (dependency->nattributes > nattnums)
938 continue;
939
940 if (strongest)
941 {
942 /* skip dependencies on fewer attributes than the strongest. */
943 if (dependency->nattributes < strongest->nattributes)
944 continue;
945
946 /* also skip weaker dependencies when attribute count matches */
947 if (strongest->nattributes == dependency->nattributes &&
948 strongest->degree > dependency->degree)
949 continue;
950 }
951
952 /*
953 * this dependency is stronger, but we must still check that it's
954 * fully matched to these attnums. We perform this check last as
955 * it's slightly more expensive than the previous checks.
956 */
957 if (dependency_is_fully_matched(dependency, attnums))
958 strongest = dependency; /* save new best match */
959 }
960 }
961
962 return strongest;
963}
964
965/*
966 * clauselist_apply_dependencies
967 * Apply the specified functional dependencies to a list of clauses and
968 * return the estimated selectivity of the clauses that are compatible
969 * with any of the given dependencies.
970 *
971 * This will estimate all not-already-estimated clauses that are compatible
972 * with functional dependencies, and which have an attribute mentioned by any
973 * of the given dependencies (either as an implying or implied attribute).
974 *
975 * Given (lists of) clauses on attributes (a,b) and a functional dependency
976 * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
977 * using the formula
978 *
979 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
980 *
981 * where 'f' is the degree of dependency. This reflects the fact that we
982 * expect a fraction f of all rows to be consistent with the dependency
983 * (a=>b), and so have a selectivity of P(a), while the remaining rows are
984 * treated as independent.
985 *
986 * In practice, we use a slightly modified version of this formula, which uses
987 * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
988 * should obviously not exceed either column's individual selectivity. I.e.,
989 * we actually combine selectivities using the formula
990 *
991 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
992 *
993 * This can make quite a difference if the specific values matching the
994 * clauses are not consistent with the functional dependency.
995 */
996static Selectivity
998 int varRelid, JoinType jointype,
999 SpecialJoinInfo *sjinfo,
1000 MVDependency **dependencies, int ndependencies,
1003{
1004 Bitmapset *attnums;
1005 int i;
1006 int j;
1007 int nattrs;
1009 int attidx;
1010 int listidx;
1011 ListCell *l;
1013
1014 /*
1015 * Extract the attnums of all implying and implied attributes from all the
1016 * given dependencies. Each of these attributes is expected to have at
1017 * least 1 not-already-estimated compatible clause that we will estimate
1018 * here.
1019 */
1020 attnums = NULL;
1021 for (i = 0; i < ndependencies; i++)
1022 {
1023 for (j = 0; j < dependencies[i]->nattributes; j++)
1024 {
1025 AttrNumber attnum = dependencies[i]->attributes[j];
1026
1027 attnums = bms_add_member(attnums, attnum);
1028 }
1029 }
1030
1031 /*
1032 * Compute per-column selectivity estimates for each of these attributes,
1033 * and mark all the corresponding clauses as estimated.
1034 */
1035 nattrs = bms_num_members(attnums);
1037
1038 attidx = 0;
1039 i = -1;
1040 while ((i = bms_next_member(attnums, i)) >= 0)
1041 {
1044
1045 listidx = -1;
1046 foreach(l, clauses)
1047 {
1048 Node *clause = (Node *) lfirst(l);
1049
1050 listidx++;
1051 if (list_attnums[listidx] == i)
1052 {
1055 }
1056 }
1057
1059 jointype, sjinfo, false);
1060 attr_sel[attidx++] = simple_sel;
1061 }
1062
1063 /*
1064 * Now combine these selectivities using the dependency information. For
1065 * chains of dependencies such as a -> b -> c, the b -> c dependency will
1066 * come before the a -> b dependency in the array, so we traverse the
1067 * array backwards to ensure such chains are computed in the right order.
1068 *
1069 * As explained above, pairs of selectivities are combined using the
1070 * formula
1071 *
1072 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1073 *
1074 * to ensure that the combined selectivity is never greater than either
1075 * individual selectivity.
1076 *
1077 * Where multiple dependencies apply (e.g., a -> b -> c), we use
1078 * conditional probabilities to compute the overall result as follows:
1079 *
1080 * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1081 *
1082 * so we replace the selectivities of all implied attributes with
1083 * conditional probabilities, that are conditional on all their implying
1084 * attributes. The selectivities of all other non-implied attributes are
1085 * left as they are.
1086 */
1087 for (i = ndependencies - 1; i >= 0; i--)
1088 {
1089 MVDependency *dependency = dependencies[i];
1092 double f;
1093
1094 /* Selectivity of all the implying attributes */
1095 s1 = 1.0;
1096 for (j = 0; j < dependency->nattributes - 1; j++)
1097 {
1098 attnum = dependency->attributes[j];
1099 attidx = bms_member_index(attnums, attnum);
1100 s1 *= attr_sel[attidx];
1101 }
1102
1103 /* Original selectivity of the implied attribute */
1104 attnum = dependency->attributes[j];
1105 attidx = bms_member_index(attnums, attnum);
1106 s2 = attr_sel[attidx];
1107
1108 /*
1109 * Replace s2 with the conditional probability s2 given s1, computed
1110 * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1111 *
1112 * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1113 *
1114 * where P(a) = s1, the selectivity of the implying attributes, and
1115 * P(b) = s2, the selectivity of the implied attribute.
1116 */
1117 f = dependency->degree;
1118
1119 if (s1 <= s2)
1120 attr_sel[attidx] = f + (1 - f) * s2;
1121 else
1122 attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1123 }
1124
1125 /*
1126 * The overall selectivity of all the clauses on all these attributes is
1127 * then the product of all the original (non-implied) probabilities and
1128 * the new conditional (implied) probabilities.
1129 */
1130 s1 = 1.0;
1131 for (i = 0; i < nattrs; i++)
1132 s1 *= attr_sel[i];
1133
1135
1136 pfree(attr_sel);
1137 bms_free(attnums);
1138
1139 return s1;
1140}
1141
1142/*
1143 * dependency_is_compatible_expression
1144 * Determines if the expression is compatible with functional dependencies
1145 *
1146 * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1147 * expression is a simple Var. On success, return the matching statistics
1148 * expression into *expr.
1149 */
1150static bool
1151dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1152{
1153 ListCell *lc,
1154 *lc2;
1156
1157 if (IsA(clause, RestrictInfo))
1158 {
1159 RestrictInfo *rinfo = (RestrictInfo *) clause;
1160
1161 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1162 if (rinfo->pseudoconstant)
1163 return false;
1164
1165 /* Clauses referencing multiple, or no, varnos are incompatible */
1166 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1167 return false;
1168
1169 clause = (Node *) rinfo->clause;
1170 }
1171
1172 if (is_opclause(clause))
1173 {
1174 /* If it's an opclause, check for Var = Const or Const = Var. */
1175 OpExpr *expr = (OpExpr *) clause;
1176
1177 /* Only expressions with two arguments are candidates. */
1178 if (list_length(expr->args) != 2)
1179 return false;
1180
1181 /* Make sure non-selected argument is a pseudoconstant. */
1183 clause_expr = linitial(expr->args);
1184 else if (is_pseudo_constant_clause(linitial(expr->args)))
1185 clause_expr = lsecond(expr->args);
1186 else
1187 return false;
1188
1189 /*
1190 * If it's not an "=" operator, just ignore the clause, as it's not
1191 * compatible with functional dependencies.
1192 *
1193 * This uses the function for estimating selectivity, not the operator
1194 * directly (a bit awkward, but well ...).
1195 *
1196 * XXX this is pretty dubious; probably it'd be better to check btree
1197 * or hash opclass membership, so as not to be fooled by custom
1198 * selectivity functions, and to be more consistent with decisions
1199 * elsewhere in the planner.
1200 */
1201 if (get_oprrest(expr->opno) != F_EQSEL)
1202 return false;
1203
1204 /* OK to proceed with checking "var" */
1205 }
1206 else if (IsA(clause, ScalarArrayOpExpr))
1207 {
1208 /* If it's a scalar array operator, check for Var IN Const. */
1209 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1210
1211 /*
1212 * Reject ALL() variant, we only care about ANY/IN.
1213 *
1214 * FIXME Maybe we should check if all the values are the same, and
1215 * allow ALL in that case? Doesn't seem very practical, though.
1216 */
1217 if (!expr->useOr)
1218 return false;
1219
1220 /* Only expressions with two arguments are candidates. */
1221 if (list_length(expr->args) != 2)
1222 return false;
1223
1224 /*
1225 * We know it's always (Var IN Const), so we assume the var is the
1226 * first argument, and pseudoconstant is the second one.
1227 */
1229 return false;
1230
1231 clause_expr = linitial(expr->args);
1232
1233 /*
1234 * If it's not an "=" operator, just ignore the clause, as it's not
1235 * compatible with functional dependencies. The operator is identified
1236 * simply by looking at which function it uses to estimate
1237 * selectivity. That's a bit strange, but it's what other similar
1238 * places do.
1239 */
1240 if (get_oprrest(expr->opno) != F_EQSEL)
1241 return false;
1242
1243 /* OK to proceed with checking "var" */
1244 }
1245 else if (is_orclause(clause))
1246 {
1247 BoolExpr *bool_expr = (BoolExpr *) clause;
1248
1249 /* start with no expression (we'll use the first match) */
1250 *expr = NULL;
1251
1252 foreach(lc, bool_expr->args)
1253 {
1254 Node *or_expr = NULL;
1255
1256 /*
1257 * Had we found incompatible expression in the arguments, treat
1258 * the whole expression as incompatible.
1259 */
1261 statlist, &or_expr))
1262 return false;
1263
1264 if (*expr == NULL)
1265 *expr = or_expr;
1266
1267 /* ensure all the expressions are the same */
1268 if (!equal(or_expr, *expr))
1269 return false;
1270 }
1271
1272 /* the expression is already checked by the recursive call */
1273 return true;
1274 }
1275 else if (is_notclause(clause))
1276 {
1277 /*
1278 * "NOT x" can be interpreted as "x = false", so get the argument and
1279 * proceed with seeing if it's a suitable Var.
1280 */
1281 clause_expr = (Node *) get_notclausearg(clause);
1282 }
1283 else
1284 {
1285 /*
1286 * A boolean expression "x" can be interpreted as "x = true", so
1287 * proceed with seeing if it's a suitable Var.
1288 */
1289 clause_expr = clause;
1290 }
1291
1292 /*
1293 * We may ignore any RelabelType node above the operand. (There won't be
1294 * more than one, since eval_const_expressions has been applied already.)
1295 */
1297 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1298
1299 /*
1300 * Search for a matching statistics expression.
1301 */
1302 foreach(lc, statlist)
1303 {
1305
1306 /* ignore stats without dependencies */
1307 if (info->kind != STATS_EXT_DEPENDENCIES)
1308 continue;
1309
1310 foreach(lc2, info->exprs)
1311 {
1312 Node *stat_expr = (Node *) lfirst(lc2);
1313
1315 {
1316 *expr = stat_expr;
1317 return true;
1318 }
1319 }
1320 }
1321
1322 return false;
1323}
1324
1325/*
1326 * dependencies_clauselist_selectivity
1327 * Return the estimated selectivity of (a subset of) the given clauses
1328 * using functional dependency statistics, or 1.0 if no useful functional
1329 * dependency statistic exists.
1330 *
1331 * 'estimatedclauses' is an input/output argument that gets a bit set
1332 * corresponding to the (zero-based) list index of each clause that is included
1333 * in the estimated selectivity.
1334 *
1335 * Given equality clauses on attributes (a,b) we find the strongest dependency
1336 * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1337 * dependency, we then combine the per-clause selectivities using the formula
1338 *
1339 * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1340 *
1341 * where 'f' is the degree of the dependency. (Actually we use a slightly
1342 * modified version of this formula -- see clauselist_apply_dependencies()).
1343 *
1344 * With clauses on more than two attributes, the dependencies are applied
1345 * recursively, starting with the widest/strongest dependencies. For example
1346 * P(a,b,c) is first split like this:
1347 *
1348 * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1349 *
1350 * assuming (a,b=>c) is the strongest dependency.
1351 */
1354 List *clauses,
1355 int varRelid,
1356 JoinType jointype,
1357 SpecialJoinInfo *sjinfo,
1358 RelOptInfo *rel,
1360{
1361 Selectivity s1 = 1.0;
1362 ListCell *l;
1365 int listidx;
1368 int total_ndeps;
1369 MVDependency **dependencies;
1370 int ndependencies;
1371 int i;
1373 RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1374
1375 /* unique expressions */
1377 int unique_exprs_cnt;
1378
1379 /* check if there's any stats that might be useful for us. */
1380 if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1381 return 1.0;
1382
1384
1385 /*
1386 * We allocate space as if every clause was a unique expression, although
1387 * that's probably overkill. Some will be simple column references that
1388 * we'll translate to attnums, and there might be duplicates. But it's
1389 * easier and cheaper to just do one allocation than repalloc later.
1390 */
1392 unique_exprs_cnt = 0;
1393
1394 /*
1395 * Pre-process the clauses list to extract the attnums seen in each item.
1396 * We need to determine if there's any clauses which will be useful for
1397 * dependency selectivity estimations. Along the way we'll record all of
1398 * the attnums for each clause in a list which we'll reference later so we
1399 * don't need to repeat the same work again. We'll also keep track of all
1400 * attnums seen.
1401 *
1402 * We also skip clauses that we already estimated using different types of
1403 * statistics (we treat them as incompatible).
1404 *
1405 * To handle expressions, we assign them negative attnums, as if it was a
1406 * system attribute (this is fine, as we only allow extended stats on user
1407 * attributes). And then we offset everything by the number of
1408 * expressions, so that we can store the values in a bitmapset.
1409 */
1410 listidx = 0;
1411 foreach(l, clauses)
1412 {
1413 Node *clause = (Node *) lfirst(l);
1415 Node *expr = NULL;
1416
1417 /* ignore clause by default */
1419
1421 {
1422 /*
1423 * If it's a simple column reference, just extract the attnum. If
1424 * it's an expression, assign a negative attnum as if it was a
1425 * system attribute.
1426 */
1427 if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1428 {
1430 }
1431 else if (dependency_is_compatible_expression(clause, rel->relid,
1432 rel->statlist,
1433 &expr))
1434 {
1435 /* special attnum assigned to this expression */
1437
1438 Assert(expr != NULL);
1439
1440 /* If the expression is duplicate, use the same attnum. */
1441 for (i = 0; i < unique_exprs_cnt; i++)
1442 {
1443 if (equal(unique_exprs[i], expr))
1444 {
1445 /* negative attribute number to expression */
1446 attnum = -(i + 1);
1447 break;
1448 }
1449 }
1450
1451 /* not found in the list, so add it */
1453 {
1455
1456 /* after incrementing the value, to get -1, -2, ... */
1458 }
1459
1460 /* remember which attnum was assigned to this clause */
1462 }
1463 }
1464
1465 listidx++;
1466 }
1467
1468 Assert(listidx == list_length(clauses));
1469
1470 /*
1471 * How much we need to offset the attnums? If there are no expressions,
1472 * then no offset is needed. Otherwise we need to offset enough for the
1473 * lowest value (-unique_exprs_cnt) to become 1.
1474 */
1475 if (unique_exprs_cnt > 0)
1477 else
1478 attnum_offset = 0;
1479
1480 /*
1481 * Now that we know how many expressions there are, we can offset the
1482 * values just enough to build the bitmapset.
1483 */
1484 for (i = 0; i < list_length(clauses); i++)
1485 {
1487
1488 /* ignore incompatible or already estimated clauses */
1490 continue;
1491
1492 /* make sure the attnum is in the expected range */
1495
1496 /* make sure the attnum is positive (valid AttrNumber) */
1498
1499 /*
1500 * Either it's a regular attribute, or it's an expression, in which
1501 * case we must not have seen it before (expressions are unique).
1502 *
1503 * XXX Check whether it's a regular attribute has to be done using the
1504 * original attnum, while the second check has to use the value with
1505 * an offset.
1506 */
1509
1510 /*
1511 * Remember the offset attnum, both for attributes and expressions.
1512 * We'll pass list_attnums to clauselist_apply_dependencies, which
1513 * uses it to identify clauses in a bitmap. We could also pass the
1514 * offset, but this is more convenient.
1515 */
1517
1519 }
1520
1521 /*
1522 * If there's not at least two distinct attnums and expressions, then
1523 * reject the whole list of clauses. We must return 1.0 so the calling
1524 * function's selectivity is unaffected.
1525 */
1527 {
1530 return 1.0;
1531 }
1532
1533 /*
1534 * Load all functional dependencies matching at least two parameters. We
1535 * can simply consider all dependencies at once, without having to search
1536 * for the best statistics object.
1537 *
1538 * To not waste cycles and memory, we deserialize dependencies only for
1539 * statistics that match at least two attributes. The array is allocated
1540 * with the assumption that all objects match - we could grow the array to
1541 * make it just the right size, but it's likely wasteful anyway thanks to
1542 * moving the freed chunks to freelists etc.
1543 */
1546 total_ndeps = 0;
1547
1548 foreach(l, rel->statlist)
1549 {
1551 int nmatched;
1552 int nexprs;
1553 int k;
1554 MVDependencies *deps;
1555
1556 /* skip statistics that are not of the correct type */
1557 if (stat->kind != STATS_EXT_DEPENDENCIES)
1558 continue;
1559
1560 /* skip statistics with mismatching stxdinherit value */
1561 if (stat->inherit != rte->inh)
1562 continue;
1563
1564 /*
1565 * Count matching attributes - we have to undo the attnum offsets. The
1566 * input attribute numbers are not offset (expressions are not
1567 * included in stat->keys, so it's not necessary). But we need to
1568 * offset it before checking against clauses_attnums.
1569 */
1570 nmatched = 0;
1571 k = -1;
1572 while ((k = bms_next_member(stat->keys, k)) >= 0)
1573 {
1575
1576 /* skip expressions */
1578 continue;
1579
1580 /* apply the same offset as above */
1582
1584 nmatched++;
1585 }
1586
1587 /* count matching expressions */
1588 nexprs = 0;
1589 for (i = 0; i < unique_exprs_cnt; i++)
1590 {
1591 ListCell *lc;
1592
1593 foreach(lc, stat->exprs)
1594 {
1595 Node *stat_expr = (Node *) lfirst(lc);
1596
1597 /* try to match it */
1599 nexprs++;
1600 }
1601 }
1602
1603 /*
1604 * Skip objects matching fewer than two attributes/expressions from
1605 * clauses.
1606 */
1607 if (nmatched + nexprs < 2)
1608 continue;
1609
1610 deps = statext_dependencies_load(stat->statOid, rte->inh);
1611
1612 /*
1613 * The expressions may be represented by different attnums in the
1614 * stats, we need to remap them to be consistent with the clauses.
1615 * That will make the later steps (e.g. picking the strongest item and
1616 * so on) much simpler and cheaper, because it won't need to care
1617 * about the offset at all.
1618 *
1619 * When we're at it, we can ignore dependencies that are not fully
1620 * matched by clauses (i.e. referencing attributes or expressions that
1621 * are not in the clauses).
1622 *
1623 * We have to do this for all statistics, as long as there are any
1624 * expressions - we need to shift the attnums in all dependencies.
1625 *
1626 * XXX Maybe we should do this always, because it also eliminates some
1627 * of the dependencies early. It might be cheaper than having to walk
1628 * the longer list in find_strongest_dependency later, especially as
1629 * we need to do that repeatedly?
1630 *
1631 * XXX We have to do this even when there are no expressions in
1632 * clauses, otherwise find_strongest_dependency may fail for stats
1633 * with expressions (due to lookup of negative value in bitmap). So we
1634 * need to at least filter out those dependencies. Maybe we could do
1635 * it in a cheaper way (if there are no expr clauses, we can just
1636 * discard all negative attnums without any lookups).
1637 */
1638 if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1639 {
1640 int ndeps = 0;
1641
1642 for (i = 0; i < deps->ndeps; i++)
1643 {
1644 bool skip = false;
1645 MVDependency *dep = deps->deps[i];
1646 int j;
1647
1648 for (j = 0; j < dep->nattributes; j++)
1649 {
1650 int idx;
1651 Node *expr;
1654
1655 /* undo the per-statistics offset */
1656 attnum = dep->attributes[j];
1657
1658 /*
1659 * For regular attributes we can simply check if it
1660 * matches any clause. If there's no matching clause, we
1661 * can just ignore it. We need to offset the attnum
1662 * though.
1663 */
1665 {
1666 dep->attributes[j] = attnum + attnum_offset;
1667
1668 if (!bms_is_member(dep->attributes[j], clauses_attnums))
1669 {
1670 skip = true;
1671 break;
1672 }
1673
1674 continue;
1675 }
1676
1677 /*
1678 * the attnum should be a valid system attnum (-1, -2,
1679 * ...)
1680 */
1682
1683 /*
1684 * For expressions, we need to do two translations. First
1685 * we have to translate the negative attnum to index in
1686 * the list of expressions (in the statistics object).
1687 * Then we need to see if there's a matching clause. The
1688 * index of the unique expression determines the attnum
1689 * (and we offset it).
1690 */
1691 idx = -(1 + attnum);
1692
1693 /* Is the expression index is valid? */
1694 Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1695
1696 expr = (Node *) list_nth(stat->exprs, idx);
1697
1698 /* try to find the expression in the unique list */
1699 for (int m = 0; m < unique_exprs_cnt; m++)
1700 {
1701 /*
1702 * found a matching unique expression, use the attnum
1703 * (derived from index of the unique expression)
1704 */
1705 if (equal(unique_exprs[m], expr))
1706 {
1707 unique_attnum = -(m + 1) + attnum_offset;
1708 break;
1709 }
1710 }
1711
1712 /*
1713 * Found no matching expression, so we can simply skip
1714 * this dependency, because there's no chance it will be
1715 * fully covered.
1716 */
1718 {
1719 skip = true;
1720 break;
1721 }
1722
1723 /* otherwise remap it to the new attnum */
1724 dep->attributes[j] = unique_attnum;
1725 }
1726
1727 /* if found a matching dependency, keep it */
1728 if (!skip)
1729 {
1730 /* maybe we've skipped something earlier, so move it */
1731 if (ndeps != i)
1732 deps->deps[ndeps] = deps->deps[i];
1733
1734 ndeps++;
1735 }
1736 }
1737
1738 deps->ndeps = ndeps;
1739 }
1740
1741 /*
1742 * It's possible we've removed all dependencies, in which case we
1743 * don't bother adding it to the list.
1744 */
1745 if (deps->ndeps > 0)
1746 {
1748 total_ndeps += deps->ndeps;
1750 }
1751 }
1752
1753 /* if no matching stats could be found then we've nothing to do */
1754 if (nfunc_dependencies == 0)
1755 {
1760 return 1.0;
1761 }
1762
1763 /*
1764 * Work out which dependencies we can apply, starting with the
1765 * widest/strongest ones, and proceeding to smaller/weaker ones.
1766 */
1767 dependencies = palloc_array(MVDependency *, total_ndeps);
1768 ndependencies = 0;
1769
1770 while (true)
1771 {
1772 MVDependency *dependency;
1774
1775 /* the widest/strongest dependency, fully matched by clauses */
1779 if (!dependency)
1780 break;
1781
1782 dependencies[ndependencies++] = dependency;
1783
1784 /* Ignore dependencies using this implied attribute in later loops */
1785 attnum = dependency->attributes[dependency->nattributes - 1];
1787 }
1788
1789 /*
1790 * If we found applicable dependencies, use them to estimate all
1791 * compatible clauses on attributes that they refer to.
1792 */
1793 if (ndependencies != 0)
1794 s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1795 sjinfo, dependencies, ndependencies,
1797
1798 /* free deserialized functional dependencies (and then the array) */
1799 for (i = 0; i < nfunc_dependencies; i++)
1801
1802 pfree(dependencies);
1807
1808 return s1;
1809}
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262
int16 AttrNumber
Definition attnum.h:21
#define AttributeNumberIsValid(attributeNumber)
Definition attnum.h:34
#define AttrNumberIsForUserDefinedAttr(attributeNumber)
Definition attnum.h:41
#define InvalidAttrNumber
Definition attnum.h:23
int bms_next_member(const Bitmapset *a, int prevbit)
Definition bitmapset.c:1305
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition bitmapset.c:867
void bms_free(Bitmapset *a)
Definition bitmapset.c:239
int bms_num_members(const Bitmapset *a)
Definition bitmapset.c:750
bool bms_is_member(int x, const Bitmapset *a)
Definition bitmapset.c:510
Bitmapset * bms_add_member(Bitmapset *a, int x)
Definition bitmapset.c:814
BMS_Membership bms_membership(const Bitmapset *a)
Definition bitmapset.c:780
int bms_member_index(Bitmapset *a, int x)
Definition bitmapset.c:539
@ BMS_SINGLETON
Definition bitmapset.h:72
@ BMS_MULTIPLE
Definition bitmapset.h:73
#define VARHDRSZ
Definition c.h:711
#define Assert(condition)
Definition c.h:873
uint32_t uint32
Definition c.h:546
unsigned int Index
Definition c.h:628
size_t Size
Definition c.h:619
bool is_pseudo_constant_clause(Node *clause)
Definition clauses.c:2100
Selectivity clauselist_selectivity_ext(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_extended_stats)
Definition clausesel.c:117
static void generate_dependencies(DependencyGenerator state)
#define SizeOfHeader
MVDependencies * statext_dependencies_deserialize(bytea *data)
MVDependencies * statext_dependencies_load(Oid mvoid, bool inh)
static bool dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
static AttrNumber * DependencyGenerator_next(DependencyGenerator state)
MVDependencies * statext_dependencies_build(StatsBuildData *data)
static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
bool statext_dependencies_validate(const MVDependencies *dependencies, const int2vector *stxkeys, int numexprs, int elevel)
static bool dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
bytea * statext_dependencies_serialize(MVDependencies *dependencies)
void statext_dependencies_free(MVDependencies *dependencies)
static void DependencyGenerator_free(DependencyGenerator state)
static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, MVDependency **dependencies, int ndependencies, AttrNumber *list_attnums, Bitmapset **estimatedclauses)
static DependencyGenerator DependencyGenerator_init(int n, int k)
#define SizeOfItem(natts)
Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses)
static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
static void generate_dependencies_recurse(DependencyGenerator state, int index, AttrNumber start, AttrNumber *current)
DependencyGeneratorData * DependencyGenerator
static MVDependency * find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums)
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
bool equal(const void *a, const void *b)
Definition equalfuncs.c:223
bool has_stats_of_kind(List *stats, char requiredkind)
int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss)
int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss)
SortItem * build_sorted_items(StatsBuildData *data, int *nitems, MultiSortSupport mss, int numattrs, AttrNumber *attnums)
MultiSortSupport multi_sort_init(int ndims)
void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation)
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
#define DatumGetByteaPP(X)
Definition fmgr.h:292
return str start
#define HeapTupleIsValid(tuple)
Definition htup.h:78
#define MaxHeapAttributeNumber
#define nitems(x)
Definition indent.h:31
FILE * output
int j
Definition isn.c:78
int i
Definition isn.c:77
List * lappend(List *list, void *datum)
Definition list.c:339
RegProcedure get_oprrest(Oid opno)
Definition lsyscache.c:1707
void MemoryContextReset(MemoryContext context)
Definition mcxt.c:403
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
MemoryContext CurrentMemoryContext
Definition mcxt.c:160
void MemoryContextDelete(MemoryContext context)
Definition mcxt.c:472
#define AllocSetContextCreate
Definition memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition memutils.h:160
static bool is_orclause(const void *clause)
Definition nodeFuncs.h:116
static bool is_opclause(const void *clause)
Definition nodeFuncs.h:76
static bool is_notclause(const void *clause)
Definition nodeFuncs.h:125
static Expr * get_notclausearg(const void *notclause)
Definition nodeFuncs.h:134
#define IsA(nodeptr, _type_)
Definition nodes.h:164
double Selectivity
Definition nodes.h:260
JoinType
Definition nodes.h:298
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
#define planner_rt_fetch(rti, root)
Definition pathnodes.h:686
int16 attnum
static const struct exclude_list_item skip[]
const void size_t len
const void * data
#define lfirst(lc)
Definition pg_list.h:172
static int list_length(const List *l)
Definition pg_list.h:152
#define NIL
Definition pg_list.h:68
static void * list_nth(const List *list, int n)
Definition pg_list.h:299
#define linitial(l)
Definition pg_list.h:178
#define lsecond(l)
Definition pg_list.h:183
static Datum BoolGetDatum(bool X)
Definition postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition postgres.h:262
uint64_t Datum
Definition postgres.h:70
#define InvalidOid
unsigned int Oid
static int fb(int x)
char * s1
char * s2
tree ctl root
Definition radixtree.h:1857
#define CLAMP_PROBABILITY(p)
Definition selfuncs.h:63
#define STATS_MAX_DIMENSIONS
Definition statistics.h:19
#define STATS_DEPS_MAGIC
Definition statistics.h:43
#define STATS_DEPS_TYPE_BASIC
Definition statistics.h:44
Definition pg_list.h:54
MVDependency * deps[FLEXIBLE_ARRAY_MEMBER]
Definition statistics.h:62
AttrNumber nattributes
Definition statistics.h:53
double degree
Definition statistics.h:52
AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]
Definition statistics.h:54
Definition nodes.h:135
Oid opno
Definition primnodes.h:850
List * args
Definition primnodes.h:868
Expr * clause
Definition pathnodes.h:2870
AttrNumber varattno
Definition primnodes.h:274
int varno
Definition primnodes.h:269
Index varlevelsup
Definition primnodes.h:294
Definition type.h:96
Definition c.h:706
void ReleaseSysCache(HeapTuple tuple)
Definition syscache.c:264
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
Definition syscache.c:595
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
Definition syscache.c:230
static ItemArray items
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition typcache.c:386
#define TYPECACHE_LT_OPR
Definition typcache.h:139
static Size VARSIZE_ANY(const void *PTR)
Definition varatt.h:460
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
const char * type

◆ SizeOfHeader

#define SizeOfHeader   (3 * sizeof(uint32))

Definition at line 32 of file dependencies.c.

◆ SizeOfItem

#define SizeOfItem (   natts)     (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))

Definition at line 35 of file dependencies.c.

Typedef Documentation

◆ DependencyGenerator

◆ DependencyGeneratorData

Function Documentation

◆ clauselist_apply_dependencies()

static Selectivity clauselist_apply_dependencies ( PlannerInfo root,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
MVDependency **  dependencies,
int  ndependencies,
AttrNumber list_attnums,
Bitmapset **  estimatedclauses 
)
static

Definition at line 998 of file dependencies.c.

1004{
1005 Bitmapset *attnums;
1006 int i;
1007 int j;
1008 int nattrs;
1010 int attidx;
1011 int listidx;
1012 ListCell *l;
1014
1015 /*
1016 * Extract the attnums of all implying and implied attributes from all the
1017 * given dependencies. Each of these attributes is expected to have at
1018 * least 1 not-already-estimated compatible clause that we will estimate
1019 * here.
1020 */
1021 attnums = NULL;
1022 for (i = 0; i < ndependencies; i++)
1023 {
1024 for (j = 0; j < dependencies[i]->nattributes; j++)
1025 {
1026 AttrNumber attnum = dependencies[i]->attributes[j];
1027
1028 attnums = bms_add_member(attnums, attnum);
1029 }
1030 }
1031
1032 /*
1033 * Compute per-column selectivity estimates for each of these attributes,
1034 * and mark all the corresponding clauses as estimated.
1035 */
1036 nattrs = bms_num_members(attnums);
1038
1039 attidx = 0;
1040 i = -1;
1041 while ((i = bms_next_member(attnums, i)) >= 0)
1042 {
1045
1046 listidx = -1;
1047 foreach(l, clauses)
1048 {
1049 Node *clause = (Node *) lfirst(l);
1050
1051 listidx++;
1052 if (list_attnums[listidx] == i)
1053 {
1056 }
1057 }
1058
1060 jointype, sjinfo, false);
1061 attr_sel[attidx++] = simple_sel;
1062 }
1063
1064 /*
1065 * Now combine these selectivities using the dependency information. For
1066 * chains of dependencies such as a -> b -> c, the b -> c dependency will
1067 * come before the a -> b dependency in the array, so we traverse the
1068 * array backwards to ensure such chains are computed in the right order.
1069 *
1070 * As explained above, pairs of selectivities are combined using the
1071 * formula
1072 *
1073 * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1074 *
1075 * to ensure that the combined selectivity is never greater than either
1076 * individual selectivity.
1077 *
1078 * Where multiple dependencies apply (e.g., a -> b -> c), we use
1079 * conditional probabilities to compute the overall result as follows:
1080 *
1081 * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1082 *
1083 * so we replace the selectivities of all implied attributes with
1084 * conditional probabilities, that are conditional on all their implying
1085 * attributes. The selectivities of all other non-implied attributes are
1086 * left as they are.
1087 */
1088 for (i = ndependencies - 1; i >= 0; i--)
1089 {
1090 MVDependency *dependency = dependencies[i];
1093 double f;
1094
1095 /* Selectivity of all the implying attributes */
1096 s1 = 1.0;
1097 for (j = 0; j < dependency->nattributes - 1; j++)
1098 {
1099 attnum = dependency->attributes[j];
1100 attidx = bms_member_index(attnums, attnum);
1101 s1 *= attr_sel[attidx];
1102 }
1103
1104 /* Original selectivity of the implied attribute */
1105 attnum = dependency->attributes[j];
1106 attidx = bms_member_index(attnums, attnum);
1107 s2 = attr_sel[attidx];
1108
1109 /*
1110 * Replace s2 with the conditional probability s2 given s1, computed
1111 * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1112 *
1113 * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1114 *
1115 * where P(a) = s1, the selectivity of the implying attributes, and
1116 * P(b) = s2, the selectivity of the implied attribute.
1117 */
1118 f = dependency->degree;
1119
1120 if (s1 <= s2)
1121 attr_sel[attidx] = f + (1 - f) * s2;
1122 else
1123 attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1124 }
1125
1126 /*
1127 * The overall selectivity of all the clauses on all these attributes is
1128 * then the product of all the original (non-implied) probabilities and
1129 * the new conditional (implied) probabilities.
1130 */
1131 s1 = 1.0;
1132 for (i = 0; i < nattrs; i++)
1133 s1 *= attr_sel[i];
1134
1136
1137 pfree(attr_sel);
1138 bms_free(attnums);
1139
1140 return s1;
1141}

References attnum, MVDependency::attributes, bms_add_member(), bms_free(), bms_member_index(), bms_next_member(), bms_num_members(), CLAMP_PROBABILITY, clauselist_selectivity_ext(), MVDependency::degree, fb(), i, j, lappend(), lfirst, MVDependency::nattributes, NIL, palloc_array, pfree(), root, s1, and s2.

Referenced by dependencies_clauselist_selectivity().

◆ dependencies_clauselist_selectivity()

Selectivity dependencies_clauselist_selectivity ( PlannerInfo root,
List clauses,
int  varRelid,
JoinType  jointype,
SpecialJoinInfo sjinfo,
RelOptInfo rel,
Bitmapset **  estimatedclauses 
)

Definition at line 1354 of file dependencies.c.

1361{
1362 Selectivity s1 = 1.0;
1363 ListCell *l;
1366 int listidx;
1369 int total_ndeps;
1370 MVDependency **dependencies;
1371 int ndependencies;
1372 int i;
1375
1376 /* unique expressions */
1378 int unique_exprs_cnt;
1379
1380 /* check if there's any stats that might be useful for us. */
1382 return 1.0;
1383
1385
1386 /*
1387 * We allocate space as if every clause was a unique expression, although
1388 * that's probably overkill. Some will be simple column references that
1389 * we'll translate to attnums, and there might be duplicates. But it's
1390 * easier and cheaper to just do one allocation than repalloc later.
1391 */
1393 unique_exprs_cnt = 0;
1394
1395 /*
1396 * Pre-process the clauses list to extract the attnums seen in each item.
1397 * We need to determine if there's any clauses which will be useful for
1398 * dependency selectivity estimations. Along the way we'll record all of
1399 * the attnums for each clause in a list which we'll reference later so we
1400 * don't need to repeat the same work again. We'll also keep track of all
1401 * attnums seen.
1402 *
1403 * We also skip clauses that we already estimated using different types of
1404 * statistics (we treat them as incompatible).
1405 *
1406 * To handle expressions, we assign them negative attnums, as if it was a
1407 * system attribute (this is fine, as we only allow extended stats on user
1408 * attributes). And then we offset everything by the number of
1409 * expressions, so that we can store the values in a bitmapset.
1410 */
1411 listidx = 0;
1412 foreach(l, clauses)
1413 {
1414 Node *clause = (Node *) lfirst(l);
1416 Node *expr = NULL;
1417
1418 /* ignore clause by default */
1420
1422 {
1423 /*
1424 * If it's a simple column reference, just extract the attnum. If
1425 * it's an expression, assign a negative attnum as if it was a
1426 * system attribute.
1427 */
1428 if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1429 {
1431 }
1432 else if (dependency_is_compatible_expression(clause, rel->relid,
1433 rel->statlist,
1434 &expr))
1435 {
1436 /* special attnum assigned to this expression */
1438
1439 Assert(expr != NULL);
1440
1441 /* If the expression is duplicate, use the same attnum. */
1442 for (i = 0; i < unique_exprs_cnt; i++)
1443 {
1444 if (equal(unique_exprs[i], expr))
1445 {
1446 /* negative attribute number to expression */
1447 attnum = -(i + 1);
1448 break;
1449 }
1450 }
1451
1452 /* not found in the list, so add it */
1454 {
1456
1457 /* after incrementing the value, to get -1, -2, ... */
1459 }
1460
1461 /* remember which attnum was assigned to this clause */
1463 }
1464 }
1465
1466 listidx++;
1467 }
1468
1469 Assert(listidx == list_length(clauses));
1470
1471 /*
1472 * How much we need to offset the attnums? If there are no expressions,
1473 * then no offset is needed. Otherwise we need to offset enough for the
1474 * lowest value (-unique_exprs_cnt) to become 1.
1475 */
1476 if (unique_exprs_cnt > 0)
1478 else
1479 attnum_offset = 0;
1480
1481 /*
1482 * Now that we know how many expressions there are, we can offset the
1483 * values just enough to build the bitmapset.
1484 */
1485 for (i = 0; i < list_length(clauses); i++)
1486 {
1488
1489 /* ignore incompatible or already estimated clauses */
1491 continue;
1492
1493 /* make sure the attnum is in the expected range */
1496
1497 /* make sure the attnum is positive (valid AttrNumber) */
1499
1500 /*
1501 * Either it's a regular attribute, or it's an expression, in which
1502 * case we must not have seen it before (expressions are unique).
1503 *
1504 * XXX Check whether it's a regular attribute has to be done using the
1505 * original attnum, while the second check has to use the value with
1506 * an offset.
1507 */
1510
1511 /*
1512 * Remember the offset attnum, both for attributes and expressions.
1513 * We'll pass list_attnums to clauselist_apply_dependencies, which
1514 * uses it to identify clauses in a bitmap. We could also pass the
1515 * offset, but this is more convenient.
1516 */
1518
1520 }
1521
1522 /*
1523 * If there's not at least two distinct attnums and expressions, then
1524 * reject the whole list of clauses. We must return 1.0 so the calling
1525 * function's selectivity is unaffected.
1526 */
1528 {
1531 return 1.0;
1532 }
1533
1534 /*
1535 * Load all functional dependencies matching at least two parameters. We
1536 * can simply consider all dependencies at once, without having to search
1537 * for the best statistics object.
1538 *
1539 * To not waste cycles and memory, we deserialize dependencies only for
1540 * statistics that match at least two attributes. The array is allocated
1541 * with the assumption that all objects match - we could grow the array to
1542 * make it just the right size, but it's likely wasteful anyway thanks to
1543 * moving the freed chunks to freelists etc.
1544 */
1547 total_ndeps = 0;
1548
1549 foreach(l, rel->statlist)
1550 {
1552 int nmatched;
1553 int nexprs;
1554 int k;
1555 MVDependencies *deps;
1556
1557 /* skip statistics that are not of the correct type */
1558 if (stat->kind != STATS_EXT_DEPENDENCIES)
1559 continue;
1560
1561 /* skip statistics with mismatching stxdinherit value */
1562 if (stat->inherit != rte->inh)
1563 continue;
1564
1565 /*
1566 * Count matching attributes - we have to undo the attnum offsets. The
1567 * input attribute numbers are not offset (expressions are not
1568 * included in stat->keys, so it's not necessary). But we need to
1569 * offset it before checking against clauses_attnums.
1570 */
1571 nmatched = 0;
1572 k = -1;
1573 while ((k = bms_next_member(stat->keys, k)) >= 0)
1574 {
1576
1577 /* skip expressions */
1579 continue;
1580
1581 /* apply the same offset as above */
1583
1585 nmatched++;
1586 }
1587
1588 /* count matching expressions */
1589 nexprs = 0;
1590 for (i = 0; i < unique_exprs_cnt; i++)
1591 {
1592 ListCell *lc;
1593
1594 foreach(lc, stat->exprs)
1595 {
1596 Node *stat_expr = (Node *) lfirst(lc);
1597
1598 /* try to match it */
1600 nexprs++;
1601 }
1602 }
1603
1604 /*
1605 * Skip objects matching fewer than two attributes/expressions from
1606 * clauses.
1607 */
1608 if (nmatched + nexprs < 2)
1609 continue;
1610
1611 deps = statext_dependencies_load(stat->statOid, rte->inh);
1612
1613 /*
1614 * The expressions may be represented by different attnums in the
1615 * stats, we need to remap them to be consistent with the clauses.
1616 * That will make the later steps (e.g. picking the strongest item and
1617 * so on) much simpler and cheaper, because it won't need to care
1618 * about the offset at all.
1619 *
1620 * When we're at it, we can ignore dependencies that are not fully
1621 * matched by clauses (i.e. referencing attributes or expressions that
1622 * are not in the clauses).
1623 *
1624 * We have to do this for all statistics, as long as there are any
1625 * expressions - we need to shift the attnums in all dependencies.
1626 *
1627 * XXX Maybe we should do this always, because it also eliminates some
1628 * of the dependencies early. It might be cheaper than having to walk
1629 * the longer list in find_strongest_dependency later, especially as
1630 * we need to do that repeatedly?
1631 *
1632 * XXX We have to do this even when there are no expressions in
1633 * clauses, otherwise find_strongest_dependency may fail for stats
1634 * with expressions (due to lookup of negative value in bitmap). So we
1635 * need to at least filter out those dependencies. Maybe we could do
1636 * it in a cheaper way (if there are no expr clauses, we can just
1637 * discard all negative attnums without any lookups).
1638 */
1639 if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1640 {
1641 int ndeps = 0;
1642
1643 for (i = 0; i < deps->ndeps; i++)
1644 {
1645 bool skip = false;
1646 MVDependency *dep = deps->deps[i];
1647 int j;
1648
1649 for (j = 0; j < dep->nattributes; j++)
1650 {
1651 int idx;
1652 Node *expr;
1655
1656 /* undo the per-statistics offset */
1657 attnum = dep->attributes[j];
1658
1659 /*
1660 * For regular attributes we can simply check if it
1661 * matches any clause. If there's no matching clause, we
1662 * can just ignore it. We need to offset the attnum
1663 * though.
1664 */
1666 {
1667 dep->attributes[j] = attnum + attnum_offset;
1668
1669 if (!bms_is_member(dep->attributes[j], clauses_attnums))
1670 {
1671 skip = true;
1672 break;
1673 }
1674
1675 continue;
1676 }
1677
1678 /*
1679 * the attnum should be a valid system attnum (-1, -2,
1680 * ...)
1681 */
1683
1684 /*
1685 * For expressions, we need to do two translations. First
1686 * we have to translate the negative attnum to index in
1687 * the list of expressions (in the statistics object).
1688 * Then we need to see if there's a matching clause. The
1689 * index of the unique expression determines the attnum
1690 * (and we offset it).
1691 */
1692 idx = -(1 + attnum);
1693
1694 /* Is the expression index is valid? */
1695 Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1696
1697 expr = (Node *) list_nth(stat->exprs, idx);
1698
1699 /* try to find the expression in the unique list */
1700 for (int m = 0; m < unique_exprs_cnt; m++)
1701 {
1702 /*
1703 * found a matching unique expression, use the attnum
1704 * (derived from index of the unique expression)
1705 */
1706 if (equal(unique_exprs[m], expr))
1707 {
1708 unique_attnum = -(m + 1) + attnum_offset;
1709 break;
1710 }
1711 }
1712
1713 /*
1714 * Found no matching expression, so we can simply skip
1715 * this dependency, because there's no chance it will be
1716 * fully covered.
1717 */
1719 {
1720 skip = true;
1721 break;
1722 }
1723
1724 /* otherwise remap it to the new attnum */
1725 dep->attributes[j] = unique_attnum;
1726 }
1727
1728 /* if found a matching dependency, keep it */
1729 if (!skip)
1730 {
1731 /* maybe we've skipped something earlier, so move it */
1732 if (ndeps != i)
1733 deps->deps[ndeps] = deps->deps[i];
1734
1735 ndeps++;
1736 }
1737 }
1738
1739 deps->ndeps = ndeps;
1740 }
1741
1742 /*
1743 * It's possible we've removed all dependencies, in which case we
1744 * don't bother adding it to the list.
1745 */
1746 if (deps->ndeps > 0)
1747 {
1749 total_ndeps += deps->ndeps;
1751 }
1752 }
1753
1754 /* if no matching stats could be found then we've nothing to do */
1755 if (nfunc_dependencies == 0)
1756 {
1761 return 1.0;
1762 }
1763
1764 /*
1765 * Work out which dependencies we can apply, starting with the
1766 * widest/strongest ones, and proceeding to smaller/weaker ones.
1767 */
1768 dependencies = palloc_array(MVDependency *, total_ndeps);
1769 ndependencies = 0;
1770
1771 while (true)
1772 {
1773 MVDependency *dependency;
1775
1776 /* the widest/strongest dependency, fully matched by clauses */
1780 if (!dependency)
1781 break;
1782
1783 dependencies[ndependencies++] = dependency;
1784
1785 /* Ignore dependencies using this implied attribute in later loops */
1786 attnum = dependency->attributes[dependency->nattributes - 1];
1788 }
1789
1790 /*
1791 * If we found applicable dependencies, use them to estimate all
1792 * compatible clauses on attributes that they refer to.
1793 */
1794 if (ndependencies != 0)
1795 s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1796 sjinfo, dependencies, ndependencies,
1798
1799 /* free deserialized functional dependencies (and then the array) */
1800 for (i = 0; i < nfunc_dependencies; i++)
1802
1803 pfree(dependencies);
1808
1809 return s1;
1810}
Index relid
Definition pathnodes.h:1051
List * statlist
Definition pathnodes.h:1075

References Assert, attnum, AttributeNumberIsValid, MVDependency::attributes, AttrNumberIsForUserDefinedAttr, bms_add_member(), bms_del_member(), bms_free(), bms_is_member(), bms_membership(), BMS_MULTIPLE, bms_next_member(), clauselist_apply_dependencies(), dependency_is_compatible_clause(), dependency_is_compatible_expression(), MVDependencies::deps, equal(), fb(), find_strongest_dependency(), has_stats_of_kind(), i, idx(), InvalidAttrNumber, j, lfirst, list_length(), list_nth(), MaxHeapAttributeNumber, MVDependency::nattributes, MVDependencies::ndeps, NIL, palloc_array, pfree(), planner_rt_fetch, RelOptInfo::relid, root, s1, skip, statext_dependencies_load(), and RelOptInfo::statlist.

Referenced by statext_clauselist_selectivity().

◆ dependency_degree()

static double dependency_degree ( StatsBuildData data,
int  k,
AttrNumber dependency 
)
static

Definition at line 215 of file dependencies.c.

216{
217 int i,
218 nitems;
222
223 /* counters valid within a group */
224 int group_size = 0;
225 int n_violations = 0;
226
227 /* total number of rows supporting (consistent with) the dependency */
228 int n_supporting_rows = 0;
229
230 /* Make sure we have at least two input attributes. */
231 Assert(k >= 2);
232
233 /* sort info for all attributes columns */
234 mss = multi_sort_init(k);
235
236 /*
237 * Translate the array of indexes to regular attnums for the dependency
238 * (we will need this to identify the columns in StatsBuildData).
239 */
241 for (i = 0; i < k; i++)
242 attnums_dep[i] = data->attnums[dependency[i]];
243
244 /*
245 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
246 *
247 * (a) sort the data lexicographically
248 *
249 * (b) split the data into groups by first (k-1) columns
250 *
251 * (c) for each group count different values in the last column
252 *
253 * We use the column data types' default sort operators and collations;
254 * perhaps at some point it'd be worth using column-specific collations?
255 */
256
257 /* prepare the sort function for the dimensions */
258 for (i = 0; i < k; i++)
259 {
260 VacAttrStats *colstat = data->stats[dependency[i]];
262
264 if (type->lt_opr == InvalidOid) /* shouldn't happen */
265 elog(ERROR, "cache lookup failed for ordering operator for type %u",
266 colstat->attrtypid);
267
268 /* prepare the sort function for this dimension */
269 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
270 }
271
272 /*
273 * build an array of SortItem(s) sorted using the multi-sort support
274 *
275 * XXX This relies on all stats entries pointing to the same tuple
276 * descriptor. For now that assumption holds, but it might change in the
277 * future for example if we support statistics on multiple tables.
278 */
280
281 /*
282 * Walk through the sorted array, split it into rows according to the
283 * first (k-1) columns. If there's a single value in the last column, we
284 * count the group as 'supporting' the functional dependency. Otherwise we
285 * count it as contradicting.
286 */
287
288 /* start with the first row forming a group */
289 group_size = 1;
290
291 /* loop 1 beyond the end of the array so that we count the final group */
292 for (i = 1; i <= nitems; i++)
293 {
294 /*
295 * Check if the group ended, which may be either because we processed
296 * all the items (i==nitems), or because the i-th item is not equal to
297 * the preceding one.
298 */
299 if (i == nitems ||
300 multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
301 {
302 /*
303 * If no violations were found in the group then track the rows of
304 * the group as supporting the functional dependency.
305 */
306 if (n_violations == 0)
308
309 /* Reset counters for the new group */
310 n_violations = 0;
311 group_size = 1;
312 continue;
313 }
314 /* first columns match, but the last one does not (so contradicting) */
315 else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
316 n_violations++;
317
318 group_size++;
319 }
320
321 /* Compute the 'degree of validity' as (supporting/total). */
322 return (n_supporting_rows * 1.0 / data->numrows);
323}

References Assert, build_sorted_items(), data, elog, ERROR, fb(), i, InvalidOid, items, lookup_type_cache(), multi_sort_add_dimension(), multi_sort_compare_dim(), multi_sort_compare_dims(), multi_sort_init(), nitems, palloc_array, type, and TYPECACHE_LT_OPR.

Referenced by statext_dependencies_build().

◆ dependency_is_compatible_clause()

static bool dependency_is_compatible_clause ( Node clause,
Index  relid,
AttrNumber attnum 
)
static

Definition at line 725 of file dependencies.c.

726{
727 Var *var;
729
730 if (IsA(clause, RestrictInfo))
731 {
732 RestrictInfo *rinfo = (RestrictInfo *) clause;
733
734 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
735 if (rinfo->pseudoconstant)
736 return false;
737
738 /* Clauses referencing multiple, or no, varnos are incompatible */
739 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
740 return false;
741
742 clause = (Node *) rinfo->clause;
743 }
744
745 if (is_opclause(clause))
746 {
747 /* If it's an opclause, check for Var = Const or Const = Var. */
748 OpExpr *expr = (OpExpr *) clause;
749
750 /* Only expressions with two arguments are candidates. */
751 if (list_length(expr->args) != 2)
752 return false;
753
754 /* Make sure non-selected argument is a pseudoconstant. */
756 clause_expr = linitial(expr->args);
757 else if (is_pseudo_constant_clause(linitial(expr->args)))
758 clause_expr = lsecond(expr->args);
759 else
760 return false;
761
762 /*
763 * If it's not an "=" operator, just ignore the clause, as it's not
764 * compatible with functional dependencies.
765 *
766 * This uses the function for estimating selectivity, not the operator
767 * directly (a bit awkward, but well ...).
768 *
769 * XXX this is pretty dubious; probably it'd be better to check btree
770 * or hash opclass membership, so as not to be fooled by custom
771 * selectivity functions, and to be more consistent with decisions
772 * elsewhere in the planner.
773 */
774 if (get_oprrest(expr->opno) != F_EQSEL)
775 return false;
776
777 /* OK to proceed with checking "var" */
778 }
779 else if (IsA(clause, ScalarArrayOpExpr))
780 {
781 /* If it's a scalar array operator, check for Var IN Const. */
782 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
783
784 /*
785 * Reject ALL() variant, we only care about ANY/IN.
786 *
787 * XXX Maybe we should check if all the values are the same, and allow
788 * ALL in that case? Doesn't seem very practical, though.
789 */
790 if (!expr->useOr)
791 return false;
792
793 /* Only expressions with two arguments are candidates. */
794 if (list_length(expr->args) != 2)
795 return false;
796
797 /*
798 * We know it's always (Var IN Const), so we assume the var is the
799 * first argument, and pseudoconstant is the second one.
800 */
802 return false;
803
804 clause_expr = linitial(expr->args);
805
806 /*
807 * If it's not an "=" operator, just ignore the clause, as it's not
808 * compatible with functional dependencies. The operator is identified
809 * simply by looking at which function it uses to estimate
810 * selectivity. That's a bit strange, but it's what other similar
811 * places do.
812 */
813 if (get_oprrest(expr->opno) != F_EQSEL)
814 return false;
815
816 /* OK to proceed with checking "var" */
817 }
818 else if (is_orclause(clause))
819 {
820 BoolExpr *bool_expr = (BoolExpr *) clause;
821 ListCell *lc;
822
823 /* start with no attribute number */
825
826 foreach(lc, bool_expr->args)
827 {
829
830 /*
831 * Had we found incompatible clause in the arguments, treat the
832 * whole clause as incompatible.
833 */
835 relid, &clause_attnum))
836 return false;
837
840
841 /* ensure all the variables are the same (same attnum) */
842 if (*attnum != clause_attnum)
843 return false;
844 }
845
846 /* the Var is already checked by the recursive call */
847 return true;
848 }
849 else if (is_notclause(clause))
850 {
851 /*
852 * "NOT x" can be interpreted as "x = false", so get the argument and
853 * proceed with seeing if it's a suitable Var.
854 */
855 clause_expr = (Node *) get_notclausearg(clause);
856 }
857 else
858 {
859 /*
860 * A boolean expression "x" can be interpreted as "x = true", so
861 * proceed with seeing if it's a suitable Var.
862 */
863 clause_expr = clause;
864 }
865
866 /*
867 * We may ignore any RelabelType node above the operand. (There won't be
868 * more than one, since eval_const_expressions has been applied already.)
869 */
871 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
872
873 /* We only support plain Vars for now */
874 if (!IsA(clause_expr, Var))
875 return false;
876
877 /* OK, we know we have a Var */
878 var = (Var *) clause_expr;
879
880 /* Ensure Var is from the correct relation */
881 if (var->varno != relid)
882 return false;
883
884 /* We also better ensure the Var is from the current level */
885 if (var->varlevelsup != 0)
886 return false;
887
888 /* Also ignore system attributes (we don't allow stats on those) */
890 return false;
891
892 *attnum = var->varattno;
893 return true;
894}

References OpExpr::args, ScalarArrayOpExpr::args, attnum, AttrNumberIsForUserDefinedAttr, bms_membership(), BMS_SINGLETON, RestrictInfo::clause, dependency_is_compatible_clause(), fb(), get_notclausearg(), get_oprrest(), InvalidAttrNumber, is_notclause(), is_opclause(), is_orclause(), is_pseudo_constant_clause(), IsA, lfirst, linitial, list_length(), lsecond, OpExpr::opno, ScalarArrayOpExpr::opno, ScalarArrayOpExpr::useOr, Var::varattno, Var::varlevelsup, and Var::varno.

Referenced by dependencies_clauselist_selectivity(), and dependency_is_compatible_clause().

◆ dependency_is_compatible_expression()

static bool dependency_is_compatible_expression ( Node clause,
Index  relid,
List statlist,
Node **  expr 
)
static

Definition at line 1152 of file dependencies.c.

1153{
1154 ListCell *lc,
1155 *lc2;
1157
1158 if (IsA(clause, RestrictInfo))
1159 {
1160 RestrictInfo *rinfo = (RestrictInfo *) clause;
1161
1162 /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1163 if (rinfo->pseudoconstant)
1164 return false;
1165
1166 /* Clauses referencing multiple, or no, varnos are incompatible */
1167 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1168 return false;
1169
1170 clause = (Node *) rinfo->clause;
1171 }
1172
1173 if (is_opclause(clause))
1174 {
1175 /* If it's an opclause, check for Var = Const or Const = Var. */
1176 OpExpr *expr = (OpExpr *) clause;
1177
1178 /* Only expressions with two arguments are candidates. */
1179 if (list_length(expr->args) != 2)
1180 return false;
1181
1182 /* Make sure non-selected argument is a pseudoconstant. */
1184 clause_expr = linitial(expr->args);
1185 else if (is_pseudo_constant_clause(linitial(expr->args)))
1186 clause_expr = lsecond(expr->args);
1187 else
1188 return false;
1189
1190 /*
1191 * If it's not an "=" operator, just ignore the clause, as it's not
1192 * compatible with functional dependencies.
1193 *
1194 * This uses the function for estimating selectivity, not the operator
1195 * directly (a bit awkward, but well ...).
1196 *
1197 * XXX this is pretty dubious; probably it'd be better to check btree
1198 * or hash opclass membership, so as not to be fooled by custom
1199 * selectivity functions, and to be more consistent with decisions
1200 * elsewhere in the planner.
1201 */
1202 if (get_oprrest(expr->opno) != F_EQSEL)
1203 return false;
1204
1205 /* OK to proceed with checking "var" */
1206 }
1207 else if (IsA(clause, ScalarArrayOpExpr))
1208 {
1209 /* If it's a scalar array operator, check for Var IN Const. */
1210 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1211
1212 /*
1213 * Reject ALL() variant, we only care about ANY/IN.
1214 *
1215 * FIXME Maybe we should check if all the values are the same, and
1216 * allow ALL in that case? Doesn't seem very practical, though.
1217 */
1218 if (!expr->useOr)
1219 return false;
1220
1221 /* Only expressions with two arguments are candidates. */
1222 if (list_length(expr->args) != 2)
1223 return false;
1224
1225 /*
1226 * We know it's always (Var IN Const), so we assume the var is the
1227 * first argument, and pseudoconstant is the second one.
1228 */
1230 return false;
1231
1232 clause_expr = linitial(expr->args);
1233
1234 /*
1235 * If it's not an "=" operator, just ignore the clause, as it's not
1236 * compatible with functional dependencies. The operator is identified
1237 * simply by looking at which function it uses to estimate
1238 * selectivity. That's a bit strange, but it's what other similar
1239 * places do.
1240 */
1241 if (get_oprrest(expr->opno) != F_EQSEL)
1242 return false;
1243
1244 /* OK to proceed with checking "var" */
1245 }
1246 else if (is_orclause(clause))
1247 {
1248 BoolExpr *bool_expr = (BoolExpr *) clause;
1249
1250 /* start with no expression (we'll use the first match) */
1251 *expr = NULL;
1252
1253 foreach(lc, bool_expr->args)
1254 {
1255 Node *or_expr = NULL;
1256
1257 /*
1258 * Had we found incompatible expression in the arguments, treat
1259 * the whole expression as incompatible.
1260 */
1262 statlist, &or_expr))
1263 return false;
1264
1265 if (*expr == NULL)
1266 *expr = or_expr;
1267
1268 /* ensure all the expressions are the same */
1269 if (!equal(or_expr, *expr))
1270 return false;
1271 }
1272
1273 /* the expression is already checked by the recursive call */
1274 return true;
1275 }
1276 else if (is_notclause(clause))
1277 {
1278 /*
1279 * "NOT x" can be interpreted as "x = false", so get the argument and
1280 * proceed with seeing if it's a suitable Var.
1281 */
1282 clause_expr = (Node *) get_notclausearg(clause);
1283 }
1284 else
1285 {
1286 /*
1287 * A boolean expression "x" can be interpreted as "x = true", so
1288 * proceed with seeing if it's a suitable Var.
1289 */
1290 clause_expr = clause;
1291 }
1292
1293 /*
1294 * We may ignore any RelabelType node above the operand. (There won't be
1295 * more than one, since eval_const_expressions has been applied already.)
1296 */
1298 clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1299
1300 /*
1301 * Search for a matching statistics expression.
1302 */
1303 foreach(lc, statlist)
1304 {
1306
1307 /* ignore stats without dependencies */
1308 if (info->kind != STATS_EXT_DEPENDENCIES)
1309 continue;
1310
1311 foreach(lc2, info->exprs)
1312 {
1313 Node *stat_expr = (Node *) lfirst(lc2);
1314
1316 {
1317 *expr = stat_expr;
1318 return true;
1319 }
1320 }
1321 }
1322
1323 return false;
1324}

References OpExpr::args, ScalarArrayOpExpr::args, bms_membership(), BMS_SINGLETON, RestrictInfo::clause, dependency_is_compatible_expression(), equal(), StatisticExtInfo::exprs, fb(), get_notclausearg(), get_oprrest(), is_notclause(), is_opclause(), is_orclause(), is_pseudo_constant_clause(), IsA, StatisticExtInfo::kind, lfirst, linitial, list_length(), lsecond, OpExpr::opno, ScalarArrayOpExpr::opno, and ScalarArrayOpExpr::useOr.

Referenced by dependencies_clauselist_selectivity(), and dependency_is_compatible_expression().

◆ dependency_is_fully_matched()

static bool dependency_is_fully_matched ( MVDependency dependency,
Bitmapset attnums 
)
static

Definition at line 664 of file dependencies.c.

665{
666 int j;
667
668 /*
669 * Check that the dependency actually is fully covered by clauses. We have
670 * to translate all attribute numbers, as those are referenced
671 */
672 for (j = 0; j < dependency->nattributes; j++)
673 {
674 int attnum = dependency->attributes[j];
675
676 if (!bms_is_member(attnum, attnums))
677 return false;
678 }
679
680 return true;
681}

References attnum, MVDependency::attributes, bms_is_member(), j, and MVDependency::nattributes.

Referenced by find_strongest_dependency().

◆ DependencyGenerator_free()

static void DependencyGenerator_free ( DependencyGenerator  state)
static

Definition at line 190 of file dependencies.c.

191{
192 pfree(state->dependencies);
193 pfree(state);
194}

References pfree().

Referenced by statext_dependencies_build().

◆ DependencyGenerator_init()

static DependencyGenerator DependencyGenerator_init ( int  n,
int  k 
)
static

Definition at line 167 of file dependencies.c.

168{
170
171 Assert((n >= k) && (k > 0));
172
173 /* allocate the DependencyGenerator state */
175 state->dependencies = palloc_array(AttrNumber, k);
176
177 state->ndependencies = 0;
178 state->current = 0;
179 state->k = k;
180 state->n = n;
181
182 /* now actually pre-generate all the variations */
184
185 return state;
186}

References Assert, generate_dependencies(), palloc0_object, and palloc_array.

Referenced by statext_dependencies_build().

◆ DependencyGenerator_next()

static AttrNumber * DependencyGenerator_next ( DependencyGenerator  state)
static

Definition at line 198 of file dependencies.c.

199{
200 if (state->current == state->ndependencies)
201 return NULL;
202
203 return &state->dependencies[state->k * state->current++];
204}

References fb().

Referenced by statext_dependencies_build().

◆ find_strongest_dependency()

static MVDependency * find_strongest_dependency ( MVDependencies **  dependencies,
int  ndependencies,
Bitmapset attnums 
)
static

Definition at line 913 of file dependencies.c.

915{
916 int i,
917 j;
919
920 /* number of attnums in clauses */
921 int nattnums = bms_num_members(attnums);
922
923 /*
924 * Iterate over the MVDependency items and find the strongest one from the
925 * fully-matched dependencies. We do the cheap checks first, before
926 * matching it against the attnums.
927 */
928 for (i = 0; i < ndependencies; i++)
929 {
930 for (j = 0; j < dependencies[i]->ndeps; j++)
931 {
932 MVDependency *dependency = dependencies[i]->deps[j];
933
934 /*
935 * Skip dependencies referencing more attributes than available
936 * clauses, as those can't be fully matched.
937 */
938 if (dependency->nattributes > nattnums)
939 continue;
940
941 if (strongest)
942 {
943 /* skip dependencies on fewer attributes than the strongest. */
944 if (dependency->nattributes < strongest->nattributes)
945 continue;
946
947 /* also skip weaker dependencies when attribute count matches */
948 if (strongest->nattributes == dependency->nattributes &&
949 strongest->degree > dependency->degree)
950 continue;
951 }
952
953 /*
954 * this dependency is stronger, but we must still check that it's
955 * fully matched to these attnums. We perform this check last as
956 * it's slightly more expensive than the previous checks.
957 */
958 if (dependency_is_fully_matched(dependency, attnums))
959 strongest = dependency; /* save new best match */
960 }
961 }
962
963 return strongest;
964}

References bms_num_members(), MVDependency::degree, dependency_is_fully_matched(), MVDependencies::deps, fb(), i, j, MVDependency::nattributes, and MVDependencies::ndeps.

Referenced by dependencies_clauselist_selectivity().

◆ generate_dependencies()

static void generate_dependencies ( DependencyGenerator  state)
static

Definition at line 151 of file dependencies.c.

152{
154
155 generate_dependencies_recurse(state, 0, 0, current);
156
157 pfree(current);
158}

References generate_dependencies_recurse(), palloc0_array, and pfree().

Referenced by DependencyGenerator_init().

◆ generate_dependencies_recurse()

static void generate_dependencies_recurse ( DependencyGenerator  state,
int  index,
AttrNumber  start,
AttrNumber current 
)
static

Definition at line 85 of file dependencies.c.

87{
88 /*
89 * The generator handles the first (k-1) elements differently from the
90 * last element.
91 */
92 if (index < (state->k - 1))
93 {
95
96 /*
97 * The first (k-1) values have to be in ascending order, which we
98 * generate recursively.
99 */
100
101 for (i = start; i < state->n; i++)
102 {
103 current[index] = i;
104 generate_dependencies_recurse(state, (index + 1), (i + 1), current);
105 }
106 }
107 else
108 {
109 int i;
110
111 /*
112 * the last element is the implied value, which does not respect the
113 * ascending order. We just need to check that the value is not in the
114 * first (k-1) elements.
115 */
116
117 for (i = 0; i < state->n; i++)
118 {
119 int j;
120 bool match = false;
121
122 current[index] = i;
123
124 for (j = 0; j < index; j++)
125 {
126 if (current[j] == i)
127 {
128 match = true;
129 break;
130 }
131 }
132
133 /*
134 * If the value is not found in the first part of the dependency,
135 * we're done.
136 */
137 if (!match)
138 {
139 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
140 state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
141 memcpy(&state->dependencies[(state->k * state->ndependencies)],
142 current, state->k * sizeof(AttrNumber));
143 state->ndependencies++;
144 }
145 }
146 }
147}

References fb(), generate_dependencies_recurse(), i, j, repalloc(), and start.

Referenced by generate_dependencies(), and generate_dependencies_recurse().

◆ statext_dependencies_build()

MVDependencies * statext_dependencies_build ( StatsBuildData data)

Definition at line 342 of file dependencies.c.

343{
344 int i,
345 k;
346
347 /* result */
348 MVDependencies *dependencies = NULL;
349 MemoryContext cxt;
350
351 Assert(data->nattnums >= 2);
352
353 /* tracks memory allocated by dependency_degree calls */
355 "dependency_degree cxt",
357
358 /*
359 * We'll try build functional dependencies starting from the smallest ones
360 * covering just 2 columns, to the largest ones, covering all columns
361 * included in the statistics object. We start from the smallest ones
362 * because we want to be able to skip already implied ones.
363 */
364 for (k = 2; k <= data->nattnums; k++)
365 {
366 AttrNumber *dependency; /* array with k elements */
367
368 /* prepare a DependencyGenerator of variation */
370
371 /* generate all possible variations of k values (out of n) */
372 while ((dependency = DependencyGenerator_next(DependencyGenerator)))
373 {
374 double degree;
375 MVDependency *d;
377
378 /* release memory used by dependency degree calculation */
380
381 /* compute how valid the dependency seems */
382 degree = dependency_degree(data, k, dependency);
383
386
387 /*
388 * if the dependency seems entirely invalid, don't store it
389 */
390 if (degree == 0.0)
391 continue;
392
393 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
394 + k * sizeof(AttrNumber));
395
396 /* copy the dependency (and keep the indexes into stxkeys) */
397 d->degree = degree;
398 d->nattributes = k;
399 for (i = 0; i < k; i++)
400 d->attributes[i] = data->attnums[dependency[i]];
401
402 /* initialize the list of dependencies */
403 if (dependencies == NULL)
404 {
405 dependencies = palloc0_object(MVDependencies);
406
407 dependencies->magic = STATS_DEPS_MAGIC;
408 dependencies->type = STATS_DEPS_TYPE_BASIC;
409 dependencies->ndeps = 0;
410 }
411
412 dependencies->ndeps++;
413 dependencies = (MVDependencies *) repalloc(dependencies,
415 + dependencies->ndeps * sizeof(MVDependency *));
416
417 dependencies->deps[dependencies->ndeps - 1] = d;
418 }
419
420 /*
421 * we're done with variations of k elements, so free the
422 * DependencyGenerator
423 */
425 }
426
428
429 return dependencies;
430}

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert, MVDependency::attributes, CurrentMemoryContext, data, MVDependency::degree, dependency_degree(), DependencyGenerator_free(), DependencyGenerator_init(), DependencyGenerator_next(), MVDependencies::deps, fb(), i, MVDependencies::magic, MemoryContextDelete(), MemoryContextReset(), MemoryContextSwitchTo(), MVDependency::nattributes, MVDependencies::ndeps, palloc0(), palloc0_object, repalloc(), STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, and MVDependencies::type.

Referenced by BuildRelationExtStatistics().

◆ statext_dependencies_deserialize()

MVDependencies * statext_dependencies_deserialize ( bytea data)

Definition at line 492 of file dependencies.c.

493{
494 int i;
496 MVDependencies *dependencies;
497 char *tmp;
498
499 if (data == NULL)
500 return NULL;
501
503 elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
505
506 /* read the MVDependencies header */
507 dependencies = palloc0_object(MVDependencies);
508
509 /* initialize pointer to the data part (skip the varlena header) */
510 tmp = VARDATA_ANY(data);
511
512 /* read the header fields and perform basic sanity checks */
513 memcpy(&dependencies->magic, tmp, sizeof(uint32));
514 tmp += sizeof(uint32);
515 memcpy(&dependencies->type, tmp, sizeof(uint32));
516 tmp += sizeof(uint32);
517 memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
518 tmp += sizeof(uint32);
519
520 if (dependencies->magic != STATS_DEPS_MAGIC)
521 elog(ERROR, "invalid dependency magic %d (expected %d)",
522 dependencies->magic, STATS_DEPS_MAGIC);
523
524 if (dependencies->type != STATS_DEPS_TYPE_BASIC)
525 elog(ERROR, "invalid dependency type %d (expected %d)",
526 dependencies->type, STATS_DEPS_TYPE_BASIC);
527
528 if (dependencies->ndeps == 0)
529 elog(ERROR, "invalid zero-length item array in MVDependencies");
530
531 /* what minimum bytea size do we expect for those parameters */
532 min_expected_size = SizeOfItem(dependencies->ndeps);
533
535 elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
537
538 /* allocate space for the MCV items */
539 dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
540 + (dependencies->ndeps * sizeof(MVDependency *)));
541
542 for (i = 0; i < dependencies->ndeps; i++)
543 {
544 double degree;
545 AttrNumber k;
546 MVDependency *d;
547
548 /* degree of validity */
549 memcpy(&degree, tmp, sizeof(double));
550 tmp += sizeof(double);
551
552 /* number of attributes */
553 memcpy(&k, tmp, sizeof(AttrNumber));
554 tmp += sizeof(AttrNumber);
555
556 /* is the number of attributes valid? */
557 Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
558
559 /* now that we know the number of attributes, allocate the dependency */
560 d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
561 + (k * sizeof(AttrNumber)));
562
563 d->degree = degree;
564 d->nattributes = k;
565
566 /* copy attribute numbers */
567 memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
568 tmp += sizeof(AttrNumber) * d->nattributes;
569
570 dependencies->deps[i] = d;
571
572 /* still within the bytea */
573 Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
574 }
575
576 /* we should have consumed the whole bytea exactly */
577 Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
578
579 return dependencies;
580}

References Assert, MVDependency::attributes, data, MVDependency::degree, MVDependencies::deps, elog, ERROR, fb(), i, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, palloc0(), palloc0_object, repalloc(), SizeOfHeader, SizeOfItem, STATS_DEPS_MAGIC, STATS_DEPS_TYPE_BASIC, STATS_MAX_DIMENSIONS, MVDependencies::type, VARDATA_ANY(), VARSIZE_ANY(), and VARSIZE_ANY_EXHDR().

Referenced by extended_statistics_update(), pg_dependencies_out(), and statext_dependencies_load().

◆ statext_dependencies_free()

void statext_dependencies_free ( MVDependencies dependencies)

Definition at line 586 of file dependencies.c.

587{
588 for (int i = 0; i < dependencies->ndeps; i++)
589 pfree(dependencies->deps[i]);
590 pfree(dependencies);
591}

References MVDependencies::deps, i, MVDependencies::ndeps, and pfree().

Referenced by extended_statistics_update().

◆ statext_dependencies_load()

MVDependencies * statext_dependencies_load ( Oid  mvoid,
bool  inh 
)

Definition at line 688 of file dependencies.c.

689{
690 MVDependencies *result;
691 bool isnull;
692 Datum deps;
693 HeapTuple htup;
694
697 BoolGetDatum(inh));
698 if (!HeapTupleIsValid(htup))
699 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
700
703 if (isnull)
704 elog(ERROR,
705 "requested statistics kind \"%c\" is not yet built for statistics object %u",
707
709
710 ReleaseSysCache(htup);
711
712 return result;
713}

References BoolGetDatum(), DatumGetByteaPP, elog, ERROR, fb(), HeapTupleIsValid, ObjectIdGetDatum(), ReleaseSysCache(), SearchSysCache2(), statext_dependencies_deserialize(), and SysCacheGetAttr().

Referenced by dependencies_clauselist_selectivity().

◆ statext_dependencies_serialize()

bytea * statext_dependencies_serialize ( MVDependencies dependencies)

Definition at line 437 of file dependencies.c.

438{
439 int i;
440 bytea *output;
441 char *tmp;
442 Size len;
443
444 /* we need to store ndeps, with a number of attributes for each one */
446
447 /* and also include space for the actual attribute numbers and degrees */
448 for (i = 0; i < dependencies->ndeps; i++)
449 len += SizeOfItem(dependencies->deps[i]->nattributes);
450
451 output = (bytea *) palloc0(len);
453
454 tmp = VARDATA(output);
455
456 /* Store the base struct values (magic, type, ndeps) */
457 memcpy(tmp, &dependencies->magic, sizeof(uint32));
458 tmp += sizeof(uint32);
459 memcpy(tmp, &dependencies->type, sizeof(uint32));
460 tmp += sizeof(uint32);
461 memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
462 tmp += sizeof(uint32);
463
464 /* store number of attributes and attribute numbers for each dependency */
465 for (i = 0; i < dependencies->ndeps; i++)
466 {
467 MVDependency *d = dependencies->deps[i];
468
469 memcpy(tmp, &d->degree, sizeof(double));
470 tmp += sizeof(double);
471
472 memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
473 tmp += sizeof(AttrNumber);
474
475 memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
476 tmp += sizeof(AttrNumber) * d->nattributes;
477
478 /* protect against overflow */
479 Assert(tmp <= ((char *) output + len));
480 }
481
482 /* make sure we've produced exactly the right amount of data */
483 Assert(tmp == ((char *) output + len));
484
485 return output;
486}

References Assert, MVDependency::attributes, MVDependency::degree, MVDependencies::deps, fb(), i, len, MVDependencies::magic, MVDependency::nattributes, MVDependencies::ndeps, output, palloc0(), SET_VARSIZE(), SizeOfHeader, SizeOfItem, MVDependencies::type, VARDATA(), and VARHDRSZ.

Referenced by build_mvdependencies(), and statext_store().

◆ statext_dependencies_validate()

bool statext_dependencies_validate ( const MVDependencies dependencies,
const int2vector stxkeys,
int  numexprs,
int  elevel 
)

Definition at line 606 of file dependencies.c.

609{
611
612 /* Scan through each dependency entry */
613 for (int i = 0; i < dependencies->ndeps; i++)
614 {
615 const MVDependency *dep = dependencies->deps[i];
616
617 /*
618 * Cross-check each attribute in a dependency entry with the extended
619 * stats object definition.
620 */
621 for (int j = 0; j < dep->nattributes; j++)
622 {
624 bool ok = false;
625
626 if (attnum > 0)
627 {
628 /* attribute number in stxkeys */
629 for (int k = 0; k < stxkeys->dim1; k++)
630 {
631 if (attnum == stxkeys->values[k])
632 {
633 ok = true;
634 break;
635 }
636 }
637 }
638 else if ((attnum < 0) && (attnum >= attnum_expr_lowbound))
639 {
640 /* attribute number for an expression */
641 ok = true;
642 }
643
644 if (!ok)
645 {
646 ereport(elevel,
648 errmsg("could not validate \"%s\" object: invalid attribute number %d found",
649 "pg_dependencies", attnum)));
650 return false;
651 }
652 }
653 }
654
655 return true;
656}

References attnum, MVDependency::attributes, MVDependencies::deps, ereport, errcode(), errmsg(), fb(), i, j, and MVDependencies::ndeps.

Referenced by extended_statistics_update().