PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
network_selfuncs.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * network_selfuncs.c
4 * Functions for selectivity estimation of inet/cidr operators
5 *
6 * This module provides estimators for the subnet inclusion and overlap
7 * operators. Estimates are based on null fraction, most common values,
8 * and histogram of inet/cidr columns.
9 *
10 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 *
14 * IDENTIFICATION
15 * src/backend/utils/adt/network_selfuncs.c
16 *
17 *-------------------------------------------------------------------------
18 */
19#include "postgres.h"
20
21#include <math.h>
22
23#include "access/htup_details.h"
24#include "catalog/pg_operator.h"
26#include "utils/fmgrprotos.h"
27#include "utils/inet.h"
28#include "utils/lsyscache.h"
29#include "utils/selfuncs.h"
30
31
32/* Default selectivity for the inet overlap operator */
33#define DEFAULT_OVERLAP_SEL 0.01
34
35/* Default selectivity for the various inclusion operators */
36#define DEFAULT_INCLUSION_SEL 0.005
37
38/* Default selectivity for specified operator */
39#define DEFAULT_SEL(operator) \
40 ((operator) == OID_INET_OVERLAP_OP ? \
41 DEFAULT_OVERLAP_SEL : DEFAULT_INCLUSION_SEL)
42
43/* Maximum number of items to consider in join selectivity calculations */
44#define MAX_CONSIDERED_ELEMS 1024
45
47 VariableStatData *vardata1, VariableStatData *vardata2);
48static Selectivity networkjoinsel_semi(Oid operator,
49 VariableStatData *vardata1, VariableStatData *vardata2);
50static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues);
51static Selectivity inet_hist_value_sel(Datum *values, int nvalues,
52 Datum constvalue, int opr_codenum);
53static Selectivity inet_mcv_join_sel(Datum *mcv1_values,
54 float4 *mcv1_numbers, int mcv1_nvalues, Datum *mcv2_values,
55 float4 *mcv2_numbers, int mcv2_nvalues, Oid operator);
56static Selectivity inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers,
57 int mcv_nvalues, Datum *hist_values, int hist_nvalues,
58 int opr_codenum);
60 int hist1_nvalues,
61 Datum *hist2_values, int hist2_nvalues,
62 int opr_codenum);
63static Selectivity inet_semi_join_sel(Datum lhs_value,
64 bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
65 bool hist_exists, Datum *hist_values, int hist_nvalues,
66 double hist_weight,
67 FmgrInfo *proc, int opr_codenum);
68static int inet_opr_codenum(Oid operator);
69static int inet_inclusion_cmp(inet *left, inet *right, int opr_codenum);
70static int inet_masklen_inclusion_cmp(inet *left, inet *right,
71 int opr_codenum);
72static int inet_hist_match_divider(inet *boundary, inet *query,
73 int opr_codenum);
74
75/*
76 * Selectivity estimation for the subnet inclusion/overlap operators
77 */
80{
82 Oid operator = PG_GETARG_OID(1);
84 int varRelid = PG_GETARG_INT32(3);
85 VariableStatData vardata;
86 Node *other;
87 bool varonleft;
88 Selectivity selec,
89 mcv_selec,
90 non_mcv_selec;
91 Datum constvalue;
93 AttStatsSlot hslot;
94 double sumcommon,
95 nullfrac;
96 FmgrInfo proc;
97
98 /*
99 * If expression is not (variable op something) or (something op
100 * variable), then punt and return a default estimate.
101 */
102 if (!get_restriction_variable(root, args, varRelid,
103 &vardata, &other, &varonleft))
104 PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
105
106 /*
107 * Can't do anything useful if the something is not a constant, either.
108 */
109 if (!IsA(other, Const))
110 {
111 ReleaseVariableStats(vardata);
112 PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
113 }
114
115 /* All of the operators handled here are strict. */
116 if (((Const *) other)->constisnull)
117 {
118 ReleaseVariableStats(vardata);
119 PG_RETURN_FLOAT8(0.0);
120 }
121 constvalue = ((Const *) other)->constvalue;
122
123 /* Otherwise, we need stats in order to produce a non-default estimate. */
124 if (!HeapTupleIsValid(vardata.statsTuple))
125 {
126 ReleaseVariableStats(vardata);
127 PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
128 }
129
130 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
131 nullfrac = stats->stanullfrac;
132
133 /*
134 * If we have most-common-values info, add up the fractions of the MCV
135 * entries that satisfy MCV OP CONST. These fractions contribute directly
136 * to the result selectivity. Also add up the total fraction represented
137 * by MCV entries.
138 */
139 fmgr_info(get_opcode(operator), &proc);
140 mcv_selec = mcv_selectivity(&vardata, &proc, InvalidOid,
141 constvalue, varonleft,
142 &sumcommon);
143
144 /*
145 * If we have a histogram, use it to estimate the proportion of the
146 * non-MCV population that satisfies the clause. If we don't, apply the
147 * default selectivity to that population.
148 */
149 if (get_attstatsslot(&hslot, vardata.statsTuple,
150 STATISTIC_KIND_HISTOGRAM, InvalidOid,
152 {
153 int opr_codenum = inet_opr_codenum(operator);
154
155 /* Commute if needed, so we can consider histogram to be on the left */
156 if (!varonleft)
157 opr_codenum = -opr_codenum;
158 non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues,
159 constvalue, opr_codenum);
160
161 free_attstatsslot(&hslot);
162 }
163 else
164 non_mcv_selec = DEFAULT_SEL(operator);
165
166 /* Combine selectivities for MCV and non-MCV populations */
167 selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec;
168
169 /* Result should be in range, but make sure... */
170 CLAMP_PROBABILITY(selec);
171
172 ReleaseVariableStats(vardata);
173
174 PG_RETURN_FLOAT8(selec);
175}
176
177/*
178 * Join selectivity estimation for the subnet inclusion/overlap operators
179 *
180 * This function has the same structure as eqjoinsel() in selfuncs.c.
181 *
182 * Throughout networkjoinsel and its subroutines, we have a performance issue
183 * in that the amount of work to be done is O(N^2) in the length of the MCV
184 * and histogram arrays. To keep the runtime from getting out of hand when
185 * large statistics targets have been set, we arbitrarily limit the number of
186 * values considered to 1024 (MAX_CONSIDERED_ELEMS). For the MCV arrays, this
187 * is easy: just consider at most the first N elements. (Since the MCVs are
188 * sorted by decreasing frequency, this correctly gets us the first N MCVs.)
189 * For the histogram arrays, we decimate; that is consider only every k'th
190 * element, where k is chosen so that no more than MAX_CONSIDERED_ELEMS
191 * elements are considered. This should still give us a good random sample of
192 * the non-MCV population. Decimation is done on-the-fly in the loops that
193 * iterate over the histogram arrays.
194 */
195Datum
197{
199 Oid operator = PG_GETARG_OID(1);
201#ifdef NOT_USED
202 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
203#endif
205 double selec;
206 VariableStatData vardata1;
207 VariableStatData vardata2;
208 bool join_is_reversed;
209
211 &vardata1, &vardata2, &join_is_reversed);
212
213 switch (sjinfo->jointype)
214 {
215 case JOIN_INNER:
216 case JOIN_LEFT:
217 case JOIN_FULL:
218
219 /*
220 * Selectivity for left/full join is not exactly the same as inner
221 * join, but we neglect the difference, as eqjoinsel does.
222 */
223 selec = networkjoinsel_inner(operator, &vardata1, &vardata2);
224 break;
225 case JOIN_SEMI:
226 case JOIN_ANTI:
227 /* Here, it's important that we pass the outer var on the left. */
228 if (!join_is_reversed)
229 selec = networkjoinsel_semi(operator, &vardata1, &vardata2);
230 else
231 selec = networkjoinsel_semi(get_commutator(operator),
232 &vardata2, &vardata1);
233 break;
234 default:
235 /* other values not expected here */
236 elog(ERROR, "unrecognized join type: %d",
237 (int) sjinfo->jointype);
238 selec = 0; /* keep compiler quiet */
239 break;
240 }
241
242 ReleaseVariableStats(vardata1);
243 ReleaseVariableStats(vardata2);
244
245 CLAMP_PROBABILITY(selec);
246
247 PG_RETURN_FLOAT8((float8) selec);
248}
249
250/*
251 * Inner join selectivity estimation for subnet inclusion/overlap operators
252 *
253 * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram
254 * selectivity for join using the subnet inclusion operators. Unlike the
255 * join selectivity function for the equality operator, eqjoinsel_inner(),
256 * one to one matching of the values is not enough. Network inclusion
257 * operators are likely to match many to many, so we must check all pairs.
258 * (Note: it might be possible to exploit understanding of the histogram's
259 * btree ordering to reduce the work needed, but we don't currently try.)
260 * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
261 */
262static Selectivity
264 VariableStatData *vardata1, VariableStatData *vardata2)
265{
266 Form_pg_statistic stats;
267 double nullfrac1 = 0.0,
268 nullfrac2 = 0.0;
269 Selectivity selec = 0.0,
270 sumcommon1 = 0.0,
271 sumcommon2 = 0.0;
272 bool mcv1_exists = false,
273 mcv2_exists = false,
274 hist1_exists = false,
275 hist2_exists = false;
276 int opr_codenum;
277 int mcv1_length = 0,
278 mcv2_length = 0;
279 AttStatsSlot mcv1_slot;
280 AttStatsSlot mcv2_slot;
281 AttStatsSlot hist1_slot;
282 AttStatsSlot hist2_slot;
283
284 if (HeapTupleIsValid(vardata1->statsTuple))
285 {
286 stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
287 nullfrac1 = stats->stanullfrac;
288
289 mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
290 STATISTIC_KIND_MCV, InvalidOid,
292 hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
293 STATISTIC_KIND_HISTOGRAM, InvalidOid,
295 /* Arbitrarily limit number of MCVs considered */
296 mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
297 if (mcv1_exists)
298 sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
299 }
300 else
301 {
302 memset(&mcv1_slot, 0, sizeof(mcv1_slot));
303 memset(&hist1_slot, 0, sizeof(hist1_slot));
304 }
305
306 if (HeapTupleIsValid(vardata2->statsTuple))
307 {
308 stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
309 nullfrac2 = stats->stanullfrac;
310
311 mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
312 STATISTIC_KIND_MCV, InvalidOid,
314 hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
315 STATISTIC_KIND_HISTOGRAM, InvalidOid,
317 /* Arbitrarily limit number of MCVs considered */
318 mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
319 if (mcv2_exists)
320 sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
321 }
322 else
323 {
324 memset(&mcv2_slot, 0, sizeof(mcv2_slot));
325 memset(&hist2_slot, 0, sizeof(hist2_slot));
326 }
327
328 opr_codenum = inet_opr_codenum(operator);
329
330 /*
331 * Calculate selectivity for MCV vs MCV matches.
332 */
333 if (mcv1_exists && mcv2_exists)
334 selec += inet_mcv_join_sel(mcv1_slot.values, mcv1_slot.numbers,
335 mcv1_length,
336 mcv2_slot.values, mcv2_slot.numbers,
337 mcv2_length,
338 operator);
339
340 /*
341 * Add in selectivities for MCV vs histogram matches, scaling according to
342 * the fractions of the populations represented by the histograms. Note
343 * that the second case needs to commute the operator.
344 */
345 if (mcv1_exists && hist2_exists)
346 selec += (1.0 - nullfrac2 - sumcommon2) *
347 inet_mcv_hist_sel(mcv1_slot.values, mcv1_slot.numbers, mcv1_length,
348 hist2_slot.values, hist2_slot.nvalues,
349 opr_codenum);
350 if (mcv2_exists && hist1_exists)
351 selec += (1.0 - nullfrac1 - sumcommon1) *
352 inet_mcv_hist_sel(mcv2_slot.values, mcv2_slot.numbers, mcv2_length,
353 hist1_slot.values, hist1_slot.nvalues,
354 -opr_codenum);
355
356 /*
357 * Add in selectivity for histogram vs histogram matches, again scaling
358 * appropriately.
359 */
360 if (hist1_exists && hist2_exists)
361 selec += (1.0 - nullfrac1 - sumcommon1) *
362 (1.0 - nullfrac2 - sumcommon2) *
363 inet_hist_inclusion_join_sel(hist1_slot.values, hist1_slot.nvalues,
364 hist2_slot.values, hist2_slot.nvalues,
365 opr_codenum);
366
367 /*
368 * If useful statistics are not available then use the default estimate.
369 * We can apply null fractions if known, though.
370 */
371 if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
372 selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
373
374 /* Release stats. */
375 free_attstatsslot(&mcv1_slot);
376 free_attstatsslot(&mcv2_slot);
377 free_attstatsslot(&hist1_slot);
378 free_attstatsslot(&hist2_slot);
379
380 return selec;
381}
382
383/*
384 * Semi join selectivity estimation for subnet inclusion/overlap operators
385 *
386 * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs
387 * histogram selectivity for semi/anti join cases.
388 */
389static Selectivity
391 VariableStatData *vardata1, VariableStatData *vardata2)
392{
393 Form_pg_statistic stats;
394 Selectivity selec = 0.0,
395 sumcommon1 = 0.0,
396 sumcommon2 = 0.0;
397 double nullfrac1 = 0.0,
398 nullfrac2 = 0.0,
399 hist2_weight = 0.0;
400 bool mcv1_exists = false,
401 mcv2_exists = false,
402 hist1_exists = false,
403 hist2_exists = false;
404 int opr_codenum;
405 FmgrInfo proc;
406 int i,
407 mcv1_length = 0,
408 mcv2_length = 0;
409 AttStatsSlot mcv1_slot;
410 AttStatsSlot mcv2_slot;
411 AttStatsSlot hist1_slot;
412 AttStatsSlot hist2_slot;
413
414 if (HeapTupleIsValid(vardata1->statsTuple))
415 {
416 stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
417 nullfrac1 = stats->stanullfrac;
418
419 mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
420 STATISTIC_KIND_MCV, InvalidOid,
422 hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
423 STATISTIC_KIND_HISTOGRAM, InvalidOid,
425 /* Arbitrarily limit number of MCVs considered */
426 mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
427 if (mcv1_exists)
428 sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
429 }
430 else
431 {
432 memset(&mcv1_slot, 0, sizeof(mcv1_slot));
433 memset(&hist1_slot, 0, sizeof(hist1_slot));
434 }
435
436 if (HeapTupleIsValid(vardata2->statsTuple))
437 {
438 stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
439 nullfrac2 = stats->stanullfrac;
440
441 mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
442 STATISTIC_KIND_MCV, InvalidOid,
444 hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
445 STATISTIC_KIND_HISTOGRAM, InvalidOid,
447 /* Arbitrarily limit number of MCVs considered */
448 mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
449 if (mcv2_exists)
450 sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
451 }
452 else
453 {
454 memset(&mcv2_slot, 0, sizeof(mcv2_slot));
455 memset(&hist2_slot, 0, sizeof(hist2_slot));
456 }
457
458 opr_codenum = inet_opr_codenum(operator);
459 fmgr_info(get_opcode(operator), &proc);
460
461 /* Estimate number of input rows represented by RHS histogram. */
462 if (hist2_exists && vardata2->rel)
463 hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows;
464
465 /*
466 * Consider each element of the LHS MCV list, matching it to whatever RHS
467 * stats we have. Scale according to the known frequency of the MCV.
468 */
469 if (mcv1_exists && (mcv2_exists || hist2_exists))
470 {
471 for (i = 0; i < mcv1_length; i++)
472 {
473 selec += mcv1_slot.numbers[i] *
474 inet_semi_join_sel(mcv1_slot.values[i],
475 mcv2_exists, mcv2_slot.values, mcv2_length,
476 hist2_exists,
477 hist2_slot.values, hist2_slot.nvalues,
478 hist2_weight,
479 &proc, opr_codenum);
480 }
481 }
482
483 /*
484 * Consider each element of the LHS histogram, except for the first and
485 * last elements, which we exclude on the grounds that they're outliers
486 * and thus not very representative. Scale on the assumption that each
487 * such histogram element represents an equal share of the LHS histogram
488 * population (which is a bit bogus, because the members of its bucket may
489 * not all act the same with respect to the join clause, but it's hard to
490 * do better).
491 *
492 * If there are too many histogram elements, decimate to limit runtime.
493 */
494 if (hist1_exists && hist1_slot.nvalues > 2 && (mcv2_exists || hist2_exists))
495 {
496 double hist_selec_sum = 0.0;
497 int k,
498 n;
499
500 k = (hist1_slot.nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
501
502 n = 0;
503 for (i = 1; i < hist1_slot.nvalues - 1; i += k)
504 {
505 hist_selec_sum +=
506 inet_semi_join_sel(hist1_slot.values[i],
507 mcv2_exists, mcv2_slot.values, mcv2_length,
508 hist2_exists,
509 hist2_slot.values, hist2_slot.nvalues,
510 hist2_weight,
511 &proc, opr_codenum);
512 n++;
513 }
514
515 selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n;
516 }
517
518 /*
519 * If useful statistics are not available then use the default estimate.
520 * We can apply null fractions if known, though.
521 */
522 if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
523 selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
524
525 /* Release stats. */
526 free_attstatsslot(&mcv1_slot);
527 free_attstatsslot(&mcv2_slot);
528 free_attstatsslot(&hist1_slot);
529 free_attstatsslot(&hist2_slot);
530
531 return selec;
532}
533
534/*
535 * Compute the fraction of a relation's population that is represented
536 * by the MCV list.
537 */
538static Selectivity
539mcv_population(float4 *mcv_numbers, int mcv_nvalues)
540{
541 Selectivity sumcommon = 0.0;
542 int i;
543
544 for (i = 0; i < mcv_nvalues; i++)
545 {
546 sumcommon += mcv_numbers[i];
547 }
548
549 return sumcommon;
550}
551
552/*
553 * Inet histogram vs single value selectivity estimation
554 *
555 * Estimate the fraction of the histogram population that satisfies
556 * "value OPR CONST". (The result needs to be scaled to reflect the
557 * proportion of the total population represented by the histogram.)
558 *
559 * The histogram is originally for the inet btree comparison operators.
560 * Only the common bits of the network part and the length of the network part
561 * (masklen) are interesting for the subnet inclusion operators. Fortunately,
562 * btree comparison treats the network part as the major sort key. Even so,
563 * the length of the network part would not really be significant in the
564 * histogram. This would lead to big mistakes for data sets with uneven
565 * masklen distribution. To reduce this problem, comparisons with the left
566 * and the right sides of the buckets are used together.
567 *
568 * Histogram bucket matches are calculated in two forms. If the constant
569 * matches both bucket endpoints the bucket is considered as fully matched.
570 * The second form is to match the bucket partially; we recognize this when
571 * the constant matches just one endpoint, or the two endpoints fall on
572 * opposite sides of the constant. (Note that when the constant matches an
573 * interior histogram element, it gets credit for partial matches to the
574 * buckets on both sides, while a match to a histogram endpoint gets credit
575 * for only one partial match. This is desirable.)
576 *
577 * The divider in the partial bucket match is imagined as the distance
578 * between the decisive bits and the common bits of the addresses. It will
579 * be used as a power of two as it is the natural scale for the IP network
580 * inclusion. This partial bucket match divider calculation is an empirical
581 * formula and subject to change with more experiment.
582 *
583 * For a partial match, we try to calculate dividers for both of the
584 * boundaries. If the address family of a boundary value does not match the
585 * constant or comparison of the length of the network parts is not correct
586 * for the operator, the divider for that boundary will not be taken into
587 * account. If both of the dividers are valid, the greater one will be used
588 * to minimize the mistake in buckets that have disparate masklens. This
589 * calculation is unfair when dividers can be calculated for both of the
590 * boundaries but they are far from each other; but it is not a common
591 * situation as the boundaries are expected to share most of their significant
592 * bits of their masklens. The mistake would be greater, if we would use the
593 * minimum instead of the maximum, and we don't know a sensible way to combine
594 * them.
595 *
596 * For partial match in buckets that have different address families on the
597 * left and right sides, only the boundary with the same address family is
598 * taken into consideration. This can cause more mistakes for these buckets
599 * if the masklens of their boundaries are also disparate. But this can only
600 * happen in one bucket, since only two address families exist. It seems a
601 * better option than not considering these buckets at all.
602 */
603static Selectivity
604inet_hist_value_sel(Datum *values, int nvalues, Datum constvalue,
605 int opr_codenum)
606{
607 Selectivity match = 0.0;
608 inet *query,
609 *left,
610 *right;
611 int i,
612 k,
613 n;
614 int left_order,
615 right_order,
616 left_divider,
617 right_divider;
618
619 /* guard against zero-divide below */
620 if (nvalues <= 1)
621 return 0.0;
622
623 /* if there are too many histogram elements, decimate to limit runtime */
624 k = (nvalues - 2) / MAX_CONSIDERED_ELEMS + 1;
625
626 query = DatumGetInetPP(constvalue);
627
628 /* "left" is the left boundary value of the current bucket ... */
629 left = DatumGetInetPP(values[0]);
630 left_order = inet_inclusion_cmp(left, query, opr_codenum);
631
632 n = 0;
633 for (i = k; i < nvalues; i += k)
634 {
635 /* ... and "right" is the right boundary value */
636 right = DatumGetInetPP(values[i]);
637 right_order = inet_inclusion_cmp(right, query, opr_codenum);
638
639 if (left_order == 0 && right_order == 0)
640 {
641 /* The whole bucket matches, since both endpoints do. */
642 match += 1.0;
643 }
644 else if ((left_order <= 0 && right_order >= 0) ||
645 (left_order >= 0 && right_order <= 0))
646 {
647 /* Partial bucket match. */
648 left_divider = inet_hist_match_divider(left, query, opr_codenum);
649 right_divider = inet_hist_match_divider(right, query, opr_codenum);
650
651 if (left_divider >= 0 || right_divider >= 0)
652 match += 1.0 / pow(2.0, Max(left_divider, right_divider));
653 }
654
655 /* Shift the variables. */
656 left = right;
657 left_order = right_order;
658
659 /* Count the number of buckets considered. */
660 n++;
661 }
662
663 return match / n;
664}
665
666/*
667 * Inet MCV vs MCV join selectivity estimation
668 *
669 * We simply add up the fractions of the populations that satisfy the clause.
670 * The result is exact and does not need to be scaled further.
671 */
672static Selectivity
673inet_mcv_join_sel(Datum *mcv1_values, float4 *mcv1_numbers, int mcv1_nvalues,
674 Datum *mcv2_values, float4 *mcv2_numbers, int mcv2_nvalues,
675 Oid operator)
676{
677 Selectivity selec = 0.0;
678 FmgrInfo proc;
679 int i,
680 j;
681
682 fmgr_info(get_opcode(operator), &proc);
683
684 for (i = 0; i < mcv1_nvalues; i++)
685 {
686 for (j = 0; j < mcv2_nvalues; j++)
687 if (DatumGetBool(FunctionCall2(&proc,
688 mcv1_values[i],
689 mcv2_values[j])))
690 selec += mcv1_numbers[i] * mcv2_numbers[j];
691 }
692 return selec;
693}
694
695/*
696 * Inet MCV vs histogram join selectivity estimation
697 *
698 * For each MCV on the lefthand side, estimate the fraction of the righthand's
699 * histogram population that satisfies the join clause, and add those up,
700 * scaling by the MCV's frequency. The result still needs to be scaled
701 * according to the fraction of the righthand's population represented by
702 * the histogram.
703 */
704static Selectivity
705inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues,
706 Datum *hist_values, int hist_nvalues,
707 int opr_codenum)
708{
709 Selectivity selec = 0.0;
710 int i;
711
712 /*
713 * We'll call inet_hist_value_selec with the histogram on the left, so we
714 * must commute the operator.
715 */
716 opr_codenum = -opr_codenum;
717
718 for (i = 0; i < mcv_nvalues; i++)
719 {
720 selec += mcv_numbers[i] *
721 inet_hist_value_sel(hist_values, hist_nvalues, mcv_values[i],
722 opr_codenum);
723 }
724 return selec;
725}
726
727/*
728 * Inet histogram vs histogram join selectivity estimation
729 *
730 * Here, we take all values listed in the second histogram (except for the
731 * first and last elements, which are excluded on the grounds of possibly
732 * not being very representative) and treat them as a uniform sample of
733 * the non-MCV population for that relation. For each one, we apply
734 * inet_hist_value_selec to see what fraction of the first histogram
735 * it matches.
736 *
737 * We could alternatively do this the other way around using the operator's
738 * commutator. XXX would it be worthwhile to do it both ways and take the
739 * average? That would at least avoid non-commutative estimation results.
740 */
741static Selectivity
742inet_hist_inclusion_join_sel(Datum *hist1_values, int hist1_nvalues,
743 Datum *hist2_values, int hist2_nvalues,
744 int opr_codenum)
745{
746 double match = 0.0;
747 int i,
748 k,
749 n;
750
751 if (hist2_nvalues <= 2)
752 return 0.0; /* no interior histogram elements */
753
754 /* if there are too many histogram elements, decimate to limit runtime */
755 k = (hist2_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
756
757 n = 0;
758 for (i = 1; i < hist2_nvalues - 1; i += k)
759 {
760 match += inet_hist_value_sel(hist1_values, hist1_nvalues,
761 hist2_values[i], opr_codenum);
762 n++;
763 }
764
765 return match / n;
766}
767
768/*
769 * Inet semi join selectivity estimation for one value
770 *
771 * The function calculates the probability that there is at least one row
772 * in the RHS table that satisfies the "lhs_value op column" condition.
773 * It is used in semi join estimation to check a sample from the left hand
774 * side table.
775 *
776 * The MCV and histogram from the right hand side table should be provided as
777 * arguments with the lhs_value from the left hand side table for the join.
778 * hist_weight is the total number of rows represented by the histogram.
779 * For example, if the table has 1000 rows, and 10% of the rows are in the MCV
780 * list, and another 10% are NULLs, hist_weight would be 800.
781 *
782 * First, the lhs_value will be matched to the most common values. If it
783 * matches any of them, 1.0 will be returned, because then there is surely
784 * a match.
785 *
786 * Otherwise, the histogram will be used to estimate the number of rows in
787 * the second table that match the condition. If the estimate is greater
788 * than 1.0, 1.0 will be returned, because it means there is a greater chance
789 * that the lhs_value will match more than one row in the table. If it is
790 * between 0.0 and 1.0, it will be returned as the probability.
791 */
792static Selectivity
794 bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
795 bool hist_exists, Datum *hist_values, int hist_nvalues,
796 double hist_weight,
797 FmgrInfo *proc, int opr_codenum)
798{
799 if (mcv_exists)
800 {
801 int i;
802
803 for (i = 0; i < mcv_nvalues; i++)
804 {
806 lhs_value,
807 mcv_values[i])))
808 return 1.0;
809 }
810 }
811
812 if (hist_exists && hist_weight > 0)
813 {
814 Selectivity hist_selec;
815
816 /* Commute operator, since we're passing lhs_value on the right */
817 hist_selec = inet_hist_value_sel(hist_values, hist_nvalues,
818 lhs_value, -opr_codenum);
819
820 if (hist_selec > 0)
821 return Min(1.0, hist_weight * hist_selec);
822 }
823
824 return 0.0;
825}
826
827/*
828 * Assign useful code numbers for the subnet inclusion/overlap operators
829 *
830 * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend
831 * on the exact codes assigned here; but many other places in this file
832 * know that they can negate a code to obtain the code for the commutator
833 * operator.
834 */
835static int
837{
838 switch (operator)
839 {
840 case OID_INET_SUP_OP:
841 return -2;
842 case OID_INET_SUPEQ_OP:
843 return -1;
844 case OID_INET_OVERLAP_OP:
845 return 0;
846 case OID_INET_SUBEQ_OP:
847 return 1;
848 case OID_INET_SUB_OP:
849 return 2;
850 default:
851 elog(ERROR, "unrecognized operator %u for inet selectivity",
852 operator);
853 }
854 return 0; /* unreached, but keep compiler quiet */
855}
856
857/*
858 * Comparison function for the subnet inclusion/overlap operators
859 *
860 * If the comparison is okay for the specified inclusion operator, the return
861 * value will be 0. Otherwise the return value will be less than or greater
862 * than 0 as appropriate for the operator.
863 *
864 * Comparison is compatible with the basic comparison function for the inet
865 * type. See network_cmp_internal() in network.c for the original. Basic
866 * comparison operators are implemented with the network_cmp_internal()
867 * function. It is possible to implement the subnet inclusion operators with
868 * this function.
869 *
870 * Comparison is first on the common bits of the network part, then on the
871 * length of the network part (masklen) as in the network_cmp_internal()
872 * function. Only the first part is in this function. The second part is
873 * separated to another function for reusability. The difference between the
874 * second part and the original network_cmp_internal() is that the inclusion
875 * operator is considered while comparing the lengths of the network parts.
876 * See the inet_masklen_inclusion_cmp() function below.
877 */
878static int
879inet_inclusion_cmp(inet *left, inet *right, int opr_codenum)
880{
881 if (ip_family(left) == ip_family(right))
882 {
883 int order;
884
885 order = bitncmp(ip_addr(left), ip_addr(right),
886 Min(ip_bits(left), ip_bits(right)));
887 if (order != 0)
888 return order;
889
890 return inet_masklen_inclusion_cmp(left, right, opr_codenum);
891 }
892
893 return ip_family(left) - ip_family(right);
894}
895
896/*
897 * Masklen comparison function for the subnet inclusion/overlap operators
898 *
899 * Compares the lengths of the network parts of the inputs. If the comparison
900 * is okay for the specified inclusion operator, the return value will be 0.
901 * Otherwise the return value will be less than or greater than 0 as
902 * appropriate for the operator.
903 */
904static int
905inet_masklen_inclusion_cmp(inet *left, inet *right, int opr_codenum)
906{
907 int order;
908
909 order = (int) ip_bits(left) - (int) ip_bits(right);
910
911 /*
912 * Return 0 if the operator would accept this combination of masklens.
913 * Note that opr_codenum zero (overlaps) will accept all cases.
914 */
915 if ((order > 0 && opr_codenum >= 0) ||
916 (order == 0 && opr_codenum >= -1 && opr_codenum <= 1) ||
917 (order < 0 && opr_codenum <= 0))
918 return 0;
919
920 /*
921 * Otherwise, return a negative value for sup/supeq (notionally, the RHS
922 * needs to have a larger masklen than it has, which would make it sort
923 * later), or a positive value for sub/subeq (vice versa).
924 */
925 return opr_codenum;
926}
927
928/*
929 * Inet histogram partial match divider calculation
930 *
931 * First the families and the lengths of the network parts are compared using
932 * the subnet inclusion operator. If those are acceptable for the operator,
933 * the divider will be calculated using the masklens and the common bits of
934 * the addresses. -1 will be returned if it cannot be calculated.
935 *
936 * See commentary for inet_hist_value_sel() for some rationale for this.
937 */
938static int
939inet_hist_match_divider(inet *boundary, inet *query, int opr_codenum)
940{
941 if (ip_family(boundary) == ip_family(query) &&
942 inet_masklen_inclusion_cmp(boundary, query, opr_codenum) == 0)
943 {
944 int min_bits,
945 decisive_bits;
946
947 min_bits = Min(ip_bits(boundary), ip_bits(query));
948
949 /*
950 * Set decisive_bits to the masklen of the one that should contain the
951 * other according to the operator.
952 */
953 if (opr_codenum < 0)
954 decisive_bits = ip_bits(boundary);
955 else if (opr_codenum > 0)
956 decisive_bits = ip_bits(query);
957 else
958 decisive_bits = min_bits;
959
960 /*
961 * Now return the number of non-common decisive bits. (This will be
962 * zero if the boundary and query in fact match, else positive.)
963 */
964 if (min_bits > 0)
965 return decisive_bits - bitncommon(ip_addr(boundary),
966 ip_addr(query),
967 min_bits);
968 return decisive_bits;
969 }
970
971 return -1;
972}
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define Min(x, y)
Definition: c.h:958
#define Max(x, y)
Definition: c.h:952
double float8
Definition: c.h:584
float float4
Definition: c.h:583
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define FunctionCall2(flinfo, arg1, arg2)
Definition: fmgr.h:661
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_GETARG_INT16(n)
Definition: fmgr.h:271
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
int j
Definition: isn.c:73
int i
Definition: isn.c:72
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3344
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1285
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3234
Oid get_commutator(Oid opno)
Definition: lsyscache.c:1509
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
int bitncommon(const unsigned char *l, const unsigned char *r, int n)
Definition: network.c:1597
int bitncmp(const unsigned char *l, const unsigned char *r, int n)
Definition: network.c:1563
static int inet_inclusion_cmp(inet *left, inet *right, int opr_codenum)
static int inet_opr_codenum(Oid operator)
static Selectivity networkjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2)
static Selectivity inet_semi_join_sel(Datum lhs_value, bool mcv_exists, Datum *mcv_values, int mcv_nvalues, bool hist_exists, Datum *hist_values, int hist_nvalues, double hist_weight, FmgrInfo *proc, int opr_codenum)
static int inet_masklen_inclusion_cmp(inet *left, inet *right, int opr_codenum)
Datum networksel(PG_FUNCTION_ARGS)
static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues)
static Selectivity inet_mcv_join_sel(Datum *mcv1_values, float4 *mcv1_numbers, int mcv1_nvalues, Datum *mcv2_values, float4 *mcv2_numbers, int mcv2_nvalues, Oid operator)
Datum networkjoinsel(PG_FUNCTION_ARGS)
static Selectivity inet_hist_value_sel(Datum *values, int nvalues, Datum constvalue, int opr_codenum)
static Selectivity inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues, Datum *hist_values, int hist_nvalues, int opr_codenum)
#define MAX_CONSIDERED_ELEMS
static int inet_hist_match_divider(inet *boundary, inet *query, int opr_codenum)
static Selectivity networkjoinsel_inner(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2)
#define DEFAULT_SEL(operator)
static Selectivity inet_hist_inclusion_join_sel(Datum *hist1_values, int hist1_nvalues, Datum *hist2_values, int hist2_nvalues, int opr_codenum)
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
double Selectivity
Definition: nodes.h:250
JoinType
Definition: nodes.h:288
@ JOIN_SEMI
Definition: nodes.h:307
@ JOIN_FULL
Definition: nodes.h:295
@ JOIN_INNER
Definition: nodes.h:293
@ JOIN_LEFT
Definition: nodes.h:294
@ JOIN_ANTI
Definition: nodes.h:308
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
uintptr_t Datum
Definition: postgres.h:64
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
tree ctl root
Definition: radixtree.h:1888
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4894
double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, Datum constval, bool varonleft, double *sumcommonp)
Definition: selfuncs.c:732
void get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, VariableStatData *vardata1, VariableStatData *vardata2, bool *join_is_reversed)
Definition: selfuncs.c:4954
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:99
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
Datum * values
Definition: lsyscache.h:53
float4 * numbers
Definition: lsyscache.h:56
Definition: fmgr.h:57
Definition: pg_list.h:54
Definition: nodes.h:129
Cardinality rows
Definition: pathnodes.h:877
JoinType jointype
Definition: pathnodes.h:2909
HeapTuple statsTuple
Definition: selfuncs.h:89
RelOptInfo * rel
Definition: selfuncs.h:88
Definition: inet.h:53
static inet * DatumGetInetPP(Datum X)
Definition: inet.h:123
#define ip_addr(inetptr)
Definition: inet.h:77
#define ip_family(inetptr)
Definition: inet.h:71
#define ip_bits(inetptr)
Definition: inet.h:74