PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
rangetypes_selfuncs.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
#include "utils/float.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/rangetypes.h"
#include "utils/selfuncs.h"
#include "utils/typcache.h"
Include dependency graph for rangetypes_selfuncs.c:

Go to the source code of this file.

Functions

static double calc_rangesel (TypeCacheEntry *typcache, VariableStatData *vardata, const RangeType *constval, Oid operator)
 
static double default_range_selectivity (Oid operator)
 
static double calc_hist_selectivity (TypeCacheEntry *typcache, VariableStatData *vardata, const RangeType *constval, Oid operator)
 
static double calc_hist_selectivity_scalar (TypeCacheEntry *typcache, const RangeBound *constbound, const RangeBound *hist, int hist_nvalues, bool equal)
 
static int rbound_bsearch (TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist, int hist_length, bool equal)
 
static float8 get_position (TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1, const RangeBound *hist2)
 
static float8 get_len_position (double value, double hist1, double hist2)
 
static float8 get_distance (TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
 
static int length_hist_bsearch (Datum *length_hist_values, int length_hist_nvalues, double value, bool equal)
 
static double calc_length_hist_frac (Datum *length_hist_values, int length_hist_nvalues, double length1, double length2, bool equal)
 
static double calc_hist_selectivity_contained (TypeCacheEntry *typcache, const RangeBound *lower, RangeBound *upper, const RangeBound *hist_lower, int hist_nvalues, Datum *length_hist_values, int length_hist_nvalues)
 
static double calc_hist_selectivity_contains (TypeCacheEntry *typcache, const RangeBound *lower, const RangeBound *upper, const RangeBound *hist_lower, int hist_nvalues, Datum *length_hist_values, int length_hist_nvalues)
 
Datum rangesel (PG_FUNCTION_ARGS)
 

Function Documentation

◆ calc_hist_selectivity()

static double calc_hist_selectivity ( TypeCacheEntry typcache,
VariableStatData vardata,
const RangeType constval,
Oid  operator 
)
static

Definition at line 373 of file rangetypes_selfuncs.c.

375{
376 AttStatsSlot hslot;
377 AttStatsSlot lslot;
378 int nhist;
379 RangeBound *hist_lower;
380 RangeBound *hist_upper;
381 int i;
382 RangeBound const_lower;
383 RangeBound const_upper;
384 bool empty;
385 double hist_selec;
386
387 /* Can't use the histogram with insecure range support functions */
389 typcache->rng_cmp_proc_finfo.fn_oid))
390 return -1;
391 if (OidIsValid(typcache->rng_subdiff_finfo.fn_oid) &&
393 typcache->rng_subdiff_finfo.fn_oid))
394 return -1;
395
396 /* Try to get histogram of ranges */
397 if (!(HeapTupleIsValid(vardata->statsTuple) &&
398 get_attstatsslot(&hslot, vardata->statsTuple,
399 STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
401 return -1.0;
402
403 /* check that it's a histogram, not just a dummy entry */
404 if (hslot.nvalues < 2)
405 {
406 free_attstatsslot(&hslot);
407 return -1.0;
408 }
409
410 /*
411 * Convert histogram of ranges into histograms of its lower and upper
412 * bounds.
413 */
414 nhist = hslot.nvalues;
415 hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
416 hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
417 for (i = 0; i < nhist; i++)
418 {
420 &hist_lower[i], &hist_upper[i], &empty);
421 /* The histogram should not contain any empty ranges */
422 if (empty)
423 elog(ERROR, "bounds histogram contains an empty range");
424 }
425
426 /* @> and @< also need a histogram of range lengths */
427 if (operator == OID_RANGE_CONTAINS_OP ||
428 operator == OID_RANGE_CONTAINED_OP)
429 {
430 if (!(HeapTupleIsValid(vardata->statsTuple) &&
431 get_attstatsslot(&lslot, vardata->statsTuple,
432 STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
435 {
436 free_attstatsslot(&hslot);
437 return -1.0;
438 }
439
440 /* check that it's a histogram, not just a dummy entry */
441 if (lslot.nvalues < 2)
442 {
443 free_attstatsslot(&lslot);
444 free_attstatsslot(&hslot);
445 return -1.0;
446 }
447 }
448 else
449 memset(&lslot, 0, sizeof(lslot));
450
451 /* Extract the bounds of the constant value. */
452 range_deserialize(typcache, constval, &const_lower, &const_upper, &empty);
453 Assert(!empty);
454
455 /*
456 * Calculate selectivity comparing the lower or upper bound of the
457 * constant with the histogram of lower or upper bounds.
458 */
459 switch (operator)
460 {
461 case OID_RANGE_LESS_OP:
462
463 /*
464 * The regular b-tree comparison operators (<, <=, >, >=) compare
465 * the lower bounds first, and the upper bounds for values with
466 * equal lower bounds. Estimate that by comparing the lower bounds
467 * only. This gives a fairly accurate estimate assuming there
468 * aren't many rows with a lower bound equal to the constant's
469 * lower bound.
470 */
471 hist_selec =
472 calc_hist_selectivity_scalar(typcache, &const_lower,
473 hist_lower, nhist, false);
474 break;
475
476 case OID_RANGE_LESS_EQUAL_OP:
477 hist_selec =
478 calc_hist_selectivity_scalar(typcache, &const_lower,
479 hist_lower, nhist, true);
480 break;
481
482 case OID_RANGE_GREATER_OP:
483 hist_selec =
484 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
485 hist_lower, nhist, false);
486 break;
487
488 case OID_RANGE_GREATER_EQUAL_OP:
489 hist_selec =
490 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
491 hist_lower, nhist, true);
492 break;
493
494 case OID_RANGE_LEFT_OP:
495 /* var << const when upper(var) < lower(const) */
496 hist_selec =
497 calc_hist_selectivity_scalar(typcache, &const_lower,
498 hist_upper, nhist, false);
499 break;
500
501 case OID_RANGE_RIGHT_OP:
502 /* var >> const when lower(var) > upper(const) */
503 hist_selec =
504 1 - calc_hist_selectivity_scalar(typcache, &const_upper,
505 hist_lower, nhist, true);
506 break;
507
508 case OID_RANGE_OVERLAPS_RIGHT_OP:
509 /* compare lower bounds */
510 hist_selec =
511 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
512 hist_lower, nhist, false);
513 break;
514
515 case OID_RANGE_OVERLAPS_LEFT_OP:
516 /* compare upper bounds */
517 hist_selec =
518 calc_hist_selectivity_scalar(typcache, &const_upper,
519 hist_upper, nhist, true);
520 break;
521
522 case OID_RANGE_OVERLAP_OP:
523 case OID_RANGE_CONTAINS_ELEM_OP:
524
525 /*
526 * A && B <=> NOT (A << B OR A >> B).
527 *
528 * Since A << B and A >> B are mutually exclusive events we can
529 * sum their probabilities to find probability of (A << B OR A >>
530 * B).
531 *
532 * "range @> elem" is equivalent to "range && [elem,elem]". The
533 * caller already constructed the singular range from the element
534 * constant, so just treat it the same as &&.
535 */
536 hist_selec =
537 calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper,
538 nhist, false);
539 hist_selec +=
540 (1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower,
541 nhist, true));
542 hist_selec = 1.0 - hist_selec;
543 break;
544
545 case OID_RANGE_CONTAINS_OP:
546 hist_selec =
547 calc_hist_selectivity_contains(typcache, &const_lower,
548 &const_upper, hist_lower, nhist,
549 lslot.values, lslot.nvalues);
550 break;
551
552 case OID_RANGE_CONTAINED_OP:
553 if (const_lower.infinite)
554 {
555 /*
556 * Lower bound no longer matters. Just estimate the fraction
557 * with an upper bound <= const upper bound
558 */
559 hist_selec =
560 calc_hist_selectivity_scalar(typcache, &const_upper,
561 hist_upper, nhist, true);
562 }
563 else if (const_upper.infinite)
564 {
565 hist_selec =
566 1.0 - calc_hist_selectivity_scalar(typcache, &const_lower,
567 hist_lower, nhist, false);
568 }
569 else
570 {
571 hist_selec =
572 calc_hist_selectivity_contained(typcache, &const_lower,
573 &const_upper, hist_lower, nhist,
574 lslot.values, lslot.nvalues);
575 }
576 break;
577
578 default:
579 elog(ERROR, "unknown range operator %u", operator);
580 hist_selec = -1.0; /* keep compiler quiet */
581 break;
582 }
583
584 free_attstatsslot(&lslot);
585 free_attstatsslot(&hslot);
586
587 return hist_selec;
588}
#define Assert(condition)
Definition: c.h:815
#define OidIsValid(objectId)
Definition: c.h:732
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
int i
Definition: isn.c:72
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3344
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3234
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
void * palloc(Size size)
Definition: mcxt.c:1317
#define InvalidOid
Definition: postgres_ext.h:37
void range_deserialize(TypeCacheEntry *typcache, const RangeType *range, RangeBound *lower, RangeBound *upper, bool *empty)
Definition: rangetypes.c:1856
static RangeType * DatumGetRangeTypeP(Datum X)
Definition: rangetypes.h:73
static double calc_hist_selectivity_contained(TypeCacheEntry *typcache, const RangeBound *lower, RangeBound *upper, const RangeBound *hist_lower, int hist_nvalues, Datum *length_hist_values, int length_hist_nvalues)
static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound, const RangeBound *hist, int hist_nvalues, bool equal)
static double calc_hist_selectivity_contains(TypeCacheEntry *typcache, const RangeBound *lower, const RangeBound *upper, const RangeBound *hist_lower, int hist_nvalues, Datum *length_hist_values, int length_hist_nvalues)
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5769
Datum * values
Definition: lsyscache.h:53
Oid fn_oid
Definition: fmgr.h:59
bool infinite
Definition: rangetypes.h:64
FmgrInfo rng_cmp_proc_finfo
Definition: typcache.h:101
FmgrInfo rng_subdiff_finfo
Definition: typcache.h:103
HeapTuple statsTuple
Definition: selfuncs.h:89

References Assert, ATTSTATSSLOT_VALUES, calc_hist_selectivity_contained(), calc_hist_selectivity_contains(), calc_hist_selectivity_scalar(), DatumGetRangeTypeP(), elog, ERROR, FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), HeapTupleIsValid, i, RangeBound::infinite, InvalidOid, AttStatsSlot::nvalues, OidIsValid, palloc(), range_deserialize(), TypeCacheEntry::rng_cmp_proc_finfo, TypeCacheEntry::rng_subdiff_finfo, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by calc_rangesel().

◆ calc_hist_selectivity_contained()

static double calc_hist_selectivity_contained ( TypeCacheEntry typcache,
const RangeBound lower,
RangeBound upper,
const RangeBound hist_lower,
int  hist_nvalues,
Datum length_hist_values,
int  length_hist_nvalues 
)
static

Definition at line 1018 of file rangetypes_selfuncs.c.

1022{
1023 int i,
1024 upper_index;
1025 float8 prev_dist;
1026 double bin_width;
1027 double upper_bin_width;
1028 double sum_frac;
1029
1030 /*
1031 * Begin by finding the bin containing the upper bound, in the lower bound
1032 * histogram. Any range with a lower bound > constant upper bound can't
1033 * match, ie. there are no matches in bins greater than upper_index.
1034 */
1035 upper->inclusive = !upper->inclusive;
1036 upper->lower = true;
1037 upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
1038 false);
1039
1040 /*
1041 * If the upper bound value is below the histogram's lower limit, there
1042 * are no matches.
1043 */
1044 if (upper_index < 0)
1045 return 0.0;
1046
1047 /*
1048 * If the upper bound value is at or beyond the histogram's upper limit,
1049 * start our loop at the last actual bin, as though the upper bound were
1050 * within that bin; get_position will clamp its result to 1.0 anyway.
1051 * (This corresponds to assuming that the data population above the
1052 * histogram's upper limit is empty, exactly like what we just assumed for
1053 * the lower limit.)
1054 */
1055 upper_index = Min(upper_index, hist_nvalues - 2);
1056
1057 /*
1058 * Calculate upper_bin_width, ie. the fraction of the (upper_index,
1059 * upper_index + 1) bin which is greater than upper bound of query range
1060 * using linear interpolation of subdiff function.
1061 */
1062 upper_bin_width = get_position(typcache, upper,
1063 &hist_lower[upper_index],
1064 &hist_lower[upper_index + 1]);
1065
1066 /*
1067 * In the loop, dist and prev_dist are the distance of the "current" bin's
1068 * lower and upper bounds from the constant upper bound.
1069 *
1070 * bin_width represents the width of the current bin. Normally it is 1.0,
1071 * meaning a full width bin, but can be less in the corner cases: start
1072 * and end of the loop. We start with bin_width = upper_bin_width, because
1073 * we begin at the bin containing the upper bound.
1074 */
1075 prev_dist = 0.0;
1076 bin_width = upper_bin_width;
1077
1078 sum_frac = 0.0;
1079 for (i = upper_index; i >= 0; i--)
1080 {
1081 double dist;
1082 double length_hist_frac;
1083 bool final_bin = false;
1084
1085 /*
1086 * dist -- distance from upper bound of query range to lower bound of
1087 * the current bin in the lower bound histogram. Or to the lower bound
1088 * of the constant range, if this is the final bin, containing the
1089 * constant lower bound.
1090 */
1091 if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0)
1092 {
1093 dist = get_distance(typcache, lower, upper);
1094
1095 /*
1096 * Subtract from bin_width the portion of this bin that we want to
1097 * ignore.
1098 */
1099 bin_width -= get_position(typcache, lower, &hist_lower[i],
1100 &hist_lower[i + 1]);
1101 if (bin_width < 0.0)
1102 bin_width = 0.0;
1103 final_bin = true;
1104 }
1105 else
1106 dist = get_distance(typcache, &hist_lower[i], upper);
1107
1108 /*
1109 * Estimate the fraction of tuples in this bin that are narrow enough
1110 * to not exceed the distance to the upper bound of the query range.
1111 */
1112 length_hist_frac = calc_length_hist_frac(length_hist_values,
1113 length_hist_nvalues,
1114 prev_dist, dist, true);
1115
1116 /*
1117 * Add the fraction of tuples in this bin, with a suitable length, to
1118 * the total.
1119 */
1120 sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
1121
1122 if (final_bin)
1123 break;
1124
1125 bin_width = 1.0;
1126 prev_dist = dist;
1127 }
1128
1129 return sum_frac;
1130}
#define Min(x, y)
Definition: c.h:961
double float8
Definition: c.h:587
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
int range_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *b1, const RangeBound *b2)
Definition: rangetypes.c:2016
static double calc_length_hist_frac(Datum *length_hist_values, int length_hist_nvalues, double length1, double length2, bool equal)
static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1, const RangeBound *hist2)
static int rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist, int hist_length, bool equal)
static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)

References calc_length_hist_frac(), get_distance(), get_position(), i, lower(), Min, range_cmp_bounds(), rbound_bsearch(), and upper().

Referenced by calc_hist_selectivity().

◆ calc_hist_selectivity_contains()

static double calc_hist_selectivity_contains ( TypeCacheEntry typcache,
const RangeBound lower,
const RangeBound upper,
const RangeBound hist_lower,
int  hist_nvalues,
Datum length_hist_values,
int  length_hist_nvalues 
)
static

Definition at line 1139 of file rangetypes_selfuncs.c.

1143{
1144 int i,
1145 lower_index;
1146 double bin_width,
1147 lower_bin_width;
1148 double sum_frac;
1149 float8 prev_dist;
1150
1151 /* Find the bin containing the lower bound of query range. */
1152 lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
1153 true);
1154
1155 /*
1156 * If the lower bound value is below the histogram's lower limit, there
1157 * are no matches.
1158 */
1159 if (lower_index < 0)
1160 return 0.0;
1161
1162 /*
1163 * If the lower bound value is at or beyond the histogram's upper limit,
1164 * start our loop at the last actual bin, as though the upper bound were
1165 * within that bin; get_position will clamp its result to 1.0 anyway.
1166 * (This corresponds to assuming that the data population above the
1167 * histogram's upper limit is empty, exactly like what we just assumed for
1168 * the lower limit.)
1169 */
1170 lower_index = Min(lower_index, hist_nvalues - 2);
1171
1172 /*
1173 * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
1174 * lower_index + 1) bin which is greater than lower bound of query range
1175 * using linear interpolation of subdiff function.
1176 */
1177 lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
1178 &hist_lower[lower_index + 1]);
1179
1180 /*
1181 * Loop through all the lower bound bins, smaller than the query lower
1182 * bound. In the loop, dist and prev_dist are the distance of the
1183 * "current" bin's lower and upper bounds from the constant upper bound.
1184 * We begin from query lower bound, and walk backwards, so the first bin's
1185 * upper bound is the query lower bound, and its distance to the query
1186 * upper bound is the length of the query range.
1187 *
1188 * bin_width represents the width of the current bin. Normally it is 1.0,
1189 * meaning a full width bin, except for the first bin, which is only
1190 * counted up to the constant lower bound.
1191 */
1192 prev_dist = get_distance(typcache, lower, upper);
1193 sum_frac = 0.0;
1194 bin_width = lower_bin_width;
1195 for (i = lower_index; i >= 0; i--)
1196 {
1197 float8 dist;
1198 double length_hist_frac;
1199
1200 /*
1201 * dist -- distance from upper bound of query range to current value
1202 * of lower bound histogram or lower bound of query range (if we've
1203 * reach it).
1204 */
1205 dist = get_distance(typcache, &hist_lower[i], upper);
1206
1207 /*
1208 * Get average fraction of length histogram which covers intervals
1209 * longer than (or equal to) distance to upper bound of query range.
1210 */
1211 length_hist_frac =
1212 1.0 - calc_length_hist_frac(length_hist_values,
1213 length_hist_nvalues,
1214 prev_dist, dist, false);
1215
1216 sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
1217
1218 bin_width = 1.0;
1219 prev_dist = dist;
1220 }
1221
1222 return sum_frac;
1223}

References calc_length_hist_frac(), get_distance(), get_position(), i, lower(), Min, rbound_bsearch(), and upper().

Referenced by calc_hist_selectivity().

◆ calc_hist_selectivity_scalar()

static double calc_hist_selectivity_scalar ( TypeCacheEntry typcache,
const RangeBound constbound,
const RangeBound hist,
int  hist_nvalues,
bool  equal 
)
static

Definition at line 596 of file rangetypes_selfuncs.c.

598{
599 Selectivity selec;
600 int index;
601
602 /*
603 * Find the histogram bin the given constant falls into. Estimate
604 * selectivity as the number of preceding whole bins.
605 */
606 index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
607 selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
608
609 /* Adjust using linear interpolation within the bin */
610 if (index >= 0 && index < hist_nvalues - 1)
611 selec += get_position(typcache, constbound, &hist[index],
612 &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
613
614 return selec;
615}
#define Max(x, y)
Definition: c.h:955
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
double Selectivity
Definition: nodes.h:250
Definition: type.h:96

References equal(), get_position(), Max, and rbound_bsearch().

Referenced by calc_hist_selectivity().

◆ calc_length_hist_frac()

static double calc_length_hist_frac ( Datum length_hist_values,
int  length_hist_nvalues,
double  length1,
double  length2,
bool  equal 
)
static

Definition at line 855 of file rangetypes_selfuncs.c.

857{
858 double frac;
859 double A,
860 B,
861 PA,
862 PB;
863 double pos;
864 int i;
865 double area;
866
867 Assert(length2 >= length1);
868
869 if (length2 < 0.0)
870 return 0.0; /* shouldn't happen, but doesn't hurt to check */
871
872 /* All lengths in the table are <= infinite. */
873 if (isinf(length2) && equal)
874 return 1.0;
875
876 /*----------
877 * The average of a function between A and B can be calculated by the
878 * formula:
879 *
880 * B
881 * 1 /
882 * ------- | P(x)dx
883 * B - A /
884 * A
885 *
886 * The geometrical interpretation of the integral is the area under the
887 * graph of P(x). P(x) is defined by the length histogram. We calculate
888 * the area in a piecewise fashion, iterating through the length histogram
889 * bins. Each bin is a trapezoid:
890 *
891 * P(x2)
892 * /|
893 * / |
894 * P(x1)/ |
895 * | |
896 * | |
897 * ---+---+--
898 * x1 x2
899 *
900 * where x1 and x2 are the boundaries of the current histogram, and P(x1)
901 * and P(x1) are the cumulative fraction of tuples at the boundaries.
902 *
903 * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1)
904 *
905 * The first bin contains the lower bound passed by the caller, so we
906 * use linear interpolation between the previous and next histogram bin
907 * boundary to calculate P(x1). Likewise for the last bin: we use linear
908 * interpolation to calculate P(x2). For the bins in between, x1 and x2
909 * lie on histogram bin boundaries, so P(x1) and P(x2) are simply:
910 * P(x1) = (bin index) / (number of bins)
911 * P(x2) = (bin index + 1 / (number of bins)
912 */
913
914 /* First bin, the one that contains lower bound */
915 i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal);
916 if (i >= length_hist_nvalues - 1)
917 return 1.0;
918
919 if (i < 0)
920 {
921 i = 0;
922 pos = 0.0;
923 }
924 else
925 {
926 /* interpolate length1's position in the bin */
927 pos = get_len_position(length1,
928 DatumGetFloat8(length_hist_values[i]),
929 DatumGetFloat8(length_hist_values[i + 1]));
930 }
931 PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
932 B = length1;
933
934 /*
935 * In the degenerate case that length1 == length2, simply return
936 * P(length1). This is not merely an optimization: if length1 == length2,
937 * we'd divide by zero later on.
938 */
939 if (length2 == length1)
940 return PB;
941
942 /*
943 * Loop through all the bins, until we hit the last bin, the one that
944 * contains the upper bound. (if lower and upper bounds are in the same
945 * bin, this falls out immediately)
946 */
947 area = 0.0;
948 for (; i < length_hist_nvalues - 1; i++)
949 {
950 double bin_upper = DatumGetFloat8(length_hist_values[i + 1]);
951
952 /* check if we've reached the last bin */
953 if (!(bin_upper < length2 || (equal && bin_upper <= length2)))
954 break;
955
956 /* the upper bound of previous bin is the lower bound of this bin */
957 A = B;
958 PA = PB;
959
960 B = bin_upper;
961 PB = (double) i / (double) (length_hist_nvalues - 1);
962
963 /*
964 * Add the area of this trapezoid to the total. The point of the
965 * if-check is to avoid NaN, in the corner case that PA == PB == 0,
966 * and B - A == Inf. The area of a zero-height trapezoid (PA == PB ==
967 * 0) is zero, regardless of the width (B - A).
968 */
969 if (PA > 0 || PB > 0)
970 area += 0.5 * (PB + PA) * (B - A);
971 }
972
973 /* Last bin */
974 A = B;
975 PA = PB;
976
977 B = length2; /* last bin ends at the query upper bound */
978 if (i >= length_hist_nvalues - 1)
979 pos = 0.0;
980 else
981 {
982 if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1]))
983 pos = 0.0;
984 else
985 pos = get_len_position(length2, DatumGetFloat8(length_hist_values[i]), DatumGetFloat8(length_hist_values[i + 1]));
986 }
987 PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
988
989 if (PA > 0 || PB > 0)
990 area += 0.5 * (PB + PA) * (B - A);
991
992 /*
993 * Ok, we have calculated the area, ie. the integral. Divide by width to
994 * get the requested average.
995 *
996 * Avoid NaN arising from infinite / infinite. This happens at least if
997 * length2 is infinite. It's not clear what the correct value would be in
998 * that case, so 0.5 seems as good as any value.
999 */
1000 if (isinf(area) && isinf(length2))
1001 frac = 0.5;
1002 else
1003 frac = area / (length2 - length1);
1004
1005 return frac;
1006}
static float8 DatumGetFloat8(Datum X)
Definition: postgres.h:499
static int length_hist_bsearch(Datum *length_hist_values, int length_hist_nvalues, double value, bool equal)
static float8 get_len_position(double value, double hist1, double hist2)

References Assert, DatumGetFloat8(), equal(), get_len_position(), i, and length_hist_bsearch().

Referenced by calc_hist_selectivity_contained(), and calc_hist_selectivity_contains().

◆ calc_rangesel()

static double calc_rangesel ( TypeCacheEntry typcache,
VariableStatData vardata,
const RangeType constval,
Oid  operator 
)
static

Definition at line 231 of file rangetypes_selfuncs.c.

233{
234 double hist_selec;
235 double selec;
236 float4 empty_frac,
237 null_frac;
238
239 /*
240 * First look up the fraction of NULLs and empty ranges from pg_statistic.
241 */
242 if (HeapTupleIsValid(vardata->statsTuple))
243 {
244 Form_pg_statistic stats;
245 AttStatsSlot sslot;
246
247 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
248 null_frac = stats->stanullfrac;
249
250 /* Try to get fraction of empty ranges */
251 if (get_attstatsslot(&sslot, vardata->statsTuple,
252 STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
255 {
256 if (sslot.nnumbers != 1)
257 elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */
258 empty_frac = sslot.numbers[0];
259 free_attstatsslot(&sslot);
260 }
261 else
262 {
263 /* No empty fraction statistic. Assume no empty ranges. */
264 empty_frac = 0.0;
265 }
266 }
267 else
268 {
269 /*
270 * No stats are available. Follow through the calculations below
271 * anyway, assuming no NULLs and no empty ranges. This still allows us
272 * to give a better-than-nothing estimate based on whether the
273 * constant is an empty range or not.
274 */
275 null_frac = 0.0;
276 empty_frac = 0.0;
277 }
278
279 if (RangeIsEmpty(constval))
280 {
281 /*
282 * An empty range matches all ranges, all empty ranges, or nothing,
283 * depending on the operator
284 */
285 switch (operator)
286 {
287 /* these return false if either argument is empty */
288 case OID_RANGE_OVERLAP_OP:
289 case OID_RANGE_OVERLAPS_LEFT_OP:
290 case OID_RANGE_OVERLAPS_RIGHT_OP:
291 case OID_RANGE_LEFT_OP:
292 case OID_RANGE_RIGHT_OP:
293 /* nothing is less than an empty range */
294 case OID_RANGE_LESS_OP:
295 selec = 0.0;
296 break;
297
298 /* only empty ranges can be contained by an empty range */
299 case OID_RANGE_CONTAINED_OP:
300 /* only empty ranges are <= an empty range */
301 case OID_RANGE_LESS_EQUAL_OP:
302 selec = empty_frac;
303 break;
304
305 /* everything contains an empty range */
306 case OID_RANGE_CONTAINS_OP:
307 /* everything is >= an empty range */
308 case OID_RANGE_GREATER_EQUAL_OP:
309 selec = 1.0;
310 break;
311
312 /* all non-empty ranges are > an empty range */
313 case OID_RANGE_GREATER_OP:
314 selec = 1.0 - empty_frac;
315 break;
316
317 /* an element cannot be empty */
318 case OID_RANGE_CONTAINS_ELEM_OP:
319 default:
320 elog(ERROR, "unexpected operator %u", operator);
321 selec = 0.0; /* keep compiler quiet */
322 break;
323 }
324 }
325 else
326 {
327 /*
328 * Calculate selectivity using bound histograms. If that fails for
329 * some reason, e.g no histogram in pg_statistic, use the default
330 * constant estimate for the fraction of non-empty values. This is
331 * still somewhat better than just returning the default estimate,
332 * because this still takes into account the fraction of empty and
333 * NULL tuples, if we had statistics for them.
334 */
335 hist_selec = calc_hist_selectivity(typcache, vardata, constval,
336 operator);
337 if (hist_selec < 0.0)
338 hist_selec = default_range_selectivity(operator);
339
340 /*
341 * Now merge the results for the empty ranges and histogram
342 * calculations, realizing that the histogram covers only the
343 * non-null, non-empty values.
344 */
345 if (operator == OID_RANGE_CONTAINED_OP)
346 {
347 /* empty is contained by anything non-empty */
348 selec = (1.0 - empty_frac) * hist_selec + empty_frac;
349 }
350 else
351 {
352 /* with any other operator, empty Op non-empty matches nothing */
353 selec = (1.0 - empty_frac) * hist_selec;
354 }
355 }
356
357 /* all range operators are strict */
358 selec *= (1.0 - null_frac);
359
360 /* result should be in range, but make sure... */
361 CLAMP_PROBABILITY(selec);
362
363 return selec;
364}
float float4
Definition: c.h:586
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define RangeIsEmpty(r)
Definition: rangetypes.h:55
static double calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata, const RangeType *constval, Oid operator)
static double default_range_selectivity(Oid operator)
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
float4 * numbers
Definition: lsyscache.h:56
int nnumbers
Definition: lsyscache.h:57

References ATTSTATSSLOT_NUMBERS, calc_hist_selectivity(), CLAMP_PROBABILITY, default_range_selectivity(), elog, ERROR, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, AttStatsSlot::nnumbers, AttStatsSlot::numbers, RangeIsEmpty, and VariableStatData::statsTuple.

Referenced by rangesel().

◆ default_range_selectivity()

static double default_range_selectivity ( Oid  operator)
static

Definition at line 67 of file rangetypes_selfuncs.c.

68{
69 switch (operator)
70 {
71 case OID_RANGE_OVERLAP_OP:
72 return 0.01;
73
74 case OID_RANGE_CONTAINS_OP:
75 case OID_RANGE_CONTAINED_OP:
76 return 0.005;
77
78 case OID_RANGE_CONTAINS_ELEM_OP:
79 case OID_RANGE_ELEM_CONTAINED_OP:
80
81 /*
82 * "range @> elem" is more or less identical to a scalar
83 * inequality "A >= b AND A <= c".
84 */
86
87 case OID_RANGE_LESS_OP:
88 case OID_RANGE_LESS_EQUAL_OP:
89 case OID_RANGE_GREATER_OP:
90 case OID_RANGE_GREATER_EQUAL_OP:
91 case OID_RANGE_LEFT_OP:
92 case OID_RANGE_RIGHT_OP:
93 case OID_RANGE_OVERLAPS_LEFT_OP:
94 case OID_RANGE_OVERLAPS_RIGHT_OP:
95 /* these are similar to regular scalar inequalities */
96 return DEFAULT_INEQ_SEL;
97
98 default:
99 /* all range operators should be handled above, but just in case */
100 return 0.01;
101 }
102}
#define DEFAULT_RANGE_INEQ_SEL
Definition: selfuncs.h:40
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37

References DEFAULT_INEQ_SEL, and DEFAULT_RANGE_INEQ_SEL.

Referenced by calc_rangesel(), and rangesel().

◆ get_distance()

static float8 get_distance ( TypeCacheEntry typcache,
const RangeBound bound1,
const RangeBound bound2 
)
static

Definition at line 807 of file rangetypes_selfuncs.c.

808{
809 bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
810
811 if (!bound1->infinite && !bound2->infinite)
812 {
813 /*
814 * Neither bound is infinite, use subdiff function or return default
815 * value of 1.0 if no subdiff is available.
816 */
817 if (has_subdiff)
818 {
819 float8 res;
820
822 typcache->rng_collation,
823 bound2->val,
824 bound1->val));
825 /* Reject possible NaN result, also negative result */
826 if (isnan(res) || res < 0.0)
827 return 1.0;
828 else
829 return res;
830 }
831 else
832 return 1.0;
833 }
834 else if (bound1->infinite && bound2->infinite)
835 {
836 /* Both bounds are infinite */
837 if (bound1->lower == bound2->lower)
838 return 0.0;
839 else
840 return get_float8_infinity();
841 }
842 else
843 {
844 /* One bound is infinite, the other is not */
845 return get_float8_infinity();
846 }
847}
static float8 get_float8_infinity(void)
Definition: float.h:94
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
bool lower
Definition: rangetypes.h:66
Datum val
Definition: rangetypes.h:63
Oid rng_collation
Definition: typcache.h:100

References DatumGetFloat8(), FmgrInfo::fn_oid, FunctionCall2Coll(), get_float8_infinity(), RangeBound::infinite, RangeBound::lower, OidIsValid, res, TypeCacheEntry::rng_collation, TypeCacheEntry::rng_subdiff_finfo, and RangeBound::val.

Referenced by calc_hist_selectivity_contained(), and calc_hist_selectivity_contains().

◆ get_len_position()

static double get_len_position ( double  value,
double  hist1,
double  hist2 
)
static

Definition at line 762 of file rangetypes_selfuncs.c.

763{
764 if (!isinf(hist1) && !isinf(hist2))
765 {
766 /*
767 * Both bounds are finite. The value should be finite too, because it
768 * lies somewhere between the bounds. If it doesn't, just return
769 * something.
770 */
771 if (isinf(value))
772 return 0.5;
773
774 return 1.0 - (hist2 - value) / (hist2 - hist1);
775 }
776 else if (isinf(hist1) && !isinf(hist2))
777 {
778 /*
779 * Lower bin boundary is -infinite, upper is finite. Return 1.0 to
780 * indicate the value is infinitely far from the lower bound.
781 */
782 return 1.0;
783 }
784 else if (isinf(hist1) && isinf(hist2))
785 {
786 /* same as above, but in reverse */
787 return 0.0;
788 }
789 else
790 {
791 /*
792 * If both bin boundaries are infinite, they should be equal to each
793 * other, and the value should also be infinite and equal to both
794 * bounds. (But don't Assert that, to avoid crashing unnecessarily if
795 * the caller messes up)
796 *
797 * Assume the value to lie in the middle of the infinite bounds.
798 */
799 return 0.5;
800 }
801}
static struct @162 value

References value.

Referenced by calc_length_hist_frac().

◆ get_position()

static float8 get_position ( TypeCacheEntry typcache,
const RangeBound value,
const RangeBound hist1,
const RangeBound hist2 
)
static

Definition at line 683 of file rangetypes_selfuncs.c.

685{
686 bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
687 float8 position;
688
689 if (!hist1->infinite && !hist2->infinite)
690 {
691 float8 bin_width;
692
693 /*
694 * Both bounds are finite. Assuming the subtype's comparison function
695 * works sanely, the value must be finite, too, because it lies
696 * somewhere between the bounds. If it doesn't, arbitrarily return
697 * 0.5.
698 */
699 if (value->infinite)
700 return 0.5;
701
702 /* Can't interpolate without subdiff function */
703 if (!has_subdiff)
704 return 0.5;
705
706 /* Calculate relative position using subdiff function. */
708 typcache->rng_collation,
709 hist2->val,
710 hist1->val));
711 if (isnan(bin_width) || bin_width <= 0.0)
712 return 0.5; /* punt for NaN or zero-width bin */
713
715 typcache->rng_collation,
716 value->val,
717 hist1->val))
718 / bin_width;
719
720 if (isnan(position))
721 return 0.5; /* punt for NaN from subdiff, Inf/Inf, etc */
722
723 /* Relative position must be in [0,1] range */
724 position = Max(position, 0.0);
725 position = Min(position, 1.0);
726 return position;
727 }
728 else if (hist1->infinite && !hist2->infinite)
729 {
730 /*
731 * Lower bin boundary is -infinite, upper is finite. If the value is
732 * -infinite, return 0.0 to indicate it's equal to the lower bound.
733 * Otherwise return 1.0 to indicate it's infinitely far from the lower
734 * bound.
735 */
736 return ((value->infinite && value->lower) ? 0.0 : 1.0);
737 }
738 else if (!hist1->infinite && hist2->infinite)
739 {
740 /* same as above, but in reverse */
741 return ((value->infinite && !value->lower) ? 1.0 : 0.0);
742 }
743 else
744 {
745 /*
746 * If both bin boundaries are infinite, they should be equal to each
747 * other, and the value should also be infinite and equal to both
748 * bounds. (But don't Assert that, to avoid crashing if a user creates
749 * a datatype with a broken comparison function).
750 *
751 * Assume the value to lie in the middle of the infinite bounds.
752 */
753 return 0.5;
754 }
755}

References DatumGetFloat8(), FmgrInfo::fn_oid, FunctionCall2Coll(), RangeBound::infinite, Max, Min, OidIsValid, TypeCacheEntry::rng_collation, TypeCacheEntry::rng_subdiff_finfo, RangeBound::val, and value.

Referenced by calc_hist_selectivity_contained(), calc_hist_selectivity_contains(), and calc_hist_selectivity_scalar().

◆ length_hist_bsearch()

static int length_hist_bsearch ( Datum length_hist_values,
int  length_hist_nvalues,
double  value,
bool  equal 
)
static

Definition at line 657 of file rangetypes_selfuncs.c.

659{
660 int lower = -1,
661 upper = length_hist_nvalues - 1,
662 middle;
663
664 while (lower < upper)
665 {
666 double middleval;
667
668 middle = (lower + upper + 1) / 2;
669
670 middleval = DatumGetFloat8(length_hist_values[middle]);
671 if (middleval < value || (equal && middleval <= value))
672 lower = middle;
673 else
674 upper = middle - 1;
675 }
676 return lower;
677}

References DatumGetFloat8(), equal(), lower(), upper(), and value.

Referenced by calc_length_hist_frac().

◆ rangesel()

Datum rangesel ( PG_FUNCTION_ARGS  )

Definition at line 108 of file rangetypes_selfuncs.c.

109{
111 Oid operator = PG_GETARG_OID(1);
113 int varRelid = PG_GETARG_INT32(3);
114 VariableStatData vardata;
115 Node *other;
116 bool varonleft;
117 Selectivity selec;
118 TypeCacheEntry *typcache = NULL;
119 RangeType *constrange = NULL;
120
121 /*
122 * If expression is not (variable op something) or (something op
123 * variable), then punt and return a default estimate.
124 */
125 if (!get_restriction_variable(root, args, varRelid,
126 &vardata, &other, &varonleft))
128
129 /*
130 * Can't do anything useful if the something is not a constant, either.
131 */
132 if (!IsA(other, Const))
133 {
134 ReleaseVariableStats(vardata);
136 }
137
138 /*
139 * All the range operators are strict, so we can cope with a NULL constant
140 * right away.
141 */
142 if (((Const *) other)->constisnull)
143 {
144 ReleaseVariableStats(vardata);
145 PG_RETURN_FLOAT8(0.0);
146 }
147
148 /*
149 * If var is on the right, commute the operator, so that we can assume the
150 * var is on the left in what follows.
151 */
152 if (!varonleft)
153 {
154 /* we have other Op var, commute to make var Op other */
155 operator = get_commutator(operator);
156 if (!operator)
157 {
158 /* Use default selectivity (should we raise an error instead?) */
159 ReleaseVariableStats(vardata);
161 }
162 }
163
164 /*
165 * OK, there's a Var and a Const we're dealing with here. We need the
166 * Const to be of same range type as the column, else we can't do anything
167 * useful. (Such cases will likely fail at runtime, but here we'd rather
168 * just return a default estimate.)
169 *
170 * If the operator is "range @> element", the constant should be of the
171 * element type of the range column. Convert it to a range that includes
172 * only that single point, so that we don't need special handling for that
173 * in what follows.
174 */
175 if (operator == OID_RANGE_CONTAINS_ELEM_OP)
176 {
177 typcache = range_get_typcache(fcinfo, vardata.vartype);
178
179 if (((Const *) other)->consttype == typcache->rngelemtype->type_id)
180 {
182 upper;
183
184 lower.inclusive = true;
185 lower.val = ((Const *) other)->constvalue;
186 lower.infinite = false;
187 lower.lower = true;
188 upper.inclusive = true;
189 upper.val = ((Const *) other)->constvalue;
190 upper.infinite = false;
191 upper.lower = false;
192 constrange = range_serialize(typcache, &lower, &upper, false, NULL);
193 }
194 }
195 else if (operator == OID_RANGE_ELEM_CONTAINED_OP)
196 {
197 /*
198 * Here, the Var is the elem, not the range. In typical cases
199 * elem_contained_by_range_support will have simplified this case, so
200 * that we won't get here. If we do get here we'll fall back on a
201 * default estimate.
202 */
203 }
204 else if (((Const *) other)->consttype == vardata.vartype)
205 {
206 /* Both sides are the same range type */
207 typcache = range_get_typcache(fcinfo, vardata.vartype);
208
209 constrange = DatumGetRangeTypeP(((Const *) other)->constvalue);
210 }
211
212 /*
213 * If we got a valid constant on one side of the operator, proceed to
214 * estimate using statistics. Otherwise punt and return a default constant
215 * estimate. Note that calc_rangesel need not handle
216 * OID_RANGE_ELEM_CONTAINED_OP.
217 */
218 if (constrange)
219 selec = calc_rangesel(typcache, &vardata, constrange, operator);
220 else
221 selec = default_range_selectivity(operator);
222
223 ReleaseVariableStats(vardata);
224
225 CLAMP_PROBABILITY(selec);
226
227 PG_RETURN_FLOAT8((float8) selec);
228}
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
Oid get_commutator(Oid opno)
Definition: lsyscache.c:1509
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
unsigned int Oid
Definition: postgres_ext.h:32
tree ctl root
Definition: radixtree.h:1857
RangeType * range_serialize(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, bool empty, struct Node *escontext)
Definition: rangetypes.c:1727
TypeCacheEntry * range_get_typcache(FunctionCallInfo fcinfo, Oid rngtypid)
Definition: rangetypes.c:1703
static double calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata, const RangeType *constval, Oid operator)
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4904
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:99
Definition: pg_list.h:54
Definition: nodes.h:129
struct TypeCacheEntry * rngelemtype
Definition: typcache.h:98

References generate_unaccent_rules::args, calc_rangesel(), CLAMP_PROBABILITY, DatumGetRangeTypeP(), default_range_selectivity(), get_commutator(), get_restriction_variable(), IsA, lower(), PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, PG_RETURN_FLOAT8, range_get_typcache(), range_serialize(), ReleaseVariableStats, TypeCacheEntry::rngelemtype, root, TypeCacheEntry::type_id, upper(), and VariableStatData::vartype.

◆ rbound_bsearch()

static int rbound_bsearch ( TypeCacheEntry typcache,
const RangeBound value,
const RangeBound hist,
int  hist_length,
bool  equal 
)
static

Definition at line 628 of file rangetypes_selfuncs.c.

630{
631 int lower = -1,
632 upper = hist_length - 1,
633 cmp,
634 middle;
635
636 while (lower < upper)
637 {
638 middle = (lower + upper + 1) / 2;
639 cmp = range_cmp_bounds(typcache, &hist[middle], value);
640
641 if (cmp < 0 || (equal && cmp == 0))
642 lower = middle;
643 else
644 upper = middle - 1;
645 }
646 return lower;
647}
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743

References cmp(), equal(), lower(), range_cmp_bounds(), upper(), and value.

Referenced by calc_hist_selectivity_contained(), calc_hist_selectivity_contains(), and calc_hist_selectivity_scalar().