PostgreSQL Source Code  git master
rangetypes_selfuncs.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
#include "utils/float.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/rangetypes.h"
#include "utils/selfuncs.h"
#include "utils/typcache.h"
Include dependency graph for rangetypes_selfuncs.c:

Go to the source code of this file.

Functions

static double calc_rangesel (TypeCacheEntry *typcache, VariableStatData *vardata, const RangeType *constval, Oid operator)
 
static double default_range_selectivity (Oid operator)
 
static double calc_hist_selectivity (TypeCacheEntry *typcache, VariableStatData *vardata, const RangeType *constval, Oid operator)
 
static double calc_hist_selectivity_scalar (TypeCacheEntry *typcache, const RangeBound *constbound, const RangeBound *hist, int hist_nvalues, bool equal)
 
static int rbound_bsearch (TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist, int hist_length, bool equal)
 
static float8 get_position (TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1, const RangeBound *hist2)
 
static float8 get_len_position (double value, double hist1, double hist2)
 
static float8 get_distance (TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
 
static int length_hist_bsearch (Datum *length_hist_values, int length_hist_nvalues, double value, bool equal)
 
static double calc_length_hist_frac (Datum *length_hist_values, int length_hist_nvalues, double length1, double length2, bool equal)
 
static double calc_hist_selectivity_contained (TypeCacheEntry *typcache, const RangeBound *lower, RangeBound *upper, const RangeBound *hist_lower, int hist_nvalues, Datum *length_hist_values, int length_hist_nvalues)
 
static double calc_hist_selectivity_contains (TypeCacheEntry *typcache, const RangeBound *lower, const RangeBound *upper, const RangeBound *hist_lower, int hist_nvalues, Datum *length_hist_values, int length_hist_nvalues)
 
Datum rangesel (PG_FUNCTION_ARGS)
 

Function Documentation

◆ calc_hist_selectivity()

static double calc_hist_selectivity ( TypeCacheEntry typcache,
VariableStatData vardata,
const RangeType constval,
Oid  operator 
)
static

Definition at line 373 of file rangetypes_selfuncs.c.

375 {
376  AttStatsSlot hslot;
377  AttStatsSlot lslot;
378  int nhist;
379  RangeBound *hist_lower;
380  RangeBound *hist_upper;
381  int i;
382  RangeBound const_lower;
383  RangeBound const_upper;
384  bool empty;
385  double hist_selec;
386 
387  /* Can't use the histogram with insecure range support functions */
388  if (!statistic_proc_security_check(vardata,
389  typcache->rng_cmp_proc_finfo.fn_oid))
390  return -1;
391  if (OidIsValid(typcache->rng_subdiff_finfo.fn_oid) &&
393  typcache->rng_subdiff_finfo.fn_oid))
394  return -1;
395 
396  /* Try to get histogram of ranges */
397  if (!(HeapTupleIsValid(vardata->statsTuple) &&
398  get_attstatsslot(&hslot, vardata->statsTuple,
399  STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
401  return -1.0;
402 
403  /* check that it's a histogram, not just a dummy entry */
404  if (hslot.nvalues < 2)
405  {
406  free_attstatsslot(&hslot);
407  return -1.0;
408  }
409 
410  /*
411  * Convert histogram of ranges into histograms of its lower and upper
412  * bounds.
413  */
414  nhist = hslot.nvalues;
415  hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
416  hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
417  for (i = 0; i < nhist; i++)
418  {
419  range_deserialize(typcache, DatumGetRangeTypeP(hslot.values[i]),
420  &hist_lower[i], &hist_upper[i], &empty);
421  /* The histogram should not contain any empty ranges */
422  if (empty)
423  elog(ERROR, "bounds histogram contains an empty range");
424  }
425 
426  /* @> and @< also need a histogram of range lengths */
427  if (operator == OID_RANGE_CONTAINS_OP ||
428  operator == OID_RANGE_CONTAINED_OP)
429  {
430  if (!(HeapTupleIsValid(vardata->statsTuple) &&
431  get_attstatsslot(&lslot, vardata->statsTuple,
432  STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
433  InvalidOid,
435  {
436  free_attstatsslot(&hslot);
437  return -1.0;
438  }
439 
440  /* check that it's a histogram, not just a dummy entry */
441  if (lslot.nvalues < 2)
442  {
443  free_attstatsslot(&lslot);
444  free_attstatsslot(&hslot);
445  return -1.0;
446  }
447  }
448  else
449  memset(&lslot, 0, sizeof(lslot));
450 
451  /* Extract the bounds of the constant value. */
452  range_deserialize(typcache, constval, &const_lower, &const_upper, &empty);
453  Assert(!empty);
454 
455  /*
456  * Calculate selectivity comparing the lower or upper bound of the
457  * constant with the histogram of lower or upper bounds.
458  */
459  switch (operator)
460  {
461  case OID_RANGE_LESS_OP:
462 
463  /*
464  * The regular b-tree comparison operators (<, <=, >, >=) compare
465  * the lower bounds first, and the upper bounds for values with
466  * equal lower bounds. Estimate that by comparing the lower bounds
467  * only. This gives a fairly accurate estimate assuming there
468  * aren't many rows with a lower bound equal to the constant's
469  * lower bound.
470  */
471  hist_selec =
472  calc_hist_selectivity_scalar(typcache, &const_lower,
473  hist_lower, nhist, false);
474  break;
475 
476  case OID_RANGE_LESS_EQUAL_OP:
477  hist_selec =
478  calc_hist_selectivity_scalar(typcache, &const_lower,
479  hist_lower, nhist, true);
480  break;
481 
482  case OID_RANGE_GREATER_OP:
483  hist_selec =
484  1 - calc_hist_selectivity_scalar(typcache, &const_lower,
485  hist_lower, nhist, false);
486  break;
487 
488  case OID_RANGE_GREATER_EQUAL_OP:
489  hist_selec =
490  1 - calc_hist_selectivity_scalar(typcache, &const_lower,
491  hist_lower, nhist, true);
492  break;
493 
494  case OID_RANGE_LEFT_OP:
495  /* var << const when upper(var) < lower(const) */
496  hist_selec =
497  calc_hist_selectivity_scalar(typcache, &const_lower,
498  hist_upper, nhist, false);
499  break;
500 
501  case OID_RANGE_RIGHT_OP:
502  /* var >> const when lower(var) > upper(const) */
503  hist_selec =
504  1 - calc_hist_selectivity_scalar(typcache, &const_upper,
505  hist_lower, nhist, true);
506  break;
507 
508  case OID_RANGE_OVERLAPS_RIGHT_OP:
509  /* compare lower bounds */
510  hist_selec =
511  1 - calc_hist_selectivity_scalar(typcache, &const_lower,
512  hist_lower, nhist, false);
513  break;
514 
515  case OID_RANGE_OVERLAPS_LEFT_OP:
516  /* compare upper bounds */
517  hist_selec =
518  calc_hist_selectivity_scalar(typcache, &const_upper,
519  hist_upper, nhist, true);
520  break;
521 
522  case OID_RANGE_OVERLAP_OP:
523  case OID_RANGE_CONTAINS_ELEM_OP:
524 
525  /*
526  * A && B <=> NOT (A << B OR A >> B).
527  *
528  * Since A << B and A >> B are mutually exclusive events we can
529  * sum their probabilities to find probability of (A << B OR A >>
530  * B).
531  *
532  * "range @> elem" is equivalent to "range && [elem,elem]". The
533  * caller already constructed the singular range from the element
534  * constant, so just treat it the same as &&.
535  */
536  hist_selec =
537  calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper,
538  nhist, false);
539  hist_selec +=
540  (1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower,
541  nhist, true));
542  hist_selec = 1.0 - hist_selec;
543  break;
544 
545  case OID_RANGE_CONTAINS_OP:
546  hist_selec =
547  calc_hist_selectivity_contains(typcache, &const_lower,
548  &const_upper, hist_lower, nhist,
549  lslot.values, lslot.nvalues);
550  break;
551 
552  case OID_RANGE_CONTAINED_OP:
553  if (const_lower.infinite)
554  {
555  /*
556  * Lower bound no longer matters. Just estimate the fraction
557  * with an upper bound <= const upper bound
558  */
559  hist_selec =
560  calc_hist_selectivity_scalar(typcache, &const_upper,
561  hist_upper, nhist, true);
562  }
563  else if (const_upper.infinite)
564  {
565  hist_selec =
566  1.0 - calc_hist_selectivity_scalar(typcache, &const_lower,
567  hist_lower, nhist, false);
568  }
569  else
570  {
571  hist_selec =
572  calc_hist_selectivity_contained(typcache, &const_lower,
573  &const_upper, hist_lower, nhist,
574  lslot.values, lslot.nvalues);
575  }
576  break;
577 
578  default:
579  elog(ERROR, "unknown range operator %u", operator);
580  hist_selec = -1.0; /* keep compiler quiet */
581  break;
582  }
583 
584  free_attstatsslot(&lslot);
585  free_attstatsslot(&hslot);
586 
587  return hist_selec;
588 }
#define Assert(condition)
Definition: c.h:858
#define OidIsValid(objectId)
Definition: c.h:775
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
int i
Definition: isn.c:73
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3344
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3234
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
void * palloc(Size size)
Definition: mcxt.c:1316
#define InvalidOid
Definition: postgres_ext.h:36
void range_deserialize(TypeCacheEntry *typcache, const RangeType *range, RangeBound *lower, RangeBound *upper, bool *empty)
Definition: rangetypes.c:1856
static RangeType * DatumGetRangeTypeP(Datum X)
Definition: rangetypes.h:73
static double calc_hist_selectivity_contained(TypeCacheEntry *typcache, const RangeBound *lower, RangeBound *upper, const RangeBound *hist_lower, int hist_nvalues, Datum *length_hist_values, int length_hist_nvalues)
static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound, const RangeBound *hist, int hist_nvalues, bool equal)
static double calc_hist_selectivity_contains(TypeCacheEntry *typcache, const RangeBound *lower, const RangeBound *upper, const RangeBound *hist_lower, int hist_nvalues, Datum *length_hist_values, int length_hist_nvalues)
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5735
Datum * values
Definition: lsyscache.h:53
Oid fn_oid
Definition: fmgr.h:59
bool infinite
Definition: rangetypes.h:64
FmgrInfo rng_cmp_proc_finfo
Definition: typcache.h:101
FmgrInfo rng_subdiff_finfo
Definition: typcache.h:103
HeapTuple statsTuple
Definition: selfuncs.h:89

References Assert, ATTSTATSSLOT_VALUES, calc_hist_selectivity_contained(), calc_hist_selectivity_contains(), calc_hist_selectivity_scalar(), DatumGetRangeTypeP(), elog, ERROR, FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), HeapTupleIsValid, i, RangeBound::infinite, InvalidOid, AttStatsSlot::nvalues, OidIsValid, palloc(), range_deserialize(), TypeCacheEntry::rng_cmp_proc_finfo, TypeCacheEntry::rng_subdiff_finfo, statistic_proc_security_check(), VariableStatData::statsTuple, and AttStatsSlot::values.

Referenced by calc_rangesel().

◆ calc_hist_selectivity_contained()

static double calc_hist_selectivity_contained ( TypeCacheEntry typcache,
const RangeBound lower,
RangeBound upper,
const RangeBound hist_lower,
int  hist_nvalues,
Datum length_hist_values,
int  length_hist_nvalues 
)
static

Definition at line 1018 of file rangetypes_selfuncs.c.

1022 {
1023  int i,
1024  upper_index;
1025  float8 prev_dist;
1026  double bin_width;
1027  double upper_bin_width;
1028  double sum_frac;
1029 
1030  /*
1031  * Begin by finding the bin containing the upper bound, in the lower bound
1032  * histogram. Any range with a lower bound > constant upper bound can't
1033  * match, ie. there are no matches in bins greater than upper_index.
1034  */
1035  upper->inclusive = !upper->inclusive;
1036  upper->lower = true;
1037  upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
1038  false);
1039 
1040  /*
1041  * If the upper bound value is below the histogram's lower limit, there
1042  * are no matches.
1043  */
1044  if (upper_index < 0)
1045  return 0.0;
1046 
1047  /*
1048  * If the upper bound value is at or beyond the histogram's upper limit,
1049  * start our loop at the last actual bin, as though the upper bound were
1050  * within that bin; get_position will clamp its result to 1.0 anyway.
1051  * (This corresponds to assuming that the data population above the
1052  * histogram's upper limit is empty, exactly like what we just assumed for
1053  * the lower limit.)
1054  */
1055  upper_index = Min(upper_index, hist_nvalues - 2);
1056 
1057  /*
1058  * Calculate upper_bin_width, ie. the fraction of the (upper_index,
1059  * upper_index + 1) bin which is greater than upper bound of query range
1060  * using linear interpolation of subdiff function.
1061  */
1062  upper_bin_width = get_position(typcache, upper,
1063  &hist_lower[upper_index],
1064  &hist_lower[upper_index + 1]);
1065 
1066  /*
1067  * In the loop, dist and prev_dist are the distance of the "current" bin's
1068  * lower and upper bounds from the constant upper bound.
1069  *
1070  * bin_width represents the width of the current bin. Normally it is 1.0,
1071  * meaning a full width bin, but can be less in the corner cases: start
1072  * and end of the loop. We start with bin_width = upper_bin_width, because
1073  * we begin at the bin containing the upper bound.
1074  */
1075  prev_dist = 0.0;
1076  bin_width = upper_bin_width;
1077 
1078  sum_frac = 0.0;
1079  for (i = upper_index; i >= 0; i--)
1080  {
1081  double dist;
1082  double length_hist_frac;
1083  bool final_bin = false;
1084 
1085  /*
1086  * dist -- distance from upper bound of query range to lower bound of
1087  * the current bin in the lower bound histogram. Or to the lower bound
1088  * of the constant range, if this is the final bin, containing the
1089  * constant lower bound.
1090  */
1091  if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0)
1092  {
1093  dist = get_distance(typcache, lower, upper);
1094 
1095  /*
1096  * Subtract from bin_width the portion of this bin that we want to
1097  * ignore.
1098  */
1099  bin_width -= get_position(typcache, lower, &hist_lower[i],
1100  &hist_lower[i + 1]);
1101  if (bin_width < 0.0)
1102  bin_width = 0.0;
1103  final_bin = true;
1104  }
1105  else
1106  dist = get_distance(typcache, &hist_lower[i], upper);
1107 
1108  /*
1109  * Estimate the fraction of tuples in this bin that are narrow enough
1110  * to not exceed the distance to the upper bound of the query range.
1111  */
1112  length_hist_frac = calc_length_hist_frac(length_hist_values,
1113  length_hist_nvalues,
1114  prev_dist, dist, true);
1115 
1116  /*
1117  * Add the fraction of tuples in this bin, with a suitable length, to
1118  * the total.
1119  */
1120  sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
1121 
1122  if (final_bin)
1123  break;
1124 
1125  bin_width = 1.0;
1126  prev_dist = dist;
1127  }
1128 
1129  return sum_frac;
1130 }
#define Min(x, y)
Definition: c.h:1004
double float8
Definition: c.h:630
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
int range_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *b1, const RangeBound *b2)
Definition: rangetypes.c:2016
static double calc_length_hist_frac(Datum *length_hist_values, int length_hist_nvalues, double length1, double length2, bool equal)
static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1, const RangeBound *hist2)
static int rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist, int hist_length, bool equal)
static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)

References calc_length_hist_frac(), get_distance(), get_position(), i, lower(), Min, range_cmp_bounds(), rbound_bsearch(), and upper().

Referenced by calc_hist_selectivity().

◆ calc_hist_selectivity_contains()

static double calc_hist_selectivity_contains ( TypeCacheEntry typcache,
const RangeBound lower,
const RangeBound upper,
const RangeBound hist_lower,
int  hist_nvalues,
Datum length_hist_values,
int  length_hist_nvalues 
)
static

Definition at line 1139 of file rangetypes_selfuncs.c.

1143 {
1144  int i,
1145  lower_index;
1146  double bin_width,
1147  lower_bin_width;
1148  double sum_frac;
1149  float8 prev_dist;
1150 
1151  /* Find the bin containing the lower bound of query range. */
1152  lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
1153  true);
1154 
1155  /*
1156  * If the lower bound value is below the histogram's lower limit, there
1157  * are no matches.
1158  */
1159  if (lower_index < 0)
1160  return 0.0;
1161 
1162  /*
1163  * If the lower bound value is at or beyond the histogram's upper limit,
1164  * start our loop at the last actual bin, as though the upper bound were
1165  * within that bin; get_position will clamp its result to 1.0 anyway.
1166  * (This corresponds to assuming that the data population above the
1167  * histogram's upper limit is empty, exactly like what we just assumed for
1168  * the lower limit.)
1169  */
1170  lower_index = Min(lower_index, hist_nvalues - 2);
1171 
1172  /*
1173  * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
1174  * lower_index + 1) bin which is greater than lower bound of query range
1175  * using linear interpolation of subdiff function.
1176  */
1177  lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
1178  &hist_lower[lower_index + 1]);
1179 
1180  /*
1181  * Loop through all the lower bound bins, smaller than the query lower
1182  * bound. In the loop, dist and prev_dist are the distance of the
1183  * "current" bin's lower and upper bounds from the constant upper bound.
1184  * We begin from query lower bound, and walk backwards, so the first bin's
1185  * upper bound is the query lower bound, and its distance to the query
1186  * upper bound is the length of the query range.
1187  *
1188  * bin_width represents the width of the current bin. Normally it is 1.0,
1189  * meaning a full width bin, except for the first bin, which is only
1190  * counted up to the constant lower bound.
1191  */
1192  prev_dist = get_distance(typcache, lower, upper);
1193  sum_frac = 0.0;
1194  bin_width = lower_bin_width;
1195  for (i = lower_index; i >= 0; i--)
1196  {
1197  float8 dist;
1198  double length_hist_frac;
1199 
1200  /*
1201  * dist -- distance from upper bound of query range to current value
1202  * of lower bound histogram or lower bound of query range (if we've
1203  * reach it).
1204  */
1205  dist = get_distance(typcache, &hist_lower[i], upper);
1206 
1207  /*
1208  * Get average fraction of length histogram which covers intervals
1209  * longer than (or equal to) distance to upper bound of query range.
1210  */
1211  length_hist_frac =
1212  1.0 - calc_length_hist_frac(length_hist_values,
1213  length_hist_nvalues,
1214  prev_dist, dist, false);
1215 
1216  sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
1217 
1218  bin_width = 1.0;
1219  prev_dist = dist;
1220  }
1221 
1222  return sum_frac;
1223 }

References calc_length_hist_frac(), get_distance(), get_position(), i, lower(), Min, rbound_bsearch(), and upper().

Referenced by calc_hist_selectivity().

◆ calc_hist_selectivity_scalar()

static double calc_hist_selectivity_scalar ( TypeCacheEntry typcache,
const RangeBound constbound,
const RangeBound hist,
int  hist_nvalues,
bool  equal 
)
static

Definition at line 596 of file rangetypes_selfuncs.c.

598 {
599  Selectivity selec;
600  int index;
601 
602  /*
603  * Find the histogram bin the given constant falls into. Estimate
604  * selectivity as the number of preceding whole bins.
605  */
606  index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
607  selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
608 
609  /* Adjust using linear interpolation within the bin */
610  if (index >= 0 && index < hist_nvalues - 1)
611  selec += get_position(typcache, constbound, &hist[index],
612  &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
613 
614  return selec;
615 }
#define Max(x, y)
Definition: c.h:998
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
double Selectivity
Definition: nodes.h:250
Definition: type.h:95

References equal(), get_position(), Max, and rbound_bsearch().

Referenced by calc_hist_selectivity().

◆ calc_length_hist_frac()

static double calc_length_hist_frac ( Datum length_hist_values,
int  length_hist_nvalues,
double  length1,
double  length2,
bool  equal 
)
static

Definition at line 855 of file rangetypes_selfuncs.c.

857 {
858  double frac;
859  double A,
860  B,
861  PA,
862  PB;
863  double pos;
864  int i;
865  double area;
866 
867  Assert(length2 >= length1);
868 
869  if (length2 < 0.0)
870  return 0.0; /* shouldn't happen, but doesn't hurt to check */
871 
872  /* All lengths in the table are <= infinite. */
873  if (isinf(length2) && equal)
874  return 1.0;
875 
876  /*----------
877  * The average of a function between A and B can be calculated by the
878  * formula:
879  *
880  * B
881  * 1 /
882  * ------- | P(x)dx
883  * B - A /
884  * A
885  *
886  * The geometrical interpretation of the integral is the area under the
887  * graph of P(x). P(x) is defined by the length histogram. We calculate
888  * the area in a piecewise fashion, iterating through the length histogram
889  * bins. Each bin is a trapezoid:
890  *
891  * P(x2)
892  * /|
893  * / |
894  * P(x1)/ |
895  * | |
896  * | |
897  * ---+---+--
898  * x1 x2
899  *
900  * where x1 and x2 are the boundaries of the current histogram, and P(x1)
901  * and P(x1) are the cumulative fraction of tuples at the boundaries.
902  *
903  * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1)
904  *
905  * The first bin contains the lower bound passed by the caller, so we
906  * use linear interpolation between the previous and next histogram bin
907  * boundary to calculate P(x1). Likewise for the last bin: we use linear
908  * interpolation to calculate P(x2). For the bins in between, x1 and x2
909  * lie on histogram bin boundaries, so P(x1) and P(x2) are simply:
910  * P(x1) = (bin index) / (number of bins)
911  * P(x2) = (bin index + 1 / (number of bins)
912  */
913 
914  /* First bin, the one that contains lower bound */
915  i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal);
916  if (i >= length_hist_nvalues - 1)
917  return 1.0;
918 
919  if (i < 0)
920  {
921  i = 0;
922  pos = 0.0;
923  }
924  else
925  {
926  /* interpolate length1's position in the bin */
927  pos = get_len_position(length1,
928  DatumGetFloat8(length_hist_values[i]),
929  DatumGetFloat8(length_hist_values[i + 1]));
930  }
931  PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
932  B = length1;
933 
934  /*
935  * In the degenerate case that length1 == length2, simply return
936  * P(length1). This is not merely an optimization: if length1 == length2,
937  * we'd divide by zero later on.
938  */
939  if (length2 == length1)
940  return PB;
941 
942  /*
943  * Loop through all the bins, until we hit the last bin, the one that
944  * contains the upper bound. (if lower and upper bounds are in the same
945  * bin, this falls out immediately)
946  */
947  area = 0.0;
948  for (; i < length_hist_nvalues - 1; i++)
949  {
950  double bin_upper = DatumGetFloat8(length_hist_values[i + 1]);
951 
952  /* check if we've reached the last bin */
953  if (!(bin_upper < length2 || (equal && bin_upper <= length2)))
954  break;
955 
956  /* the upper bound of previous bin is the lower bound of this bin */
957  A = B;
958  PA = PB;
959 
960  B = bin_upper;
961  PB = (double) i / (double) (length_hist_nvalues - 1);
962 
963  /*
964  * Add the area of this trapezoid to the total. The point of the
965  * if-check is to avoid NaN, in the corner case that PA == PB == 0,
966  * and B - A == Inf. The area of a zero-height trapezoid (PA == PB ==
967  * 0) is zero, regardless of the width (B - A).
968  */
969  if (PA > 0 || PB > 0)
970  area += 0.5 * (PB + PA) * (B - A);
971  }
972 
973  /* Last bin */
974  A = B;
975  PA = PB;
976 
977  B = length2; /* last bin ends at the query upper bound */
978  if (i >= length_hist_nvalues - 1)
979  pos = 0.0;
980  else
981  {
982  if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1]))
983  pos = 0.0;
984  else
985  pos = get_len_position(length2, DatumGetFloat8(length_hist_values[i]), DatumGetFloat8(length_hist_values[i + 1]));
986  }
987  PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
988 
989  if (PA > 0 || PB > 0)
990  area += 0.5 * (PB + PA) * (B - A);
991 
992  /*
993  * Ok, we have calculated the area, ie. the integral. Divide by width to
994  * get the requested average.
995  *
996  * Avoid NaN arising from infinite / infinite. This happens at least if
997  * length2 is infinite. It's not clear what the correct value would be in
998  * that case, so 0.5 seems as good as any value.
999  */
1000  if (isinf(area) && isinf(length2))
1001  frac = 0.5;
1002  else
1003  frac = area / (length2 - length1);
1004 
1005  return frac;
1006 }
static float8 DatumGetFloat8(Datum X)
Definition: postgres.h:494
static int length_hist_bsearch(Datum *length_hist_values, int length_hist_nvalues, double value, bool equal)
static float8 get_len_position(double value, double hist1, double hist2)

References Assert, DatumGetFloat8(), equal(), get_len_position(), i, and length_hist_bsearch().

Referenced by calc_hist_selectivity_contained(), and calc_hist_selectivity_contains().

◆ calc_rangesel()

static double calc_rangesel ( TypeCacheEntry typcache,
VariableStatData vardata,
const RangeType constval,
Oid  operator 
)
static

Definition at line 231 of file rangetypes_selfuncs.c.

233 {
234  double hist_selec;
235  double selec;
236  float4 empty_frac,
237  null_frac;
238 
239  /*
240  * First look up the fraction of NULLs and empty ranges from pg_statistic.
241  */
242  if (HeapTupleIsValid(vardata->statsTuple))
243  {
244  Form_pg_statistic stats;
245  AttStatsSlot sslot;
246 
247  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
248  null_frac = stats->stanullfrac;
249 
250  /* Try to get fraction of empty ranges */
251  if (get_attstatsslot(&sslot, vardata->statsTuple,
252  STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
253  InvalidOid,
255  {
256  if (sslot.nnumbers != 1)
257  elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */
258  empty_frac = sslot.numbers[0];
259  free_attstatsslot(&sslot);
260  }
261  else
262  {
263  /* No empty fraction statistic. Assume no empty ranges. */
264  empty_frac = 0.0;
265  }
266  }
267  else
268  {
269  /*
270  * No stats are available. Follow through the calculations below
271  * anyway, assuming no NULLs and no empty ranges. This still allows us
272  * to give a better-than-nothing estimate based on whether the
273  * constant is an empty range or not.
274  */
275  null_frac = 0.0;
276  empty_frac = 0.0;
277  }
278 
279  if (RangeIsEmpty(constval))
280  {
281  /*
282  * An empty range matches all ranges, all empty ranges, or nothing,
283  * depending on the operator
284  */
285  switch (operator)
286  {
287  /* these return false if either argument is empty */
288  case OID_RANGE_OVERLAP_OP:
289  case OID_RANGE_OVERLAPS_LEFT_OP:
290  case OID_RANGE_OVERLAPS_RIGHT_OP:
291  case OID_RANGE_LEFT_OP:
292  case OID_RANGE_RIGHT_OP:
293  /* nothing is less than an empty range */
294  case OID_RANGE_LESS_OP:
295  selec = 0.0;
296  break;
297 
298  /* only empty ranges can be contained by an empty range */
299  case OID_RANGE_CONTAINED_OP:
300  /* only empty ranges are <= an empty range */
301  case OID_RANGE_LESS_EQUAL_OP:
302  selec = empty_frac;
303  break;
304 
305  /* everything contains an empty range */
306  case OID_RANGE_CONTAINS_OP:
307  /* everything is >= an empty range */
308  case OID_RANGE_GREATER_EQUAL_OP:
309  selec = 1.0;
310  break;
311 
312  /* all non-empty ranges are > an empty range */
313  case OID_RANGE_GREATER_OP:
314  selec = 1.0 - empty_frac;
315  break;
316 
317  /* an element cannot be empty */
318  case OID_RANGE_CONTAINS_ELEM_OP:
319  default:
320  elog(ERROR, "unexpected operator %u", operator);
321  selec = 0.0; /* keep compiler quiet */
322  break;
323  }
324  }
325  else
326  {
327  /*
328  * Calculate selectivity using bound histograms. If that fails for
329  * some reason, e.g no histogram in pg_statistic, use the default
330  * constant estimate for the fraction of non-empty values. This is
331  * still somewhat better than just returning the default estimate,
332  * because this still takes into account the fraction of empty and
333  * NULL tuples, if we had statistics for them.
334  */
335  hist_selec = calc_hist_selectivity(typcache, vardata, constval,
336  operator);
337  if (hist_selec < 0.0)
338  hist_selec = default_range_selectivity(operator);
339 
340  /*
341  * Now merge the results for the empty ranges and histogram
342  * calculations, realizing that the histogram covers only the
343  * non-null, non-empty values.
344  */
345  if (operator == OID_RANGE_CONTAINED_OP)
346  {
347  /* empty is contained by anything non-empty */
348  selec = (1.0 - empty_frac) * hist_selec + empty_frac;
349  }
350  else
351  {
352  /* with any other operator, empty Op non-empty matches nothing */
353  selec = (1.0 - empty_frac) * hist_selec;
354  }
355  }
356 
357  /* all range operators are strict */
358  selec *= (1.0 - null_frac);
359 
360  /* result should be in range, but make sure... */
361  CLAMP_PROBABILITY(selec);
362 
363  return selec;
364 }
float float4
Definition: c.h:629
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
#define RangeIsEmpty(r)
Definition: rangetypes.h:55
static double calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata, const RangeType *constval, Oid operator)
static double default_range_selectivity(Oid operator)
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
float4 * numbers
Definition: lsyscache.h:56
int nnumbers
Definition: lsyscache.h:57

References ATTSTATSSLOT_NUMBERS, calc_hist_selectivity(), CLAMP_PROBABILITY, default_range_selectivity(), elog, ERROR, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, AttStatsSlot::nnumbers, AttStatsSlot::numbers, RangeIsEmpty, and VariableStatData::statsTuple.

Referenced by rangesel().

◆ default_range_selectivity()

static double default_range_selectivity ( Oid  operator)
static

Definition at line 67 of file rangetypes_selfuncs.c.

68 {
69  switch (operator)
70  {
71  case OID_RANGE_OVERLAP_OP:
72  return 0.01;
73 
74  case OID_RANGE_CONTAINS_OP:
75  case OID_RANGE_CONTAINED_OP:
76  return 0.005;
77 
78  case OID_RANGE_CONTAINS_ELEM_OP:
79  case OID_RANGE_ELEM_CONTAINED_OP:
80 
81  /*
82  * "range @> elem" is more or less identical to a scalar
83  * inequality "A >= b AND A <= c".
84  */
86 
87  case OID_RANGE_LESS_OP:
88  case OID_RANGE_LESS_EQUAL_OP:
89  case OID_RANGE_GREATER_OP:
90  case OID_RANGE_GREATER_EQUAL_OP:
91  case OID_RANGE_LEFT_OP:
92  case OID_RANGE_RIGHT_OP:
93  case OID_RANGE_OVERLAPS_LEFT_OP:
94  case OID_RANGE_OVERLAPS_RIGHT_OP:
95  /* these are similar to regular scalar inequalities */
96  return DEFAULT_INEQ_SEL;
97 
98  default:
99  /* all range operators should be handled above, but just in case */
100  return 0.01;
101  }
102 }
#define DEFAULT_RANGE_INEQ_SEL
Definition: selfuncs.h:40
#define DEFAULT_INEQ_SEL
Definition: selfuncs.h:37

References DEFAULT_INEQ_SEL, and DEFAULT_RANGE_INEQ_SEL.

Referenced by calc_rangesel(), and rangesel().

◆ get_distance()

static float8 get_distance ( TypeCacheEntry typcache,
const RangeBound bound1,
const RangeBound bound2 
)
static

Definition at line 807 of file rangetypes_selfuncs.c.

808 {
809  bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
810 
811  if (!bound1->infinite && !bound2->infinite)
812  {
813  /*
814  * Neither bound is infinite, use subdiff function or return default
815  * value of 1.0 if no subdiff is available.
816  */
817  if (has_subdiff)
818  {
819  float8 res;
820 
822  typcache->rng_collation,
823  bound2->val,
824  bound1->val));
825  /* Reject possible NaN result, also negative result */
826  if (isnan(res) || res < 0.0)
827  return 1.0;
828  else
829  return res;
830  }
831  else
832  return 1.0;
833  }
834  else if (bound1->infinite && bound2->infinite)
835  {
836  /* Both bounds are infinite */
837  if (bound1->lower == bound2->lower)
838  return 0.0;
839  else
840  return get_float8_infinity();
841  }
842  else
843  {
844  /* One bound is infinite, the other is not */
845  return get_float8_infinity();
846  }
847 }
static float8 get_float8_infinity(void)
Definition: float.h:94
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
bool lower
Definition: rangetypes.h:66
Datum val
Definition: rangetypes.h:63
Oid rng_collation
Definition: typcache.h:100

References DatumGetFloat8(), FmgrInfo::fn_oid, FunctionCall2Coll(), get_float8_infinity(), RangeBound::infinite, RangeBound::lower, OidIsValid, res, TypeCacheEntry::rng_collation, TypeCacheEntry::rng_subdiff_finfo, and RangeBound::val.

Referenced by calc_hist_selectivity_contained(), and calc_hist_selectivity_contains().

◆ get_len_position()

static double get_len_position ( double  value,
double  hist1,
double  hist2 
)
static

Definition at line 762 of file rangetypes_selfuncs.c.

763 {
764  if (!isinf(hist1) && !isinf(hist2))
765  {
766  /*
767  * Both bounds are finite. The value should be finite too, because it
768  * lies somewhere between the bounds. If it doesn't, just return
769  * something.
770  */
771  if (isinf(value))
772  return 0.5;
773 
774  return 1.0 - (hist2 - value) / (hist2 - hist1);
775  }
776  else if (isinf(hist1) && !isinf(hist2))
777  {
778  /*
779  * Lower bin boundary is -infinite, upper is finite. Return 1.0 to
780  * indicate the value is infinitely far from the lower bound.
781  */
782  return 1.0;
783  }
784  else if (isinf(hist1) && isinf(hist2))
785  {
786  /* same as above, but in reverse */
787  return 0.0;
788  }
789  else
790  {
791  /*
792  * If both bin boundaries are infinite, they should be equal to each
793  * other, and the value should also be infinite and equal to both
794  * bounds. (But don't Assert that, to avoid crashing unnecessarily if
795  * the caller messes up)
796  *
797  * Assume the value to lie in the middle of the infinite bounds.
798  */
799  return 0.5;
800  }
801 }
static struct @155 value

References value.

Referenced by calc_length_hist_frac().

◆ get_position()

static float8 get_position ( TypeCacheEntry typcache,
const RangeBound value,
const RangeBound hist1,
const RangeBound hist2 
)
static

Definition at line 683 of file rangetypes_selfuncs.c.

685 {
686  bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
687  float8 position;
688 
689  if (!hist1->infinite && !hist2->infinite)
690  {
691  float8 bin_width;
692 
693  /*
694  * Both bounds are finite. Assuming the subtype's comparison function
695  * works sanely, the value must be finite, too, because it lies
696  * somewhere between the bounds. If it doesn't, arbitrarily return
697  * 0.5.
698  */
699  if (value->infinite)
700  return 0.5;
701 
702  /* Can't interpolate without subdiff function */
703  if (!has_subdiff)
704  return 0.5;
705 
706  /* Calculate relative position using subdiff function. */
707  bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
708  typcache->rng_collation,
709  hist2->val,
710  hist1->val));
711  if (isnan(bin_width) || bin_width <= 0.0)
712  return 0.5; /* punt for NaN or zero-width bin */
713 
714  position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
715  typcache->rng_collation,
716  value->val,
717  hist1->val))
718  / bin_width;
719 
720  if (isnan(position))
721  return 0.5; /* punt for NaN from subdiff, Inf/Inf, etc */
722 
723  /* Relative position must be in [0,1] range */
724  position = Max(position, 0.0);
725  position = Min(position, 1.0);
726  return position;
727  }
728  else if (hist1->infinite && !hist2->infinite)
729  {
730  /*
731  * Lower bin boundary is -infinite, upper is finite. If the value is
732  * -infinite, return 0.0 to indicate it's equal to the lower bound.
733  * Otherwise return 1.0 to indicate it's infinitely far from the lower
734  * bound.
735  */
736  return ((value->infinite && value->lower) ? 0.0 : 1.0);
737  }
738  else if (!hist1->infinite && hist2->infinite)
739  {
740  /* same as above, but in reverse */
741  return ((value->infinite && !value->lower) ? 1.0 : 0.0);
742  }
743  else
744  {
745  /*
746  * If both bin boundaries are infinite, they should be equal to each
747  * other, and the value should also be infinite and equal to both
748  * bounds. (But don't Assert that, to avoid crashing if a user creates
749  * a datatype with a broken comparison function).
750  *
751  * Assume the value to lie in the middle of the infinite bounds.
752  */
753  return 0.5;
754  }
755 }

References DatumGetFloat8(), FmgrInfo::fn_oid, FunctionCall2Coll(), RangeBound::infinite, Max, Min, OidIsValid, TypeCacheEntry::rng_collation, TypeCacheEntry::rng_subdiff_finfo, RangeBound::val, and value.

Referenced by calc_hist_selectivity_contained(), calc_hist_selectivity_contains(), and calc_hist_selectivity_scalar().

◆ length_hist_bsearch()

static int length_hist_bsearch ( Datum length_hist_values,
int  length_hist_nvalues,
double  value,
bool  equal 
)
static

Definition at line 657 of file rangetypes_selfuncs.c.

659 {
660  int lower = -1,
661  upper = length_hist_nvalues - 1,
662  middle;
663 
664  while (lower < upper)
665  {
666  double middleval;
667 
668  middle = (lower + upper + 1) / 2;
669 
670  middleval = DatumGetFloat8(length_hist_values[middle]);
671  if (middleval < value || (equal && middleval <= value))
672  lower = middle;
673  else
674  upper = middle - 1;
675  }
676  return lower;
677 }

References DatumGetFloat8(), equal(), lower(), upper(), and value.

Referenced by calc_length_hist_frac().

◆ rangesel()

Datum rangesel ( PG_FUNCTION_ARGS  )

Definition at line 108 of file rangetypes_selfuncs.c.

109 {
111  Oid operator = PG_GETARG_OID(1);
112  List *args = (List *) PG_GETARG_POINTER(2);
113  int varRelid = PG_GETARG_INT32(3);
114  VariableStatData vardata;
115  Node *other;
116  bool varonleft;
117  Selectivity selec;
118  TypeCacheEntry *typcache = NULL;
119  RangeType *constrange = NULL;
120 
121  /*
122  * If expression is not (variable op something) or (something op
123  * variable), then punt and return a default estimate.
124  */
125  if (!get_restriction_variable(root, args, varRelid,
126  &vardata, &other, &varonleft))
128 
129  /*
130  * Can't do anything useful if the something is not a constant, either.
131  */
132  if (!IsA(other, Const))
133  {
134  ReleaseVariableStats(vardata);
136  }
137 
138  /*
139  * All the range operators are strict, so we can cope with a NULL constant
140  * right away.
141  */
142  if (((Const *) other)->constisnull)
143  {
144  ReleaseVariableStats(vardata);
145  PG_RETURN_FLOAT8(0.0);
146  }
147 
148  /*
149  * If var is on the right, commute the operator, so that we can assume the
150  * var is on the left in what follows.
151  */
152  if (!varonleft)
153  {
154  /* we have other Op var, commute to make var Op other */
155  operator = get_commutator(operator);
156  if (!operator)
157  {
158  /* Use default selectivity (should we raise an error instead?) */
159  ReleaseVariableStats(vardata);
161  }
162  }
163 
164  /*
165  * OK, there's a Var and a Const we're dealing with here. We need the
166  * Const to be of same range type as the column, else we can't do anything
167  * useful. (Such cases will likely fail at runtime, but here we'd rather
168  * just return a default estimate.)
169  *
170  * If the operator is "range @> element", the constant should be of the
171  * element type of the range column. Convert it to a range that includes
172  * only that single point, so that we don't need special handling for that
173  * in what follows.
174  */
175  if (operator == OID_RANGE_CONTAINS_ELEM_OP)
176  {
177  typcache = range_get_typcache(fcinfo, vardata.vartype);
178 
179  if (((Const *) other)->consttype == typcache->rngelemtype->type_id)
180  {
182  upper;
183 
184  lower.inclusive = true;
185  lower.val = ((Const *) other)->constvalue;
186  lower.infinite = false;
187  lower.lower = true;
188  upper.inclusive = true;
189  upper.val = ((Const *) other)->constvalue;
190  upper.infinite = false;
191  upper.lower = false;
192  constrange = range_serialize(typcache, &lower, &upper, false, NULL);
193  }
194  }
195  else if (operator == OID_RANGE_ELEM_CONTAINED_OP)
196  {
197  /*
198  * Here, the Var is the elem, not the range. In typical cases
199  * elem_contained_by_range_support will have simplified this case, so
200  * that we won't get here. If we do get here we'll fall back on a
201  * default estimate.
202  */
203  }
204  else if (((Const *) other)->consttype == vardata.vartype)
205  {
206  /* Both sides are the same range type */
207  typcache = range_get_typcache(fcinfo, vardata.vartype);
208 
209  constrange = DatumGetRangeTypeP(((Const *) other)->constvalue);
210  }
211 
212  /*
213  * If we got a valid constant on one side of the operator, proceed to
214  * estimate using statistics. Otherwise punt and return a default constant
215  * estimate. Note that calc_rangesel need not handle
216  * OID_RANGE_ELEM_CONTAINED_OP.
217  */
218  if (constrange)
219  selec = calc_rangesel(typcache, &vardata, constrange, operator);
220  else
221  selec = default_range_selectivity(operator);
222 
223  ReleaseVariableStats(vardata);
224 
225  CLAMP_PROBABILITY(selec);
226 
227  PG_RETURN_FLOAT8((float8) selec);
228 }
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
Oid get_commutator(Oid opno)
Definition: lsyscache.c:1509
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
unsigned int Oid
Definition: postgres_ext.h:31
tree ctl root
Definition: radixtree.h:1880
TypeCacheEntry * range_get_typcache(FunctionCallInfo fcinfo, Oid rngtypid)
Definition: rangetypes.c:1703
RangeType * range_serialize(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, bool empty, struct Node *escontext)
Definition: rangetypes.c:1727
static double calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata, const RangeType *constval, Oid operator)
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4883
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:99
Definition: pg_list.h:54
Definition: nodes.h:129
struct TypeCacheEntry * rngelemtype
Definition: typcache.h:98

References generate_unaccent_rules::args, calc_rangesel(), CLAMP_PROBABILITY, DatumGetRangeTypeP(), default_range_selectivity(), get_commutator(), get_restriction_variable(), IsA, lower(), PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, PG_RETURN_FLOAT8, range_get_typcache(), range_serialize(), ReleaseVariableStats, TypeCacheEntry::rngelemtype, root, TypeCacheEntry::type_id, upper(), and VariableStatData::vartype.

◆ rbound_bsearch()

static int rbound_bsearch ( TypeCacheEntry typcache,
const RangeBound value,
const RangeBound hist,
int  hist_length,
bool  equal 
)
static

Definition at line 628 of file rangetypes_selfuncs.c.

630 {
631  int lower = -1,
632  upper = hist_length - 1,
633  cmp,
634  middle;
635 
636  while (lower < upper)
637  {
638  middle = (lower + upper + 1) / 2;
639  cmp = range_cmp_bounds(typcache, &hist[middle], value);
640 
641  if (cmp < 0 || (equal && cmp == 0))
642  lower = middle;
643  else
644  upper = middle - 1;
645  }
646  return lower;
647 }
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743

References cmp(), equal(), lower(), range_cmp_bounds(), upper(), and value.

Referenced by calc_hist_selectivity_contained(), calc_hist_selectivity_contains(), and calc_hist_selectivity_scalar().