PostgreSQL Source Code  git master
array_selfuncs.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
#include "utils/array.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/typcache.h"
Include dependency graph for array_selfuncs.c:

Go to the source code of this file.

Macros

#define DEFAULT_CONTAIN_SEL   0.005
 
#define DEFAULT_OVERLAP_SEL   0.01
 
#define DEFAULT_SEL(operator)
 
#define EFFORT   100
 

Functions

static Selectivity calc_arraycontsel (VariableStatData *vardata, Datum constval, Oid elemtype, Oid operator)
 
static Selectivity mcelem_array_selec (ArrayType *array, TypeCacheEntry *typentry, Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, float4 *hist, int nhist, Oid operator)
 
static Selectivity mcelem_array_contain_overlap_selec (Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, TypeCacheEntry *typentry)
 
static Selectivity mcelem_array_contained_selec (Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, TypeCacheEntry *typentry)
 
static float * calc_hist (const float4 *hist, int nhist, int n)
 
static float * calc_distr (const float *p, int n, int m, float rest)
 
static int floor_log2 (uint32 n)
 
static bool find_next_mcelem (Datum *mcelem, int nmcelem, Datum value, int *index, TypeCacheEntry *typentry)
 
static int element_compare (const void *key1, const void *key2, void *arg)
 
static int float_compare_desc (const void *key1, const void *key2)
 
Selectivity scalararraysel_containment (PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
 
Datum arraycontsel (PG_FUNCTION_ARGS)
 
Datum arraycontjoinsel (PG_FUNCTION_ARGS)
 

Macro Definition Documentation

◆ DEFAULT_CONTAIN_SEL

#define DEFAULT_CONTAIN_SEL   0.005

Definition at line 30 of file array_selfuncs.c.

◆ DEFAULT_OVERLAP_SEL

#define DEFAULT_OVERLAP_SEL   0.01

Definition at line 33 of file array_selfuncs.c.

◆ DEFAULT_SEL

#define DEFAULT_SEL (   operator)
Value:
((operator) == OID_ARRAY_OVERLAP_OP ? \
#define DEFAULT_CONTAIN_SEL
#define DEFAULT_OVERLAP_SEL

Definition at line 36 of file array_selfuncs.c.

◆ EFFORT

#define EFFORT   100

Function Documentation

◆ arraycontjoinsel()

Datum arraycontjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 321 of file array_selfuncs.c.

322 {
323  /* For the moment this is just a stub */
324  Oid operator = PG_GETARG_OID(1);
325 
326  PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
327 }
#define DEFAULT_SEL(operator)
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
unsigned int Oid
Definition: postgres_ext.h:31

References DEFAULT_SEL, PG_GETARG_OID, and PG_RETURN_FLOAT8.

Referenced by _int_contained_joinsel(), _int_contains_joinsel(), and _int_overlap_joinsel().

◆ arraycontsel()

Datum arraycontsel ( PG_FUNCTION_ARGS  )

Definition at line 241 of file array_selfuncs.c.

242 {
244  Oid operator = PG_GETARG_OID(1);
245  List *args = (List *) PG_GETARG_POINTER(2);
246  int varRelid = PG_GETARG_INT32(3);
247  VariableStatData vardata;
248  Node *other;
249  bool varonleft;
250  Selectivity selec;
251  Oid element_typeid;
252 
253  /*
254  * If expression is not (variable op something) or (something op
255  * variable), then punt and return a default estimate.
256  */
257  if (!get_restriction_variable(root, args, varRelid,
258  &vardata, &other, &varonleft))
259  PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
260 
261  /*
262  * Can't do anything useful if the something is not a constant, either.
263  */
264  if (!IsA(other, Const))
265  {
266  ReleaseVariableStats(vardata);
267  PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
268  }
269 
270  /*
271  * The "&&", "@>" and "<@" operators are strict, so we can cope with a
272  * NULL constant right away.
273  */
274  if (((Const *) other)->constisnull)
275  {
276  ReleaseVariableStats(vardata);
277  PG_RETURN_FLOAT8(0.0);
278  }
279 
280  /*
281  * If var is on the right, commute the operator, so that we can assume the
282  * var is on the left in what follows.
283  */
284  if (!varonleft)
285  {
286  if (operator == OID_ARRAY_CONTAINS_OP)
287  operator = OID_ARRAY_CONTAINED_OP;
288  else if (operator == OID_ARRAY_CONTAINED_OP)
289  operator = OID_ARRAY_CONTAINS_OP;
290  }
291 
292  /*
293  * OK, there's a Var and a Const we're dealing with here. We need the
294  * Const to be an array with same element type as column, else we can't do
295  * anything useful. (Such cases will likely fail at runtime, but here
296  * we'd rather just return a default estimate.)
297  */
298  element_typeid = get_base_element_type(((Const *) other)->consttype);
299  if (element_typeid != InvalidOid &&
300  element_typeid == get_base_element_type(vardata.vartype))
301  {
302  selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue,
303  element_typeid, operator);
304  }
305  else
306  {
307  selec = DEFAULT_SEL(operator);
308  }
309 
310  ReleaseVariableStats(vardata);
311 
312  CLAMP_PROBABILITY(selec);
313 
314  PG_RETURN_FLOAT8((float8) selec);
315 }
static Selectivity calc_arraycontsel(VariableStatData *vardata, Datum constval, Oid elemtype, Oid operator)
double float8
Definition: c.h:630
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2832
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
double Selectivity
Definition: nodes.h:250
#define InvalidOid
Definition: postgres_ext.h:36
tree ctl root
Definition: radixtree.h:1886
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4891
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:99
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
Definition: pg_list.h:54
Definition: nodes.h:129

References generate_unaccent_rules::args, calc_arraycontsel(), CLAMP_PROBABILITY, DEFAULT_SEL, get_base_element_type(), get_restriction_variable(), InvalidOid, IsA, PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, root, and VariableStatData::vartype.

Referenced by _int_contained_sel(), _int_contains_sel(), and _int_overlap_sel().

◆ calc_arraycontsel()

static Selectivity calc_arraycontsel ( VariableStatData vardata,
Datum  constval,
Oid  elemtype,
Oid  operator 
)
static

Definition at line 337 of file array_selfuncs.c.

339 {
340  Selectivity selec;
341  TypeCacheEntry *typentry;
342  FmgrInfo *cmpfunc;
343  ArrayType *array;
344 
345  /* Get element type's default comparison function */
346  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
347  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
348  return DEFAULT_SEL(operator);
349  cmpfunc = &typentry->cmp_proc_finfo;
350 
351  /*
352  * The caller made sure the const is an array with same element type, so
353  * get it now
354  */
355  array = DatumGetArrayTypeP(constval);
356 
357  if (HeapTupleIsValid(vardata->statsTuple) &&
358  statistic_proc_security_check(vardata, cmpfunc->fn_oid))
359  {
360  Form_pg_statistic stats;
361  AttStatsSlot sslot;
362  AttStatsSlot hslot;
363 
364  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
365 
366  /* MCELEM will be an array of same type as column */
367  if (get_attstatsslot(&sslot, vardata->statsTuple,
368  STATISTIC_KIND_MCELEM, InvalidOid,
370  {
371  /*
372  * For "array <@ const" case we also need histogram of distinct
373  * element counts.
374  */
375  if (operator != OID_ARRAY_CONTAINED_OP ||
376  !get_attstatsslot(&hslot, vardata->statsTuple,
377  STATISTIC_KIND_DECHIST, InvalidOid,
379  memset(&hslot, 0, sizeof(hslot));
380 
381  /* Use the most-common-elements slot for the array Var. */
382  selec = mcelem_array_selec(array, typentry,
383  sslot.values, sslot.nvalues,
384  sslot.numbers, sslot.nnumbers,
385  hslot.numbers, hslot.nnumbers,
386  operator);
387 
388  free_attstatsslot(&hslot);
389  free_attstatsslot(&sslot);
390  }
391  else
392  {
393  /* No most-common-elements info, so do without */
394  selec = mcelem_array_selec(array, typentry,
395  NULL, 0, NULL, 0, NULL, 0,
396  operator);
397  }
398 
399  /*
400  * MCE stats count only non-null rows, so adjust for null rows.
401  */
402  selec *= (1.0 - stats->stanullfrac);
403  }
404  else
405  {
406  /* No stats at all, so do without */
407  selec = mcelem_array_selec(array, typentry,
408  NULL, 0, NULL, 0, NULL, 0,
409  operator);
410  /* we assume no nulls here, so no stanullfrac correction */
411  }
412 
413  /* If constant was toasted, release the copy we made */
414  if (PointerGetDatum(array) != constval)
415  pfree(array);
416 
417  return selec;
418 }
#define DatumGetArrayTypeP(X)
Definition: array.h:261
static Selectivity mcelem_array_selec(ArrayType *array, TypeCacheEntry *typentry, Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, float4 *hist, int nhist, Oid operator)
#define OidIsValid(objectId)
Definition: c.h:775
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3344
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3234
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
void pfree(void *pointer)
Definition: mcxt.c:1521
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:5743
Datum * values
Definition: lsyscache.h:53
float4 * numbers
Definition: lsyscache.h:56
int nnumbers
Definition: lsyscache.h:57
Definition: fmgr.h:57
Oid fn_oid
Definition: fmgr.h:59
FmgrInfo cmp_proc_finfo
Definition: typcache.h:76
HeapTuple statsTuple
Definition: selfuncs.h:89
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:356
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:143

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, TypeCacheEntry::cmp_proc_finfo, DatumGetArrayTypeP, DEFAULT_SEL, FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, lookup_type_cache(), mcelem_array_selec(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, pfree(), PointerGetDatum(), statistic_proc_security_check(), VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and AttStatsSlot::values.

Referenced by arraycontsel().

◆ calc_distr()

static float * calc_distr ( const float *  p,
int  n,
int  m,
float  rest 
)
static

Definition at line 1010 of file array_selfuncs.c.

1011 {
1012  float *row,
1013  *prev_row,
1014  *tmp;
1015  int i,
1016  j;
1017 
1018  /*
1019  * Since we return only the last row of the matrix and need only the
1020  * current and previous row for calculations, allocate two rows.
1021  */
1022  row = (float *) palloc((m + 1) * sizeof(float));
1023  prev_row = (float *) palloc((m + 1) * sizeof(float));
1024 
1025  /* M[0,0] = 1 */
1026  row[0] = 1.0f;
1027  for (i = 1; i <= n; i++)
1028  {
1029  float t = p[i - 1];
1030 
1031  /* Swap rows */
1032  tmp = row;
1033  row = prev_row;
1034  prev_row = tmp;
1035 
1036  /* Calculate next row */
1037  for (j = 0; j <= i && j <= m; j++)
1038  {
1039  float val = 0.0f;
1040 
1041  if (j < i)
1042  val += prev_row[j] * (1.0f - t);
1043  if (j > 0)
1044  val += prev_row[j - 1] * t;
1045  row[j] = val;
1046  }
1047  }
1048 
1049  /*
1050  * The presence of many distinct rare (not in "p") elements materially
1051  * decreases selectivity. Model their collective occurrence with the
1052  * Poisson distribution.
1053  */
1054  if (rest > DEFAULT_CONTAIN_SEL)
1055  {
1056  float t;
1057 
1058  /* Swap rows */
1059  tmp = row;
1060  row = prev_row;
1061  prev_row = tmp;
1062 
1063  for (i = 0; i <= m; i++)
1064  row[i] = 0.0f;
1065 
1066  /* Value of Poisson distribution for 0 occurrences */
1067  t = exp(-rest);
1068 
1069  /*
1070  * Calculate convolution of previously computed distribution and the
1071  * Poisson distribution.
1072  */
1073  for (i = 0; i <= m; i++)
1074  {
1075  for (j = 0; j <= m - i; j++)
1076  row[j + i] += prev_row[j] * t;
1077 
1078  /* Get Poisson distribution value for (i + 1) occurrences */
1079  t *= rest / (float) (i + 1);
1080  }
1081  }
1082 
1083  pfree(prev_row);
1084  return row;
1085 }
long val
Definition: informix.c:689
int j
Definition: isn.c:74
int i
Definition: isn.c:73
void * palloc(Size size)
Definition: mcxt.c:1317

References DEFAULT_CONTAIN_SEL, i, j, palloc(), pfree(), and val.

Referenced by mcelem_array_contained_selec().

◆ calc_hist()

static float * calc_hist ( const float4 hist,
int  nhist,
int  n 
)
static

Definition at line 921 of file array_selfuncs.c.

922 {
923  float *hist_part;
924  int k,
925  i = 0;
926  float prev_interval = 0,
927  next_interval;
928  float frac;
929 
930  hist_part = (float *) palloc((n + 1) * sizeof(float));
931 
932  /*
933  * frac is a probability contribution for each interval between histogram
934  * values. We have nhist - 1 intervals, so contribution of each one will
935  * be 1 / (nhist - 1).
936  */
937  frac = 1.0f / ((float) (nhist - 1));
938 
939  for (k = 0; k <= n; k++)
940  {
941  int count = 0;
942 
943  /*
944  * Count the histogram boundaries equal to k. (Although the histogram
945  * should theoretically contain only exact integers, entries are
946  * floats so there could be roundoff error in large values. Treat any
947  * fractional value as equal to the next larger k.)
948  */
949  while (i < nhist && hist[i] <= k)
950  {
951  count++;
952  i++;
953  }
954 
955  if (count > 0)
956  {
957  /* k is an exact bound for at least one histogram box. */
958  float val;
959 
960  /* Find length between current histogram value and the next one */
961  if (i < nhist)
962  next_interval = hist[i] - hist[i - 1];
963  else
964  next_interval = 0;
965 
966  /*
967  * count - 1 histogram boxes contain k exclusively. They
968  * contribute a total of (count - 1) * frac probability. Also
969  * factor in the partial histogram boxes on either side.
970  */
971  val = (float) (count - 1);
972  if (next_interval > 0)
973  val += 0.5f / next_interval;
974  if (prev_interval > 0)
975  val += 0.5f / prev_interval;
976  hist_part[k] = frac * val;
977 
978  prev_interval = next_interval;
979  }
980  else
981  {
982  /* k does not appear as an exact histogram bound. */
983  if (prev_interval > 0)
984  hist_part[k] = frac / prev_interval;
985  else
986  hist_part[k] = 0.0f;
987  }
988  }
989 
990  return hist_part;
991 }

References i, palloc(), and val.

Referenced by mcelem_array_contained_selec().

◆ element_compare()

static int element_compare ( const void *  key1,
const void *  key2,
void *  arg 
)
static

Definition at line 1165 of file array_selfuncs.c.

1166 {
1167  Datum d1 = *((const Datum *) key1);
1168  Datum d2 = *((const Datum *) key2);
1169  TypeCacheEntry *typentry = (TypeCacheEntry *) arg;
1170  FmgrInfo *cmpfunc = &typentry->cmp_proc_finfo;
1171  Datum c;
1172 
1173  c = FunctionCall2Coll(cmpfunc, typentry->typcollation, d1, d2);
1174  return DatumGetInt32(c);
1175 }
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
void * arg
uintptr_t Datum
Definition: postgres.h:64
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202
char * c
Oid typcollation
Definition: typcache.h:47

References arg, TypeCacheEntry::cmp_proc_finfo, DatumGetInt32(), FunctionCall2Coll(), and TypeCacheEntry::typcollation.

Referenced by find_next_mcelem(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), and mcelem_array_selec().

◆ find_next_mcelem()

static bool find_next_mcelem ( Datum mcelem,
int  nmcelem,
Datum  value,
int *  index,
TypeCacheEntry typentry 
)
static

Definition at line 1130 of file array_selfuncs.c.

1132 {
1133  int l = *index,
1134  r = nmcelem - 1,
1135  i,
1136  res;
1137 
1138  while (l <= r)
1139  {
1140  i = (l + r) / 2;
1141  res = element_compare(&mcelem[i], &value, typentry);
1142  if (res == 0)
1143  {
1144  *index = i;
1145  return true;
1146  }
1147  else if (res < 0)
1148  l = i + 1;
1149  else
1150  r = i - 1;
1151  }
1152  *index = l;
1153  return false;
1154 }
static int element_compare(const void *key1, const void *key2, void *arg)
static struct @157 value
Definition: type.h:95

References element_compare(), i, res, and value.

Referenced by mcelem_array_contain_overlap_selec().

◆ float_compare_desc()

static int float_compare_desc ( const void *  key1,
const void *  key2 
)
static

Definition at line 1181 of file array_selfuncs.c.

1182 {
1183  float d1 = *((const float *) key1);
1184  float d2 = *((const float *) key2);
1185 
1186  if (d1 > d2)
1187  return -1;
1188  else if (d1 < d2)
1189  return 1;
1190  else
1191  return 0;
1192 }

Referenced by mcelem_array_contained_selec().

◆ floor_log2()

static int floor_log2 ( uint32  n)
static

Definition at line 1089 of file array_selfuncs.c.

1090 {
1091  int logval = 0;
1092 
1093  if (n == 0)
1094  return -1;
1095  if (n >= (1 << 16))
1096  {
1097  n >>= 16;
1098  logval += 16;
1099  }
1100  if (n >= (1 << 8))
1101  {
1102  n >>= 8;
1103  logval += 8;
1104  }
1105  if (n >= (1 << 4))
1106  {
1107  n >>= 4;
1108  logval += 4;
1109  }
1110  if (n >= (1 << 2))
1111  {
1112  n >>= 2;
1113  logval += 2;
1114  }
1115  if (n >= (1 << 1))
1116  {
1117  logval += 1;
1118  }
1119  return logval;
1120 }

Referenced by mcelem_array_contain_overlap_selec().

◆ mcelem_array_contain_overlap_selec()

static Selectivity mcelem_array_contain_overlap_selec ( Datum mcelem,
int  nmcelem,
float4 numbers,
int  nnumbers,
Datum array_data,
int  nitems,
Oid  operator,
TypeCacheEntry typentry 
)
static

Definition at line 521 of file array_selfuncs.c.

525 {
526  Selectivity selec,
527  elem_selec;
528  int mcelem_index,
529  i;
530  bool use_bsearch;
531  float4 minfreq;
532 
533  /*
534  * There should be three more Numbers than Values, because the last three
535  * cells should hold minimal and maximal frequency among the non-null
536  * elements, and then the frequency of null elements. Ignore the Numbers
537  * if not right.
538  */
539  if (nnumbers != nmcelem + 3)
540  {
541  numbers = NULL;
542  nnumbers = 0;
543  }
544 
545  if (numbers)
546  {
547  /* Grab the lowest observed frequency */
548  minfreq = numbers[nmcelem];
549  }
550  else
551  {
552  /* Without statistics make some default assumptions */
553  minfreq = 2 * (float4) DEFAULT_CONTAIN_SEL;
554  }
555 
556  /* Decide whether it is faster to use binary search or not. */
557  if (nitems * floor_log2((uint32) nmcelem) < nmcelem + nitems)
558  use_bsearch = true;
559  else
560  use_bsearch = false;
561 
562  if (operator == OID_ARRAY_CONTAINS_OP)
563  {
564  /*
565  * Initial selectivity for "column @> const" query is 1.0, and it will
566  * be decreased with each element of constant array.
567  */
568  selec = 1.0;
569  }
570  else
571  {
572  /*
573  * Initial selectivity for "column && const" query is 0.0, and it will
574  * be increased with each element of constant array.
575  */
576  selec = 0.0;
577  }
578 
579  /* Scan mcelem and array in parallel. */
580  mcelem_index = 0;
581  for (i = 0; i < nitems; i++)
582  {
583  bool match = false;
584 
585  /* Ignore any duplicates in the array data. */
586  if (i > 0 &&
587  element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
588  continue;
589 
590  /* Find the smallest MCELEM >= this array item. */
591  if (use_bsearch)
592  {
593  match = find_next_mcelem(mcelem, nmcelem, array_data[i],
594  &mcelem_index, typentry);
595  }
596  else
597  {
598  while (mcelem_index < nmcelem)
599  {
600  int cmp = element_compare(&mcelem[mcelem_index],
601  &array_data[i],
602  typentry);
603 
604  if (cmp < 0)
605  mcelem_index++;
606  else
607  {
608  if (cmp == 0)
609  match = true; /* mcelem is found */
610  break;
611  }
612  }
613  }
614 
615  if (match && numbers)
616  {
617  /* MCELEM matches the array item; use its frequency. */
618  elem_selec = numbers[mcelem_index];
619  mcelem_index++;
620  }
621  else
622  {
623  /*
624  * The element is not in MCELEM. Punt, but assume that the
625  * selectivity cannot be more than minfreq / 2.
626  */
627  elem_selec = Min(DEFAULT_CONTAIN_SEL, minfreq / 2);
628  }
629 
630  /*
631  * Update overall selectivity using the current element's selectivity
632  * and an assumption of element occurrence independence.
633  */
634  if (operator == OID_ARRAY_CONTAINS_OP)
635  selec *= elem_selec;
636  else
637  selec = selec + elem_selec - selec * elem_selec;
638 
639  /* Clamp intermediate results to stay sane despite roundoff error */
640  CLAMP_PROBABILITY(selec);
641  }
642 
643  return selec;
644 }
static int floor_log2(uint32 n)
static bool find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, int *index, TypeCacheEntry *typentry)
unsigned int uint32
Definition: c.h:506
#define Min(x, y)
Definition: c.h:1004
float float4
Definition: c.h:629
#define nitems(x)
Definition: indent.h:31
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743

References CLAMP_PROBABILITY, cmp(), DEFAULT_CONTAIN_SEL, element_compare(), find_next_mcelem(), floor_log2(), i, Min, and nitems.

Referenced by mcelem_array_selec(), and scalararraysel_containment().

◆ mcelem_array_contained_selec()

static Selectivity mcelem_array_contained_selec ( Datum mcelem,
int  nmcelem,
float4 numbers,
int  nnumbers,
Datum array_data,
int  nitems,
float4 hist,
int  nhist,
Oid  operator,
TypeCacheEntry typentry 
)
static

Definition at line 696 of file array_selfuncs.c.

701 {
702  int mcelem_index,
703  i,
704  unique_nitems = 0;
705  float selec,
706  minfreq,
707  nullelem_freq;
708  float *dist,
709  *mcelem_dist,
710  *hist_part;
711  float avg_count,
712  mult,
713  rest;
714  float *elem_selec;
715 
716  /*
717  * There should be three more Numbers than Values in the MCELEM slot,
718  * because the last three cells should hold minimal and maximal frequency
719  * among the non-null elements, and then the frequency of null elements.
720  * Punt if not right, because we can't do much without the element freqs.
721  */
722  if (numbers == NULL || nnumbers != nmcelem + 3)
723  return DEFAULT_CONTAIN_SEL;
724 
725  /* Can't do much without a count histogram, either */
726  if (hist == NULL || nhist < 3)
727  return DEFAULT_CONTAIN_SEL;
728 
729  /*
730  * Grab some of the summary statistics that compute_array_stats() stores:
731  * lowest frequency, frequency of null elements, and average distinct
732  * element count.
733  */
734  minfreq = numbers[nmcelem];
735  nullelem_freq = numbers[nmcelem + 2];
736  avg_count = hist[nhist - 1];
737 
738  /*
739  * "rest" will be the sum of the frequencies of all elements not
740  * represented in MCELEM. The average distinct element count is the sum
741  * of the frequencies of *all* elements. Begin with that; we will proceed
742  * to subtract the MCELEM frequencies.
743  */
744  rest = avg_count;
745 
746  /*
747  * mult is a multiplier representing estimate of probability that each
748  * mcelem that is not present in constant doesn't occur.
749  */
750  mult = 1.0f;
751 
752  /*
753  * elem_selec is array of estimated frequencies for elements in the
754  * constant.
755  */
756  elem_selec = (float *) palloc(sizeof(float) * nitems);
757 
758  /* Scan mcelem and array in parallel. */
759  mcelem_index = 0;
760  for (i = 0; i < nitems; i++)
761  {
762  bool match = false;
763 
764  /* Ignore any duplicates in the array data. */
765  if (i > 0 &&
766  element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
767  continue;
768 
769  /*
770  * Iterate over MCELEM until we find an entry greater than or equal to
771  * this element of the constant. Update "rest" and "mult" for mcelem
772  * entries skipped over.
773  */
774  while (mcelem_index < nmcelem)
775  {
776  int cmp = element_compare(&mcelem[mcelem_index],
777  &array_data[i],
778  typentry);
779 
780  if (cmp < 0)
781  {
782  mult *= (1.0f - numbers[mcelem_index]);
783  rest -= numbers[mcelem_index];
784  mcelem_index++;
785  }
786  else
787  {
788  if (cmp == 0)
789  match = true; /* mcelem is found */
790  break;
791  }
792  }
793 
794  if (match)
795  {
796  /* MCELEM matches the array item. */
797  elem_selec[unique_nitems] = numbers[mcelem_index];
798  /* "rest" is decremented for all mcelems, matched or not */
799  rest -= numbers[mcelem_index];
800  mcelem_index++;
801  }
802  else
803  {
804  /*
805  * The element is not in MCELEM. Punt, but assume that the
806  * selectivity cannot be more than minfreq / 2.
807  */
808  elem_selec[unique_nitems] = Min(DEFAULT_CONTAIN_SEL,
809  minfreq / 2);
810  }
811 
812  unique_nitems++;
813  }
814 
815  /*
816  * If we handled all constant elements without exhausting the MCELEM
817  * array, finish walking it to complete calculation of "rest" and "mult".
818  */
819  while (mcelem_index < nmcelem)
820  {
821  mult *= (1.0f - numbers[mcelem_index]);
822  rest -= numbers[mcelem_index];
823  mcelem_index++;
824  }
825 
826  /*
827  * The presence of many distinct rare elements materially decreases
828  * selectivity. Use the Poisson distribution to estimate the probability
829  * of a column value having zero occurrences of such elements. See above
830  * for the definition of "rest".
831  */
832  mult *= exp(-rest);
833 
834  /*----------
835  * Using the distinct element count histogram requires
836  * O(unique_nitems * (nmcelem + unique_nitems))
837  * operations. Beyond a certain computational cost threshold, it's
838  * reasonable to sacrifice accuracy for decreased planning time. We limit
839  * the number of operations to EFFORT * nmcelem; since nmcelem is limited
840  * by the column's statistics target, the work done is user-controllable.
841  *
842  * If the number of operations would be too large, we can reduce it
843  * without losing all accuracy by reducing unique_nitems and considering
844  * only the most-common elements of the constant array. To make the
845  * results exactly match what we would have gotten with only those
846  * elements to start with, we'd have to remove any discarded elements'
847  * frequencies from "mult", but since this is only an approximation
848  * anyway, we don't bother with that. Therefore it's sufficient to qsort
849  * elem_selec[] and take the largest elements. (They will no longer match
850  * up with the elements of array_data[], but we don't care.)
851  *----------
852  */
853 #define EFFORT 100
854 
855  if ((nmcelem + unique_nitems) > 0 &&
856  unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems))
857  {
858  /*
859  * Use the quadratic formula to solve for largest allowable N. We
860  * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
861  */
862  double b = (double) nmcelem;
863  int n;
864 
865  n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2);
866 
867  /* Sort, then take just the first n elements */
868  qsort(elem_selec, unique_nitems, sizeof(float),
870  unique_nitems = n;
871  }
872 
873  /*
874  * Calculate probabilities of each distinct element count for both mcelems
875  * and constant elements. At this point, assume independent element
876  * occurrence.
877  */
878  dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f);
879  mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest);
880 
881  /* ignore hist[nhist-1], which is the average not a histogram member */
882  hist_part = calc_hist(hist, nhist - 1, unique_nitems);
883 
884  selec = 0.0f;
885  for (i = 0; i <= unique_nitems; i++)
886  {
887  /*
888  * mult * dist[i] / mcelem_dist[i] gives us probability of qual
889  * matching from assumption of independent element occurrence with the
890  * condition that distinct element count = i.
891  */
892  if (mcelem_dist[i] > 0)
893  selec += hist_part[i] * mult * dist[i] / mcelem_dist[i];
894  }
895 
896  pfree(dist);
897  pfree(mcelem_dist);
898  pfree(hist_part);
899  pfree(elem_selec);
900 
901  /* Take into account occurrence of NULL element. */
902  selec *= (1.0f - nullelem_freq);
903 
904  CLAMP_PROBABILITY(selec);
905 
906  return selec;
907 }
static int float_compare_desc(const void *key1, const void *key2)
#define EFFORT
static float * calc_hist(const float4 *hist, int nhist, int n)
static float * calc_distr(const float *p, int n, int m, float rest)
int b
Definition: isn.c:70
#define qsort(a, b, c, d)
Definition: port.h:447

References b, calc_distr(), calc_hist(), CLAMP_PROBABILITY, cmp(), DEFAULT_CONTAIN_SEL, EFFORT, element_compare(), float_compare_desc(), i, Min, nitems, palloc(), pfree(), and qsort.

Referenced by mcelem_array_selec(), and scalararraysel_containment().

◆ mcelem_array_selec()

static Selectivity mcelem_array_selec ( ArrayType array,
TypeCacheEntry typentry,
Datum mcelem,
int  nmcelem,
float4 numbers,
int  nnumbers,
float4 hist,
int  nhist,
Oid  operator 
)
static

Definition at line 428 of file array_selfuncs.c.

433 {
434  Selectivity selec;
435  int num_elems;
436  Datum *elem_values;
437  bool *elem_nulls;
438  bool null_present;
439  int nonnull_nitems;
440  int i;
441 
442  /*
443  * Prepare constant array data for sorting. Sorting lets us find unique
444  * elements and efficiently merge with the MCELEM array.
445  */
446  deconstruct_array(array,
447  typentry->type_id,
448  typentry->typlen,
449  typentry->typbyval,
450  typentry->typalign,
451  &elem_values, &elem_nulls, &num_elems);
452 
453  /* Collapse out any null elements */
454  nonnull_nitems = 0;
455  null_present = false;
456  for (i = 0; i < num_elems; i++)
457  {
458  if (elem_nulls[i])
459  null_present = true;
460  else
461  elem_values[nonnull_nitems++] = elem_values[i];
462  }
463 
464  /*
465  * Query "column @> '{anything, null}'" matches nothing. For the other
466  * two operators, presence of a null in the constant can be ignored.
467  */
468  if (null_present && operator == OID_ARRAY_CONTAINS_OP)
469  {
470  pfree(elem_values);
471  pfree(elem_nulls);
472  return (Selectivity) 0.0;
473  }
474 
475  /* Sort extracted elements using their default comparison function. */
476  qsort_arg(elem_values, nonnull_nitems, sizeof(Datum),
477  element_compare, typentry);
478 
479  /* Separate cases according to operator */
480  if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP)
481  selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem,
482  numbers, nnumbers,
483  elem_values, nonnull_nitems,
484  operator, typentry);
485  else if (operator == OID_ARRAY_CONTAINED_OP)
486  selec = mcelem_array_contained_selec(mcelem, nmcelem,
487  numbers, nnumbers,
488  elem_values, nonnull_nitems,
489  hist, nhist,
490  operator, typentry);
491  else
492  {
493  elog(ERROR, "arraycontsel called for unrecognized operator %u",
494  operator);
495  selec = 0.0; /* keep compiler quiet */
496  }
497 
498  pfree(elem_values);
499  pfree(elem_nulls);
500  return selec;
501 }
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, TypeCacheEntry *typentry)
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, TypeCacheEntry *typentry)
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3619
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
char typalign
Definition: typcache.h:41
bool typbyval
Definition: typcache.h:40
int16 typlen
Definition: typcache.h:39

References deconstruct_array(), element_compare(), elog, ERROR, i, mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), pfree(), qsort_arg(), TypeCacheEntry::typalign, TypeCacheEntry::typbyval, TypeCacheEntry::type_id, and TypeCacheEntry::typlen.

Referenced by calc_arraycontsel().

◆ scalararraysel_containment()

Selectivity scalararraysel_containment ( PlannerInfo root,
Node leftop,
Node rightop,
Oid  elemtype,
bool  isEquality,
bool  useOr,
int  varRelid 
)

Definition at line 81 of file array_selfuncs.c.

85 {
86  Selectivity selec;
87  VariableStatData vardata;
88  Datum constval;
89  TypeCacheEntry *typentry;
90  FmgrInfo *cmpfunc;
91 
92  /*
93  * rightop must be a variable, else punt.
94  */
95  examine_variable(root, rightop, varRelid, &vardata);
96  if (!vardata.rel)
97  {
98  ReleaseVariableStats(vardata);
99  return -1.0;
100  }
101 
102  /*
103  * leftop must be a constant, else punt.
104  */
105  if (!IsA(leftop, Const))
106  {
107  ReleaseVariableStats(vardata);
108  return -1.0;
109  }
110  if (((Const *) leftop)->constisnull)
111  {
112  /* qual can't succeed if null on left */
113  ReleaseVariableStats(vardata);
114  return (Selectivity) 0.0;
115  }
116  constval = ((Const *) leftop)->constvalue;
117 
118  /* Get element type's default comparison function */
119  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
120  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
121  {
122  ReleaseVariableStats(vardata);
123  return -1.0;
124  }
125  cmpfunc = &typentry->cmp_proc_finfo;
126 
127  /*
128  * If the operator is <>, swap ANY/ALL, then invert the result later.
129  */
130  if (!isEquality)
131  useOr = !useOr;
132 
133  /* Get array element stats for var, if available */
134  if (HeapTupleIsValid(vardata.statsTuple) &&
135  statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
136  {
137  Form_pg_statistic stats;
138  AttStatsSlot sslot;
139  AttStatsSlot hslot;
140 
141  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
142 
143  /* MCELEM will be an array of same type as element */
144  if (get_attstatsslot(&sslot, vardata.statsTuple,
145  STATISTIC_KIND_MCELEM, InvalidOid,
147  {
148  /* For ALL case, also get histogram of distinct-element counts */
149  if (useOr ||
150  !get_attstatsslot(&hslot, vardata.statsTuple,
151  STATISTIC_KIND_DECHIST, InvalidOid,
153  memset(&hslot, 0, sizeof(hslot));
154 
155  /*
156  * For = ANY, estimate as var @> ARRAY[const].
157  *
158  * For = ALL, estimate as var <@ ARRAY[const].
159  */
160  if (useOr)
162  sslot.nvalues,
163  sslot.numbers,
164  sslot.nnumbers,
165  &constval, 1,
166  OID_ARRAY_CONTAINS_OP,
167  typentry);
168  else
169  selec = mcelem_array_contained_selec(sslot.values,
170  sslot.nvalues,
171  sslot.numbers,
172  sslot.nnumbers,
173  &constval, 1,
174  hslot.numbers,
175  hslot.nnumbers,
176  OID_ARRAY_CONTAINED_OP,
177  typentry);
178 
179  free_attstatsslot(&hslot);
180  free_attstatsslot(&sslot);
181  }
182  else
183  {
184  /* No most-common-elements info, so do without */
185  if (useOr)
186  selec = mcelem_array_contain_overlap_selec(NULL, 0,
187  NULL, 0,
188  &constval, 1,
189  OID_ARRAY_CONTAINS_OP,
190  typentry);
191  else
192  selec = mcelem_array_contained_selec(NULL, 0,
193  NULL, 0,
194  &constval, 1,
195  NULL, 0,
196  OID_ARRAY_CONTAINED_OP,
197  typentry);
198  }
199 
200  /*
201  * MCE stats count only non-null rows, so adjust for null rows.
202  */
203  selec *= (1.0 - stats->stanullfrac);
204  }
205  else
206  {
207  /* No stats at all, so do without */
208  if (useOr)
209  selec = mcelem_array_contain_overlap_selec(NULL, 0,
210  NULL, 0,
211  &constval, 1,
212  OID_ARRAY_CONTAINS_OP,
213  typentry);
214  else
215  selec = mcelem_array_contained_selec(NULL, 0,
216  NULL, 0,
217  &constval, 1,
218  NULL, 0,
219  OID_ARRAY_CONTAINED_OP,
220  typentry);
221  /* we assume no nulls here, so no stanullfrac correction */
222  }
223 
224  ReleaseVariableStats(vardata);
225 
226  /*
227  * If the operator is <>, invert the results.
228  */
229  if (!isEquality)
230  selec = 1.0 - selec;
231 
232  CLAMP_PROBABILITY(selec);
233 
234  return selec;
235 }
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:5020
RelOptInfo * rel
Definition: selfuncs.h:88

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, VariableStatData::rel, ReleaseVariableStats, root, statistic_proc_security_check(), VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and AttStatsSlot::values.

Referenced by scalararraysel().