PostgreSQL Source Code  git master
array_selfuncs.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/typcache.h"
Include dependency graph for array_selfuncs.c:

Go to the source code of this file.

Macros

#define DEFAULT_CONTAIN_SEL   0.005
 
#define DEFAULT_OVERLAP_SEL   0.01
 
#define DEFAULT_SEL(operator)
 
#define EFFORT   100
 

Functions

static Selectivity calc_arraycontsel (VariableStatData *vardata, Datum constval, Oid elemtype, Oid operator)
 
static Selectivity mcelem_array_selec (ArrayType *array, TypeCacheEntry *typentry, Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, float4 *hist, int nhist, Oid operator)
 
static Selectivity mcelem_array_contain_overlap_selec (Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, TypeCacheEntry *typentry)
 
static Selectivity mcelem_array_contained_selec (Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, TypeCacheEntry *typentry)
 
static float * calc_hist (const float4 *hist, int nhist, int n)
 
static float * calc_distr (const float *p, int n, int m, float rest)
 
static int floor_log2 (uint32 n)
 
static bool find_next_mcelem (Datum *mcelem, int nmcelem, Datum value, int *index, TypeCacheEntry *typentry)
 
static int element_compare (const void *key1, const void *key2, void *arg)
 
static int float_compare_desc (const void *key1, const void *key2)
 
Selectivity scalararraysel_containment (PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
 
Datum arraycontsel (PG_FUNCTION_ARGS)
 
Datum arraycontjoinsel (PG_FUNCTION_ARGS)
 

Macro Definition Documentation

◆ DEFAULT_CONTAIN_SEL

#define DEFAULT_CONTAIN_SEL   0.005

◆ DEFAULT_OVERLAP_SEL

#define DEFAULT_OVERLAP_SEL   0.01

Definition at line 34 of file array_selfuncs.c.

◆ DEFAULT_SEL

#define DEFAULT_SEL (   operator)
Value:
((operator) == OID_ARRAY_OVERLAP_OP ? \
#define DEFAULT_CONTAIN_SEL
#define DEFAULT_OVERLAP_SEL

Definition at line 37 of file array_selfuncs.c.

Referenced by arraycontjoinsel(), arraycontsel(), and calc_arraycontsel().

◆ EFFORT

#define EFFORT   100

Function Documentation

◆ arraycontjoinsel()

Datum arraycontjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 322 of file array_selfuncs.c.

References DEFAULT_SEL, PG_GETARG_OID, and PG_RETURN_FLOAT8.

Referenced by _int_contained_joinsel(), _int_contains_joinsel(), and _int_overlap_joinsel().

323 {
324  /* For the moment this is just a stub */
325  Oid operator = PG_GETARG_OID(1);
326 
327  PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
328 }
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:356
unsigned int Oid
Definition: postgres_ext.h:31
#define PG_GETARG_OID(n)
Definition: fmgr.h:270
#define DEFAULT_SEL(operator)

◆ arraycontsel()

Datum arraycontsel ( PG_FUNCTION_ARGS  )

Definition at line 242 of file array_selfuncs.c.

References generate_unaccent_rules::args, calc_arraycontsel(), CLAMP_PROBABILITY, DEFAULT_SEL, get_base_element_type(), get_restriction_variable(), InvalidOid, IsA, PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, and VariableStatData::vartype.

Referenced by _int_contained_sel(), _int_contains_sel(), and _int_overlap_sel().

243 {
245  Oid operator = PG_GETARG_OID(1);
246  List *args = (List *) PG_GETARG_POINTER(2);
247  int varRelid = PG_GETARG_INT32(3);
248  VariableStatData vardata;
249  Node *other;
250  bool varonleft;
251  Selectivity selec;
252  Oid element_typeid;
253 
254  /*
255  * If expression is not (variable op something) or (something op
256  * variable), then punt and return a default estimate.
257  */
258  if (!get_restriction_variable(root, args, varRelid,
259  &vardata, &other, &varonleft))
260  PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
261 
262  /*
263  * Can't do anything useful if the something is not a constant, either.
264  */
265  if (!IsA(other, Const))
266  {
267  ReleaseVariableStats(vardata);
268  PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
269  }
270 
271  /*
272  * The "&&", "@>" and "<@" operators are strict, so we can cope with a
273  * NULL constant right away.
274  */
275  if (((Const *) other)->constisnull)
276  {
277  ReleaseVariableStats(vardata);
278  PG_RETURN_FLOAT8(0.0);
279  }
280 
281  /*
282  * If var is on the right, commute the operator, so that we can assume the
283  * var is on the left in what follows.
284  */
285  if (!varonleft)
286  {
287  if (operator == OID_ARRAY_CONTAINS_OP)
288  operator = OID_ARRAY_CONTAINED_OP;
289  else if (operator == OID_ARRAY_CONTAINED_OP)
290  operator = OID_ARRAY_CONTAINS_OP;
291  }
292 
293  /*
294  * OK, there's a Var and a Const we're dealing with here. We need the
295  * Const to be an array with same element type as column, else we can't do
296  * anything useful. (Such cases will likely fail at runtime, but here
297  * we'd rather just return a default estimate.)
298  */
299  element_typeid = get_base_element_type(((Const *) other)->consttype);
300  if (element_typeid != InvalidOid &&
301  element_typeid == get_base_element_type(vardata.vartype))
302  {
303  selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue,
304  element_typeid, operator);
305  }
306  else
307  {
308  selec = DEFAULT_SEL(operator);
309  }
310 
311  ReleaseVariableStats(vardata);
312 
313  CLAMP_PROBABILITY(selec);
314 
315  PG_RETURN_FLOAT8((float8) selec);
316 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:264
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4286
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:356
Definition: nodes.h:525
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:271
double Selectivity
Definition: nodes.h:658
unsigned int Oid
Definition: postgres_ext.h:31
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
double float8
Definition: c.h:491
#define PG_GETARG_OID(n)
Definition: fmgr.h:270
#define InvalidOid
Definition: postgres_ext.h:36
static Selectivity calc_arraycontsel(VariableStatData *vardata, Datum constval, Oid elemtype, Oid operator)
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2599
#define DEFAULT_SEL(operator)
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
Definition: pg_list.h:50

◆ calc_arraycontsel()

static Selectivity calc_arraycontsel ( VariableStatData vardata,
Datum  constval,
Oid  elemtype,
Oid  operator 
)
static

Definition at line 338 of file array_selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, TypeCacheEntry::cmp_proc_finfo, DatumGetArrayTypeP, DEFAULT_SEL, FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, lookup_type_cache(), mcelem_array_selec(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, pfree(), PointerGetDatum, statistic_proc_security_check(), VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and AttStatsSlot::values.

Referenced by arraycontsel().

340 {
341  Selectivity selec;
342  TypeCacheEntry *typentry;
343  FmgrInfo *cmpfunc;
344  ArrayType *array;
345 
346  /* Get element type's default comparison function */
347  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
348  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
349  return DEFAULT_SEL(operator);
350  cmpfunc = &typentry->cmp_proc_finfo;
351 
352  /*
353  * The caller made sure the const is an array with same element type, so
354  * get it now
355  */
356  array = DatumGetArrayTypeP(constval);
357 
358  if (HeapTupleIsValid(vardata->statsTuple) &&
359  statistic_proc_security_check(vardata, cmpfunc->fn_oid))
360  {
361  Form_pg_statistic stats;
362  AttStatsSlot sslot;
363  AttStatsSlot hslot;
364 
365  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
366 
367  /* MCELEM will be an array of same type as column */
368  if (get_attstatsslot(&sslot, vardata->statsTuple,
369  STATISTIC_KIND_MCELEM, InvalidOid,
371  {
372  /*
373  * For "array <@ const" case we also need histogram of distinct
374  * element counts.
375  */
376  if (operator != OID_ARRAY_CONTAINED_OP ||
377  !get_attstatsslot(&hslot, vardata->statsTuple,
378  STATISTIC_KIND_DECHIST, InvalidOid,
380  memset(&hslot, 0, sizeof(hslot));
381 
382  /* Use the most-common-elements slot for the array Var. */
383  selec = mcelem_array_selec(array, typentry,
384  sslot.values, sslot.nvalues,
385  sslot.numbers, sslot.nnumbers,
386  hslot.numbers, hslot.nnumbers,
387  operator);
388 
389  free_attstatsslot(&hslot);
390  free_attstatsslot(&sslot);
391  }
392  else
393  {
394  /* No most-common-elements info, so do without */
395  selec = mcelem_array_selec(array, typentry,
396  NULL, 0, NULL, 0, NULL, 0,
397  operator);
398  }
399 
400  /*
401  * MCE stats count only non-null rows, so adjust for null rows.
402  */
403  selec *= (1.0 - stats->stanullfrac);
404  }
405  else
406  {
407  /* No stats at all, so do without */
408  selec = mcelem_array_selec(array, typentry,
409  NULL, 0, NULL, 0, NULL, 0,
410  operator);
411  /* we assume no nulls here, so no stanullfrac correction */
412  }
413 
414  /* If constant was toasted, release the copy we made */
415  if (PointerGetDatum(array) != constval)
416  pfree(array);
417 
418  return selec;
419 }
Definition: fmgr.h:56
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
#define PointerGetDatum(X)
Definition: postgres.h:556
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4812
double Selectivity
Definition: nodes.h:658
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define OidIsValid(objectId)
Definition: c.h:638
FmgrInfo cmp_proc_finfo
Definition: typcache.h:73
void pfree(void *pointer)
Definition: mcxt.c:1056
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
float4 * numbers
Definition: lsyscache.h:53
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:322
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
#define DEFAULT_SEL(operator)
static Selectivity mcelem_array_selec(ArrayType *array, TypeCacheEntry *typentry, Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, float4 *hist, int nhist, Oid operator)
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:134
#define DatumGetArrayTypeP(X)
Definition: array.h:249
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072

◆ calc_distr()

static float * calc_distr ( const float *  p,
int  n,
int  m,
float  rest 
)
static

Definition at line 1011 of file array_selfuncs.c.

References DEFAULT_CONTAIN_SEL, i, palloc(), pfree(), and val.

Referenced by mcelem_array_contained_selec().

1012 {
1013  float *row,
1014  *prev_row,
1015  *tmp;
1016  int i,
1017  j;
1018 
1019  /*
1020  * Since we return only the last row of the matrix and need only the
1021  * current and previous row for calculations, allocate two rows.
1022  */
1023  row = (float *) palloc((m + 1) * sizeof(float));
1024  prev_row = (float *) palloc((m + 1) * sizeof(float));
1025 
1026  /* M[0,0] = 1 */
1027  row[0] = 1.0f;
1028  for (i = 1; i <= n; i++)
1029  {
1030  float t = p[i - 1];
1031 
1032  /* Swap rows */
1033  tmp = row;
1034  row = prev_row;
1035  prev_row = tmp;
1036 
1037  /* Calculate next row */
1038  for (j = 0; j <= i && j <= m; j++)
1039  {
1040  float val = 0.0f;
1041 
1042  if (j < i)
1043  val += prev_row[j] * (1.0f - t);
1044  if (j > 0)
1045  val += prev_row[j - 1] * t;
1046  row[j] = val;
1047  }
1048  }
1049 
1050  /*
1051  * The presence of many distinct rare (not in "p") elements materially
1052  * decreases selectivity. Model their collective occurrence with the
1053  * Poisson distribution.
1054  */
1055  if (rest > DEFAULT_CONTAIN_SEL)
1056  {
1057  float t;
1058 
1059  /* Swap rows */
1060  tmp = row;
1061  row = prev_row;
1062  prev_row = tmp;
1063 
1064  for (i = 0; i <= m; i++)
1065  row[i] = 0.0f;
1066 
1067  /* Value of Poisson distribution for 0 occurrences */
1068  t = exp(-rest);
1069 
1070  /*
1071  * Calculate convolution of previously computed distribution and the
1072  * Poisson distribution.
1073  */
1074  for (i = 0; i <= m; i++)
1075  {
1076  for (j = 0; j <= m - i; j++)
1077  row[j + i] += prev_row[j] * t;
1078 
1079  /* Get Poisson distribution value for (i + 1) occurrences */
1080  t *= rest / (float) (i + 1);
1081  }
1082  }
1083 
1084  pfree(prev_row);
1085  return row;
1086 }
#define DEFAULT_CONTAIN_SEL
void pfree(void *pointer)
Definition: mcxt.c:1056
void * palloc(Size size)
Definition: mcxt.c:949
int i
long val
Definition: informix.c:684

◆ calc_hist()

static float * calc_hist ( const float4 hist,
int  nhist,
int  n 
)
static

Definition at line 922 of file array_selfuncs.c.

References i, palloc(), and val.

Referenced by mcelem_array_contained_selec().

923 {
924  float *hist_part;
925  int k,
926  i = 0;
927  float prev_interval = 0,
928  next_interval;
929  float frac;
930 
931  hist_part = (float *) palloc((n + 1) * sizeof(float));
932 
933  /*
934  * frac is a probability contribution for each interval between histogram
935  * values. We have nhist - 1 intervals, so contribution of each one will
936  * be 1 / (nhist - 1).
937  */
938  frac = 1.0f / ((float) (nhist - 1));
939 
940  for (k = 0; k <= n; k++)
941  {
942  int count = 0;
943 
944  /*
945  * Count the histogram boundaries equal to k. (Although the histogram
946  * should theoretically contain only exact integers, entries are
947  * floats so there could be roundoff error in large values. Treat any
948  * fractional value as equal to the next larger k.)
949  */
950  while (i < nhist && hist[i] <= k)
951  {
952  count++;
953  i++;
954  }
955 
956  if (count > 0)
957  {
958  /* k is an exact bound for at least one histogram box. */
959  float val;
960 
961  /* Find length between current histogram value and the next one */
962  if (i < nhist)
963  next_interval = hist[i] - hist[i - 1];
964  else
965  next_interval = 0;
966 
967  /*
968  * count - 1 histogram boxes contain k exclusively. They
969  * contribute a total of (count - 1) * frac probability. Also
970  * factor in the partial histogram boxes on either side.
971  */
972  val = (float) (count - 1);
973  if (next_interval > 0)
974  val += 0.5f / next_interval;
975  if (prev_interval > 0)
976  val += 0.5f / prev_interval;
977  hist_part[k] = frac * val;
978 
979  prev_interval = next_interval;
980  }
981  else
982  {
983  /* k does not appear as an exact histogram bound. */
984  if (prev_interval > 0)
985  hist_part[k] = frac / prev_interval;
986  else
987  hist_part[k] = 0.0f;
988  }
989  }
990 
991  return hist_part;
992 }
void * palloc(Size size)
Definition: mcxt.c:949
int i
long val
Definition: informix.c:684

◆ element_compare()

static int element_compare ( const void *  key1,
const void *  key2,
void *  arg 
)
static

Definition at line 1166 of file array_selfuncs.c.

References TypeCacheEntry::cmp_proc_finfo, DatumGetInt32, FunctionCall2Coll(), and TypeCacheEntry::typcollation.

Referenced by find_next_mcelem(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), and mcelem_array_selec().

1167 {
1168  Datum d1 = *((const Datum *) key1);
1169  Datum d2 = *((const Datum *) key2);
1170  TypeCacheEntry *typentry = (TypeCacheEntry *) arg;
1171  FmgrInfo *cmpfunc = &typentry->cmp_proc_finfo;
1172  Datum c;
1173 
1174  c = FunctionCall2Coll(cmpfunc, typentry->typcollation, d1, d2);
1175  return DatumGetInt32(c);
1176 }
Definition: fmgr.h:56
#define DatumGetInt32(X)
Definition: postgres.h:472
Oid typcollation
Definition: typcache.h:44
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
FmgrInfo cmp_proc_finfo
Definition: typcache.h:73
char * c
uintptr_t Datum
Definition: postgres.h:367
void * arg

◆ find_next_mcelem()

static bool find_next_mcelem ( Datum mcelem,
int  nmcelem,
Datum  value,
int *  index,
TypeCacheEntry typentry 
)
static

Definition at line 1131 of file array_selfuncs.c.

References element_compare(), and i.

Referenced by mcelem_array_contain_overlap_selec().

1133 {
1134  int l = *index,
1135  r = nmcelem - 1,
1136  i,
1137  res;
1138 
1139  while (l <= r)
1140  {
1141  i = (l + r) / 2;
1142  res = element_compare(&mcelem[i], &value, typentry);
1143  if (res == 0)
1144  {
1145  *index = i;
1146  return true;
1147  }
1148  else if (res < 0)
1149  l = i + 1;
1150  else
1151  r = i - 1;
1152  }
1153  *index = l;
1154  return false;
1155 }
static int element_compare(const void *key1, const void *key2, void *arg)
static struct @145 value
Definition: type.h:89
int i

◆ float_compare_desc()

static int float_compare_desc ( const void *  key1,
const void *  key2 
)
static

Definition at line 1182 of file array_selfuncs.c.

Referenced by mcelem_array_contained_selec().

1183 {
1184  float d1 = *((const float *) key1);
1185  float d2 = *((const float *) key2);
1186 
1187  if (d1 > d2)
1188  return -1;
1189  else if (d1 < d2)
1190  return 1;
1191  else
1192  return 0;
1193 }

◆ floor_log2()

static int floor_log2 ( uint32  n)
static

Definition at line 1090 of file array_selfuncs.c.

Referenced by mcelem_array_contain_overlap_selec().

1091 {
1092  int logval = 0;
1093 
1094  if (n == 0)
1095  return -1;
1096  if (n >= (1 << 16))
1097  {
1098  n >>= 16;
1099  logval += 16;
1100  }
1101  if (n >= (1 << 8))
1102  {
1103  n >>= 8;
1104  logval += 8;
1105  }
1106  if (n >= (1 << 4))
1107  {
1108  n >>= 4;
1109  logval += 4;
1110  }
1111  if (n >= (1 << 2))
1112  {
1113  n >>= 2;
1114  logval += 2;
1115  }
1116  if (n >= (1 << 1))
1117  {
1118  logval += 1;
1119  }
1120  return logval;
1121 }

◆ mcelem_array_contain_overlap_selec()

static Selectivity mcelem_array_contain_overlap_selec ( Datum mcelem,
int  nmcelem,
float4 numbers,
int  nnumbers,
Datum array_data,
int  nitems,
Oid  operator,
TypeCacheEntry typentry 
)
static

Definition at line 522 of file array_selfuncs.c.

References CLAMP_PROBABILITY, cmp(), DEFAULT_CONTAIN_SEL, element_compare(), find_next_mcelem(), floor_log2(), i, and Min.

Referenced by mcelem_array_selec(), and scalararraysel_containment().

526 {
527  Selectivity selec,
528  elem_selec;
529  int mcelem_index,
530  i;
531  bool use_bsearch;
532  float4 minfreq;
533 
534  /*
535  * There should be three more Numbers than Values, because the last three
536  * cells should hold minimal and maximal frequency among the non-null
537  * elements, and then the frequency of null elements. Ignore the Numbers
538  * if not right.
539  */
540  if (nnumbers != nmcelem + 3)
541  {
542  numbers = NULL;
543  nnumbers = 0;
544  }
545 
546  if (numbers)
547  {
548  /* Grab the lowest observed frequency */
549  minfreq = numbers[nmcelem];
550  }
551  else
552  {
553  /* Without statistics make some default assumptions */
554  minfreq = 2 * (float4) DEFAULT_CONTAIN_SEL;
555  }
556 
557  /* Decide whether it is faster to use binary search or not. */
558  if (nitems * floor_log2((uint32) nmcelem) < nmcelem + nitems)
559  use_bsearch = true;
560  else
561  use_bsearch = false;
562 
563  if (operator == OID_ARRAY_CONTAINS_OP)
564  {
565  /*
566  * Initial selectivity for "column @> const" query is 1.0, and it will
567  * be decreased with each element of constant array.
568  */
569  selec = 1.0;
570  }
571  else
572  {
573  /*
574  * Initial selectivity for "column && const" query is 0.0, and it will
575  * be increased with each element of constant array.
576  */
577  selec = 0.0;
578  }
579 
580  /* Scan mcelem and array in parallel. */
581  mcelem_index = 0;
582  for (i = 0; i < nitems; i++)
583  {
584  bool match = false;
585 
586  /* Ignore any duplicates in the array data. */
587  if (i > 0 &&
588  element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
589  continue;
590 
591  /* Find the smallest MCELEM >= this array item. */
592  if (use_bsearch)
593  {
594  match = find_next_mcelem(mcelem, nmcelem, array_data[i],
595  &mcelem_index, typentry);
596  }
597  else
598  {
599  while (mcelem_index < nmcelem)
600  {
601  int cmp = element_compare(&mcelem[mcelem_index],
602  &array_data[i],
603  typentry);
604 
605  if (cmp < 0)
606  mcelem_index++;
607  else
608  {
609  if (cmp == 0)
610  match = true; /* mcelem is found */
611  break;
612  }
613  }
614  }
615 
616  if (match && numbers)
617  {
618  /* MCELEM matches the array item; use its frequency. */
619  elem_selec = numbers[mcelem_index];
620  mcelem_index++;
621  }
622  else
623  {
624  /*
625  * The element is not in MCELEM. Punt, but assume that the
626  * selectivity cannot be more than minfreq / 2.
627  */
628  elem_selec = Min(DEFAULT_CONTAIN_SEL, minfreq / 2);
629  }
630 
631  /*
632  * Update overall selectivity using the current element's selectivity
633  * and an assumption of element occurrence independence.
634  */
635  if (operator == OID_ARRAY_CONTAINS_OP)
636  selec *= elem_selec;
637  else
638  selec = selec + elem_selec - selec * elem_selec;
639 
640  /* Clamp intermediate results to stay sane despite roundoff error */
641  CLAMP_PROBABILITY(selec);
642  }
643 
644  return selec;
645 }
static int element_compare(const void *key1, const void *key2, void *arg)
#define DEFAULT_CONTAIN_SEL
#define Min(x, y)
Definition: c.h:904
double Selectivity
Definition: nodes.h:658
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
unsigned int uint32
Definition: c.h:358
static bool find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, int *index, TypeCacheEntry *typentry)
float float4
Definition: c.h:490
static int floor_log2(uint32 n)
int i
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742

◆ mcelem_array_contained_selec()

static Selectivity mcelem_array_contained_selec ( Datum mcelem,
int  nmcelem,
float4 numbers,
int  nnumbers,
Datum array_data,
int  nitems,
float4 hist,
int  nhist,
Oid  operator,
TypeCacheEntry typentry 
)
static

Definition at line 697 of file array_selfuncs.c.

References calc_distr(), calc_hist(), CLAMP_PROBABILITY, cmp(), DEFAULT_CONTAIN_SEL, EFFORT, element_compare(), float_compare_desc(), i, Min, palloc(), pfree(), and qsort.

Referenced by mcelem_array_selec(), and scalararraysel_containment().

702 {
703  int mcelem_index,
704  i,
705  unique_nitems = 0;
706  float selec,
707  minfreq,
708  nullelem_freq;
709  float *dist,
710  *mcelem_dist,
711  *hist_part;
712  float avg_count,
713  mult,
714  rest;
715  float *elem_selec;
716 
717  /*
718  * There should be three more Numbers than Values in the MCELEM slot,
719  * because the last three cells should hold minimal and maximal frequency
720  * among the non-null elements, and then the frequency of null elements.
721  * Punt if not right, because we can't do much without the element freqs.
722  */
723  if (numbers == NULL || nnumbers != nmcelem + 3)
724  return DEFAULT_CONTAIN_SEL;
725 
726  /* Can't do much without a count histogram, either */
727  if (hist == NULL || nhist < 3)
728  return DEFAULT_CONTAIN_SEL;
729 
730  /*
731  * Grab some of the summary statistics that compute_array_stats() stores:
732  * lowest frequency, frequency of null elements, and average distinct
733  * element count.
734  */
735  minfreq = numbers[nmcelem];
736  nullelem_freq = numbers[nmcelem + 2];
737  avg_count = hist[nhist - 1];
738 
739  /*
740  * "rest" will be the sum of the frequencies of all elements not
741  * represented in MCELEM. The average distinct element count is the sum
742  * of the frequencies of *all* elements. Begin with that; we will proceed
743  * to subtract the MCELEM frequencies.
744  */
745  rest = avg_count;
746 
747  /*
748  * mult is a multiplier representing estimate of probability that each
749  * mcelem that is not present in constant doesn't occur.
750  */
751  mult = 1.0f;
752 
753  /*
754  * elem_selec is array of estimated frequencies for elements in the
755  * constant.
756  */
757  elem_selec = (float *) palloc(sizeof(float) * nitems);
758 
759  /* Scan mcelem and array in parallel. */
760  mcelem_index = 0;
761  for (i = 0; i < nitems; i++)
762  {
763  bool match = false;
764 
765  /* Ignore any duplicates in the array data. */
766  if (i > 0 &&
767  element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
768  continue;
769 
770  /*
771  * Iterate over MCELEM until we find an entry greater than or equal to
772  * this element of the constant. Update "rest" and "mult" for mcelem
773  * entries skipped over.
774  */
775  while (mcelem_index < nmcelem)
776  {
777  int cmp = element_compare(&mcelem[mcelem_index],
778  &array_data[i],
779  typentry);
780 
781  if (cmp < 0)
782  {
783  mult *= (1.0f - numbers[mcelem_index]);
784  rest -= numbers[mcelem_index];
785  mcelem_index++;
786  }
787  else
788  {
789  if (cmp == 0)
790  match = true; /* mcelem is found */
791  break;
792  }
793  }
794 
795  if (match)
796  {
797  /* MCELEM matches the array item. */
798  elem_selec[unique_nitems] = numbers[mcelem_index];
799  /* "rest" is decremented for all mcelems, matched or not */
800  rest -= numbers[mcelem_index];
801  mcelem_index++;
802  }
803  else
804  {
805  /*
806  * The element is not in MCELEM. Punt, but assume that the
807  * selectivity cannot be more than minfreq / 2.
808  */
809  elem_selec[unique_nitems] = Min(DEFAULT_CONTAIN_SEL,
810  minfreq / 2);
811  }
812 
813  unique_nitems++;
814  }
815 
816  /*
817  * If we handled all constant elements without exhausting the MCELEM
818  * array, finish walking it to complete calculation of "rest" and "mult".
819  */
820  while (mcelem_index < nmcelem)
821  {
822  mult *= (1.0f - numbers[mcelem_index]);
823  rest -= numbers[mcelem_index];
824  mcelem_index++;
825  }
826 
827  /*
828  * The presence of many distinct rare elements materially decreases
829  * selectivity. Use the Poisson distribution to estimate the probability
830  * of a column value having zero occurrences of such elements. See above
831  * for the definition of "rest".
832  */
833  mult *= exp(-rest);
834 
835  /*----------
836  * Using the distinct element count histogram requires
837  * O(unique_nitems * (nmcelem + unique_nitems))
838  * operations. Beyond a certain computational cost threshold, it's
839  * reasonable to sacrifice accuracy for decreased planning time. We limit
840  * the number of operations to EFFORT * nmcelem; since nmcelem is limited
841  * by the column's statistics target, the work done is user-controllable.
842  *
843  * If the number of operations would be too large, we can reduce it
844  * without losing all accuracy by reducing unique_nitems and considering
845  * only the most-common elements of the constant array. To make the
846  * results exactly match what we would have gotten with only those
847  * elements to start with, we'd have to remove any discarded elements'
848  * frequencies from "mult", but since this is only an approximation
849  * anyway, we don't bother with that. Therefore it's sufficient to qsort
850  * elem_selec[] and take the largest elements. (They will no longer match
851  * up with the elements of array_data[], but we don't care.)
852  *----------
853  */
854 #define EFFORT 100
855 
856  if ((nmcelem + unique_nitems) > 0 &&
857  unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems))
858  {
859  /*
860  * Use the quadratic formula to solve for largest allowable N. We
861  * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
862  */
863  double b = (double) nmcelem;
864  int n;
865 
866  n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2);
867 
868  /* Sort, then take just the first n elements */
869  qsort(elem_selec, unique_nitems, sizeof(float),
871  unique_nitems = n;
872  }
873 
874  /*
875  * Calculate probabilities of each distinct element count for both mcelems
876  * and constant elements. At this point, assume independent element
877  * occurrence.
878  */
879  dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f);
880  mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest);
881 
882  /* ignore hist[nhist-1], which is the average not a histogram member */
883  hist_part = calc_hist(hist, nhist - 1, unique_nitems);
884 
885  selec = 0.0f;
886  for (i = 0; i <= unique_nitems; i++)
887  {
888  /*
889  * mult * dist[i] / mcelem_dist[i] gives us probability of qual
890  * matching from assumption of independent element occurrence with the
891  * condition that distinct element count = i.
892  */
893  if (mcelem_dist[i] > 0)
894  selec += hist_part[i] * mult * dist[i] / mcelem_dist[i];
895  }
896 
897  pfree(dist);
898  pfree(mcelem_dist);
899  pfree(hist_part);
900  pfree(elem_selec);
901 
902  /* Take into account occurrence of NULL element. */
903  selec *= (1.0f - nullelem_freq);
904 
905  CLAMP_PROBABILITY(selec);
906 
907  return selec;
908 }
static int element_compare(const void *key1, const void *key2, void *arg)
#define DEFAULT_CONTAIN_SEL
#define Min(x, y)
Definition: c.h:904
static float * calc_hist(const float4 *hist, int nhist, int n)
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
void pfree(void *pointer)
Definition: mcxt.c:1056
static float * calc_distr(const float *p, int n, int m, float rest)
#define EFFORT
void * palloc(Size size)
Definition: mcxt.c:949
int i
#define qsort(a, b, c, d)
Definition: port.h:492
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742
static int float_compare_desc(const void *key1, const void *key2)

◆ mcelem_array_selec()

static Selectivity mcelem_array_selec ( ArrayType array,
TypeCacheEntry typentry,
Datum mcelem,
int  nmcelem,
float4 numbers,
int  nnumbers,
float4 hist,
int  nhist,
Oid  operator 
)
static

Definition at line 429 of file array_selfuncs.c.

References deconstruct_array(), element_compare(), elog, ERROR, i, mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), pfree(), qsort_arg(), TypeCacheEntry::typalign, TypeCacheEntry::typbyval, TypeCacheEntry::type_id, and TypeCacheEntry::typlen.

Referenced by calc_arraycontsel().

434 {
435  Selectivity selec;
436  int num_elems;
437  Datum *elem_values;
438  bool *elem_nulls;
439  bool null_present;
440  int nonnull_nitems;
441  int i;
442 
443  /*
444  * Prepare constant array data for sorting. Sorting lets us find unique
445  * elements and efficiently merge with the MCELEM array.
446  */
447  deconstruct_array(array,
448  typentry->type_id,
449  typentry->typlen,
450  typentry->typbyval,
451  typentry->typalign,
452  &elem_values, &elem_nulls, &num_elems);
453 
454  /* Collapse out any null elements */
455  nonnull_nitems = 0;
456  null_present = false;
457  for (i = 0; i < num_elems; i++)
458  {
459  if (elem_nulls[i])
460  null_present = true;
461  else
462  elem_values[nonnull_nitems++] = elem_values[i];
463  }
464 
465  /*
466  * Query "column @> '{anything, null}'" matches nothing. For the other
467  * two operators, presence of a null in the constant can be ignored.
468  */
469  if (null_present && operator == OID_ARRAY_CONTAINS_OP)
470  {
471  pfree(elem_values);
472  pfree(elem_nulls);
473  return (Selectivity) 0.0;
474  }
475 
476  /* Sort extracted elements using their default comparison function. */
477  qsort_arg(elem_values, nonnull_nitems, sizeof(Datum),
478  element_compare, typentry);
479 
480  /* Separate cases according to operator */
481  if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP)
482  selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem,
483  numbers, nnumbers,
484  elem_values, nonnull_nitems,
485  operator, typentry);
486  else if (operator == OID_ARRAY_CONTAINED_OP)
487  selec = mcelem_array_contained_selec(mcelem, nmcelem,
488  numbers, nnumbers,
489  elem_values, nonnull_nitems,
490  hist, nhist,
491  operator, typentry);
492  else
493  {
494  elog(ERROR, "arraycontsel called for unrecognized operator %u",
495  operator);
496  selec = 0.0; /* keep compiler quiet */
497  }
498 
499  pfree(elem_values);
500  pfree(elem_nulls);
501  return selec;
502 }
static int element_compare(const void *key1, const void *key2, void *arg)
double Selectivity
Definition: nodes.h:658
int16 typlen
Definition: typcache.h:37
bool typbyval
Definition: typcache.h:38
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, TypeCacheEntry *typentry)
void pfree(void *pointer)
Definition: mcxt.c:1056
#define ERROR
Definition: elog.h:43
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, TypeCacheEntry *typentry)
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
uintptr_t Datum
Definition: postgres.h:367
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3461
char typalign
Definition: typcache.h:39
#define elog(elevel,...)
Definition: elog.h:226
int i

◆ scalararraysel_containment()

Selectivity scalararraysel_containment ( PlannerInfo root,
Node leftop,
Node rightop,
Oid  elemtype,
bool  isEquality,
bool  useOr,
int  varRelid 
)

Definition at line 82 of file array_selfuncs.c.

References ATTSTATSSLOT_NUMBERS, ATTSTATSSLOT_VALUES, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), AttStatsSlot::nnumbers, AttStatsSlot::numbers, AttStatsSlot::nvalues, OidIsValid, VariableStatData::rel, ReleaseVariableStats, statistic_proc_security_check(), VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and AttStatsSlot::values.

Referenced by scalararraysel().

86 {
87  Selectivity selec;
88  VariableStatData vardata;
89  Datum constval;
90  TypeCacheEntry *typentry;
91  FmgrInfo *cmpfunc;
92 
93  /*
94  * rightop must be a variable, else punt.
95  */
96  examine_variable(root, rightop, varRelid, &vardata);
97  if (!vardata.rel)
98  {
99  ReleaseVariableStats(vardata);
100  return -1.0;
101  }
102 
103  /*
104  * leftop must be a constant, else punt.
105  */
106  if (!IsA(leftop, Const))
107  {
108  ReleaseVariableStats(vardata);
109  return -1.0;
110  }
111  if (((Const *) leftop)->constisnull)
112  {
113  /* qual can't succeed if null on left */
114  ReleaseVariableStats(vardata);
115  return (Selectivity) 0.0;
116  }
117  constval = ((Const *) leftop)->constvalue;
118 
119  /* Get element type's default comparison function */
120  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
121  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
122  {
123  ReleaseVariableStats(vardata);
124  return -1.0;
125  }
126  cmpfunc = &typentry->cmp_proc_finfo;
127 
128  /*
129  * If the operator is <>, swap ANY/ALL, then invert the result later.
130  */
131  if (!isEquality)
132  useOr = !useOr;
133 
134  /* Get array element stats for var, if available */
135  if (HeapTupleIsValid(vardata.statsTuple) &&
136  statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
137  {
138  Form_pg_statistic stats;
139  AttStatsSlot sslot;
140  AttStatsSlot hslot;
141 
142  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
143 
144  /* MCELEM will be an array of same type as element */
145  if (get_attstatsslot(&sslot, vardata.statsTuple,
146  STATISTIC_KIND_MCELEM, InvalidOid,
148  {
149  /* For ALL case, also get histogram of distinct-element counts */
150  if (useOr ||
151  !get_attstatsslot(&hslot, vardata.statsTuple,
152  STATISTIC_KIND_DECHIST, InvalidOid,
154  memset(&hslot, 0, sizeof(hslot));
155 
156  /*
157  * For = ANY, estimate as var @> ARRAY[const].
158  *
159  * For = ALL, estimate as var <@ ARRAY[const].
160  */
161  if (useOr)
163  sslot.nvalues,
164  sslot.numbers,
165  sslot.nnumbers,
166  &constval, 1,
167  OID_ARRAY_CONTAINS_OP,
168  typentry);
169  else
170  selec = mcelem_array_contained_selec(sslot.values,
171  sslot.nvalues,
172  sslot.numbers,
173  sslot.nnumbers,
174  &constval, 1,
175  hslot.numbers,
176  hslot.nnumbers,
177  OID_ARRAY_CONTAINED_OP,
178  typentry);
179 
180  free_attstatsslot(&hslot);
181  free_attstatsslot(&sslot);
182  }
183  else
184  {
185  /* No most-common-elements info, so do without */
186  if (useOr)
187  selec = mcelem_array_contain_overlap_selec(NULL, 0,
188  NULL, 0,
189  &constval, 1,
190  OID_ARRAY_CONTAINS_OP,
191  typentry);
192  else
193  selec = mcelem_array_contained_selec(NULL, 0,
194  NULL, 0,
195  &constval, 1,
196  NULL, 0,
197  OID_ARRAY_CONTAINED_OP,
198  typentry);
199  }
200 
201  /*
202  * MCE stats count only non-null rows, so adjust for null rows.
203  */
204  selec *= (1.0 - stats->stanullfrac);
205  }
206  else
207  {
208  /* No stats at all, so do without */
209  if (useOr)
210  selec = mcelem_array_contain_overlap_selec(NULL, 0,
211  NULL, 0,
212  &constval, 1,
213  OID_ARRAY_CONTAINS_OP,
214  typentry);
215  else
216  selec = mcelem_array_contained_selec(NULL, 0,
217  NULL, 0,
218  &constval, 1,
219  NULL, 0,
220  OID_ARRAY_CONTAINED_OP,
221  typentry);
222  /* we assume no nulls here, so no stanullfrac correction */
223  }
224 
225  ReleaseVariableStats(vardata);
226 
227  /*
228  * If the operator is <>, invert the results.
229  */
230  if (!isEquality)
231  selec = 1.0 - selec;
232 
233  CLAMP_PROBABILITY(selec);
234 
235  return selec;
236 }
Definition: fmgr.h:56
#define IsA(nodeptr, _type_)
Definition: nodes.h:576
#define GETSTRUCT(TUP)
Definition: htup_details.h:655
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:39
HeapTuple statsTuple
Definition: selfuncs.h:71
int nnumbers
Definition: lsyscache.h:54
bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
Definition: selfuncs.c:4812
RelOptInfo * rel
Definition: selfuncs.h:70
double Selectivity
Definition: nodes.h:658
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:134
#define OidIsValid(objectId)
Definition: c.h:638
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, TypeCacheEntry *typentry)
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
FmgrInfo cmp_proc_finfo
Definition: typcache.h:73
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:40
float4 * numbers
Definition: lsyscache.h:53
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, TypeCacheEntry *typentry)
uintptr_t Datum
Definition: postgres.h:367
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:322
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4408
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:2942
Datum * values
Definition: lsyscache.h:50
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:81
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:134
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3072