PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
array_selfuncs.c File Reference
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
#include "optimizer/clauses.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/typcache.h"
Include dependency graph for array_selfuncs.c:

Go to the source code of this file.

Macros

#define DEFAULT_CONTAIN_SEL   0.005
 
#define DEFAULT_OVERLAP_SEL   0.01
 
#define DEFAULT_SEL(operator)
 
#define EFFORT   100
 

Functions

static Selectivity calc_arraycontsel (VariableStatData *vardata, Datum constval, Oid elemtype, Oid operator)
 
static Selectivity mcelem_array_selec (ArrayType *array, TypeCacheEntry *typentry, Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, float4 *hist, int nhist, Oid operator, FmgrInfo *cmpfunc)
 
static Selectivity mcelem_array_contain_overlap_selec (Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, FmgrInfo *cmpfunc)
 
static Selectivity mcelem_array_contained_selec (Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, FmgrInfo *cmpfunc)
 
static float * calc_hist (const float4 *hist, int nhist, int n)
 
static float * calc_distr (const float *p, int n, int m, float rest)
 
static int floor_log2 (uint32 n)
 
static bool find_next_mcelem (Datum *mcelem, int nmcelem, Datum value, int *index, FmgrInfo *cmpfunc)
 
static int element_compare (const void *key1, const void *key2, void *arg)
 
static int float_compare_desc (const void *key1, const void *key2)
 
Selectivity scalararraysel_containment (PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid)
 
Datum arraycontsel (PG_FUNCTION_ARGS)
 
Datum arraycontjoinsel (PG_FUNCTION_ARGS)
 

Macro Definition Documentation

#define DEFAULT_CONTAIN_SEL   0.005
#define DEFAULT_OVERLAP_SEL   0.01

Definition at line 35 of file array_selfuncs.c.

#define DEFAULT_SEL (   operator)
Value:
((operator) == OID_ARRAY_OVERLAP_OP ? \
#define DEFAULT_CONTAIN_SEL
#define DEFAULT_OVERLAP_SEL
#define OID_ARRAY_OVERLAP_OP
Definition: pg_operator.h:1545

Definition at line 38 of file array_selfuncs.c.

Referenced by arraycontjoinsel(), arraycontsel(), and calc_arraycontsel().

#define EFFORT   100

Function Documentation

Datum arraycontjoinsel ( PG_FUNCTION_ARGS  )

Definition at line 331 of file array_selfuncs.c.

References DEFAULT_SEL, PG_GETARG_OID, and PG_RETURN_FLOAT8.

Referenced by _int_contained_joinsel(), _int_contains_joinsel(), and _int_overlap_joinsel().

332 {
333  /* For the moment this is just a stub */
334  Oid operator = PG_GETARG_OID(1);
335 
336  PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
337 }
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:310
unsigned int Oid
Definition: postgres_ext.h:31
#define PG_GETARG_OID(n)
Definition: fmgr.h:231
#define DEFAULT_SEL(operator)
Datum arraycontsel ( PG_FUNCTION_ARGS  )

Definition at line 251 of file array_selfuncs.c.

References generate_unaccent_rules::args, calc_arraycontsel(), CLAMP_PROBABILITY, DEFAULT_SEL, get_base_element_type(), get_restriction_variable(), InvalidOid, IsA, OID_ARRAY_CONTAINED_OP, OID_ARRAY_CONTAINS_OP, PG_GETARG_INT32, PG_GETARG_OID, PG_GETARG_POINTER, PG_RETURN_FLOAT8, ReleaseVariableStats, and VariableStatData::vartype.

Referenced by _int_contained_sel(), _int_contains_sel(), and _int_overlap_sel().

252 {
254  Oid operator = PG_GETARG_OID(1);
255  List *args = (List *) PG_GETARG_POINTER(2);
256  int varRelid = PG_GETARG_INT32(3);
257  VariableStatData vardata;
258  Node *other;
259  bool varonleft;
260  Selectivity selec;
261  Oid element_typeid;
262 
263  /*
264  * If expression is not (variable op something) or (something op
265  * variable), then punt and return a default estimate.
266  */
267  if (!get_restriction_variable(root, args, varRelid,
268  &vardata, &other, &varonleft))
269  PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
270 
271  /*
272  * Can't do anything useful if the something is not a constant, either.
273  */
274  if (!IsA(other, Const))
275  {
276  ReleaseVariableStats(vardata);
277  PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
278  }
279 
280  /*
281  * The "&&", "@>" and "<@" operators are strict, so we can cope with a
282  * NULL constant right away.
283  */
284  if (((Const *) other)->constisnull)
285  {
286  ReleaseVariableStats(vardata);
287  PG_RETURN_FLOAT8(0.0);
288  }
289 
290  /*
291  * If var is on the right, commute the operator, so that we can assume the
292  * var is on the left in what follows.
293  */
294  if (!varonleft)
295  {
296  if (operator == OID_ARRAY_CONTAINS_OP)
297  operator = OID_ARRAY_CONTAINED_OP;
298  else if (operator == OID_ARRAY_CONTAINED_OP)
299  operator = OID_ARRAY_CONTAINS_OP;
300  }
301 
302  /*
303  * OK, there's a Var and a Const we're dealing with here. We need the
304  * Const to be an array with same element type as column, else we can't do
305  * anything useful. (Such cases will likely fail at runtime, but here
306  * we'd rather just return a default estimate.)
307  */
308  element_typeid = get_base_element_type(((Const *) other)->consttype);
309  if (element_typeid != InvalidOid &&
310  element_typeid == get_base_element_type(vardata.vartype))
311  {
312  selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue,
313  element_typeid, operator);
314  }
315  else
316  {
317  selec = DEFAULT_SEL(operator);
318  }
319 
320  ReleaseVariableStats(vardata);
321 
322  CLAMP_PROBABILITY(selec);
323 
324  PG_RETURN_FLOAT8((float8) selec);
325 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:225
#define IsA(nodeptr, _type_)
Definition: nodes.h:559
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4290
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:310
Definition: nodes.h:508
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:232
double Selectivity
Definition: nodes.h:631
unsigned int Oid
Definition: postgres_ext.h:31
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define OID_ARRAY_CONTAINS_OP
Definition: pg_operator.h:1548
double float8
Definition: c.h:377
#define PG_GETARG_OID(n)
Definition: fmgr.h:231
#define OID_ARRAY_CONTAINED_OP
Definition: pg_operator.h:1551
#define InvalidOid
Definition: postgres_ext.h:36
static Selectivity calc_arraycontsel(VariableStatData *vardata, Datum constval, Oid elemtype, Oid operator)
Oid get_base_element_type(Oid typid)
Definition: lsyscache.c:2525
#define DEFAULT_SEL(operator)
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
Definition: pg_list.h:45
static Selectivity calc_arraycontsel ( VariableStatData vardata,
Datum  constval,
Oid  elemtype,
Oid  operator 
)
static

Definition at line 347 of file array_selfuncs.c.

References VariableStatData::atttypmod, TypeCacheEntry::cmp_proc_finfo, DatumGetArrayTypeP, DEFAULT_SEL, FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, lookup_type_cache(), mcelem_array_selec(), NULL, OID_ARRAY_CONTAINED_OP, OidIsValid, pfree(), PointerGetDatum, STATISTIC_KIND_DECHIST, STATISTIC_KIND_MCELEM, VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and values.

Referenced by arraycontsel().

349 {
350  Selectivity selec;
351  TypeCacheEntry *typentry;
352  FmgrInfo *cmpfunc;
353  ArrayType *array;
354 
355  /* Get element type's default comparison function */
356  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
357  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
358  return DEFAULT_SEL(operator);
359  cmpfunc = &typentry->cmp_proc_finfo;
360 
361  /*
362  * The caller made sure the const is an array with same element type, so
363  * get it now
364  */
365  array = DatumGetArrayTypeP(constval);
366 
367  if (HeapTupleIsValid(vardata->statsTuple))
368  {
369  Form_pg_statistic stats;
370  Datum *values;
371  int nvalues;
372  float4 *numbers;
373  int nnumbers;
374  float4 *hist;
375  int nhist;
376 
377  stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
378 
379  /* MCELEM will be an array of same type as column */
380  if (get_attstatsslot(vardata->statsTuple,
381  elemtype, vardata->atttypmod,
383  NULL,
384  &values, &nvalues,
385  &numbers, &nnumbers))
386  {
387  /*
388  * For "array <@ const" case we also need histogram of distinct
389  * element counts.
390  */
391  if (operator != OID_ARRAY_CONTAINED_OP ||
392  !get_attstatsslot(vardata->statsTuple,
393  elemtype, vardata->atttypmod,
395  NULL,
396  NULL, NULL,
397  &hist, &nhist))
398  {
399  hist = NULL;
400  nhist = 0;
401  }
402 
403  /* Use the most-common-elements slot for the array Var. */
404  selec = mcelem_array_selec(array, typentry,
405  values, nvalues,
406  numbers, nnumbers,
407  hist, nhist,
408  operator, cmpfunc);
409 
410  if (hist)
411  free_attstatsslot(elemtype, NULL, 0, hist, nhist);
412  free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers);
413  }
414  else
415  {
416  /* No most-common-elements info, so do without */
417  selec = mcelem_array_selec(array, typentry,
418  NULL, 0, NULL, 0, NULL, 0,
419  operator, cmpfunc);
420  }
421 
422  /*
423  * MCE stats count only non-null rows, so adjust for null rows.
424  */
425  selec *= (1.0 - stats->stanullfrac);
426  }
427  else
428  {
429  /* No stats at all, so do without */
430  selec = mcelem_array_selec(array, typentry,
431  NULL, 0, NULL, 0, NULL, 0,
432  operator, cmpfunc);
433  /* we assume no nulls here, so no stanullfrac correction */
434  }
435 
436  /* If constant was toasted, release the copy we made */
437  if (PointerGetDatum(array) != constval)
438  pfree(array);
439 
440  return selec;
441 }
Definition: fmgr.h:53
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
HeapTuple statsTuple
Definition: selfuncs.h:71
#define PointerGetDatum(X)
Definition: postgres.h:564
double Selectivity
Definition: nodes.h:631
bool get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers)
Definition: lsyscache.c:2854
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:129
#define OidIsValid(objectId)
Definition: c.h:533
int32 atttypmod
Definition: selfuncs.h:76
FmgrInfo cmp_proc_finfo
Definition: typcache.h:68
void pfree(void *pointer)
Definition: mcxt.c:992
#define STATISTIC_KIND_DECHIST
Definition: pg_statistic.h:270
static Selectivity mcelem_array_selec(ArrayType *array, TypeCacheEntry *typentry, Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, float4 *hist, int nhist, Oid operator, FmgrInfo *cmpfunc)
#define OID_ARRAY_CONTAINED_OP
Definition: pg_operator.h:1551
float float4
Definition: c.h:376
uintptr_t Datum
Definition: postgres.h:374
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:191
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:56
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define NULL
Definition: c.h:226
static Datum values[MAXATTR]
Definition: bootstrap.c:162
#define DEFAULT_SEL(operator)
#define STATISTIC_KIND_MCELEM
Definition: pg_statistic.h:257
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:116
void free_attstatsslot(Oid atttype, Datum *values, int nvalues, float4 *numbers, int nnumbers)
Definition: lsyscache.c:2978
#define DatumGetArrayTypeP(X)
Definition: array.h:242
static float * calc_distr ( const float *  p,
int  n,
int  m,
float  rest 
)
static

Definition at line 1033 of file array_selfuncs.c.

References DEFAULT_CONTAIN_SEL, i, palloc(), pfree(), and val.

Referenced by mcelem_array_contained_selec().

1034 {
1035  float *row,
1036  *prev_row,
1037  *tmp;
1038  int i,
1039  j;
1040 
1041  /*
1042  * Since we return only the last row of the matrix and need only the
1043  * current and previous row for calculations, allocate two rows.
1044  */
1045  row = (float *) palloc((m + 1) * sizeof(float));
1046  prev_row = (float *) palloc((m + 1) * sizeof(float));
1047 
1048  /* M[0,0] = 1 */
1049  row[0] = 1.0f;
1050  for (i = 1; i <= n; i++)
1051  {
1052  float t = p[i - 1];
1053 
1054  /* Swap rows */
1055  tmp = row;
1056  row = prev_row;
1057  prev_row = tmp;
1058 
1059  /* Calculate next row */
1060  for (j = 0; j <= i && j <= m; j++)
1061  {
1062  float val = 0.0f;
1063 
1064  if (j < i)
1065  val += prev_row[j] * (1.0f - t);
1066  if (j > 0)
1067  val += prev_row[j - 1] * t;
1068  row[j] = val;
1069  }
1070  }
1071 
1072  /*
1073  * The presence of many distinct rare (not in "p") elements materially
1074  * decreases selectivity. Model their collective occurrence with the
1075  * Poisson distribution.
1076  */
1077  if (rest > DEFAULT_CONTAIN_SEL)
1078  {
1079  float t;
1080 
1081  /* Swap rows */
1082  tmp = row;
1083  row = prev_row;
1084  prev_row = tmp;
1085 
1086  for (i = 0; i <= m; i++)
1087  row[i] = 0.0f;
1088 
1089  /* Value of Poisson distribution for 0 occurrences */
1090  t = exp(-rest);
1091 
1092  /*
1093  * Calculate convolution of previously computed distribution and the
1094  * Poisson distribution.
1095  */
1096  for (i = 0; i <= m; i++)
1097  {
1098  for (j = 0; j <= m - i; j++)
1099  row[j + i] += prev_row[j] * t;
1100 
1101  /* Get Poisson distribution value for (i + 1) occurrences */
1102  t *= rest / (float) (i + 1);
1103  }
1104  }
1105 
1106  pfree(prev_row);
1107  return row;
1108 }
#define DEFAULT_CONTAIN_SEL
void pfree(void *pointer)
Definition: mcxt.c:992
void * palloc(Size size)
Definition: mcxt.c:891
int i
long val
Definition: informix.c:689
static float * calc_hist ( const float4 hist,
int  nhist,
int  n 
)
static

Definition at line 944 of file array_selfuncs.c.

References i, palloc(), and val.

Referenced by mcelem_array_contained_selec().

945 {
946  float *hist_part;
947  int k,
948  i = 0;
949  float prev_interval = 0,
950  next_interval;
951  float frac;
952 
953  hist_part = (float *) palloc((n + 1) * sizeof(float));
954 
955  /*
956  * frac is a probability contribution for each interval between histogram
957  * values. We have nhist - 1 intervals, so contribution of each one will
958  * be 1 / (nhist - 1).
959  */
960  frac = 1.0f / ((float) (nhist - 1));
961 
962  for (k = 0; k <= n; k++)
963  {
964  int count = 0;
965 
966  /*
967  * Count the histogram boundaries equal to k. (Although the histogram
968  * should theoretically contain only exact integers, entries are
969  * floats so there could be roundoff error in large values. Treat any
970  * fractional value as equal to the next larger k.)
971  */
972  while (i < nhist && hist[i] <= k)
973  {
974  count++;
975  i++;
976  }
977 
978  if (count > 0)
979  {
980  /* k is an exact bound for at least one histogram box. */
981  float val;
982 
983  /* Find length between current histogram value and the next one */
984  if (i < nhist)
985  next_interval = hist[i] - hist[i - 1];
986  else
987  next_interval = 0;
988 
989  /*
990  * count - 1 histogram boxes contain k exclusively. They
991  * contribute a total of (count - 1) * frac probability. Also
992  * factor in the partial histogram boxes on either side.
993  */
994  val = (float) (count - 1);
995  if (next_interval > 0)
996  val += 0.5f / next_interval;
997  if (prev_interval > 0)
998  val += 0.5f / prev_interval;
999  hist_part[k] = frac * val;
1000 
1001  prev_interval = next_interval;
1002  }
1003  else
1004  {
1005  /* k does not appear as an exact histogram bound. */
1006  if (prev_interval > 0)
1007  hist_part[k] = frac / prev_interval;
1008  else
1009  hist_part[k] = 0.0f;
1010  }
1011  }
1012 
1013  return hist_part;
1014 }
void * palloc(Size size)
Definition: mcxt.c:891
int i
long val
Definition: informix.c:689
static int element_compare ( const void *  key1,
const void *  key2,
void *  arg 
)
static

Definition at line 1188 of file array_selfuncs.c.

References DatumGetInt32, DEFAULT_COLLATION_OID, and FunctionCall2Coll().

Referenced by find_next_mcelem(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), and mcelem_array_selec().

1189 {
1190  Datum d1 = *((const Datum *) key1);
1191  Datum d2 = *((const Datum *) key2);
1192  FmgrInfo *cmpfunc = (FmgrInfo *) arg;
1193  Datum c;
1194 
1195  c = FunctionCall2Coll(cmpfunc, DEFAULT_COLLATION_OID, d1, d2);
1196  return DatumGetInt32(c);
1197 }
Definition: fmgr.h:53
#define DatumGetInt32(X)
Definition: postgres.h:480
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1306
char * c
#define DEFAULT_COLLATION_OID
Definition: pg_collation.h:68
uintptr_t Datum
Definition: postgres.h:374
void * arg
static bool find_next_mcelem ( Datum mcelem,
int  nmcelem,
Datum  value,
int *  index,
FmgrInfo cmpfunc 
)
static

Definition at line 1153 of file array_selfuncs.c.

References element_compare(), and i.

Referenced by mcelem_array_contain_overlap_selec().

1155 {
1156  int l = *index,
1157  r = nmcelem - 1,
1158  i,
1159  res;
1160 
1161  while (l <= r)
1162  {
1163  i = (l + r) / 2;
1164  res = element_compare(&mcelem[i], &value, cmpfunc);
1165  if (res == 0)
1166  {
1167  *index = i;
1168  return true;
1169  }
1170  else if (res < 0)
1171  l = i + 1;
1172  else
1173  r = i - 1;
1174  }
1175  *index = l;
1176  return false;
1177 }
static struct @76 value
static int element_compare(const void *key1, const void *key2, void *arg)
Definition: type.h:90
int i
static int float_compare_desc ( const void *  key1,
const void *  key2 
)
static

Definition at line 1203 of file array_selfuncs.c.

Referenced by mcelem_array_contained_selec().

1204 {
1205  float d1 = *((const float *) key1);
1206  float d2 = *((const float *) key2);
1207 
1208  if (d1 > d2)
1209  return -1;
1210  else if (d1 < d2)
1211  return 1;
1212  else
1213  return 0;
1214 }
static int floor_log2 ( uint32  n)
static

Definition at line 1112 of file array_selfuncs.c.

Referenced by mcelem_array_contain_overlap_selec().

1113 {
1114  int logval = 0;
1115 
1116  if (n == 0)
1117  return -1;
1118  if (n >= (1 << 16))
1119  {
1120  n >>= 16;
1121  logval += 16;
1122  }
1123  if (n >= (1 << 8))
1124  {
1125  n >>= 8;
1126  logval += 8;
1127  }
1128  if (n >= (1 << 4))
1129  {
1130  n >>= 4;
1131  logval += 4;
1132  }
1133  if (n >= (1 << 2))
1134  {
1135  n >>= 2;
1136  logval += 2;
1137  }
1138  if (n >= (1 << 1))
1139  {
1140  logval += 1;
1141  }
1142  return logval;
1143 }
static Selectivity mcelem_array_contain_overlap_selec ( Datum mcelem,
int  nmcelem,
float4 numbers,
int  nnumbers,
Datum array_data,
int  nitems,
Oid  operator,
FmgrInfo cmpfunc 
)
static

Definition at line 544 of file array_selfuncs.c.

References CLAMP_PROBABILITY, cmp(), DEFAULT_CONTAIN_SEL, element_compare(), find_next_mcelem(), floor_log2(), i, Min, NULL, and OID_ARRAY_CONTAINS_OP.

Referenced by mcelem_array_selec(), and scalararraysel_containment().

548 {
549  Selectivity selec,
550  elem_selec;
551  int mcelem_index,
552  i;
553  bool use_bsearch;
554  float4 minfreq;
555 
556  /*
557  * There should be three more Numbers than Values, because the last three
558  * cells should hold minimal and maximal frequency among the non-null
559  * elements, and then the frequency of null elements. Ignore the Numbers
560  * if not right.
561  */
562  if (nnumbers != nmcelem + 3)
563  {
564  numbers = NULL;
565  nnumbers = 0;
566  }
567 
568  if (numbers)
569  {
570  /* Grab the lowest observed frequency */
571  minfreq = numbers[nmcelem];
572  }
573  else
574  {
575  /* Without statistics make some default assumptions */
576  minfreq = 2 * (float4) DEFAULT_CONTAIN_SEL;
577  }
578 
579  /* Decide whether it is faster to use binary search or not. */
580  if (nitems * floor_log2((uint32) nmcelem) < nmcelem + nitems)
581  use_bsearch = true;
582  else
583  use_bsearch = false;
584 
585  if (operator == OID_ARRAY_CONTAINS_OP)
586  {
587  /*
588  * Initial selectivity for "column @> const" query is 1.0, and it will
589  * be decreased with each element of constant array.
590  */
591  selec = 1.0;
592  }
593  else
594  {
595  /*
596  * Initial selectivity for "column && const" query is 0.0, and it will
597  * be increased with each element of constant array.
598  */
599  selec = 0.0;
600  }
601 
602  /* Scan mcelem and array in parallel. */
603  mcelem_index = 0;
604  for (i = 0; i < nitems; i++)
605  {
606  bool match = false;
607 
608  /* Ignore any duplicates in the array data. */
609  if (i > 0 &&
610  element_compare(&array_data[i - 1], &array_data[i], cmpfunc) == 0)
611  continue;
612 
613  /* Find the smallest MCELEM >= this array item. */
614  if (use_bsearch)
615  {
616  match = find_next_mcelem(mcelem, nmcelem, array_data[i],
617  &mcelem_index, cmpfunc);
618  }
619  else
620  {
621  while (mcelem_index < nmcelem)
622  {
623  int cmp = element_compare(&mcelem[mcelem_index],
624  &array_data[i],
625  cmpfunc);
626 
627  if (cmp < 0)
628  mcelem_index++;
629  else
630  {
631  if (cmp == 0)
632  match = true; /* mcelem is found */
633  break;
634  }
635  }
636  }
637 
638  if (match && numbers)
639  {
640  /* MCELEM matches the array item; use its frequency. */
641  elem_selec = numbers[mcelem_index];
642  mcelem_index++;
643  }
644  else
645  {
646  /*
647  * The element is not in MCELEM. Punt, but assume that the
648  * selectivity cannot be more than minfreq / 2.
649  */
650  elem_selec = Min(DEFAULT_CONTAIN_SEL, minfreq / 2);
651  }
652 
653  /*
654  * Update overall selectivity using the current element's selectivity
655  * and an assumption of element occurrence independence.
656  */
657  if (operator == OID_ARRAY_CONTAINS_OP)
658  selec *= elem_selec;
659  else
660  selec = selec + elem_selec - selec * elem_selec;
661 
662  /* Clamp intermediate results to stay sane despite roundoff error */
663  CLAMP_PROBABILITY(selec);
664  }
665 
666  return selec;
667 }
static int element_compare(const void *key1, const void *key2, void *arg)
#define DEFAULT_CONTAIN_SEL
#define Min(x, y)
Definition: c.h:801
double Selectivity
Definition: nodes.h:631
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define OID_ARRAY_CONTAINS_OP
Definition: pg_operator.h:1548
unsigned int uint32
Definition: c.h:265
float float4
Definition: c.h:376
static bool find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, int *index, FmgrInfo *cmpfunc)
#define NULL
Definition: c.h:226
static int floor_log2(uint32 n)
int i
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742
static Selectivity mcelem_array_contained_selec ( Datum mcelem,
int  nmcelem,
float4 numbers,
int  nnumbers,
Datum array_data,
int  nitems,
float4 hist,
int  nhist,
Oid  operator,
FmgrInfo cmpfunc 
)
static

Definition at line 719 of file array_selfuncs.c.

References calc_distr(), calc_hist(), CLAMP_PROBABILITY, cmp(), DEFAULT_CONTAIN_SEL, EFFORT, element_compare(), float_compare_desc(), i, Min, NULL, palloc(), pfree(), and qsort.

Referenced by mcelem_array_selec(), and scalararraysel_containment().

724 {
725  int mcelem_index,
726  i,
727  unique_nitems = 0;
728  float selec,
729  minfreq,
730  nullelem_freq;
731  float *dist,
732  *mcelem_dist,
733  *hist_part;
734  float avg_count,
735  mult,
736  rest;
737  float *elem_selec;
738 
739  /*
740  * There should be three more Numbers than Values in the MCELEM slot,
741  * because the last three cells should hold minimal and maximal frequency
742  * among the non-null elements, and then the frequency of null elements.
743  * Punt if not right, because we can't do much without the element freqs.
744  */
745  if (numbers == NULL || nnumbers != nmcelem + 3)
746  return DEFAULT_CONTAIN_SEL;
747 
748  /* Can't do much without a count histogram, either */
749  if (hist == NULL || nhist < 3)
750  return DEFAULT_CONTAIN_SEL;
751 
752  /*
753  * Grab some of the summary statistics that compute_array_stats() stores:
754  * lowest frequency, frequency of null elements, and average distinct
755  * element count.
756  */
757  minfreq = numbers[nmcelem];
758  nullelem_freq = numbers[nmcelem + 2];
759  avg_count = hist[nhist - 1];
760 
761  /*
762  * "rest" will be the sum of the frequencies of all elements not
763  * represented in MCELEM. The average distinct element count is the sum
764  * of the frequencies of *all* elements. Begin with that; we will proceed
765  * to subtract the MCELEM frequencies.
766  */
767  rest = avg_count;
768 
769  /*
770  * mult is a multiplier representing estimate of probability that each
771  * mcelem that is not present in constant doesn't occur.
772  */
773  mult = 1.0f;
774 
775  /*
776  * elem_selec is array of estimated frequencies for elements in the
777  * constant.
778  */
779  elem_selec = (float *) palloc(sizeof(float) * nitems);
780 
781  /* Scan mcelem and array in parallel. */
782  mcelem_index = 0;
783  for (i = 0; i < nitems; i++)
784  {
785  bool match = false;
786 
787  /* Ignore any duplicates in the array data. */
788  if (i > 0 &&
789  element_compare(&array_data[i - 1], &array_data[i], cmpfunc) == 0)
790  continue;
791 
792  /*
793  * Iterate over MCELEM until we find an entry greater than or equal to
794  * this element of the constant. Update "rest" and "mult" for mcelem
795  * entries skipped over.
796  */
797  while (mcelem_index < nmcelem)
798  {
799  int cmp = element_compare(&mcelem[mcelem_index],
800  &array_data[i],
801  cmpfunc);
802 
803  if (cmp < 0)
804  {
805  mult *= (1.0f - numbers[mcelem_index]);
806  rest -= numbers[mcelem_index];
807  mcelem_index++;
808  }
809  else
810  {
811  if (cmp == 0)
812  match = true; /* mcelem is found */
813  break;
814  }
815  }
816 
817  if (match)
818  {
819  /* MCELEM matches the array item. */
820  elem_selec[unique_nitems] = numbers[mcelem_index];
821  /* "rest" is decremented for all mcelems, matched or not */
822  rest -= numbers[mcelem_index];
823  mcelem_index++;
824  }
825  else
826  {
827  /*
828  * The element is not in MCELEM. Punt, but assume that the
829  * selectivity cannot be more than minfreq / 2.
830  */
831  elem_selec[unique_nitems] = Min(DEFAULT_CONTAIN_SEL,
832  minfreq / 2);
833  }
834 
835  unique_nitems++;
836  }
837 
838  /*
839  * If we handled all constant elements without exhausting the MCELEM
840  * array, finish walking it to complete calculation of "rest" and "mult".
841  */
842  while (mcelem_index < nmcelem)
843  {
844  mult *= (1.0f - numbers[mcelem_index]);
845  rest -= numbers[mcelem_index];
846  mcelem_index++;
847  }
848 
849  /*
850  * The presence of many distinct rare elements materially decreases
851  * selectivity. Use the Poisson distribution to estimate the probability
852  * of a column value having zero occurrences of such elements. See above
853  * for the definition of "rest".
854  */
855  mult *= exp(-rest);
856 
857  /*----------
858  * Using the distinct element count histogram requires
859  * O(unique_nitems * (nmcelem + unique_nitems))
860  * operations. Beyond a certain computational cost threshold, it's
861  * reasonable to sacrifice accuracy for decreased planning time. We limit
862  * the number of operations to EFFORT * nmcelem; since nmcelem is limited
863  * by the column's statistics target, the work done is user-controllable.
864  *
865  * If the number of operations would be too large, we can reduce it
866  * without losing all accuracy by reducing unique_nitems and considering
867  * only the most-common elements of the constant array. To make the
868  * results exactly match what we would have gotten with only those
869  * elements to start with, we'd have to remove any discarded elements'
870  * frequencies from "mult", but since this is only an approximation
871  * anyway, we don't bother with that. Therefore it's sufficient to qsort
872  * elem_selec[] and take the largest elements. (They will no longer match
873  * up with the elements of array_data[], but we don't care.)
874  *----------
875  */
876 #define EFFORT 100
877 
878  if ((nmcelem + unique_nitems) > 0 &&
879  unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems))
880  {
881  /*
882  * Use the quadratic formula to solve for largest allowable N. We
883  * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
884  */
885  double b = (double) nmcelem;
886  int n;
887 
888  n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2);
889 
890  /* Sort, then take just the first n elements */
891  qsort(elem_selec, unique_nitems, sizeof(float),
893  unique_nitems = n;
894  }
895 
896  /*
897  * Calculate probabilities of each distinct element count for both mcelems
898  * and constant elements. At this point, assume independent element
899  * occurrence.
900  */
901  dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f);
902  mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest);
903 
904  /* ignore hist[nhist-1], which is the average not a histogram member */
905  hist_part = calc_hist(hist, nhist - 1, unique_nitems);
906 
907  selec = 0.0f;
908  for (i = 0; i <= unique_nitems; i++)
909  {
910  /*
911  * mult * dist[i] / mcelem_dist[i] gives us probability of qual
912  * matching from assumption of independent element occurrence with the
913  * condition that distinct element count = i.
914  */
915  if (mcelem_dist[i] > 0)
916  selec += hist_part[i] * mult * dist[i] / mcelem_dist[i];
917  }
918 
919  pfree(dist);
920  pfree(mcelem_dist);
921  pfree(hist_part);
922  pfree(elem_selec);
923 
924  /* Take into account occurrence of NULL element. */
925  selec *= (1.0f - nullelem_freq);
926 
927  CLAMP_PROBABILITY(selec);
928 
929  return selec;
930 }
static int element_compare(const void *key1, const void *key2, void *arg)
#define DEFAULT_CONTAIN_SEL
#define Min(x, y)
Definition: c.h:801
static float * calc_hist(const float4 *hist, int nhist, int n)
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
void pfree(void *pointer)
Definition: mcxt.c:992
static float * calc_distr(const float *p, int n, int m, float rest)
#define EFFORT
#define NULL
Definition: c.h:226
void * palloc(Size size)
Definition: mcxt.c:891
int i
#define qsort(a, b, c, d)
Definition: port.h:440
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742
static int float_compare_desc(const void *key1, const void *key2)
static Selectivity mcelem_array_selec ( ArrayType array,
TypeCacheEntry typentry,
Datum mcelem,
int  nmcelem,
float4 numbers,
int  nnumbers,
float4 hist,
int  nhist,
Oid  operator,
FmgrInfo cmpfunc 
)
static

Definition at line 451 of file array_selfuncs.c.

References deconstruct_array(), element_compare(), elog, ERROR, i, mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), OID_ARRAY_CONTAINED_OP, OID_ARRAY_CONTAINS_OP, OID_ARRAY_OVERLAP_OP, pfree(), qsort_arg(), TypeCacheEntry::typalign, TypeCacheEntry::typbyval, TypeCacheEntry::type_id, and TypeCacheEntry::typlen.

Referenced by calc_arraycontsel().

456 {
457  Selectivity selec;
458  int num_elems;
459  Datum *elem_values;
460  bool *elem_nulls;
461  bool null_present;
462  int nonnull_nitems;
463  int i;
464 
465  /*
466  * Prepare constant array data for sorting. Sorting lets us find unique
467  * elements and efficiently merge with the MCELEM array.
468  */
469  deconstruct_array(array,
470  typentry->type_id,
471  typentry->typlen,
472  typentry->typbyval,
473  typentry->typalign,
474  &elem_values, &elem_nulls, &num_elems);
475 
476  /* Collapse out any null elements */
477  nonnull_nitems = 0;
478  null_present = false;
479  for (i = 0; i < num_elems; i++)
480  {
481  if (elem_nulls[i])
482  null_present = true;
483  else
484  elem_values[nonnull_nitems++] = elem_values[i];
485  }
486 
487  /*
488  * Query "column @> '{anything, null}'" matches nothing. For the other
489  * two operators, presence of a null in the constant can be ignored.
490  */
491  if (null_present && operator == OID_ARRAY_CONTAINS_OP)
492  {
493  pfree(elem_values);
494  pfree(elem_nulls);
495  return (Selectivity) 0.0;
496  }
497 
498  /* Sort extracted elements using their default comparison function. */
499  qsort_arg(elem_values, nonnull_nitems, sizeof(Datum),
500  element_compare, cmpfunc);
501 
502  /* Separate cases according to operator */
503  if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP)
504  selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem,
505  numbers, nnumbers,
506  elem_values, nonnull_nitems,
507  operator, cmpfunc);
508  else if (operator == OID_ARRAY_CONTAINED_OP)
509  selec = mcelem_array_contained_selec(mcelem, nmcelem,
510  numbers, nnumbers,
511  elem_values, nonnull_nitems,
512  hist, nhist,
513  operator, cmpfunc);
514  else
515  {
516  elog(ERROR, "arraycontsel called for unrecognized operator %u",
517  operator);
518  selec = 0.0; /* keep compiler quiet */
519  }
520 
521  pfree(elem_values);
522  pfree(elem_nulls);
523  return selec;
524 }
static int element_compare(const void *key1, const void *key2, void *arg)
double Selectivity
Definition: nodes.h:631
int16 typlen
Definition: typcache.h:35
bool typbyval
Definition: typcache.h:36
#define OID_ARRAY_CONTAINS_OP
Definition: pg_operator.h:1548
void pfree(void *pointer)
Definition: mcxt.c:992
#define ERROR
Definition: elog.h:43
#define OID_ARRAY_OVERLAP_OP
Definition: pg_operator.h:1545
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
#define OID_ARRAY_CONTAINED_OP
Definition: pg_operator.h:1551
uintptr_t Datum
Definition: postgres.h:374
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, FmgrInfo *cmpfunc)
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3475
char typalign
Definition: typcache.h:37
int i
#define elog
Definition: elog.h:219
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, FmgrInfo *cmpfunc)
Selectivity scalararraysel_containment ( PlannerInfo root,
Node leftop,
Node rightop,
Oid  elemtype,
bool  isEquality,
bool  useOr,
int  varRelid 
)

Definition at line 83 of file array_selfuncs.c.

References VariableStatData::atttypmod, CLAMP_PROBABILITY, TypeCacheEntry::cmp_proc_finfo, examine_variable(), FmgrInfo::fn_oid, free_attstatsslot(), get_attstatsslot(), GETSTRUCT, HeapTupleIsValid, InvalidOid, IsA, lookup_type_cache(), mcelem_array_contain_overlap_selec(), mcelem_array_contained_selec(), NULL, OID_ARRAY_CONTAINED_OP, OID_ARRAY_CONTAINS_OP, OidIsValid, VariableStatData::rel, ReleaseVariableStats, STATISTIC_KIND_DECHIST, STATISTIC_KIND_MCELEM, VariableStatData::statsTuple, TYPECACHE_CMP_PROC_FINFO, and values.

Referenced by scalararraysel().

87 {
88  Selectivity selec;
89  VariableStatData vardata;
90  Datum constval;
91  TypeCacheEntry *typentry;
92  FmgrInfo *cmpfunc;
93 
94  /*
95  * rightop must be a variable, else punt.
96  */
97  examine_variable(root, rightop, varRelid, &vardata);
98  if (!vardata.rel)
99  {
100  ReleaseVariableStats(vardata);
101  return -1.0;
102  }
103 
104  /*
105  * leftop must be a constant, else punt.
106  */
107  if (!IsA(leftop, Const))
108  {
109  ReleaseVariableStats(vardata);
110  return -1.0;
111  }
112  if (((Const *) leftop)->constisnull)
113  {
114  /* qual can't succeed if null on left */
115  ReleaseVariableStats(vardata);
116  return (Selectivity) 0.0;
117  }
118  constval = ((Const *) leftop)->constvalue;
119 
120  /* Get element type's default comparison function */
121  typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
122  if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
123  {
124  ReleaseVariableStats(vardata);
125  return -1.0;
126  }
127  cmpfunc = &typentry->cmp_proc_finfo;
128 
129  /*
130  * If the operator is <>, swap ANY/ALL, then invert the result later.
131  */
132  if (!isEquality)
133  useOr = !useOr;
134 
135  /* Get array element stats for var, if available */
136  if (HeapTupleIsValid(vardata.statsTuple))
137  {
138  Form_pg_statistic stats;
139  Datum *values;
140  int nvalues;
141  float4 *numbers;
142  int nnumbers;
143  float4 *hist;
144  int nhist;
145 
146  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
147 
148  /* MCELEM will be an array of same type as element */
149  if (get_attstatsslot(vardata.statsTuple,
150  elemtype, vardata.atttypmod,
152  NULL,
153  &values, &nvalues,
154  &numbers, &nnumbers))
155  {
156  /* For ALL case, also get histogram of distinct-element counts */
157  if (useOr ||
158  !get_attstatsslot(vardata.statsTuple,
159  elemtype, vardata.atttypmod,
161  NULL,
162  NULL, NULL,
163  &hist, &nhist))
164  {
165  hist = NULL;
166  nhist = 0;
167  }
168 
169  /*
170  * For = ANY, estimate as var @> ARRAY[const].
171  *
172  * For = ALL, estimate as var <@ ARRAY[const].
173  */
174  if (useOr)
175  selec = mcelem_array_contain_overlap_selec(values, nvalues,
176  numbers, nnumbers,
177  &constval, 1,
179  cmpfunc);
180  else
181  selec = mcelem_array_contained_selec(values, nvalues,
182  numbers, nnumbers,
183  &constval, 1,
184  hist, nhist,
186  cmpfunc);
187 
188  if (hist)
189  free_attstatsslot(elemtype, NULL, 0, hist, nhist);
190  free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers);
191  }
192  else
193  {
194  /* No most-common-elements info, so do without */
195  if (useOr)
197  NULL, 0,
198  &constval, 1,
200  cmpfunc);
201  else
203  NULL, 0,
204  &constval, 1,
205  NULL, 0,
207  cmpfunc);
208  }
209 
210  /*
211  * MCE stats count only non-null rows, so adjust for null rows.
212  */
213  selec *= (1.0 - stats->stanullfrac);
214  }
215  else
216  {
217  /* No stats at all, so do without */
218  if (useOr)
220  NULL, 0,
221  &constval, 1,
223  cmpfunc);
224  else
226  NULL, 0,
227  &constval, 1,
228  NULL, 0,
230  cmpfunc);
231  /* we assume no nulls here, so no stanullfrac correction */
232  }
233 
234  ReleaseVariableStats(vardata);
235 
236  /*
237  * If the operator is <>, invert the results.
238  */
239  if (!isEquality)
240  selec = 1.0 - selec;
241 
242  CLAMP_PROBABILITY(selec);
243 
244  return selec;
245 }
Definition: fmgr.h:53
#define IsA(nodeptr, _type_)
Definition: nodes.h:559
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
HeapTuple statsTuple
Definition: selfuncs.h:71
RelOptInfo * rel
Definition: selfuncs.h:70
double Selectivity
Definition: nodes.h:631
bool get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers)
Definition: lsyscache.c:2854
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:129
#define OidIsValid(objectId)
Definition: c.h:533
int32 atttypmod
Definition: selfuncs.h:76
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
FmgrInfo cmp_proc_finfo
Definition: typcache.h:68
#define OID_ARRAY_CONTAINS_OP
Definition: pg_operator.h:1548
#define STATISTIC_KIND_DECHIST
Definition: pg_statistic.h:270
#define OID_ARRAY_CONTAINED_OP
Definition: pg_operator.h:1551
float float4
Definition: c.h:376
uintptr_t Datum
Definition: postgres.h:374
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:191
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:56
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata)
Definition: selfuncs.c:4409
#define NULL
Definition: c.h:226
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, FmgrInfo *cmpfunc)
static Datum values[MAXATTR]
Definition: bootstrap.c:162
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
#define STATISTIC_KIND_MCELEM
Definition: pg_statistic.h:257
#define TYPECACHE_CMP_PROC_FINFO
Definition: typcache.h:116
void free_attstatsslot(Oid atttype, Datum *values, int nvalues, float4 *numbers, int nnumbers)
Definition: lsyscache.c:2978
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, FmgrInfo *cmpfunc)