PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
_int_selfuncs.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * _int_selfuncs.c
4  * Functions for selectivity estimation of intarray operators
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * contrib/intarray/_int_selfuncs.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 #include "_int.h"
17 
18 #include "access/htup_details.h"
19 #include "catalog/pg_operator.h"
20 #include "catalog/pg_statistic.h"
21 #include "catalog/pg_type.h"
22 #include "utils/builtins.h"
23 #include "utils/selfuncs.h"
24 #include "utils/syscache.h"
25 #include "utils/lsyscache.h"
26 #include "miscadmin.h"
27 
35 
36 
37 static Selectivity int_query_opr_selec(ITEM *item, Datum *values, float4 *freqs,
38  int nmncelems, float4 minfreq);
39 static int compare_val_int4(const void *a, const void *b);
40 
41 /*
42  * Wrappers around the default array selectivity estimation functions.
43  *
44  * The default array selectivity operators for the @>, && and @< operators
45  * work fine for integer arrays. However, if we tried to just use arraycontsel
46  * and arracontjoinsel directly as the cost estimator functions for our
47  * operators, they would not work as intended, because they look at the
48  * operator's OID. Our operators behave exactly like the built-in anyarray
49  * versions, but we must tell the cost estimator functions which built-in
50  * operators they correspond to. These wrappers just replace the operator
51  * OID with the corresponding built-in operator's OID, and call the built-in
52  * function.
53  */
54 
55 Datum
57 {
59  PG_GETARG_DATUM(0),
61  PG_GETARG_DATUM(2),
62  PG_GETARG_DATUM(3)));
63 }
64 
65 Datum
67 {
69  PG_GETARG_DATUM(0),
71  PG_GETARG_DATUM(2),
72  PG_GETARG_DATUM(3)));
73 }
74 
75 Datum
77 {
79  PG_GETARG_DATUM(0),
81  PG_GETARG_DATUM(2),
82  PG_GETARG_DATUM(3)));
83 }
84 
85 Datum
87 {
89  PG_GETARG_DATUM(0),
91  PG_GETARG_DATUM(2),
92  PG_GETARG_DATUM(3),
93  PG_GETARG_DATUM(4)));
94 }
95 
96 Datum
98 {
100  PG_GETARG_DATUM(0),
102  PG_GETARG_DATUM(2),
103  PG_GETARG_DATUM(3),
104  PG_GETARG_DATUM(4)));
105 }
106 
107 Datum
109 {
111  PG_GETARG_DATUM(0),
113  PG_GETARG_DATUM(2),
114  PG_GETARG_DATUM(3),
115  PG_GETARG_DATUM(4)));
116 }
117 
118 
119 /*
120  * _int_matchsel -- restriction selectivity function for intarray @@ query_int
121  */
122 Datum
124 {
126 
127  List *args = (List *) PG_GETARG_POINTER(2);
128  int varRelid = PG_GETARG_INT32(3);
129  VariableStatData vardata;
130  Node *other;
131  bool varonleft;
132  Selectivity selec;
133  QUERYTYPE *query;
134  Datum *mcelems = NULL;
135  float4 *mcefreqs = NULL;
136  int nmcelems = 0;
137  float4 minfreq = 0.0;
138  float4 nullfrac = 0.0;
139  Form_pg_statistic stats;
140  Datum *values = NULL;
141  int nvalues = 0;
142  float4 *numbers = NULL;
143  int nnumbers = 0;
144 
145  /*
146  * If expression is not "variable @@ something" or "something @@ variable"
147  * then punt and return a default estimate.
148  */
149  if (!get_restriction_variable(root, args, varRelid,
150  &vardata, &other, &varonleft))
152 
153  /*
154  * Variable should be int[]. We don't support cases where variable is
155  * query_int.
156  */
157  if (vardata.vartype != INT4ARRAYOID)
159 
160  /*
161  * Can't do anything useful if the something is not a constant, either.
162  */
163  if (!IsA(other, Const))
164  {
165  ReleaseVariableStats(vardata);
167  }
168 
169  /*
170  * The "@@" operator is strict, so we can cope with NULL right away.
171  */
172  if (((Const *) other)->constisnull)
173  {
174  ReleaseVariableStats(vardata);
175  PG_RETURN_FLOAT8(0.0);
176  }
177 
178  /* The caller made sure the const is a query, so get it now */
179  query = DatumGetQueryTypeP(((Const *) other)->constvalue);
180 
181  /* Empty query matches nothing */
182  if (query->size == 0)
183  {
184  ReleaseVariableStats(vardata);
185  return (Selectivity) 0.0;
186  }
187 
188  /*
189  * Get the statistics for the intarray column.
190  *
191  * We're interested in the Most-Common-Elements list, and the NULL
192  * fraction.
193  */
194  if (HeapTupleIsValid(vardata.statsTuple))
195  {
196  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
197  nullfrac = stats->stanullfrac;
198 
199  /*
200  * For an int4 array, the default array type analyze function will
201  * collect a Most Common Elements list, which is an array of int4s.
202  */
203  if (get_attstatsslot(vardata.statsTuple,
204  INT4OID, -1,
206  NULL,
207  &values, &nvalues,
208  &numbers, &nnumbers))
209  {
210  /*
211  * There should be three more Numbers than Values, because the
212  * last three (for intarray) cells are taken for minimal, maximal
213  * and nulls frequency. Punt if not.
214  */
215  if (nnumbers == nvalues + 3)
216  {
217  /* Grab the lowest frequency. */
218  minfreq = numbers[nnumbers - (nnumbers - nvalues)];
219 
220  mcelems = values;
221  mcefreqs = numbers;
222  nmcelems = nvalues;
223  }
224  }
225  }
226 
227  /* Process the logical expression in the query, using the stats */
228  selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
229  mcelems, mcefreqs, nmcelems, minfreq);
230 
231  /* MCE stats count only non-null rows, so adjust for null rows. */
232  selec *= (1.0 - nullfrac);
233 
234  free_attstatsslot(INT4OID, values, nvalues, numbers, nnumbers);
235  ReleaseVariableStats(vardata);
236 
237  CLAMP_PROBABILITY(selec);
238 
239  PG_RETURN_FLOAT8((float8) selec);
240 }
241 
242 /*
243  * Estimate selectivity of single intquery operator
244  */
245 static Selectivity
246 int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
247  int nmcelems, float4 minfreq)
248 {
249  Selectivity selec;
250 
251  /* since this function recurses, it could be driven to stack overflow */
253 
254  if (item->type == VAL)
255  {
256  Datum *searchres;
257 
258  if (mcelems == NULL)
259  return (Selectivity) DEFAULT_EQ_SEL;
260 
261  searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
262  sizeof(Datum), compare_val_int4);
263  if (searchres)
264  {
265  /*
266  * The element is in MCELEM. Return precise selectivity (or at
267  * least as precise as ANALYZE could find out).
268  */
269  selec = mcefreqs[searchres - mcelems];
270  }
271  else
272  {
273  /*
274  * The element is not in MCELEM. Punt, but assume that the
275  * selectivity cannot be more than minfreq / 2.
276  */
277  selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
278  }
279  }
280  else if (item->type == OPR)
281  {
282  /* Current query node is an operator */
283  Selectivity s1,
284  s2;
285 
286  s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
287  minfreq);
288  switch (item->val)
289  {
290  case (int32) '!':
291  selec = 1.0 - s1;
292  break;
293 
294  case (int32) '&':
295  s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
296  nmcelems, minfreq);
297  selec = s1 * s2;
298  break;
299 
300  case (int32) '|':
301  s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
302  nmcelems, minfreq);
303  selec = s1 + s2 - s1 * s2;
304  break;
305 
306  default:
307  elog(ERROR, "unrecognized operator: %d", item->val);
308  selec = 0; /* keep compiler quiet */
309  break;
310  }
311  }
312  else
313  {
314  elog(ERROR, "unrecognized int query item type: %u", item->type);
315  selec = 0; /* keep compiler quiet */
316  }
317 
318  /* Clamp intermediate results to stay sane despite roundoff error */
319  CLAMP_PROBABILITY(selec);
320 
321  return selec;
322 }
323 
324 /*
325  * Comparison function for binary search in mcelem array.
326  */
327 static int
328 compare_val_int4(const void *a, const void *b)
329 {
330  int32 key = *(int32 *) a;
331  const Datum *t = (const Datum *) b;
332 
333  return key - DatumGetInt32(*t);
334 }
Definition: _int.h:125
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define IsA(nodeptr, _type_)
Definition: nodes.h:555
Datum _int_overlap_sel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:56
#define GETSTRUCT(TUP)
Definition: htup_details.h:656
Datum _int_matchsel(PG_FUNCTION_ARGS)
Datum _int_overlap_joinsel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:86
#define DatumGetInt32(X)
Definition: postgres.h:478
HeapTuple statsTuple
Definition: selfuncs.h:71
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:233
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4441
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:326
#define Min(x, y)
Definition: c.h:806
static Selectivity int_query_opr_selec(ITEM *item, Datum *values, float4 *freqs, int nmncelems, float4 minfreq)
#define INT4OID
Definition: pg_type.h:316
Definition: nodes.h:504
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:241
double Selectivity
Definition: nodes.h:627
bool get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers)
Definition: lsyscache.c:2854
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:129
#define GETQUERY(x)
Definition: _int.h:142
signed int int32
Definition: c.h:256
Datum arraycontsel(PG_FUNCTION_ARGS)
int16 type
Definition: _int.h:127
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:57
#define OID_ARRAY_CONTAINS_OP
Definition: pg_operator.h:1569
#define ObjectIdGetDatum(X)
Definition: postgres.h:513
#define ERROR
Definition: elog.h:43
double float8
Definition: c.h:381
char * s1
#define OID_ARRAY_OVERLAP_OP
Definition: pg_operator.h:1566
#define DirectFunctionCall4(func, arg1, arg2, arg3, arg4)
Definition: fmgr.h:590
Datum _int_contains_joinsel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:97
void check_stack_depth(void)
Definition: postgres.c:3098
#define DEFAULT_EQ_SEL
Definition: selfuncs.h:34
int32 val
Definition: _int.h:129
static int compare_val_int4(const void *a, const void *b)
#define DirectFunctionCall5(func, arg1, arg2, arg3, arg4, arg5)
Definition: fmgr.h:592
#define OID_ARRAY_CONTAINED_OP
Definition: pg_operator.h:1572
PG_FUNCTION_INFO_V1(_int_overlap_sel)
float float4
Definition: c.h:380
char * s2
uintptr_t Datum
Definition: postgres.h:372
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:313
#define InvalidOid
Definition: postgres_ext.h:36
int16 left
Definition: _int.h:128
#define VAL
Definition: _int.h:147
#define HeapTupleIsValid(tuple)
Definition: htup.h:77
#define NULL
Definition: c.h:229
#define OPR
Definition: _int.h:148
Datum _int_contained_joinsel(PG_FUNCTION_ARGS)
static Datum values[MAXATTR]
Definition: bootstrap.c:162
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:80
#define STATISTIC_KIND_MCELEM
Definition: pg_statistic.h:257
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
Datum _int_contained_sel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:76
Datum _int_contains_sel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:66
#define elog
Definition: elog.h:219
#define INT4ARRAYOID
Definition: pg_type.h:463
#define DatumGetQueryTypeP(X)
Definition: _int.h:153
int32 size
Definition: _int.h:135
Definition: pg_list.h:45
void free_attstatsslot(Oid atttype, Datum *values, int nvalues, float4 *numbers, int nnumbers)
Definition: lsyscache.c:2978
Datum arraycontjoinsel(PG_FUNCTION_ARGS)