PostgreSQL Source Code  git master
_int_selfuncs.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * _int_selfuncs.c
4  * Functions for selectivity estimation of intarray operators
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * contrib/intarray/_int_selfuncs.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "_int.h"
18 #include "access/htup_details.h"
19 #include "catalog/pg_operator.h"
20 #include "catalog/pg_statistic.h"
21 #include "catalog/pg_type.h"
22 #include "miscadmin.h"
23 #include "utils/builtins.h"
24 #include "utils/lsyscache.h"
25 #include "utils/selfuncs.h"
26 #include "utils/syscache.h"
27 
35 
36 
37 static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
38  int nmcelems, float4 minfreq);
39 static int compare_val_int4(const void *a, const void *b);
40 
41 /*
42  * Wrappers around the default array selectivity estimation functions.
43  *
44  * The default array selectivity operators for the @>, && and @< operators
45  * work fine for integer arrays. However, if we tried to just use arraycontsel
46  * and arraycontjoinsel directly as the cost estimator functions for our
47  * operators, they would not work as intended, because they look at the
48  * operator's OID. Our operators behave exactly like the built-in anyarray
49  * versions, but we must tell the cost estimator functions which built-in
50  * operators they correspond to. These wrappers just replace the operator
51  * OID with the corresponding built-in operator's OID, and call the built-in
52  * function.
53  */
54 
55 Datum
57 {
59  PG_GETARG_DATUM(0),
60  ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
61  PG_GETARG_DATUM(2),
62  PG_GETARG_DATUM(3)));
63 }
64 
65 Datum
67 {
69  PG_GETARG_DATUM(0),
70  ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
71  PG_GETARG_DATUM(2),
72  PG_GETARG_DATUM(3)));
73 }
74 
75 Datum
77 {
79  PG_GETARG_DATUM(0),
80  ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
81  PG_GETARG_DATUM(2),
82  PG_GETARG_DATUM(3)));
83 }
84 
85 Datum
87 {
89  PG_GETARG_DATUM(0),
90  ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
91  PG_GETARG_DATUM(2),
92  PG_GETARG_DATUM(3),
93  PG_GETARG_DATUM(4)));
94 }
95 
96 Datum
98 {
100  PG_GETARG_DATUM(0),
101  ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
102  PG_GETARG_DATUM(2),
103  PG_GETARG_DATUM(3),
104  PG_GETARG_DATUM(4)));
105 }
106 
107 Datum
109 {
111  PG_GETARG_DATUM(0),
112  ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
113  PG_GETARG_DATUM(2),
114  PG_GETARG_DATUM(3),
115  PG_GETARG_DATUM(4)));
116 }
117 
118 
119 /*
120  * _int_matchsel -- restriction selectivity function for intarray @@ query_int
121  */
122 Datum
124 {
126 
127  List *args = (List *) PG_GETARG_POINTER(2);
128  int varRelid = PG_GETARG_INT32(3);
129  VariableStatData vardata;
130  Node *other;
131  bool varonleft;
132  Selectivity selec;
133  QUERYTYPE *query;
134  Datum *mcelems = NULL;
135  float4 *mcefreqs = NULL;
136  int nmcelems = 0;
137  float4 minfreq = 0.0;
138  float4 nullfrac = 0.0;
139  AttStatsSlot sslot;
140 
141  /*
142  * If expression is not "variable @@ something" or "something @@ variable"
143  * then punt and return a default estimate.
144  */
145  if (!get_restriction_variable(root, args, varRelid,
146  &vardata, &other, &varonleft))
148 
149  /*
150  * Variable should be int[]. We don't support cases where variable is
151  * query_int.
152  */
153  if (vardata.vartype != INT4ARRAYOID)
155 
156  /*
157  * Can't do anything useful if the something is not a constant, either.
158  */
159  if (!IsA(other, Const))
160  {
161  ReleaseVariableStats(vardata);
163  }
164 
165  /*
166  * The "@@" operator is strict, so we can cope with NULL right away.
167  */
168  if (((Const *) other)->constisnull)
169  {
170  ReleaseVariableStats(vardata);
171  PG_RETURN_FLOAT8(0.0);
172  }
173 
174  /* The caller made sure the const is a query, so get it now */
175  query = DatumGetQueryTypeP(((Const *) other)->constvalue);
176 
177  /* Empty query matches nothing */
178  if (query->size == 0)
179  {
180  ReleaseVariableStats(vardata);
181  return (Selectivity) 0.0;
182  }
183 
184  /*
185  * Get the statistics for the intarray column.
186  *
187  * We're interested in the Most-Common-Elements list, and the NULL
188  * fraction.
189  */
190  if (HeapTupleIsValid(vardata.statsTuple))
191  {
192  Form_pg_statistic stats;
193 
194  stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
195  nullfrac = stats->stanullfrac;
196 
197  /*
198  * For an int4 array, the default array type analyze function will
199  * collect a Most Common Elements list, which is an array of int4s.
200  */
201  if (get_attstatsslot(&sslot, vardata.statsTuple,
202  STATISTIC_KIND_MCELEM, InvalidOid,
204  {
205  Assert(sslot.valuetype == INT4OID);
206 
207  /*
208  * There should be three more Numbers than Values, because the
209  * last three (for intarray) cells are taken for minimal, maximal
210  * and nulls frequency. Punt if not.
211  */
212  if (sslot.nnumbers == sslot.nvalues + 3)
213  {
214  /* Grab the lowest frequency. */
215  minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
216 
217  mcelems = sslot.values;
218  mcefreqs = sslot.numbers;
219  nmcelems = sslot.nvalues;
220  }
221  }
222  }
223  else
224  memset(&sslot, 0, sizeof(sslot));
225 
226  /* Process the logical expression in the query, using the stats */
227  selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
228  mcelems, mcefreqs, nmcelems, minfreq);
229 
230  /* MCE stats count only non-null rows, so adjust for null rows. */
231  selec *= (1.0 - nullfrac);
232 
233  free_attstatsslot(&sslot);
234  ReleaseVariableStats(vardata);
235 
236  CLAMP_PROBABILITY(selec);
237 
238  PG_RETURN_FLOAT8((float8) selec);
239 }
240 
241 /*
242  * Estimate selectivity of single intquery operator
243  */
244 static Selectivity
245 int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
246  int nmcelems, float4 minfreq)
247 {
248  Selectivity selec;
249 
250  /* since this function recurses, it could be driven to stack overflow */
252 
253  if (item->type == VAL)
254  {
255  Datum *searchres;
256 
257  if (mcelems == NULL)
258  return (Selectivity) DEFAULT_EQ_SEL;
259 
260  searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
261  sizeof(Datum), compare_val_int4);
262  if (searchres)
263  {
264  /*
265  * The element is in MCELEM. Return precise selectivity (or at
266  * least as precise as ANALYZE could find out).
267  */
268  selec = mcefreqs[searchres - mcelems];
269  }
270  else
271  {
272  /*
273  * The element is not in MCELEM. Punt, but assume that the
274  * selectivity cannot be more than minfreq / 2.
275  */
276  selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
277  }
278  }
279  else if (item->type == OPR)
280  {
281  /* Current query node is an operator */
282  Selectivity s1,
283  s2;
284 
285  s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
286  minfreq);
287  switch (item->val)
288  {
289  case (int32) '!':
290  selec = 1.0 - s1;
291  break;
292 
293  case (int32) '&':
294  s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
295  nmcelems, minfreq);
296  selec = s1 * s2;
297  break;
298 
299  case (int32) '|':
300  s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
301  nmcelems, minfreq);
302  selec = s1 + s2 - s1 * s2;
303  break;
304 
305  default:
306  elog(ERROR, "unrecognized operator: %d", item->val);
307  selec = 0; /* keep compiler quiet */
308  break;
309  }
310  }
311  else
312  {
313  elog(ERROR, "unrecognized int query item type: %u", item->type);
314  selec = 0; /* keep compiler quiet */
315  }
316 
317  /* Clamp intermediate results to stay sane despite roundoff error */
318  CLAMP_PROBABILITY(selec);
319 
320  return selec;
321 }
322 
323 /*
324  * Comparison function for binary search in mcelem array.
325  */
326 static int
327 compare_val_int4(const void *a, const void *b)
328 {
329  int32 key = *(int32 *) a;
330  const Datum *t = (const Datum *) b;
331 
332  return key - DatumGetInt32(*t);
333 }
#define OPR
Definition: _int.h:163
#define VAL
Definition: _int.h:162
#define GETQUERY(x)
Definition: _int.h:157
#define DatumGetQueryTypeP(X)
Definition: _int.h:168
static int compare_val_int4(const void *a, const void *b)
Datum _int_overlap_joinsel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:86
static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs, int nmcelems, float4 minfreq)
Datum _int_matchsel(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(_int_overlap_sel)
Datum _int_contains_sel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:66
Datum _int_contained_sel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:76
Datum _int_contains_joinsel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:97
Datum _int_overlap_sel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:56
Datum _int_contained_joinsel(PG_FUNCTION_ARGS)
Datum arraycontsel(PG_FUNCTION_ARGS)
Datum arraycontjoinsel(PG_FUNCTION_ARGS)
#define Min(x, y)
Definition: c.h:991
signed int int32
Definition: c.h:481
double float8
Definition: c.h:617
float float4
Definition: c.h:616
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define DirectFunctionCall4(func, arg1, arg2, arg3, arg4)
Definition: fmgr.h:648
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define DirectFunctionCall5(func, arg1, arg2, arg3, arg4, arg5)
Definition: fmgr.h:650
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
int b
Definition: isn.c:70
int a
Definition: isn.c:69
Assert(fmt[strlen(fmt) - 1] !='\n')
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3322
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3212
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
double Selectivity
Definition: nodes.h:250
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
void check_stack_depth(void)
Definition: postgres.c:3531
uintptr_t Datum
Definition: postgres.h:64
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202
#define InvalidOid
Definition: postgres_ext.h:36
char * s1
char * s2
tree ctl root
Definition: radixtree.h:1840
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4883
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:99
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define DEFAULT_EQ_SEL
Definition: selfuncs.h:34
Oid valuetype
Definition: lsyscache.h:52
Datum * values
Definition: lsyscache.h:53
float4 * numbers
Definition: lsyscache.h:56
int nnumbers
Definition: lsyscache.h:57
Definition: _int.h:141
int16 left
Definition: _int.h:143
int32 val
Definition: _int.h:144
int16 type
Definition: _int.h:142
Definition: pg_list.h:54
Definition: nodes.h:129
int32 size
Definition: _int.h:150
HeapTuple statsTuple
Definition: selfuncs.h:89