PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
_int_selfuncs.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * _int_selfuncs.c
4 * Functions for selectivity estimation of intarray operators
5 *
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * contrib/intarray/_int_selfuncs.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "_int.h"
18#include "access/htup_details.h"
19#include "catalog/pg_operator.h"
21#include "catalog/pg_type.h"
22#include "miscadmin.h"
23#include "utils/fmgrprotos.h"
24#include "utils/lsyscache.h"
25#include "utils/selfuncs.h"
26
34
35
36static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
37 int nmcelems, float4 minfreq);
38static int compare_val_int4(const void *a, const void *b);
39
40/*
41 * Wrappers around the default array selectivity estimation functions.
42 *
43 * The default array selectivity operators for the @>, && and @< operators
44 * work fine for integer arrays. However, if we tried to just use arraycontsel
45 * and arraycontjoinsel directly as the cost estimator functions for our
46 * operators, they would not work as intended, because they look at the
47 * operator's OID. Our operators behave exactly like the built-in anyarray
48 * versions, but we must tell the cost estimator functions which built-in
49 * operators they correspond to. These wrappers just replace the operator
50 * OID with the corresponding built-in operator's OID, and call the built-in
51 * function.
52 */
53
56{
59 ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
61 PG_GETARG_DATUM(3)));
62}
63
66{
69 ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
71 PG_GETARG_DATUM(3)));
72}
73
76{
79 ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
81 PG_GETARG_DATUM(3)));
82}
83
86{
89 ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
92 PG_GETARG_DATUM(4)));
93}
94
97{
100 ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
103 PG_GETARG_DATUM(4)));
104}
105
106Datum
108{
111 ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
114 PG_GETARG_DATUM(4)));
115}
116
117
118/*
119 * _int_matchsel -- restriction selectivity function for intarray @@ query_int
120 */
121Datum
123{
125
127 int varRelid = PG_GETARG_INT32(3);
128 VariableStatData vardata;
129 Node *other;
130 bool varonleft;
131 Selectivity selec;
132 QUERYTYPE *query;
133 Datum *mcelems = NULL;
134 float4 *mcefreqs = NULL;
135 int nmcelems = 0;
136 float4 minfreq = 0.0;
137 float4 nullfrac = 0.0;
138 AttStatsSlot sslot;
139
140 /*
141 * If expression is not "variable @@ something" or "something @@ variable"
142 * then punt and return a default estimate.
143 */
144 if (!get_restriction_variable(root, args, varRelid,
145 &vardata, &other, &varonleft))
147
148 /*
149 * Variable should be int[]. We don't support cases where variable is
150 * query_int.
151 */
152 if (vardata.vartype != INT4ARRAYOID)
154
155 /*
156 * Can't do anything useful if the something is not a constant, either.
157 */
158 if (!IsA(other, Const))
159 {
160 ReleaseVariableStats(vardata);
162 }
163
164 /*
165 * The "@@" operator is strict, so we can cope with NULL right away.
166 */
167 if (((Const *) other)->constisnull)
168 {
169 ReleaseVariableStats(vardata);
170 PG_RETURN_FLOAT8(0.0);
171 }
172
173 /* The caller made sure the const is a query, so get it now */
174 query = DatumGetQueryTypeP(((Const *) other)->constvalue);
175
176 /* Empty query matches nothing */
177 if (query->size == 0)
178 {
179 ReleaseVariableStats(vardata);
180 return (Selectivity) 0.0;
181 }
182
183 /*
184 * Get the statistics for the intarray column.
185 *
186 * We're interested in the Most-Common-Elements list, and the NULL
187 * fraction.
188 */
189 if (HeapTupleIsValid(vardata.statsTuple))
190 {
191 Form_pg_statistic stats;
192
193 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
194 nullfrac = stats->stanullfrac;
195
196 /*
197 * For an int4 array, the default array type analyze function will
198 * collect a Most Common Elements list, which is an array of int4s.
199 */
200 if (get_attstatsslot(&sslot, vardata.statsTuple,
201 STATISTIC_KIND_MCELEM, InvalidOid,
203 {
204 Assert(sslot.valuetype == INT4OID);
205
206 /*
207 * There should be three more Numbers than Values, because the
208 * last three (for intarray) cells are taken for minimal, maximal
209 * and nulls frequency. Punt if not.
210 */
211 if (sslot.nnumbers == sslot.nvalues + 3)
212 {
213 /* Grab the lowest frequency. */
214 minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
215
216 mcelems = sslot.values;
217 mcefreqs = sslot.numbers;
218 nmcelems = sslot.nvalues;
219 }
220 }
221 }
222 else
223 memset(&sslot, 0, sizeof(sslot));
224
225 /* Process the logical expression in the query, using the stats */
226 selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
227 mcelems, mcefreqs, nmcelems, minfreq);
228
229 /* MCE stats count only non-null rows, so adjust for null rows. */
230 selec *= (1.0 - nullfrac);
231
232 free_attstatsslot(&sslot);
233 ReleaseVariableStats(vardata);
234
235 CLAMP_PROBABILITY(selec);
236
237 PG_RETURN_FLOAT8((float8) selec);
238}
239
240/*
241 * Estimate selectivity of single intquery operator
242 */
243static Selectivity
244int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
245 int nmcelems, float4 minfreq)
246{
247 Selectivity selec;
248
249 /* since this function recurses, it could be driven to stack overflow */
251
252 if (item->type == VAL)
253 {
254 Datum *searchres;
255
256 if (mcelems == NULL)
258
259 searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
260 sizeof(Datum), compare_val_int4);
261 if (searchres)
262 {
263 /*
264 * The element is in MCELEM. Return precise selectivity (or at
265 * least as precise as ANALYZE could find out).
266 */
267 selec = mcefreqs[searchres - mcelems];
268 }
269 else
270 {
271 /*
272 * The element is not in MCELEM. Punt, but assume that the
273 * selectivity cannot be more than minfreq / 2.
274 */
275 selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
276 }
277 }
278 else if (item->type == OPR)
279 {
280 /* Current query node is an operator */
282 s2;
283
284 s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
285 minfreq);
286 switch (item->val)
287 {
288 case (int32) '!':
289 selec = 1.0 - s1;
290 break;
291
292 case (int32) '&':
293 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
294 nmcelems, minfreq);
295 selec = s1 * s2;
296 break;
297
298 case (int32) '|':
299 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
300 nmcelems, minfreq);
301 selec = s1 + s2 - s1 * s2;
302 break;
303
304 default:
305 elog(ERROR, "unrecognized operator: %d", item->val);
306 selec = 0; /* keep compiler quiet */
307 break;
308 }
309 }
310 else
311 {
312 elog(ERROR, "unrecognized int query item type: %u", item->type);
313 selec = 0; /* keep compiler quiet */
314 }
315
316 /* Clamp intermediate results to stay sane despite roundoff error */
317 CLAMP_PROBABILITY(selec);
318
319 return selec;
320}
321
322/*
323 * Comparison function for binary search in mcelem array.
324 */
325static int
326compare_val_int4(const void *a, const void *b)
327{
328 int32 key = *(int32 *) a;
329 const Datum *t = (const Datum *) b;
330
331 return key - DatumGetInt32(*t);
332}
#define OPR
Definition: _int.h:163
#define VAL
Definition: _int.h:162
#define GETQUERY(x)
Definition: _int.h:157
#define DatumGetQueryTypeP(X)
Definition: _int.h:168
static int compare_val_int4(const void *a, const void *b)
Datum _int_overlap_joinsel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:85
static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs, int nmcelems, float4 minfreq)
Datum _int_matchsel(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(_int_overlap_sel)
Datum _int_contains_sel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:65
Datum _int_contained_sel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:75
Datum _int_contains_joinsel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:96
Datum _int_overlap_sel(PG_FUNCTION_ARGS)
Definition: _int_selfuncs.c:55
Datum _int_contained_joinsel(PG_FUNCTION_ARGS)
Datum arraycontsel(PG_FUNCTION_ARGS)
Datum arraycontjoinsel(PG_FUNCTION_ARGS)
#define Min(x, y)
Definition: c.h:958
#define Assert(condition)
Definition: c.h:812
double float8
Definition: c.h:584
int32_t int32
Definition: c.h:481
float float4
Definition: c.h:583
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define DirectFunctionCall4(func, arg1, arg2, arg3, arg4)
Definition: fmgr.h:647
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define DirectFunctionCall5(func, arg1, arg2, arg3, arg4, arg5)
Definition: fmgr.h:649
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define GETSTRUCT(TUP)
Definition: htup_details.h:653
int b
Definition: isn.c:69
int a
Definition: isn.c:68
void free_attstatsslot(AttStatsSlot *sslot)
Definition: lsyscache.c:3344
bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple, int reqkind, Oid reqop, int flags)
Definition: lsyscache.c:3234
#define ATTSTATSSLOT_NUMBERS
Definition: lsyscache.h:43
#define ATTSTATSSLOT_VALUES
Definition: lsyscache.h:42
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
double Selectivity
Definition: nodes.h:250
FormData_pg_statistic * Form_pg_statistic
Definition: pg_statistic.h:135
uintptr_t Datum
Definition: postgres.h:64
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
static int32 DatumGetInt32(Datum X)
Definition: postgres.h:202
#define InvalidOid
Definition: postgres_ext.h:36
char * s1
char * s2
tree ctl root
Definition: radixtree.h:1888
bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft)
Definition: selfuncs.c:4894
#define ReleaseVariableStats(vardata)
Definition: selfuncs.h:99
#define CLAMP_PROBABILITY(p)
Definition: selfuncs.h:63
#define DEFAULT_EQ_SEL
Definition: selfuncs.h:34
void check_stack_depth(void)
Definition: stack_depth.c:95
Oid valuetype
Definition: lsyscache.h:52
Datum * values
Definition: lsyscache.h:53
float4 * numbers
Definition: lsyscache.h:56
int nnumbers
Definition: lsyscache.h:57
Definition: _int.h:141
int16 left
Definition: _int.h:143
int32 val
Definition: _int.h:144
int16 type
Definition: _int.h:142
Definition: pg_list.h:54
Definition: nodes.h:129
int32 size
Definition: _int.h:150
HeapTuple statsTuple
Definition: selfuncs.h:89