PostgreSQL Source Code git master
Loading...
Searching...
No Matches
_int_gist.c
Go to the documentation of this file.
1/*
2 * contrib/intarray/_int_gist.c
3 */
4#include "postgres.h"
5
6#include <limits.h>
7#include <math.h>
8
9#include "_int.h"
10#include "access/gist.h"
11#include "access/reloptions.h"
12#include "access/stratnum.h"
13
14#define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
15
16/*
17 * Control the maximum sparseness of compressed keys.
18 *
19 * The upper safe bound for this limit is half the maximum allocatable array
20 * size. A lower bound would give more guarantees that pathological data
21 * wouldn't eat excessive CPU and memory, but at the expense of breaking
22 * possibly working (after a fashion) indexes.
23 */
24#define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2)
25/* or: #define MAXNUMELTS 1000000 */
26
27/*
28** GiST support methods
29*/
38
39
40/*
41** The GiST Consistent method for _intments
42** Should return false if for all data items x below entry,
43** the predicate x op query == false, where op is the oper
44** corresponding to strategy in the pg_amop table.
45*/
48{
52#ifdef NOT_USED
53 Oid subtype = PG_GETARG_OID(3);
54#endif
55 bool *recheck = (bool *) PG_GETARG_POINTER(4);
56 bool retval = false; /* silence compiler warning */
57
58 /* this is exact except for RTSameStrategyNumber */
59 *recheck = (strategy == RTSameStrategyNumber);
60
61 if (strategy == BooleanSearchStrategy)
62 {
63 retval = execconsistent((QUERYTYPE *) query,
64 (ArrayType *) DatumGetPointer(entry->key),
65 GIST_LEAF(entry));
66
67 pfree(query);
68 PG_RETURN_BOOL(retval);
69 }
70
71 /* sort query for fast search, key is already sorted */
72 CHECKARRVALID(query);
73 PREPAREARR(query);
74
75 switch (strategy)
76 {
79 query);
80 break;
82 if (GIST_LEAF(entry))
84 entry->key,
85 PointerGetDatum(query),
86 PointerGetDatum(&retval));
87 else
89 query);
90 break;
94 query);
95 break;
98
99 /*
100 * This code is unreachable as of intarray 1.4, because the <@
101 * operator has been removed from the opclass. We keep it for now
102 * to support older versions of the SQL definitions.
103 */
104 if (GIST_LEAF(entry))
105 retval = inner_int_contains(query,
106 (ArrayType *) DatumGetPointer(entry->key));
107 else
108 {
109 /*
110 * Unfortunately, because empty arrays could be anywhere in
111 * the index, we must search the whole tree.
112 */
113 retval = true;
114 }
115 break;
116 default:
117 retval = false;
118 }
119 pfree(query);
120 PG_RETURN_BOOL(retval);
121}
122
123Datum
125{
127 int *size = (int *) PG_GETARG_POINTER(1);
128 int32 i,
129 *ptr;
130 ArrayType *res;
131 int totlen = 0;
132
133 for (i = 0; i < entryvec->n; i++)
134 {
136
138 totlen += ARRNELEMS(ent);
139 }
140
142 ptr = ARRPTR(res);
143
144 for (i = 0; i < entryvec->n; i++)
145 {
147 int nel;
148
149 nel = ARRNELEMS(ent);
150 memcpy(ptr, ARRPTR(ent), nel * sizeof(int32));
151 ptr += nel;
152 }
153
154 QSORT(res, 1);
155 res = _int_unique(res);
156 *size = VARSIZE(res);
158}
159
160/*
161** GiST Compress and Decompress methods
162*/
163Datum
165{
166 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
167 GISTENTRY *retval;
168 ArrayType *r;
169 int num_ranges = G_INT_GET_NUMRANGES();
170 int len,
171 lenr;
172 int *dr;
173 int i,
174 j,
175 cand;
176 int64 min;
177
178 if (entry->leafkey)
179 {
180 r = DatumGetArrayTypePCopy(entry->key);
181 CHECKARRVALID(r);
182 PREPAREARR(r);
183
184 if (ARRNELEMS(r) >= 2 * num_ranges)
187 errmsg("input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",
188 2 * num_ranges - 1, ARRNELEMS(r))));
189
190 retval = palloc_object(GISTENTRY);
191 gistentryinit(*retval, PointerGetDatum(r),
192 entry->rel, entry->page, entry->offset, false);
193
194 PG_RETURN_POINTER(retval);
195 }
196
197 /*
198 * leaf entries never compress one more time, only when entry->leafkey
199 * ==true, so now we work only with internal keys
200 */
201
202 r = DatumGetArrayTypeP(entry->key);
203 CHECKARRVALID(r);
204 if (ARRISEMPTY(r))
205 {
206 if (r != (ArrayType *) DatumGetPointer(entry->key))
207 pfree(r);
208 PG_RETURN_POINTER(entry);
209 }
210
211 if ((len = ARRNELEMS(r)) >= 2 * num_ranges)
212 { /* compress */
213 if (r == (ArrayType *) DatumGetPointer(entry->key))
214 r = DatumGetArrayTypePCopy(entry->key);
215 r = resize_intArrayType(r, 2 * (len));
216
217 dr = ARRPTR(r);
218
219 /*
220 * "len" at this point is the number of ranges we will construct.
221 * "lenr" is the number of ranges we must eventually remove by
222 * merging, we must be careful to remove no more than this number.
223 */
224 lenr = len - num_ranges;
225
226 /*
227 * Initially assume we can merge consecutive ints into a range. but we
228 * must count every value removed and stop when lenr runs out
229 */
230 for (j = i = len - 1; i > 0 && lenr > 0; i--, j--)
231 {
232 int r_end = dr[i];
233 int r_start = r_end;
234
235 while (i > 0 && lenr > 0 && dr[i - 1] == r_start - 1)
236 --r_start, --i, --lenr;
237 dr[2 * j] = r_start;
238 dr[2 * j + 1] = r_end;
239 }
240 /* just copy the rest, if any, as trivial ranges */
241 for (; i >= 0; i--, j--)
242 dr[2 * j] = dr[2 * j + 1] = dr[i];
243
244 if (++j)
245 {
246 /*
247 * shunt everything down to start at the right place
248 */
249 memmove(&dr[0], &dr[2 * j], 2 * (len - j) * sizeof(int32));
250 }
251
252 /*
253 * make "len" be number of array elements, not ranges
254 */
255 len = 2 * (len - j);
256 cand = 1;
257 while (len > num_ranges * 2)
258 {
259 min = PG_INT64_MAX;
260 for (i = 2; i < len; i += 2)
261 if (min > ((int64) dr[i] - (int64) dr[i - 1]))
262 {
263 min = ((int64) dr[i] - (int64) dr[i - 1]);
264 cand = i;
265 }
266 memmove(&dr[cand - 1], &dr[cand + 1], (len - cand - 1) * sizeof(int32));
267 len -= 2;
268 }
269
270 /*
271 * check sparseness of result
272 */
277 errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead")));
278
279 r = resize_intArrayType(r, len);
280 retval = palloc_object(GISTENTRY);
281 gistentryinit(*retval, PointerGetDatum(r),
282 entry->rel, entry->page, entry->offset, false);
283 PG_RETURN_POINTER(retval);
284 }
285 else
286 PG_RETURN_POINTER(entry);
287}
288
289Datum
291{
292 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
293 GISTENTRY *retval;
294 ArrayType *r;
295 int num_ranges = G_INT_GET_NUMRANGES();
296 int *dr,
297 lenr;
298 ArrayType *in;
299 int lenin;
300 int *din;
301 int i;
302
303 in = DatumGetArrayTypeP(entry->key);
304
305 CHECKARRVALID(in);
306 if (ARRISEMPTY(in))
307 {
308 if (in != (ArrayType *) DatumGetPointer(entry->key))
309 {
310 retval = palloc_object(GISTENTRY);
311 gistentryinit(*retval, PointerGetDatum(in),
312 entry->rel, entry->page, entry->offset, false);
313 PG_RETURN_POINTER(retval);
314 }
315
316 PG_RETURN_POINTER(entry);
317 }
318
319 lenin = ARRNELEMS(in);
320
321 if (lenin < 2 * num_ranges)
322 { /* not compressed value */
323 if (in != (ArrayType *) DatumGetPointer(entry->key))
324 {
325 retval = palloc_object(GISTENTRY);
326 gistentryinit(*retval, PointerGetDatum(in),
327 entry->rel, entry->page, entry->offset, false);
328
329 PG_RETURN_POINTER(retval);
330 }
331 PG_RETURN_POINTER(entry);
332 }
333
334 din = ARRPTR(in);
339 errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead")));
340
342 dr = ARRPTR(r);
343
344 for (i = 0; i < lenin; i += 2)
345 {
346 /* use int64 for j in case din[i + 1] is INT_MAX */
347 for (int64 j = din[i]; j <= din[i + 1]; j++)
348 if ((!i) || *(dr - 1) != j)
349 *dr++ = (int) j;
350 }
351
352 if (in != (ArrayType *) DatumGetPointer(entry->key))
353 pfree(in);
354 retval = palloc_object(GISTENTRY);
355 gistentryinit(*retval, PointerGetDatum(r),
356 entry->rel, entry->page, entry->offset, false);
357
358 PG_RETURN_POINTER(retval);
359}
360
361/*
362** The GiST Penalty method for _intments
363*/
364Datum
366{
369 float *result = (float *) PG_GETARG_POINTER(2);
370 ArrayType *ud;
371 float tmp1,
372 tmp2;
373
378 *result = tmp1 - tmp2;
379 pfree(ud);
380
381 PG_RETURN_POINTER(result);
382}
383
384
385
386Datum
388{
391 bool *result = (bool *) PG_GETARG_POINTER(2);
392 int32 n = ARRNELEMS(a);
393 int32 *da,
394 *db;
395
398
399 if (n != ARRNELEMS(b))
400 {
401 *result = false;
402 PG_RETURN_POINTER(result);
403 }
404 *result = true;
405 da = ARRPTR(a);
406 db = ARRPTR(b);
407 while (n--)
408 {
409 if (*da++ != *db++)
410 {
411 *result = false;
412 break;
413 }
414 }
415
416 PG_RETURN_POINTER(result);
417}
418
419/*****************************************************************
420** Common GiST Method
421*****************************************************************/
422
423typedef struct
424{
425 OffsetNumber pos;
426 float cost;
427} SPLITCOST;
428
429static int
430comparecost(const void *a, const void *b)
431{
432 if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
433 return 0;
434 else
435 return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
436}
437
438/*
439** The GiST PickSplit method for _intments
440** We use Guttman's poly time split algorithm
441*/
442Datum
444{
448 j;
450 *datum_beta;
452 *datum_r;
454 *union_dl,
455 *union_dr;
457 bool firsttime;
458 float size_alpha,
459 size_beta,
462 float size_waste,
463 waste;
464 float size_l,
465 size_r;
466 int nbytes;
468 seed_2 = 0;
469 OffsetNumber *left,
470 *right;
471 OffsetNumber maxoff;
473
474#ifdef GIST_DEBUG
475 elog(DEBUG3, "--------picksplit %d", entryvec->n);
476#endif
477
478 maxoff = entryvec->n - 2;
479 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
480 v->spl_left = (OffsetNumber *) palloc(nbytes);
481 v->spl_right = (OffsetNumber *) palloc(nbytes);
482
483 firsttime = true;
484 waste = 0.0;
485 for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
486 {
488 for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
489 {
491
492 /* compute the wasted space by unioning these guys */
493 /* size_waste = size_union - size_inter; */
499
500 pfree(union_d);
501 pfree(inter_d);
502
503 /*
504 * are these a more promising split that what we've already seen?
505 */
506
507 if (size_waste > waste || firsttime)
508 {
509 waste = size_waste;
510 seed_1 = i;
511 seed_2 = j;
512 firsttime = false;
513 }
514 }
515 }
516
517 left = v->spl_left;
518 v->spl_nleft = 0;
519 right = v->spl_right;
520 v->spl_nright = 0;
521 if (seed_1 == 0 || seed_2 == 0)
522 {
523 seed_1 = 1;
524 seed_2 = 2;
525 }
526
533
534 maxoff = OffsetNumberNext(maxoff);
535
536 /*
537 * sort entries
538 */
540 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
541 {
542 costvector[i - 1].pos = i;
546 pfree(union_d);
549 pfree(union_d);
550 costvector[i - 1].cost = fabsf((size_alpha - size_l) - (size_beta - size_r));
551 }
552 qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
553
554 /*
555 * Now split up the regions between the two seeds. An important property
556 * of this split algorithm is that the split vector v has the indices of
557 * items to be split in order in its left and right vectors. We exploit
558 * this property by doing a merge in the code that actually splits the
559 * page.
560 *
561 * For efficiency, we also place the new index tuple in this loop. This is
562 * handled at the very end, when we have placed all the existing tuples
563 * and i == maxoff + 1.
564 */
565
566
567 for (j = 0; j < maxoff; j++)
568 {
569 i = costvector[j].pos;
570
571 /*
572 * If we've already decided where to place this item, just put it on
573 * the right list. Otherwise, we need to figure out which page needs
574 * the least enlargement in order to store the item.
575 */
576
577 if (i == seed_1)
578 {
579 *left++ = i;
580 v->spl_nleft++;
581 continue;
582 }
583 else if (i == seed_2)
584 {
585 *right++ = i;
586 v->spl_nright++;
587 continue;
588 }
589
590 /* okay, which page needs least enlargement? */
596
597 /* pick which page to add it to */
598 if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
599 {
600 pfree(datum_l);
604 *left++ = i;
605 v->spl_nleft++;
606 }
607 else
608 {
609 pfree(datum_r);
613 *right++ = i;
614 v->spl_nright++;
615 }
616 }
618 *right = *left = FirstOffsetNumber;
619
622
624}
625
626Datum
bool inner_int_overlap(ArrayType *a, ArrayType *b)
Definition _int_tool.c:50
ArrayType * new_intArrayType(int num)
Definition _int_tool.c:224
ArrayType * inner_int_union(ArrayType *a, ArrayType *b)
Definition _int_tool.c:79
bool inner_int_contains(ArrayType *a, ArrayType *b)
Definition _int_tool.c:15
#define G_INT_GET_NUMRANGES()
Definition _int.h:14
int internal_size(int *a, int len)
Definition _int_tool.c:295
#define PREPAREARR(x)
Definition _int.h:49
ArrayType * copy_intArrayType(ArrayType *a)
Definition _int_tool.c:283
ArrayType * inner_int_inter(ArrayType *a, ArrayType *b)
Definition _int_tool.c:136
void rt__int_size(ArrayType *a, float *size)
Definition _int_tool.c:184
#define QSORT(a, direction)
Definition _int.h:180
#define CHECKARRVALID(x)
Definition _int.h:30
ArrayType * resize_intArrayType(ArrayType *a, int num)
Definition _int_tool.c:252
#define BooleanSearchStrategy
Definition _int.h:134
bool execconsistent(QUERYTYPE *query, ArrayType *array, bool calcnot)
Definition _int_bool.c:307
#define ARRISEMPTY(x)
Definition _int.h:38
#define G_INT_NUMRANGES_DEFAULT
Definition _int.h:11
#define G_INT_NUMRANGES_MAX
Definition _int.h:12
ArrayType * _int_unique(ArrayType *r)
Definition _int_tool.c:313
Datum g_int_penalty(PG_FUNCTION_ARGS)
Definition _int_gist.c:365
Datum g_int_picksplit(PG_FUNCTION_ARGS)
Definition _int_gist.c:443
#define MAXNUMELTS
Definition _int_gist.c:24
#define GETENTRY(vec, pos)
Definition _int_gist.c:14
Datum g_int_options(PG_FUNCTION_ARGS)
Definition _int_gist.c:627
Datum g_int_same(PG_FUNCTION_ARGS)
Definition _int_gist.c:387
Datum g_int_compress(PG_FUNCTION_ARGS)
Definition _int_gist.c:164
Datum g_int_union(PG_FUNCTION_ARGS)
Definition _int_gist.c:124
Datum g_int_decompress(PG_FUNCTION_ARGS)
Definition _int_gist.c:290
Datum g_int_consistent(PG_FUNCTION_ARGS)
Definition _int_gist.c:47
static int comparecost(const void *a, const void *b)
Definition _int_gist.c:430
#define DatumGetArrayTypePCopy(X)
Definition array.h:262
#define PG_GETARG_ARRAYTYPE_P_COPY(n)
Definition array.h:264
#define PG_GETARG_ARRAYTYPE_P(n)
Definition array.h:263
#define DatumGetArrayTypeP(X)
Definition array.h:261
int64_t int64
Definition c.h:543
int32_t int32
Definition c.h:542
#define PG_INT64_MAX
Definition c.h:606
#define ARRNELEMS(x)
Definition cube.c:29
#define ARRPTR(x)
Definition cube.c:28
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define DEBUG3
Definition elog.h:28
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define PG_GETARG_POINTER(n)
Definition fmgr.h:277
#define PG_FUNCTION_INFO_V1(funcname)
Definition fmgr.h:417
#define PG_GETARG_UINT16(n)
Definition fmgr.h:272
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition fmgr.h:688
#define PG_RETURN_POINTER(x)
Definition fmgr.h:363
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition fmgr.h:360
#define GIST_LEAF(entry)
Definition gist.h:171
#define gistentryinit(e, k, r, pg, o, l)
Definition gist.h:245
#define WISH_F(a, b, c)
Definition hstore_gist.c:77
int b
Definition isn.c:74
int a
Definition isn.c:73
int j
Definition isn.c:78
int i
Definition isn.c:77
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
const void size_t len
#define qsort(a, b, c, d)
Definition port.h:495
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
unsigned int Oid
static int fb(int x)
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition reloptions.c:782
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
#define RTOldContainsStrategyNumber
Definition stratnum.h:63
uint16 StrategyNumber
Definition stratnum.h:22
#define RTOverlapStrategyNumber
Definition stratnum.h:53
#define RTSameStrategyNumber
Definition stratnum.h:56
#define RTContainsStrategyNumber
Definition stratnum.h:57
#define RTOldContainedByStrategyNumber
Definition stratnum.h:64
#define RTContainedByStrategyNumber
Definition stratnum.h:58
OffsetNumber offset
Definition gist.h:164
Datum key
Definition gist.h:161
Page page
Definition gist.h:163
Relation rel
Definition gist.h:162
bool leafkey
Definition gist.h:165
int spl_nleft
Definition gist.h:144
OffsetNumber * spl_right
Definition gist.h:148
Datum spl_ldatum
Definition gist.h:145
Datum spl_rdatum
Definition gist.h:150
int spl_nright
Definition gist.h:149
OffsetNumber * spl_left
Definition gist.h:143
float cost
Definition _int_gist.c:426
static Size VARSIZE(const void *PTR)
Definition varatt.h:298