PostgreSQL Source Code  git master
_int_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/intarray/_int_gist.c
3  */
4 #include "postgres.h"
5 
6 #include <limits.h>
7 
8 #include "_int.h"
9 #include "access/gist.h"
10 #include "access/stratnum.h"
11 
12 #define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
13 
14 /*
15  * Control the maximum sparseness of compressed keys.
16  *
17  * The upper safe bound for this limit is half the maximum allocatable array
18  * size. A lower bound would give more guarantees that pathological data
19  * wouldn't eat excessive CPU and memory, but at the expense of breaking
20  * possibly working (after a fashion) indexes.
21  */
22 #define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2)
23 /* or: #define MAXNUMELTS 1000000 */
24 
25 /*
26 ** GiST support methods
27 */
35 
36 
37 /*
38 ** The GiST Consistent method for _intments
39 ** Should return false if for all data items x below entry,
40 ** the predicate x op query == false, where op is the oper
41 ** corresponding to strategy in the pg_amop table.
42 */
43 Datum
45 {
46  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
49 
50  /* Oid subtype = PG_GETARG_OID(3); */
51  bool *recheck = (bool *) PG_GETARG_POINTER(4);
52  bool retval;
53 
54  /* this is exact except for RTSameStrategyNumber */
55  *recheck = (strategy == RTSameStrategyNumber);
56 
57  if (strategy == BooleanSearchStrategy)
58  {
59  retval = execconsistent((QUERYTYPE *) query,
60  (ArrayType *) DatumGetPointer(entry->key),
61  GIST_LEAF(entry));
62 
63  pfree(query);
64  PG_RETURN_BOOL(retval);
65  }
66 
67  /* sort query for fast search, key is already sorted */
68  CHECKARRVALID(query);
69  PREPAREARR(query);
70 
71  switch (strategy)
72  {
74  retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
75  query);
76  break;
78  if (GIST_LEAF(entry))
80  entry->key,
81  PointerGetDatum(query),
82  PointerGetDatum(&retval));
83  else
84  retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
85  query);
86  break;
89  retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
90  query);
91  break;
94  if (GIST_LEAF(entry))
95  retval = inner_int_contains(query,
96  (ArrayType *) DatumGetPointer(entry->key));
97  else
98  {
99  /*
100  * Unfortunately, because empty arrays could be anywhere in
101  * the index, we must search the whole tree.
102  */
103  retval = true;
104  }
105  break;
106  default:
107  retval = false;
108  }
109  pfree(query);
110  PG_RETURN_BOOL(retval);
111 }
112 
113 Datum
115 {
117  int *size = (int *) PG_GETARG_POINTER(1);
118  int32 i,
119  *ptr;
120  ArrayType *res;
121  int totlen = 0;
122 
123  for (i = 0; i < entryvec->n; i++)
124  {
125  ArrayType *ent = GETENTRY(entryvec, i);
126 
127  CHECKARRVALID(ent);
128  totlen += ARRNELEMS(ent);
129  }
130 
131  res = new_intArrayType(totlen);
132  ptr = ARRPTR(res);
133 
134  for (i = 0; i < entryvec->n; i++)
135  {
136  ArrayType *ent = GETENTRY(entryvec, i);
137  int nel;
138 
139  nel = ARRNELEMS(ent);
140  memcpy(ptr, ARRPTR(ent), nel * sizeof(int32));
141  ptr += nel;
142  }
143 
144  QSORT(res, 1);
145  res = _int_unique(res);
146  *size = VARSIZE(res);
147  PG_RETURN_POINTER(res);
148 }
149 
150 /*
151 ** GiST Compress and Decompress methods
152 */
153 Datum
155 {
156  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
157  GISTENTRY *retval;
158  ArrayType *r;
159  int len,
160  lenr;
161  int *dr;
162  int i,
163  j,
164  cand;
165  int64 min;
166 
167  if (entry->leafkey)
168  {
169  r = DatumGetArrayTypePCopy(entry->key);
170  CHECKARRVALID(r);
171  PREPAREARR(r);
172 
173  if (ARRNELEMS(r) >= 2 * MAXNUMRANGE)
174  elog(NOTICE, "input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",
175  2 * MAXNUMRANGE - 1, ARRNELEMS(r));
176 
177  retval = palloc(sizeof(GISTENTRY));
178  gistentryinit(*retval, PointerGetDatum(r),
179  entry->rel, entry->page, entry->offset, false);
180 
181  PG_RETURN_POINTER(retval);
182  }
183 
184  /*
185  * leaf entries never compress one more time, only when entry->leafkey
186  * ==true, so now we work only with internal keys
187  */
188 
189  r = DatumGetArrayTypeP(entry->key);
190  CHECKARRVALID(r);
191  if (ARRISEMPTY(r))
192  {
193  if (r != (ArrayType *) DatumGetPointer(entry->key))
194  pfree(r);
195  PG_RETURN_POINTER(entry);
196  }
197 
198  if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE)
199  { /* compress */
200  if (r == (ArrayType *) DatumGetPointer(entry->key))
201  r = DatumGetArrayTypePCopy(entry->key);
202  r = resize_intArrayType(r, 2 * (len));
203 
204  dr = ARRPTR(r);
205 
206  /*
207  * "len" at this point is the number of ranges we will construct.
208  * "lenr" is the number of ranges we must eventually remove by
209  * merging, we must be careful to remove no more than this number.
210  */
211  lenr = len - MAXNUMRANGE;
212 
213  /*
214  * Initially assume we can merge consecutive ints into a range. but we
215  * must count every value removed and stop when lenr runs out
216  */
217  for (j = i = len - 1; i > 0 && lenr > 0; i--, j--)
218  {
219  int r_end = dr[i];
220  int r_start = r_end;
221 
222  while (i > 0 && lenr > 0 && dr[i - 1] == r_start - 1)
223  --r_start, --i, --lenr;
224  dr[2 * j] = r_start;
225  dr[2 * j + 1] = r_end;
226  }
227  /* just copy the rest, if any, as trivial ranges */
228  for (; i >= 0; i--, j--)
229  dr[2 * j] = dr[2 * j + 1] = dr[i];
230 
231  if (++j)
232  {
233  /*
234  * shunt everything down to start at the right place
235  */
236  memmove((void *) &dr[0], (void *) &dr[2 * j], 2 * (len - j) * sizeof(int32));
237  }
238 
239  /*
240  * make "len" be number of array elements, not ranges
241  */
242  len = 2 * (len - j);
243  cand = 1;
244  while (len > MAXNUMRANGE * 2)
245  {
246  min = PG_INT64_MAX;
247  for (i = 2; i < len; i += 2)
248  if (min > ((int64) dr[i] - (int64) dr[i - 1]))
249  {
250  min = ((int64) dr[i] - (int64) dr[i - 1]);
251  cand = i;
252  }
253  memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));
254  len -= 2;
255  }
256 
257  /*
258  * check sparseness of result
259  */
260  lenr = internal_size(dr, len);
261  if (lenr < 0 || lenr > MAXNUMELTS)
262  ereport(ERROR,
263  (errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead")));
264 
265  r = resize_intArrayType(r, len);
266  retval = palloc(sizeof(GISTENTRY));
267  gistentryinit(*retval, PointerGetDatum(r),
268  entry->rel, entry->page, entry->offset, false);
269  PG_RETURN_POINTER(retval);
270  }
271  else
272  PG_RETURN_POINTER(entry);
273 }
274 
275 Datum
277 {
278  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
279  GISTENTRY *retval;
280  ArrayType *r;
281  int *dr,
282  lenr;
283  ArrayType *in;
284  int lenin;
285  int *din;
286  int i,
287  j;
288 
289  in = DatumGetArrayTypeP(entry->key);
290 
291  CHECKARRVALID(in);
292  if (ARRISEMPTY(in))
293  {
294  if (in != (ArrayType *) DatumGetPointer(entry->key))
295  {
296  retval = palloc(sizeof(GISTENTRY));
297  gistentryinit(*retval, PointerGetDatum(in),
298  entry->rel, entry->page, entry->offset, false);
299  PG_RETURN_POINTER(retval);
300  }
301 
302  PG_RETURN_POINTER(entry);
303  }
304 
305  lenin = ARRNELEMS(in);
306 
307  if (lenin < 2 * MAXNUMRANGE)
308  { /* not compressed value */
309  if (in != (ArrayType *) DatumGetPointer(entry->key))
310  {
311  retval = palloc(sizeof(GISTENTRY));
312  gistentryinit(*retval, PointerGetDatum(in),
313  entry->rel, entry->page, entry->offset, false);
314 
315  PG_RETURN_POINTER(retval);
316  }
317  PG_RETURN_POINTER(entry);
318  }
319 
320  din = ARRPTR(in);
321  lenr = internal_size(din, lenin);
322  if (lenr < 0 || lenr > MAXNUMELTS)
323  ereport(ERROR,
324  (errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead")));
325 
326  r = new_intArrayType(lenr);
327  dr = ARRPTR(r);
328 
329  for (i = 0; i < lenin; i += 2)
330  for (j = din[i]; j <= din[i + 1]; j++)
331  if ((!i) || *(dr - 1) != j)
332  *dr++ = j;
333 
334  if (in != (ArrayType *) DatumGetPointer(entry->key))
335  pfree(in);
336  retval = palloc(sizeof(GISTENTRY));
337  gistentryinit(*retval, PointerGetDatum(r),
338  entry->rel, entry->page, entry->offset, false);
339 
340  PG_RETURN_POINTER(retval);
341 }
342 
343 /*
344 ** The GiST Penalty method for _intments
345 */
346 Datum
348 {
349  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
350  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
351  float *result = (float *) PG_GETARG_POINTER(2);
352  ArrayType *ud;
353  float tmp1,
354  tmp2;
355 
356  ud = inner_int_union((ArrayType *) DatumGetPointer(origentry->key),
357  (ArrayType *) DatumGetPointer(newentry->key));
358  rt__int_size(ud, &tmp1);
359  rt__int_size((ArrayType *) DatumGetPointer(origentry->key), &tmp2);
360  *result = tmp1 - tmp2;
361  pfree(ud);
362 
363  PG_RETURN_POINTER(result);
364 }
365 
366 
367 
368 Datum
370 {
373  bool *result = (bool *) PG_GETARG_POINTER(2);
374  int32 n = ARRNELEMS(a);
375  int32 *da,
376  *db;
377 
378  CHECKARRVALID(a);
379  CHECKARRVALID(b);
380 
381  if (n != ARRNELEMS(b))
382  {
383  *result = false;
384  PG_RETURN_POINTER(result);
385  }
386  *result = true;
387  da = ARRPTR(a);
388  db = ARRPTR(b);
389  while (n--)
390  {
391  if (*da++ != *db++)
392  {
393  *result = false;
394  break;
395  }
396  }
397 
398  PG_RETURN_POINTER(result);
399 }
400 
401 /*****************************************************************
402 ** Common GiST Method
403 *****************************************************************/
404 
405 typedef struct
406 {
407  OffsetNumber pos;
408  float cost;
409 } SPLITCOST;
410 
411 static int
412 comparecost(const void *a, const void *b)
413 {
414  if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
415  return 0;
416  else
417  return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
418 }
419 
420 /*
421 ** The GiST PickSplit method for _intments
422 ** We use Guttman's poly time split algorithm
423 */
424 Datum
426 {
429  OffsetNumber i,
430  j;
431  ArrayType *datum_alpha,
432  *datum_beta;
433  ArrayType *datum_l,
434  *datum_r;
435  ArrayType *union_d,
436  *union_dl,
437  *union_dr;
438  ArrayType *inter_d;
439  bool firsttime;
440  float size_alpha,
441  size_beta,
442  size_union,
443  size_inter;
444  float size_waste,
445  waste;
446  float size_l,
447  size_r;
448  int nbytes;
449  OffsetNumber seed_1 = 0,
450  seed_2 = 0;
452  *right;
453  OffsetNumber maxoff;
454  SPLITCOST *costvector;
455 
456 #ifdef GIST_DEBUG
457  elog(DEBUG3, "--------picksplit %d", entryvec->n);
458 #endif
459 
460  maxoff = entryvec->n - 2;
461  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
462  v->spl_left = (OffsetNumber *) palloc(nbytes);
463  v->spl_right = (OffsetNumber *) palloc(nbytes);
464 
465  firsttime = true;
466  waste = 0.0;
467  for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
468  {
469  datum_alpha = GETENTRY(entryvec, i);
470  for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
471  {
472  datum_beta = GETENTRY(entryvec, j);
473 
474  /* compute the wasted space by unioning these guys */
475  /* size_waste = size_union - size_inter; */
476  union_d = inner_int_union(datum_alpha, datum_beta);
477  rt__int_size(union_d, &size_union);
478  inter_d = inner_int_inter(datum_alpha, datum_beta);
479  rt__int_size(inter_d, &size_inter);
480  size_waste = size_union - size_inter;
481 
482  pfree(union_d);
483  pfree(inter_d);
484 
485  /*
486  * are these a more promising split that what we've already seen?
487  */
488 
489  if (size_waste > waste || firsttime)
490  {
491  waste = size_waste;
492  seed_1 = i;
493  seed_2 = j;
494  firsttime = false;
495  }
496  }
497  }
498 
499  left = v->spl_left;
500  v->spl_nleft = 0;
501  right = v->spl_right;
502  v->spl_nright = 0;
503  if (seed_1 == 0 || seed_2 == 0)
504  {
505  seed_1 = 1;
506  seed_2 = 2;
507  }
508 
509  datum_alpha = GETENTRY(entryvec, seed_1);
510  datum_l = copy_intArrayType(datum_alpha);
511  rt__int_size(datum_l, &size_l);
512  datum_beta = GETENTRY(entryvec, seed_2);
513  datum_r = copy_intArrayType(datum_beta);
514  rt__int_size(datum_r, &size_r);
515 
516  maxoff = OffsetNumberNext(maxoff);
517 
518  /*
519  * sort entries
520  */
521  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
522  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
523  {
524  costvector[i - 1].pos = i;
525  datum_alpha = GETENTRY(entryvec, i);
526  union_d = inner_int_union(datum_l, datum_alpha);
527  rt__int_size(union_d, &size_alpha);
528  pfree(union_d);
529  union_d = inner_int_union(datum_r, datum_alpha);
530  rt__int_size(union_d, &size_beta);
531  pfree(union_d);
532  costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r));
533  }
534  qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
535 
536  /*
537  * Now split up the regions between the two seeds. An important property
538  * of this split algorithm is that the split vector v has the indices of
539  * items to be split in order in its left and right vectors. We exploit
540  * this property by doing a merge in the code that actually splits the
541  * page.
542  *
543  * For efficiency, we also place the new index tuple in this loop. This is
544  * handled at the very end, when we have placed all the existing tuples
545  * and i == maxoff + 1.
546  */
547 
548 
549  for (j = 0; j < maxoff; j++)
550  {
551  i = costvector[j].pos;
552 
553  /*
554  * If we've already decided where to place this item, just put it on
555  * the right list. Otherwise, we need to figure out which page needs
556  * the least enlargement in order to store the item.
557  */
558 
559  if (i == seed_1)
560  {
561  *left++ = i;
562  v->spl_nleft++;
563  continue;
564  }
565  else if (i == seed_2)
566  {
567  *right++ = i;
568  v->spl_nright++;
569  continue;
570  }
571 
572  /* okay, which page needs least enlargement? */
573  datum_alpha = GETENTRY(entryvec, i);
574  union_dl = inner_int_union(datum_l, datum_alpha);
575  union_dr = inner_int_union(datum_r, datum_alpha);
576  rt__int_size(union_dl, &size_alpha);
577  rt__int_size(union_dr, &size_beta);
578 
579  /* pick which page to add it to */
580  if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
581  {
582  pfree(datum_l);
583  pfree(union_dr);
584  datum_l = union_dl;
585  size_l = size_alpha;
586  *left++ = i;
587  v->spl_nleft++;
588  }
589  else
590  {
591  pfree(datum_r);
592  pfree(union_dl);
593  datum_r = union_dr;
594  size_r = size_beta;
595  *right++ = i;
596  v->spl_nright++;
597  }
598  }
599  pfree(costvector);
600  *right = *left = FirstOffsetNumber;
601 
602  v->spl_ldatum = PointerGetDatum(datum_l);
603  v->spl_rdatum = PointerGetDatum(datum_r);
604 
606 }
#define GIST_LEAF(entry)
Definition: gist.h:141
Relation rel
Definition: gist.h:132
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
OffsetNumber pos
Definition: hstore_gist.c:319
#define PG_INT64_MAX
Definition: c.h:445
#define RTOldContainsStrategyNumber
Definition: stratnum.h:63
#define RTOldContainedByStrategyNumber
Definition: stratnum.h:64
#define DEBUG3
Definition: elog.h:23
void rt__int_size(ArrayType *a, float *size)
Definition: _int_tool.c:183
struct NODE * left
#define PG_GETARG_ARRAYTYPE_P_COPY(n)
Definition: array.h:252
#define VARSIZE(PTR)
Definition: postgres.h:303
#define PointerGetDatum(X)
Definition: postgres.h:556
Datum g_int_penalty(PG_FUNCTION_ARGS)
Definition: _int_gist.c:347
#define CHECKARRVALID(x)
Definition: _int.h:18
Datum g_int_consistent(PG_FUNCTION_ARGS)
Definition: _int_gist.c:44
Datum g_int_compress(PG_FUNCTION_ARGS)
Definition: _int_gist.c:154
OffsetNumber * spl_left
Definition: gist.h:113
Datum spl_rdatum
Definition: gist.h:120
#define PREPAREARR(x)
Definition: _int.h:37
int32 n
Definition: gist.h:206
#define MAXNUMELTS
Definition: _int_gist.c:22
uint16 StrategyNumber
Definition: stratnum.h:22
struct NODE * right
int internal_size(int *a, int len)
Definition: _int_tool.c:292
ArrayType * inner_int_inter(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:135
#define GETENTRY(vec, pos)
Definition: _int_gist.c:12
#define RTContainedByStrategyNumber
Definition: stratnum.h:58
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:271
Datum g_int_same(PG_FUNCTION_ARGS)
Definition: _int_gist.c:369
int spl_nleft
Definition: gist.h:114
signed int int32
Definition: c.h:347
int32 cost
Definition: hstore_gist.c:320
#define QSORT(a, direction)
Definition: _int.h:162
uint16 OffsetNumber
Definition: off.h:24
Page page
Definition: gist.h:133
#define Abs(x)
Definition: c.h:917
Datum g_int_union(PG_FUNCTION_ARGS)
Definition: _int_gist.c:114
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:251
void pfree(void *pointer)
Definition: mcxt.c:1056
#define WISH_F(a, b, c)
Definition: hstore_gist.c:65
#define ERROR
Definition: elog.h:43
ArrayType * copy_intArrayType(ArrayType *a)
Definition: _int_tool.c:280
int spl_nright
Definition: gist.h:119
bool inner_int_overlap(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:49
ArrayType * resize_intArrayType(ArrayType *a, int num)
Definition: _int_tool.c:249
Datum key
Definition: gist.h:131
PG_FUNCTION_INFO_V1(g_int_consistent)
#define memmove(d, s, c)
Definition: c.h:1267
#define FirstOffsetNumber
Definition: off.h:27
#define ARRISEMPTY(x)
Definition: _int.h:26
#define RTSameStrategyNumber
Definition: stratnum.h:56
#define ereport(elevel, rest)
Definition: elog.h:141
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:619
bool leafkey
Definition: gist.h:135
bool inner_int_contains(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:14
ArrayType * new_intArrayType(int num)
Definition: _int_tool.c:221
ArrayType * _int_unique(ArrayType *a)
Definition: _int_tool.c:310
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:349
uintptr_t Datum
Definition: postgres.h:367
#define DatumGetArrayTypePCopy(X)
Definition: array.h:250
static int comparecost(const void *a, const void *b)
Definition: _int_gist.c:412
Datum g_int_decompress(PG_FUNCTION_ARGS)
Definition: _int_gist.c:276
#define NOTICE
Definition: elog.h:37
bool execconsistent(QUERYTYPE *query, ArrayType *array, bool calcnot)
Definition: _int_bool.c:309
ArrayType * inner_int_union(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:78
Datum spl_ldatum
Definition: gist.h:115
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:215
float cost
Definition: _int_gist.c:408
#define BooleanSearchStrategy
Definition: _int.h:113
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
OffsetNumber * spl_right
Definition: gist.h:118
#define MAXNUMRANGE
Definition: _int.h:11
#define RTContainsStrategyNumber
Definition: stratnum.h:57
#define ARRNELEMS(x)
Definition: cube.c:25
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:267
#define DatumGetPointer(X)
Definition: postgres.h:549
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822
#define elog(elevel,...)
Definition: elog.h:228
int i
#define RTOverlapStrategyNumber
Definition: stratnum.h:53
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define ARRPTR(x)
Definition: cube.c:24
#define qsort(a, b, c, d)
Definition: port.h:488
OffsetNumber offset
Definition: gist.h:134
Datum g_int_picksplit(PG_FUNCTION_ARGS)
Definition: _int_gist.c:425
#define DatumGetArrayTypeP(X)
Definition: array.h:249