PostgreSQL Source Code  git master
_int_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/intarray/_int_gist.c
3  */
4 #include "postgres.h"
5 
6 #include <limits.h>
7 
8 #include "access/gist.h"
9 #include "access/stratnum.h"
10 
11 #include "_int.h"
12 
13 #define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
14 
15 /*
16  * Control the maximum sparseness of compressed keys.
17  *
18  * The upper safe bound for this limit is half the maximum allocatable array
19  * size. A lower bound would give more guarantees that pathological data
20  * wouldn't eat excessive CPU and memory, but at the expense of breaking
21  * possibly working (after a fashion) indexes.
22  */
23 #define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2)
24 /* or: #define MAXNUMELTS 1000000 */
25 
26 /*
27 ** GiST support methods
28 */
36 
37 
38 /*
39 ** The GiST Consistent method for _intments
40 ** Should return false if for all data items x below entry,
41 ** the predicate x op query == false, where op is the oper
42 ** corresponding to strategy in the pg_amop table.
43 */
44 Datum
46 {
47  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
50 
51  /* Oid subtype = PG_GETARG_OID(3); */
52  bool *recheck = (bool *) PG_GETARG_POINTER(4);
53  bool retval;
54 
55  /* this is exact except for RTSameStrategyNumber */
56  *recheck = (strategy == RTSameStrategyNumber);
57 
58  if (strategy == BooleanSearchStrategy)
59  {
60  retval = execconsistent((QUERYTYPE *) query,
61  (ArrayType *) DatumGetPointer(entry->key),
62  GIST_LEAF(entry));
63 
64  pfree(query);
65  PG_RETURN_BOOL(retval);
66  }
67 
68  /* sort query for fast search, key is already sorted */
69  CHECKARRVALID(query);
70  PREPAREARR(query);
71 
72  switch (strategy)
73  {
75  retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
76  query);
77  break;
79  if (GIST_LEAF(entry))
81  entry->key,
82  PointerGetDatum(query),
83  PointerGetDatum(&retval));
84  else
85  retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
86  query);
87  break;
90  retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
91  query);
92  break;
95  if (GIST_LEAF(entry))
96  retval = inner_int_contains(query,
97  (ArrayType *) DatumGetPointer(entry->key));
98  else
99  {
100  /*
101  * Unfortunately, because empty arrays could be anywhere in
102  * the index, we must search the whole tree.
103  */
104  retval = true;
105  }
106  break;
107  default:
108  retval = false;
109  }
110  pfree(query);
111  PG_RETURN_BOOL(retval);
112 }
113 
114 Datum
116 {
118  int *size = (int *) PG_GETARG_POINTER(1);
119  int32 i,
120  *ptr;
121  ArrayType *res;
122  int totlen = 0;
123 
124  for (i = 0; i < entryvec->n; i++)
125  {
126  ArrayType *ent = GETENTRY(entryvec, i);
127 
128  CHECKARRVALID(ent);
129  totlen += ARRNELEMS(ent);
130  }
131 
132  res = new_intArrayType(totlen);
133  ptr = ARRPTR(res);
134 
135  for (i = 0; i < entryvec->n; i++)
136  {
137  ArrayType *ent = GETENTRY(entryvec, i);
138  int nel;
139 
140  nel = ARRNELEMS(ent);
141  memcpy(ptr, ARRPTR(ent), nel * sizeof(int32));
142  ptr += nel;
143  }
144 
145  QSORT(res, 1);
146  res = _int_unique(res);
147  *size = VARSIZE(res);
148  PG_RETURN_POINTER(res);
149 }
150 
151 /*
152 ** GiST Compress and Decompress methods
153 */
154 Datum
156 {
157  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
158  GISTENTRY *retval;
159  ArrayType *r;
160  int len,
161  lenr;
162  int *dr;
163  int i,
164  j,
165  cand;
166  int64 min;
167 
168  if (entry->leafkey)
169  {
170  r = DatumGetArrayTypePCopy(entry->key);
171  CHECKARRVALID(r);
172  PREPAREARR(r);
173 
174  if (ARRNELEMS(r) >= 2 * MAXNUMRANGE)
175  elog(NOTICE, "input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",
176  2 * MAXNUMRANGE - 1, ARRNELEMS(r));
177 
178  retval = palloc(sizeof(GISTENTRY));
179  gistentryinit(*retval, PointerGetDatum(r),
180  entry->rel, entry->page, entry->offset, false);
181 
182  PG_RETURN_POINTER(retval);
183  }
184 
185  /*
186  * leaf entries never compress one more time, only when entry->leafkey
187  * ==true, so now we work only with internal keys
188  */
189 
190  r = DatumGetArrayTypeP(entry->key);
191  CHECKARRVALID(r);
192  if (ARRISEMPTY(r))
193  {
194  if (r != (ArrayType *) DatumGetPointer(entry->key))
195  pfree(r);
196  PG_RETURN_POINTER(entry);
197  }
198 
199  if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE)
200  { /* compress */
201  if (r == (ArrayType *) DatumGetPointer(entry->key))
202  r = DatumGetArrayTypePCopy(entry->key);
203  r = resize_intArrayType(r, 2 * (len));
204 
205  dr = ARRPTR(r);
206 
207  /*
208  * "len" at this point is the number of ranges we will construct.
209  * "lenr" is the number of ranges we must eventually remove by
210  * merging, we must be careful to remove no more than this number.
211  */
212  lenr = len - MAXNUMRANGE;
213 
214  /*
215  * Initially assume we can merge consecutive ints into a range. but we
216  * must count every value removed and stop when lenr runs out
217  */
218  for (j = i = len - 1; i > 0 && lenr > 0; i--, j--)
219  {
220  int r_end = dr[i];
221  int r_start = r_end;
222 
223  while (i > 0 && lenr > 0 && dr[i - 1] == r_start - 1)
224  --r_start, --i, --lenr;
225  dr[2 * j] = r_start;
226  dr[2 * j + 1] = r_end;
227  }
228  /* just copy the rest, if any, as trivial ranges */
229  for (; i >= 0; i--, j--)
230  dr[2 * j] = dr[2 * j + 1] = dr[i];
231 
232  if (++j)
233  {
234  /*
235  * shunt everything down to start at the right place
236  */
237  memmove((void *) &dr[0], (void *) &dr[2 * j], 2 * (len - j) * sizeof(int32));
238  }
239 
240  /*
241  * make "len" be number of array elements, not ranges
242  */
243  len = 2 * (len - j);
244  cand = 1;
245  while (len > MAXNUMRANGE * 2)
246  {
247  min = PG_INT64_MAX;
248  for (i = 2; i < len; i += 2)
249  if (min > ((int64) dr[i] - (int64) dr[i - 1]))
250  {
251  min = ((int64) dr[i] - (int64) dr[i - 1]);
252  cand = i;
253  }
254  memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));
255  len -= 2;
256  }
257 
258  /*
259  * check sparseness of result
260  */
261  lenr = internal_size(dr, len);
262  if (lenr < 0 || lenr > MAXNUMELTS)
263  ereport(ERROR,
264  (errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead")));
265 
266  r = resize_intArrayType(r, len);
267  retval = palloc(sizeof(GISTENTRY));
268  gistentryinit(*retval, PointerGetDatum(r),
269  entry->rel, entry->page, entry->offset, false);
270  PG_RETURN_POINTER(retval);
271  }
272  else
273  PG_RETURN_POINTER(entry);
274 }
275 
276 Datum
278 {
279  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
280  GISTENTRY *retval;
281  ArrayType *r;
282  int *dr,
283  lenr;
284  ArrayType *in;
285  int lenin;
286  int *din;
287  int i,
288  j;
289 
290  in = DatumGetArrayTypeP(entry->key);
291 
292  CHECKARRVALID(in);
293  if (ARRISEMPTY(in))
294  {
295  if (in != (ArrayType *) DatumGetPointer(entry->key))
296  {
297  retval = palloc(sizeof(GISTENTRY));
298  gistentryinit(*retval, PointerGetDatum(in),
299  entry->rel, entry->page, entry->offset, false);
300  PG_RETURN_POINTER(retval);
301  }
302 
303  PG_RETURN_POINTER(entry);
304  }
305 
306  lenin = ARRNELEMS(in);
307 
308  if (lenin < 2 * MAXNUMRANGE)
309  { /* not compressed value */
310  if (in != (ArrayType *) DatumGetPointer(entry->key))
311  {
312  retval = palloc(sizeof(GISTENTRY));
313  gistentryinit(*retval, PointerGetDatum(in),
314  entry->rel, entry->page, entry->offset, false);
315 
316  PG_RETURN_POINTER(retval);
317  }
318  PG_RETURN_POINTER(entry);
319  }
320 
321  din = ARRPTR(in);
322  lenr = internal_size(din, lenin);
323  if (lenr < 0 || lenr > MAXNUMELTS)
324  ereport(ERROR,
325  (errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead")));
326 
327  r = new_intArrayType(lenr);
328  dr = ARRPTR(r);
329 
330  for (i = 0; i < lenin; i += 2)
331  for (j = din[i]; j <= din[i + 1]; j++)
332  if ((!i) || *(dr - 1) != j)
333  *dr++ = j;
334 
335  if (in != (ArrayType *) DatumGetPointer(entry->key))
336  pfree(in);
337  retval = palloc(sizeof(GISTENTRY));
338  gistentryinit(*retval, PointerGetDatum(r),
339  entry->rel, entry->page, entry->offset, false);
340 
341  PG_RETURN_POINTER(retval);
342 }
343 
344 /*
345 ** The GiST Penalty method for _intments
346 */
347 Datum
349 {
350  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
351  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
352  float *result = (float *) PG_GETARG_POINTER(2);
353  ArrayType *ud;
354  float tmp1,
355  tmp2;
356 
357  ud = inner_int_union((ArrayType *) DatumGetPointer(origentry->key),
358  (ArrayType *) DatumGetPointer(newentry->key));
359  rt__int_size(ud, &tmp1);
360  rt__int_size((ArrayType *) DatumGetPointer(origentry->key), &tmp2);
361  *result = tmp1 - tmp2;
362  pfree(ud);
363 
364  PG_RETURN_POINTER(result);
365 }
366 
367 
368 
369 Datum
371 {
374  bool *result = (bool *) PG_GETARG_POINTER(2);
375  int32 n = ARRNELEMS(a);
376  int32 *da,
377  *db;
378 
379  CHECKARRVALID(a);
380  CHECKARRVALID(b);
381 
382  if (n != ARRNELEMS(b))
383  {
384  *result = false;
385  PG_RETURN_POINTER(result);
386  }
387  *result = true;
388  da = ARRPTR(a);
389  db = ARRPTR(b);
390  while (n--)
391  {
392  if (*da++ != *db++)
393  {
394  *result = false;
395  break;
396  }
397  }
398 
399  PG_RETURN_POINTER(result);
400 }
401 
402 /*****************************************************************
403 ** Common GiST Method
404 *****************************************************************/
405 
406 typedef struct
407 {
408  OffsetNumber pos;
409  float cost;
410 } SPLITCOST;
411 
412 static int
413 comparecost(const void *a, const void *b)
414 {
415  if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
416  return 0;
417  else
418  return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
419 }
420 
421 /*
422 ** The GiST PickSplit method for _intments
423 ** We use Guttman's poly time split algorithm
424 */
425 Datum
427 {
430  OffsetNumber i,
431  j;
432  ArrayType *datum_alpha,
433  *datum_beta;
434  ArrayType *datum_l,
435  *datum_r;
436  ArrayType *union_d,
437  *union_dl,
438  *union_dr;
439  ArrayType *inter_d;
440  bool firsttime;
441  float size_alpha,
442  size_beta,
443  size_union,
444  size_inter;
445  float size_waste,
446  waste;
447  float size_l,
448  size_r;
449  int nbytes;
450  OffsetNumber seed_1 = 0,
451  seed_2 = 0;
453  *right;
454  OffsetNumber maxoff;
455  SPLITCOST *costvector;
456 
457 #ifdef GIST_DEBUG
458  elog(DEBUG3, "--------picksplit %d", entryvec->n);
459 #endif
460 
461  maxoff = entryvec->n - 2;
462  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
463  v->spl_left = (OffsetNumber *) palloc(nbytes);
464  v->spl_right = (OffsetNumber *) palloc(nbytes);
465 
466  firsttime = true;
467  waste = 0.0;
468  for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
469  {
470  datum_alpha = GETENTRY(entryvec, i);
471  for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
472  {
473  datum_beta = GETENTRY(entryvec, j);
474 
475  /* compute the wasted space by unioning these guys */
476  /* size_waste = size_union - size_inter; */
477  union_d = inner_int_union(datum_alpha, datum_beta);
478  rt__int_size(union_d, &size_union);
479  inter_d = inner_int_inter(datum_alpha, datum_beta);
480  rt__int_size(inter_d, &size_inter);
481  size_waste = size_union - size_inter;
482 
483  pfree(union_d);
484  pfree(inter_d);
485 
486  /*
487  * are these a more promising split that what we've already seen?
488  */
489 
490  if (size_waste > waste || firsttime)
491  {
492  waste = size_waste;
493  seed_1 = i;
494  seed_2 = j;
495  firsttime = false;
496  }
497  }
498  }
499 
500  left = v->spl_left;
501  v->spl_nleft = 0;
502  right = v->spl_right;
503  v->spl_nright = 0;
504  if (seed_1 == 0 || seed_2 == 0)
505  {
506  seed_1 = 1;
507  seed_2 = 2;
508  }
509 
510  datum_alpha = GETENTRY(entryvec, seed_1);
511  datum_l = copy_intArrayType(datum_alpha);
512  rt__int_size(datum_l, &size_l);
513  datum_beta = GETENTRY(entryvec, seed_2);
514  datum_r = copy_intArrayType(datum_beta);
515  rt__int_size(datum_r, &size_r);
516 
517  maxoff = OffsetNumberNext(maxoff);
518 
519  /*
520  * sort entries
521  */
522  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
523  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
524  {
525  costvector[i - 1].pos = i;
526  datum_alpha = GETENTRY(entryvec, i);
527  union_d = inner_int_union(datum_l, datum_alpha);
528  rt__int_size(union_d, &size_alpha);
529  pfree(union_d);
530  union_d = inner_int_union(datum_r, datum_alpha);
531  rt__int_size(union_d, &size_beta);
532  pfree(union_d);
533  costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r));
534  }
535  qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
536 
537  /*
538  * Now split up the regions between the two seeds. An important property
539  * of this split algorithm is that the split vector v has the indices of
540  * items to be split in order in its left and right vectors. We exploit
541  * this property by doing a merge in the code that actually splits the
542  * page.
543  *
544  * For efficiency, we also place the new index tuple in this loop. This is
545  * handled at the very end, when we have placed all the existing tuples
546  * and i == maxoff + 1.
547  */
548 
549 
550  for (j = 0; j < maxoff; j++)
551  {
552  i = costvector[j].pos;
553 
554  /*
555  * If we've already decided where to place this item, just put it on
556  * the right list. Otherwise, we need to figure out which page needs
557  * the least enlargement in order to store the item.
558  */
559 
560  if (i == seed_1)
561  {
562  *left++ = i;
563  v->spl_nleft++;
564  continue;
565  }
566  else if (i == seed_2)
567  {
568  *right++ = i;
569  v->spl_nright++;
570  continue;
571  }
572 
573  /* okay, which page needs least enlargement? */
574  datum_alpha = GETENTRY(entryvec, i);
575  union_dl = inner_int_union(datum_l, datum_alpha);
576  union_dr = inner_int_union(datum_r, datum_alpha);
577  rt__int_size(union_dl, &size_alpha);
578  rt__int_size(union_dr, &size_beta);
579 
580  /* pick which page to add it to */
581  if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
582  {
583  pfree(datum_l);
584  pfree(union_dr);
585  datum_l = union_dl;
586  size_l = size_alpha;
587  *left++ = i;
588  v->spl_nleft++;
589  }
590  else
591  {
592  pfree(datum_r);
593  pfree(union_dl);
594  datum_r = union_dr;
595  size_r = size_beta;
596  *right++ = i;
597  v->spl_nright++;
598  }
599  }
600  pfree(costvector);
601  *right = *left = FirstOffsetNumber;
602 
603  v->spl_ldatum = PointerGetDatum(datum_l);
604  v->spl_rdatum = PointerGetDatum(datum_r);
605 
607 }
#define GIST_LEAF(entry)
Definition: gist.h:141
Relation rel
Definition: gist.h:132
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
OffsetNumber pos
Definition: hstore_gist.c:320
#define PG_INT64_MAX
Definition: c.h:444
#define RTOldContainsStrategyNumber
Definition: stratnum.h:63
#define RTOldContainedByStrategyNumber
Definition: stratnum.h:64
#define DEBUG3
Definition: elog.h:23
void rt__int_size(ArrayType *a, float *size)
Definition: _int_tool.c:184
struct NODE * left
#define PG_GETARG_ARRAYTYPE_P_COPY(n)
Definition: array.h:252
#define VARSIZE(PTR)
Definition: postgres.h:303
#define PointerGetDatum(X)
Definition: postgres.h:556
Datum g_int_penalty(PG_FUNCTION_ARGS)
Definition: _int_gist.c:348
#define CHECKARRVALID(x)
Definition: _int.h:18
Datum g_int_consistent(PG_FUNCTION_ARGS)
Definition: _int_gist.c:45
Datum g_int_compress(PG_FUNCTION_ARGS)
Definition: _int_gist.c:155
OffsetNumber * spl_left
Definition: gist.h:113
Datum spl_rdatum
Definition: gist.h:120
#define PREPAREARR(x)
Definition: _int.h:37
int32 n
Definition: gist.h:206
#define MAXNUMELTS
Definition: _int_gist.c:23
uint16 StrategyNumber
Definition: stratnum.h:22
struct NODE * right
int internal_size(int *a, int len)
Definition: _int_tool.c:293
ArrayType * inner_int_inter(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:136
#define GETENTRY(vec, pos)
Definition: _int_gist.c:13
#define RTContainedByStrategyNumber
Definition: stratnum.h:58
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:271
Datum g_int_same(PG_FUNCTION_ARGS)
Definition: _int_gist.c:370
int spl_nleft
Definition: gist.h:114
signed int int32
Definition: c.h:346
int32 cost
Definition: hstore_gist.c:321
#define QSORT(a, direction)
Definition: _int.h:162
uint16 OffsetNumber
Definition: off.h:24
Page page
Definition: gist.h:133
#define Abs(x)
Definition: c.h:910
Datum g_int_union(PG_FUNCTION_ARGS)
Definition: _int_gist.c:115
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:251
void pfree(void *pointer)
Definition: mcxt.c:1056
#define WISH_F(a, b, c)
Definition: hstore_gist.c:66
#define ERROR
Definition: elog.h:43
ArrayType * copy_intArrayType(ArrayType *a)
Definition: _int_tool.c:281
int spl_nright
Definition: gist.h:119
bool inner_int_overlap(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:50
ArrayType * resize_intArrayType(ArrayType *a, int num)
Definition: _int_tool.c:250
Datum key
Definition: gist.h:131
PG_FUNCTION_INFO_V1(g_int_consistent)
#define memmove(d, s, c)
Definition: c.h:1238
#define FirstOffsetNumber
Definition: off.h:27
#define ARRISEMPTY(x)
Definition: _int.h:26
#define RTSameStrategyNumber
Definition: stratnum.h:56
#define ereport(elevel, rest)
Definition: elog.h:141
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:621
bool leafkey
Definition: gist.h:135
bool inner_int_contains(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:15
ArrayType * new_intArrayType(int num)
Definition: _int_tool.c:222
ArrayType * _int_unique(ArrayType *a)
Definition: _int_tool.c:311
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:349
uintptr_t Datum
Definition: postgres.h:367
#define DatumGetArrayTypePCopy(X)
Definition: array.h:250
static int comparecost(const void *a, const void *b)
Definition: _int_gist.c:413
Datum g_int_decompress(PG_FUNCTION_ARGS)
Definition: _int_gist.c:277
#define NOTICE
Definition: elog.h:37
bool execconsistent(QUERYTYPE *query, ArrayType *array, bool calcnot)
Definition: _int_bool.c:310
ArrayType * inner_int_union(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:79
Datum spl_ldatum
Definition: gist.h:115
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:215
float cost
Definition: _int_gist.c:409
#define BooleanSearchStrategy
Definition: _int.h:113
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
OffsetNumber * spl_right
Definition: gist.h:118
#define MAXNUMRANGE
Definition: _int.h:11
#define RTContainsStrategyNumber
Definition: stratnum.h:57
#define ARRNELEMS(x)
Definition: cube.c:26
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:267
#define DatumGetPointer(X)
Definition: postgres.h:549
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define elog(elevel,...)
Definition: elog.h:226
int i
#define RTOverlapStrategyNumber
Definition: stratnum.h:53
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define ARRPTR(x)
Definition: cube.c:25
#define qsort(a, b, c, d)
Definition: port.h:492
OffsetNumber offset
Definition: gist.h:134
Datum g_int_picksplit(PG_FUNCTION_ARGS)
Definition: _int_gist.c:426
#define DatumGetArrayTypeP(X)
Definition: array.h:249