PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
_int_gist.c
Go to the documentation of this file.
1 /*
2  * contrib/intarray/_int_gist.c
3  */
4 #include "postgres.h"
5 
6 #include <limits.h>
7 
8 #include "access/gist.h"
9 #include "access/stratnum.h"
10 
11 #include "_int.h"
12 
13 #define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
14 
15 /*
16 ** GiST support methods
17 */
25 
26 
27 /*
28 ** The GiST Consistent method for _intments
29 ** Should return false if for all data items x below entry,
30 ** the predicate x op query == FALSE, where op is the oper
31 ** corresponding to strategy in the pg_amop table.
32 */
33 Datum
35 {
36  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
39 
40  /* Oid subtype = PG_GETARG_OID(3); */
41  bool *recheck = (bool *) PG_GETARG_POINTER(4);
42  bool retval;
43 
44  /* this is exact except for RTSameStrategyNumber */
45  *recheck = (strategy == RTSameStrategyNumber);
46 
47  if (strategy == BooleanSearchStrategy)
48  {
49  retval = execconsistent((QUERYTYPE *) query,
50  (ArrayType *) DatumGetPointer(entry->key),
51  GIST_LEAF(entry));
52 
53  pfree(query);
54  PG_RETURN_BOOL(retval);
55  }
56 
57  /* sort query for fast search, key is already sorted */
58  CHECKARRVALID(query);
59  PREPAREARR(query);
60 
61  switch (strategy)
62  {
64  retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
65  query);
66  break;
68  if (GIST_LEAF(entry))
70  entry->key,
71  PointerGetDatum(query),
72  PointerGetDatum(&retval));
73  else
74  retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
75  query);
76  break;
79  retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
80  query);
81  break;
84  if (GIST_LEAF(entry))
85  retval = inner_int_contains(query,
86  (ArrayType *) DatumGetPointer(entry->key));
87  else
88  retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
89  query);
90  break;
91  default:
92  retval = FALSE;
93  }
94  pfree(query);
95  PG_RETURN_BOOL(retval);
96 }
97 
98 Datum
100 {
102  int *size = (int *) PG_GETARG_POINTER(1);
103  int32 i,
104  *ptr;
105  ArrayType *res;
106  int totlen = 0;
107 
108  for (i = 0; i < entryvec->n; i++)
109  {
110  ArrayType *ent = GETENTRY(entryvec, i);
111 
112  CHECKARRVALID(ent);
113  totlen += ARRNELEMS(ent);
114  }
115 
116  res = new_intArrayType(totlen);
117  ptr = ARRPTR(res);
118 
119  for (i = 0; i < entryvec->n; i++)
120  {
121  ArrayType *ent = GETENTRY(entryvec, i);
122  int nel;
123 
124  nel = ARRNELEMS(ent);
125  memcpy(ptr, ARRPTR(ent), nel * sizeof(int32));
126  ptr += nel;
127  }
128 
129  QSORT(res, 1);
130  res = _int_unique(res);
131  *size = VARSIZE(res);
132  PG_RETURN_POINTER(res);
133 }
134 
135 /*
136 ** GiST Compress and Decompress methods
137 */
138 Datum
140 {
141  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
142  GISTENTRY *retval;
143  ArrayType *r;
144  int len;
145  int *dr;
146  int i,
147  min,
148  cand;
149 
150  if (entry->leafkey)
151  {
152  r = DatumGetArrayTypePCopy(entry->key);
153  CHECKARRVALID(r);
154  PREPAREARR(r);
155 
156  if (ARRNELEMS(r) >= 2 * MAXNUMRANGE)
157  elog(NOTICE, "input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",
158  2 * MAXNUMRANGE - 1, ARRNELEMS(r));
159 
160  retval = palloc(sizeof(GISTENTRY));
161  gistentryinit(*retval, PointerGetDatum(r),
162  entry->rel, entry->page, entry->offset, FALSE);
163 
164  PG_RETURN_POINTER(retval);
165  }
166 
167  /*
168  * leaf entries never compress one more time, only when entry->leafkey
169  * ==true, so now we work only with internal keys
170  */
171 
172  r = DatumGetArrayTypeP(entry->key);
173  CHECKARRVALID(r);
174  if (ARRISEMPTY(r))
175  {
176  if (r != (ArrayType *) DatumGetPointer(entry->key))
177  pfree(r);
178  PG_RETURN_POINTER(entry);
179  }
180 
181  if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE)
182  { /* compress */
183  if (r == (ArrayType *) DatumGetPointer(entry->key))
184  r = DatumGetArrayTypePCopy(entry->key);
185  r = resize_intArrayType(r, 2 * (len));
186 
187  dr = ARRPTR(r);
188 
189  for (i = len - 1; i >= 0; i--)
190  dr[2 * i] = dr[2 * i + 1] = dr[i];
191 
192  len *= 2;
193  cand = 1;
194  while (len > MAXNUMRANGE * 2)
195  {
196  min = INT_MAX;
197  for (i = 2; i < len; i += 2)
198  if (min > (dr[i] - dr[i - 1]))
199  {
200  min = (dr[i] - dr[i - 1]);
201  cand = i;
202  }
203  memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));
204  len -= 2;
205  }
206  r = resize_intArrayType(r, len);
207  retval = palloc(sizeof(GISTENTRY));
208  gistentryinit(*retval, PointerGetDatum(r),
209  entry->rel, entry->page, entry->offset, FALSE);
210  PG_RETURN_POINTER(retval);
211  }
212  else
213  PG_RETURN_POINTER(entry);
214 }
215 
216 Datum
218 {
219  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
220  GISTENTRY *retval;
221  ArrayType *r;
222  int *dr,
223  lenr;
224  ArrayType *in;
225  int lenin;
226  int *din;
227  int i,
228  j;
229 
230  in = DatumGetArrayTypeP(entry->key);
231 
232  CHECKARRVALID(in);
233  if (ARRISEMPTY(in))
234  {
235  if (in != (ArrayType *) DatumGetPointer(entry->key))
236  {
237  retval = palloc(sizeof(GISTENTRY));
238  gistentryinit(*retval, PointerGetDatum(in),
239  entry->rel, entry->page, entry->offset, FALSE);
240  PG_RETURN_POINTER(retval);
241  }
242 
243  PG_RETURN_POINTER(entry);
244  }
245 
246  lenin = ARRNELEMS(in);
247 
248  if (lenin < 2 * MAXNUMRANGE)
249  { /* not compressed value */
250  if (in != (ArrayType *) DatumGetPointer(entry->key))
251  {
252  retval = palloc(sizeof(GISTENTRY));
253  gistentryinit(*retval, PointerGetDatum(in),
254  entry->rel, entry->page, entry->offset, FALSE);
255 
256  PG_RETURN_POINTER(retval);
257  }
258  PG_RETURN_POINTER(entry);
259  }
260 
261  din = ARRPTR(in);
262  lenr = internal_size(din, lenin);
263 
264  r = new_intArrayType(lenr);
265  dr = ARRPTR(r);
266 
267  for (i = 0; i < lenin; i += 2)
268  for (j = din[i]; j <= din[i + 1]; j++)
269  if ((!i) || *(dr - 1) != j)
270  *dr++ = j;
271 
272  if (in != (ArrayType *) DatumGetPointer(entry->key))
273  pfree(in);
274  retval = palloc(sizeof(GISTENTRY));
275  gistentryinit(*retval, PointerGetDatum(r),
276  entry->rel, entry->page, entry->offset, FALSE);
277 
278  PG_RETURN_POINTER(retval);
279 }
280 
281 /*
282 ** The GiST Penalty method for _intments
283 */
284 Datum
286 {
287  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
288  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
289  float *result = (float *) PG_GETARG_POINTER(2);
290  ArrayType *ud;
291  float tmp1,
292  tmp2;
293 
294  ud = inner_int_union((ArrayType *) DatumGetPointer(origentry->key),
295  (ArrayType *) DatumGetPointer(newentry->key));
296  rt__int_size(ud, &tmp1);
297  rt__int_size((ArrayType *) DatumGetPointer(origentry->key), &tmp2);
298  *result = tmp1 - tmp2;
299  pfree(ud);
300 
301  PG_RETURN_POINTER(result);
302 }
303 
304 
305 
306 Datum
308 {
311  bool *result = (bool *) PG_GETARG_POINTER(2);
312  int32 n = ARRNELEMS(a);
313  int32 *da,
314  *db;
315 
316  CHECKARRVALID(a);
317  CHECKARRVALID(b);
318 
319  if (n != ARRNELEMS(b))
320  {
321  *result = false;
322  PG_RETURN_POINTER(result);
323  }
324  *result = TRUE;
325  da = ARRPTR(a);
326  db = ARRPTR(b);
327  while (n--)
328  {
329  if (*da++ != *db++)
330  {
331  *result = FALSE;
332  break;
333  }
334  }
335 
336  PG_RETURN_POINTER(result);
337 }
338 
339 /*****************************************************************
340 ** Common GiST Method
341 *****************************************************************/
342 
343 typedef struct
344 {
345  OffsetNumber pos;
346  float cost;
347 } SPLITCOST;
348 
349 static int
350 comparecost(const void *a, const void *b)
351 {
352  if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
353  return 0;
354  else
355  return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
356 }
357 
358 /*
359 ** The GiST PickSplit method for _intments
360 ** We use Guttman's poly time split algorithm
361 */
362 Datum
364 {
367  OffsetNumber i,
368  j;
369  ArrayType *datum_alpha,
370  *datum_beta;
371  ArrayType *datum_l,
372  *datum_r;
373  ArrayType *union_d,
374  *union_dl,
375  *union_dr;
376  ArrayType *inter_d;
377  bool firsttime;
378  float size_alpha,
379  size_beta,
380  size_union,
381  size_inter;
382  float size_waste,
383  waste;
384  float size_l,
385  size_r;
386  int nbytes;
387  OffsetNumber seed_1 = 0,
388  seed_2 = 0;
390  *right;
391  OffsetNumber maxoff;
392  SPLITCOST *costvector;
393 
394 #ifdef GIST_DEBUG
395  elog(DEBUG3, "--------picksplit %d", entryvec->n);
396 #endif
397 
398  maxoff = entryvec->n - 2;
399  nbytes = (maxoff + 2) * sizeof(OffsetNumber);
400  v->spl_left = (OffsetNumber *) palloc(nbytes);
401  v->spl_right = (OffsetNumber *) palloc(nbytes);
402 
403  firsttime = true;
404  waste = 0.0;
405  for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
406  {
407  datum_alpha = GETENTRY(entryvec, i);
408  for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
409  {
410  datum_beta = GETENTRY(entryvec, j);
411 
412  /* compute the wasted space by unioning these guys */
413  /* size_waste = size_union - size_inter; */
414  union_d = inner_int_union(datum_alpha, datum_beta);
415  rt__int_size(union_d, &size_union);
416  inter_d = inner_int_inter(datum_alpha, datum_beta);
417  rt__int_size(inter_d, &size_inter);
418  size_waste = size_union - size_inter;
419 
420  pfree(union_d);
421  pfree(inter_d);
422 
423  /*
424  * are these a more promising split that what we've already seen?
425  */
426 
427  if (size_waste > waste || firsttime)
428  {
429  waste = size_waste;
430  seed_1 = i;
431  seed_2 = j;
432  firsttime = false;
433  }
434  }
435  }
436 
437  left = v->spl_left;
438  v->spl_nleft = 0;
439  right = v->spl_right;
440  v->spl_nright = 0;
441  if (seed_1 == 0 || seed_2 == 0)
442  {
443  seed_1 = 1;
444  seed_2 = 2;
445  }
446 
447  datum_alpha = GETENTRY(entryvec, seed_1);
448  datum_l = copy_intArrayType(datum_alpha);
449  rt__int_size(datum_l, &size_l);
450  datum_beta = GETENTRY(entryvec, seed_2);
451  datum_r = copy_intArrayType(datum_beta);
452  rt__int_size(datum_r, &size_r);
453 
454  maxoff = OffsetNumberNext(maxoff);
455 
456  /*
457  * sort entries
458  */
459  costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
460  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
461  {
462  costvector[i - 1].pos = i;
463  datum_alpha = GETENTRY(entryvec, i);
464  union_d = inner_int_union(datum_l, datum_alpha);
465  rt__int_size(union_d, &size_alpha);
466  pfree(union_d);
467  union_d = inner_int_union(datum_r, datum_alpha);
468  rt__int_size(union_d, &size_beta);
469  pfree(union_d);
470  costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r));
471  }
472  qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
473 
474  /*
475  * Now split up the regions between the two seeds. An important property
476  * of this split algorithm is that the split vector v has the indices of
477  * items to be split in order in its left and right vectors. We exploit
478  * this property by doing a merge in the code that actually splits the
479  * page.
480  *
481  * For efficiency, we also place the new index tuple in this loop. This is
482  * handled at the very end, when we have placed all the existing tuples
483  * and i == maxoff + 1.
484  */
485 
486 
487  for (j = 0; j < maxoff; j++)
488  {
489  i = costvector[j].pos;
490 
491  /*
492  * If we've already decided where to place this item, just put it on
493  * the right list. Otherwise, we need to figure out which page needs
494  * the least enlargement in order to store the item.
495  */
496 
497  if (i == seed_1)
498  {
499  *left++ = i;
500  v->spl_nleft++;
501  continue;
502  }
503  else if (i == seed_2)
504  {
505  *right++ = i;
506  v->spl_nright++;
507  continue;
508  }
509 
510  /* okay, which page needs least enlargement? */
511  datum_alpha = GETENTRY(entryvec, i);
512  union_dl = inner_int_union(datum_l, datum_alpha);
513  union_dr = inner_int_union(datum_r, datum_alpha);
514  rt__int_size(union_dl, &size_alpha);
515  rt__int_size(union_dr, &size_beta);
516 
517  /* pick which page to add it to */
518  if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
519  {
520  pfree(datum_l);
521  pfree(union_dr);
522  datum_l = union_dl;
523  size_l = size_alpha;
524  *left++ = i;
525  v->spl_nleft++;
526  }
527  else
528  {
529  pfree(datum_r);
530  pfree(union_dl);
531  datum_r = union_dr;
532  size_r = size_beta;
533  *right++ = i;
534  v->spl_nright++;
535  }
536  }
537  pfree(costvector);
538  *right = *left = FirstOffsetNumber;
539 
540  v->spl_ldatum = PointerGetDatum(datum_l);
541  v->spl_rdatum = PointerGetDatum(datum_r);
542 
544 }
#define GIST_LEAF(entry)
Definition: gist.h:133
Relation rel
Definition: gist.h:124
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:321
OffsetNumber pos
Definition: hstore_gist.c:323
#define RTOldContainsStrategyNumber
Definition: stratnum.h:56
#define RTOldContainedByStrategyNumber
Definition: stratnum.h:57
#define DEBUG3
Definition: elog.h:23
void rt__int_size(ArrayType *a, float *size)
Definition: _int_tool.c:182
struct NODE * left
#define PG_GETARG_ARRAYTYPE_P_COPY(n)
Definition: array.h:245
#define VARSIZE(PTR)
Definition: postgres.h:304
#define PointerGetDatum(X)
Definition: postgres.h:562
Datum g_int_penalty(PG_FUNCTION_ARGS)
Definition: _int_gist.c:285
#define CHECKARRVALID(x)
Definition: _int.h:18
Datum g_int_consistent(PG_FUNCTION_ARGS)
Definition: _int_gist.c:34
Datum g_int_compress(PG_FUNCTION_ARGS)
Definition: _int_gist.c:139
OffsetNumber * spl_left
Definition: gist.h:105
Datum spl_rdatum
Definition: gist.h:112
#define PREPAREARR(x)
Definition: _int.h:37
int32 n
Definition: gist.h:160
uint16 StrategyNumber
Definition: stratnum.h:22
struct NODE * right
int internal_size(int *a, int len)
Definition: _int_tool.c:278
return result
Definition: formatting.c:1618
ArrayType * inner_int_inter(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:134
#define GETENTRY(vec, pos)
Definition: _int_gist.c:13
#define RTContainedByStrategyNumber
Definition: stratnum.h:51
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:241
Datum g_int_same(PG_FUNCTION_ARGS)
Definition: _int_gist.c:307
int spl_nleft
Definition: gist.h:106
signed int int32
Definition: c.h:256
int32 cost
Definition: hstore_gist.c:324
#define QSORT(a, direction)
Definition: _int.h:168
uint16 OffsetNumber
Definition: off.h:24
Page page
Definition: gist.h:125
#define Abs(x)
Definition: c.h:812
Datum g_int_union(PG_FUNCTION_ARGS)
Definition: _int_gist.c:99
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:244
void pfree(void *pointer)
Definition: mcxt.c:950
#define WISH_F(a, b, c)
Definition: hstore_gist.c:69
ArrayType * copy_intArrayType(ArrayType *a)
Definition: _int_tool.c:266
#define FALSE
Definition: c.h:221
int spl_nright
Definition: gist.h:111
bool inner_int_overlap(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:48
ArrayType * resize_intArrayType(ArrayType *a, int num)
Definition: _int_tool.c:238
Datum key
Definition: gist.h:123
PG_FUNCTION_INFO_V1(g_int_consistent)
#define memmove(d, s, c)
Definition: c.h:1058
#define FirstOffsetNumber
Definition: off.h:27
#define ARRISEMPTY(x)
Definition: _int.h:26
#define RTSameStrategyNumber
Definition: stratnum.h:49
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:588
bool leafkey
Definition: gist.h:127
bool inner_int_contains(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:13
ArrayType * new_intArrayType(int num)
Definition: _int_tool.c:220
ArrayType * _int_unique(ArrayType *a)
Definition: _int_tool.c:294
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:319
uintptr_t Datum
Definition: postgres.h:372
#define DatumGetArrayTypePCopy(X)
Definition: array.h:243
static int comparecost(const void *a, const void *b)
Definition: _int_gist.c:350
Datum g_int_decompress(PG_FUNCTION_ARGS)
Definition: _int_gist.c:217
#define NOTICE
Definition: elog.h:37
bool execconsistent(QUERYTYPE *query, ArrayType *array, bool calcnot)
Definition: _int_bool.c:310
ArrayType * inner_int_union(ArrayType *a, ArrayType *b)
Definition: _int_tool.c:77
Datum spl_ldatum
Definition: gist.h:107
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:169
float cost
Definition: _int_gist.c:346
#define BooleanSearchStrategy
Definition: _int.h:119
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
OffsetNumber * spl_right
Definition: gist.h:110
#define MAXNUMRANGE
Definition: _int.h:11
#define RTContainsStrategyNumber
Definition: stratnum.h:50
#define ARRNELEMS(x)
Definition: cube.c:27
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:237
#define DatumGetPointer(X)
Definition: postgres.h:555
void * palloc(Size size)
Definition: mcxt.c:849
int i
#define RTOverlapStrategyNumber
Definition: stratnum.h:46
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
#define TRUE
Definition: c.h:217
#define ARRPTR(x)
Definition: cube.c:26
#define elog
Definition: elog.h:219
#define qsort(a, b, c, d)
Definition: port.h:440
OffsetNumber offset
Definition: gist.h:126
Datum g_int_picksplit(PG_FUNCTION_ARGS)
Definition: _int_gist.c:363
#define DatumGetArrayTypeP(X)
Definition: array.h:242